1 /* $NetBSD: vfs_vnode.c,v 1.45 2015/07/12 08:11:28 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 /* 70 * The vnode cache subsystem. 71 * 72 * Life-cycle 73 * 74 * Normally, there are two points where new vnodes are created: 75 * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode 76 * starts in one of the following ways: 77 * 78 * - Allocation, via vcache_get(9) or vcache_new(9). 79 * - Reclamation of inactive vnode, via vget(9). 80 * 81 * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9) 82 * was another, traditional way. Currently, only the draining thread 83 * recycles the vnodes. This behaviour might be revisited. 84 * 85 * The life-cycle ends when the last reference is dropped, usually 86 * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform 87 * the file system that vnode is inactive. Via this call, file system 88 * indicates whether vnode can be recycled (usually, it checks its own 89 * references, e.g. count of links, whether the file was removed). 90 * 91 * Depending on indication, vnode can be put into a free list (cache), 92 * or cleaned via vclean(9), which calls VOP_RECLAIM(9) to disassociate 93 * underlying file system from the vnode, and finally destroyed. 94 * 95 * Reference counting 96 * 97 * Vnode is considered active, if reference count (vnode_t::v_usecount) 98 * is non-zero. It is maintained using: vref(9) and vrele(9), as well 99 * as vput(9), routines. Common points holding references are e.g. 100 * file openings, current working directory, mount points, etc. 101 * 102 * Note on v_usecount and its locking 103 * 104 * At nearly all points it is known that v_usecount could be zero, 105 * the vnode_t::v_interlock will be held. To change v_usecount away 106 * from zero, the interlock must be held. To change from a non-zero 107 * value to zero, again the interlock must be held. 108 * 109 * Changing the usecount from a non-zero value to a non-zero value can 110 * safely be done using atomic operations, without the interlock held. 111 * 112 * Note: if VI_CLEAN is set, vnode_t::v_interlock will be released while 113 * mntvnode_lock is still held. 114 * 115 * See PR 41374. 116 */ 117 118 #include <sys/cdefs.h> 119 __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.45 2015/07/12 08:11:28 hannken Exp $"); 120 121 #define _VFS_VNODE_PRIVATE 122 123 #include <sys/param.h> 124 #include <sys/kernel.h> 125 126 #include <sys/atomic.h> 127 #include <sys/buf.h> 128 #include <sys/conf.h> 129 #include <sys/device.h> 130 #include <sys/hash.h> 131 #include <sys/kauth.h> 132 #include <sys/kmem.h> 133 #include <sys/kthread.h> 134 #include <sys/module.h> 135 #include <sys/mount.h> 136 #include <sys/namei.h> 137 #include <sys/syscallargs.h> 138 #include <sys/sysctl.h> 139 #include <sys/systm.h> 140 #include <sys/vnode.h> 141 #include <sys/wapbl.h> 142 #include <sys/fstrans.h> 143 144 #include <uvm/uvm.h> 145 #include <uvm/uvm_readahead.h> 146 147 /* Flags to vrelel. */ 148 #define VRELEL_ASYNC_RELE 0x0001 /* Always defer to vrele thread. */ 149 #define VRELEL_CHANGING_SET 0x0002 /* VI_CHANGING set by caller. */ 150 151 struct vcache_key { 152 struct mount *vk_mount; 153 const void *vk_key; 154 size_t vk_key_len; 155 }; 156 struct vcache_node { 157 SLIST_ENTRY(vcache_node) vn_hash; 158 struct vnode *vn_vnode; 159 struct vcache_key vn_key; 160 }; 161 162 u_int numvnodes __cacheline_aligned; 163 164 static pool_cache_t vnode_cache __read_mostly; 165 166 /* 167 * There are two free lists: one is for vnodes which have no buffer/page 168 * references and one for those which do (i.e. v_holdcnt is non-zero). 169 * Vnode recycling mechanism first attempts to look into the former list. 170 */ 171 static kmutex_t vnode_free_list_lock __cacheline_aligned; 172 static vnodelst_t vnode_free_list __cacheline_aligned; 173 static vnodelst_t vnode_hold_list __cacheline_aligned; 174 static kcondvar_t vdrain_cv __cacheline_aligned; 175 176 static vnodelst_t vrele_list __cacheline_aligned; 177 static kmutex_t vrele_lock __cacheline_aligned; 178 static kcondvar_t vrele_cv __cacheline_aligned; 179 static lwp_t * vrele_lwp __cacheline_aligned; 180 static int vrele_pending __cacheline_aligned; 181 static int vrele_gen __cacheline_aligned; 182 183 SLIST_HEAD(hashhead, vcache_node); 184 static struct { 185 kmutex_t lock; 186 u_long hashmask; 187 struct hashhead *hashtab; 188 pool_cache_t pool; 189 } vcache __cacheline_aligned; 190 191 static int cleanvnode(void); 192 static void vcache_init(void); 193 static void vcache_reinit(void); 194 static void vclean(vnode_t *); 195 static void vrelel(vnode_t *, int); 196 static void vdrain_thread(void *); 197 static void vrele_thread(void *); 198 static void vnpanic(vnode_t *, const char *, ...) 199 __printflike(2, 3); 200 static void vwait(vnode_t *, int); 201 202 /* Routines having to do with the management of the vnode table. */ 203 extern struct mount *dead_rootmount; 204 extern int (**dead_vnodeop_p)(void *); 205 extern struct vfsops dead_vfsops; 206 207 void 208 vfs_vnode_sysinit(void) 209 { 210 int error __diagused; 211 212 vnode_cache = pool_cache_init(sizeof(vnode_t), 0, 0, 0, "vnodepl", 213 NULL, IPL_NONE, NULL, NULL, NULL); 214 KASSERT(vnode_cache != NULL); 215 216 dead_rootmount = vfs_mountalloc(&dead_vfsops, NULL); 217 KASSERT(dead_rootmount != NULL); 218 dead_rootmount->mnt_iflag = IMNT_MPSAFE; 219 220 mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE); 221 TAILQ_INIT(&vnode_free_list); 222 TAILQ_INIT(&vnode_hold_list); 223 TAILQ_INIT(&vrele_list); 224 225 vcache_init(); 226 227 mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE); 228 cv_init(&vdrain_cv, "vdrain"); 229 cv_init(&vrele_cv, "vrele"); 230 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread, 231 NULL, NULL, "vdrain"); 232 KASSERT(error == 0); 233 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread, 234 NULL, &vrele_lwp, "vrele"); 235 KASSERT(error == 0); 236 } 237 238 /* 239 * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a 240 * marker vnode. 241 */ 242 vnode_t * 243 vnalloc(struct mount *mp) 244 { 245 vnode_t *vp; 246 247 vp = pool_cache_get(vnode_cache, PR_WAITOK); 248 KASSERT(vp != NULL); 249 250 memset(vp, 0, sizeof(*vp)); 251 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0); 252 cv_init(&vp->v_cv, "vnode"); 253 /* 254 * Done by memset() above. 255 * LIST_INIT(&vp->v_nclist); 256 * LIST_INIT(&vp->v_dnclist); 257 */ 258 259 if (mp != NULL) { 260 vp->v_mount = mp; 261 vp->v_type = VBAD; 262 vp->v_iflag = VI_MARKER; 263 return vp; 264 } 265 266 mutex_enter(&vnode_free_list_lock); 267 numvnodes++; 268 if (numvnodes > desiredvnodes + desiredvnodes / 10) 269 cv_signal(&vdrain_cv); 270 mutex_exit(&vnode_free_list_lock); 271 272 rw_init(&vp->v_lock); 273 vp->v_usecount = 1; 274 vp->v_type = VNON; 275 vp->v_size = vp->v_writesize = VSIZENOTSET; 276 277 return vp; 278 } 279 280 /* 281 * Free an unused, unreferenced vnode. 282 */ 283 void 284 vnfree(vnode_t *vp) 285 { 286 287 KASSERT(vp->v_usecount == 0); 288 289 if ((vp->v_iflag & VI_MARKER) == 0) { 290 rw_destroy(&vp->v_lock); 291 mutex_enter(&vnode_free_list_lock); 292 numvnodes--; 293 mutex_exit(&vnode_free_list_lock); 294 } 295 296 uvm_obj_destroy(&vp->v_uobj, true); 297 cv_destroy(&vp->v_cv); 298 pool_cache_put(vnode_cache, vp); 299 } 300 301 /* 302 * cleanvnode: grab a vnode from freelist, clean and free it. 303 * 304 * => Releases vnode_free_list_lock. 305 */ 306 static int 307 cleanvnode(void) 308 { 309 vnode_t *vp; 310 vnodelst_t *listhd; 311 struct mount *mp; 312 313 KASSERT(mutex_owned(&vnode_free_list_lock)); 314 315 listhd = &vnode_free_list; 316 try_nextlist: 317 TAILQ_FOREACH(vp, listhd, v_freelist) { 318 /* 319 * It's safe to test v_usecount and v_iflag 320 * without holding the interlock here, since 321 * these vnodes should never appear on the 322 * lists. 323 */ 324 KASSERT(vp->v_usecount == 0); 325 KASSERT((vp->v_iflag & VI_CLEAN) == 0); 326 KASSERT(vp->v_freelisthd == listhd); 327 328 if (!mutex_tryenter(vp->v_interlock)) 329 continue; 330 if ((vp->v_iflag & VI_XLOCK) != 0) { 331 mutex_exit(vp->v_interlock); 332 continue; 333 } 334 mp = vp->v_mount; 335 if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) { 336 mutex_exit(vp->v_interlock); 337 continue; 338 } 339 break; 340 } 341 342 if (vp == NULL) { 343 if (listhd == &vnode_free_list) { 344 listhd = &vnode_hold_list; 345 goto try_nextlist; 346 } 347 mutex_exit(&vnode_free_list_lock); 348 return EBUSY; 349 } 350 351 /* Remove it from the freelist. */ 352 TAILQ_REMOVE(listhd, vp, v_freelist); 353 vp->v_freelisthd = NULL; 354 mutex_exit(&vnode_free_list_lock); 355 356 KASSERT(vp->v_usecount == 0); 357 358 /* 359 * The vnode is still associated with a file system, so we must 360 * clean it out before freeing it. We need to add a reference 361 * before doing this. 362 */ 363 vp->v_usecount = 1; 364 KASSERT((vp->v_iflag & VI_CHANGING) == 0); 365 vp->v_iflag |= VI_CHANGING; 366 vclean(vp); 367 vrelel(vp, VRELEL_CHANGING_SET); 368 fstrans_done(mp); 369 370 return 0; 371 } 372 373 /* 374 * Helper thread to keep the number of vnodes below desiredvnodes. 375 */ 376 static void 377 vdrain_thread(void *cookie) 378 { 379 int error; 380 381 mutex_enter(&vnode_free_list_lock); 382 383 for (;;) { 384 cv_timedwait(&vdrain_cv, &vnode_free_list_lock, hz); 385 while (numvnodes > desiredvnodes) { 386 error = cleanvnode(); 387 if (error) 388 kpause("vndsbusy", false, hz, NULL); 389 mutex_enter(&vnode_free_list_lock); 390 if (error) 391 break; 392 } 393 } 394 } 395 396 /* 397 * Remove a vnode from its freelist. 398 */ 399 void 400 vremfree(vnode_t *vp) 401 { 402 403 KASSERT(mutex_owned(vp->v_interlock)); 404 KASSERT(vp->v_usecount == 0); 405 406 /* 407 * Note that the reference count must not change until 408 * the vnode is removed. 409 */ 410 mutex_enter(&vnode_free_list_lock); 411 if (vp->v_holdcnt > 0) { 412 KASSERT(vp->v_freelisthd == &vnode_hold_list); 413 } else { 414 KASSERT(vp->v_freelisthd == &vnode_free_list); 415 } 416 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 417 vp->v_freelisthd = NULL; 418 mutex_exit(&vnode_free_list_lock); 419 } 420 421 /* 422 * vget: get a particular vnode from the free list, increment its reference 423 * count and lock it. 424 * 425 * => Should be called with v_interlock held. 426 * 427 * If VI_CHANGING is set, the vnode may be eliminated in vgone()/vclean(). 428 * In that case, we cannot grab the vnode, so the process is awakened when 429 * the transition is completed, and an error returned to indicate that the 430 * vnode is no longer usable. 431 */ 432 int 433 vget(vnode_t *vp, int flags, bool waitok) 434 { 435 int error = 0; 436 437 KASSERT((vp->v_iflag & VI_MARKER) == 0); 438 KASSERT(mutex_owned(vp->v_interlock)); 439 KASSERT((flags & ~LK_NOWAIT) == 0); 440 KASSERT(waitok == ((flags & LK_NOWAIT) == 0)); 441 442 /* 443 * Before adding a reference, we must remove the vnode 444 * from its freelist. 445 */ 446 if (vp->v_usecount == 0) { 447 vremfree(vp); 448 vp->v_usecount = 1; 449 } else { 450 atomic_inc_uint(&vp->v_usecount); 451 } 452 453 /* 454 * If the vnode is in the process of changing state we wait 455 * for the change to complete and take care not to return 456 * a clean vnode. 457 */ 458 if ((vp->v_iflag & VI_CHANGING) != 0) { 459 if ((flags & LK_NOWAIT) != 0) { 460 vrelel(vp, 0); 461 return EBUSY; 462 } 463 vwait(vp, VI_CHANGING); 464 if ((vp->v_iflag & VI_CLEAN) != 0) { 465 vrelel(vp, 0); 466 return ENOENT; 467 } 468 } 469 470 /* 471 * Ok, we got it in good shape. 472 */ 473 KASSERT((vp->v_iflag & VI_CLEAN) == 0); 474 mutex_exit(vp->v_interlock); 475 return error; 476 } 477 478 /* 479 * vput: unlock and release the reference. 480 */ 481 void 482 vput(vnode_t *vp) 483 { 484 485 KASSERT((vp->v_iflag & VI_MARKER) == 0); 486 487 VOP_UNLOCK(vp); 488 vrele(vp); 489 } 490 491 /* 492 * Try to drop reference on a vnode. Abort if we are releasing the 493 * last reference. Note: this _must_ succeed if not the last reference. 494 */ 495 static inline bool 496 vtryrele(vnode_t *vp) 497 { 498 u_int use, next; 499 500 for (use = vp->v_usecount;; use = next) { 501 if (use == 1) { 502 return false; 503 } 504 KASSERT(use > 1); 505 next = atomic_cas_uint(&vp->v_usecount, use, use - 1); 506 if (__predict_true(next == use)) { 507 return true; 508 } 509 } 510 } 511 512 /* 513 * Vnode release. If reference count drops to zero, call inactive 514 * routine and either return to freelist or free to the pool. 515 */ 516 static void 517 vrelel(vnode_t *vp, int flags) 518 { 519 bool recycle, defer; 520 int error; 521 522 KASSERT(mutex_owned(vp->v_interlock)); 523 KASSERT((vp->v_iflag & VI_MARKER) == 0); 524 KASSERT(vp->v_freelisthd == NULL); 525 526 if (__predict_false(vp->v_op == dead_vnodeop_p && 527 (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) { 528 vnpanic(vp, "dead but not clean"); 529 } 530 531 /* 532 * If not the last reference, just drop the reference count 533 * and unlock. 534 */ 535 if (vtryrele(vp)) { 536 if ((flags & VRELEL_CHANGING_SET) != 0) { 537 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 538 vp->v_iflag &= ~VI_CHANGING; 539 cv_broadcast(&vp->v_cv); 540 } 541 mutex_exit(vp->v_interlock); 542 return; 543 } 544 if (vp->v_usecount <= 0 || vp->v_writecount != 0) { 545 vnpanic(vp, "%s: bad ref count", __func__); 546 } 547 548 KASSERT((vp->v_iflag & VI_XLOCK) == 0); 549 550 #ifdef DIAGNOSTIC 551 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 552 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) { 553 vprint("vrelel: missing VOP_CLOSE()", vp); 554 } 555 #endif 556 557 /* 558 * If not clean, deactivate the vnode, but preserve 559 * our reference across the call to VOP_INACTIVE(). 560 */ 561 if ((vp->v_iflag & VI_CLEAN) == 0) { 562 recycle = false; 563 564 /* 565 * XXX This ugly block can be largely eliminated if 566 * locking is pushed down into the file systems. 567 * 568 * Defer vnode release to vrele_thread if caller 569 * requests it explicitly or is the pagedaemon. 570 */ 571 if ((curlwp == uvm.pagedaemon_lwp) || 572 (flags & VRELEL_ASYNC_RELE) != 0) { 573 defer = true; 574 } else if (curlwp == vrele_lwp) { 575 /* 576 * We have to try harder. 577 */ 578 mutex_exit(vp->v_interlock); 579 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 580 KASSERT(error == 0); 581 mutex_enter(vp->v_interlock); 582 defer = false; 583 } else { 584 /* If we can't acquire the lock, then defer. */ 585 mutex_exit(vp->v_interlock); 586 error = vn_lock(vp, 587 LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT); 588 defer = (error != 0); 589 mutex_enter(vp->v_interlock); 590 } 591 592 KASSERT(mutex_owned(vp->v_interlock)); 593 KASSERT(! (curlwp == vrele_lwp && defer)); 594 595 if (defer) { 596 /* 597 * Defer reclaim to the kthread; it's not safe to 598 * clean it here. We donate it our last reference. 599 */ 600 if ((flags & VRELEL_CHANGING_SET) != 0) { 601 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 602 vp->v_iflag &= ~VI_CHANGING; 603 cv_broadcast(&vp->v_cv); 604 } 605 mutex_enter(&vrele_lock); 606 TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist); 607 if (++vrele_pending > (desiredvnodes >> 8)) 608 cv_signal(&vrele_cv); 609 mutex_exit(&vrele_lock); 610 mutex_exit(vp->v_interlock); 611 return; 612 } 613 614 /* 615 * If the node got another reference while we 616 * released the interlock, don't try to inactivate it yet. 617 */ 618 if (__predict_false(vtryrele(vp))) { 619 VOP_UNLOCK(vp); 620 if ((flags & VRELEL_CHANGING_SET) != 0) { 621 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 622 vp->v_iflag &= ~VI_CHANGING; 623 cv_broadcast(&vp->v_cv); 624 } 625 mutex_exit(vp->v_interlock); 626 return; 627 } 628 629 if ((flags & VRELEL_CHANGING_SET) == 0) { 630 KASSERT((vp->v_iflag & VI_CHANGING) == 0); 631 vp->v_iflag |= VI_CHANGING; 632 } 633 mutex_exit(vp->v_interlock); 634 635 /* 636 * The vnode can gain another reference while being 637 * deactivated. If VOP_INACTIVE() indicates that 638 * the described file has been deleted, then recycle 639 * the vnode irrespective of additional references. 640 * Another thread may be waiting to re-use the on-disk 641 * inode. 642 * 643 * Note that VOP_INACTIVE() will drop the vnode lock. 644 */ 645 VOP_INACTIVE(vp, &recycle); 646 mutex_enter(vp->v_interlock); 647 if (!recycle) { 648 if (vtryrele(vp)) { 649 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 650 vp->v_iflag &= ~VI_CHANGING; 651 cv_broadcast(&vp->v_cv); 652 mutex_exit(vp->v_interlock); 653 return; 654 } 655 } 656 657 /* Take care of space accounting. */ 658 if (vp->v_iflag & VI_EXECMAP) { 659 atomic_add_int(&uvmexp.execpages, 660 -vp->v_uobj.uo_npages); 661 atomic_add_int(&uvmexp.filepages, 662 vp->v_uobj.uo_npages); 663 } 664 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP); 665 vp->v_vflag &= ~VV_MAPPED; 666 667 /* 668 * Recycle the vnode if the file is now unused (unlinked), 669 * otherwise just free it. 670 */ 671 if (recycle) { 672 vclean(vp); 673 } 674 KASSERT(vp->v_usecount > 0); 675 } else { /* vnode was already clean */ 676 if ((flags & VRELEL_CHANGING_SET) == 0) { 677 KASSERT((vp->v_iflag & VI_CHANGING) == 0); 678 vp->v_iflag |= VI_CHANGING; 679 } 680 } 681 682 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) { 683 /* Gained another reference while being reclaimed. */ 684 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 685 vp->v_iflag &= ~VI_CHANGING; 686 cv_broadcast(&vp->v_cv); 687 mutex_exit(vp->v_interlock); 688 return; 689 } 690 691 if ((vp->v_iflag & VI_CLEAN) != 0) { 692 /* 693 * It's clean so destroy it. It isn't referenced 694 * anywhere since it has been reclaimed. 695 */ 696 KASSERT(vp->v_holdcnt == 0); 697 KASSERT(vp->v_writecount == 0); 698 mutex_exit(vp->v_interlock); 699 vfs_insmntque(vp, NULL); 700 if (vp->v_type == VBLK || vp->v_type == VCHR) { 701 spec_node_destroy(vp); 702 } 703 vnfree(vp); 704 } else { 705 /* 706 * Otherwise, put it back onto the freelist. It 707 * can't be destroyed while still associated with 708 * a file system. 709 */ 710 mutex_enter(&vnode_free_list_lock); 711 if (vp->v_holdcnt > 0) { 712 vp->v_freelisthd = &vnode_hold_list; 713 } else { 714 vp->v_freelisthd = &vnode_free_list; 715 } 716 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 717 mutex_exit(&vnode_free_list_lock); 718 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 719 vp->v_iflag &= ~VI_CHANGING; 720 cv_broadcast(&vp->v_cv); 721 mutex_exit(vp->v_interlock); 722 } 723 } 724 725 void 726 vrele(vnode_t *vp) 727 { 728 729 KASSERT((vp->v_iflag & VI_MARKER) == 0); 730 731 if (vtryrele(vp)) { 732 return; 733 } 734 mutex_enter(vp->v_interlock); 735 vrelel(vp, 0); 736 } 737 738 /* 739 * Asynchronous vnode release, vnode is released in different context. 740 */ 741 void 742 vrele_async(vnode_t *vp) 743 { 744 745 KASSERT((vp->v_iflag & VI_MARKER) == 0); 746 747 if (vtryrele(vp)) { 748 return; 749 } 750 mutex_enter(vp->v_interlock); 751 vrelel(vp, VRELEL_ASYNC_RELE); 752 } 753 754 static void 755 vrele_thread(void *cookie) 756 { 757 vnodelst_t skip_list; 758 vnode_t *vp; 759 struct mount *mp; 760 761 TAILQ_INIT(&skip_list); 762 763 mutex_enter(&vrele_lock); 764 for (;;) { 765 while (TAILQ_EMPTY(&vrele_list)) { 766 vrele_gen++; 767 cv_broadcast(&vrele_cv); 768 cv_timedwait(&vrele_cv, &vrele_lock, hz); 769 TAILQ_CONCAT(&vrele_list, &skip_list, v_freelist); 770 } 771 vp = TAILQ_FIRST(&vrele_list); 772 mp = vp->v_mount; 773 TAILQ_REMOVE(&vrele_list, vp, v_freelist); 774 if (fstrans_start_nowait(mp, FSTRANS_LAZY) != 0) { 775 TAILQ_INSERT_TAIL(&skip_list, vp, v_freelist); 776 continue; 777 } 778 vrele_pending--; 779 mutex_exit(&vrele_lock); 780 781 /* 782 * If not the last reference, then ignore the vnode 783 * and look for more work. 784 */ 785 mutex_enter(vp->v_interlock); 786 vrelel(vp, 0); 787 fstrans_done(mp); 788 mutex_enter(&vrele_lock); 789 } 790 } 791 792 void 793 vrele_flush(void) 794 { 795 int gen; 796 797 mutex_enter(&vrele_lock); 798 gen = vrele_gen; 799 while (vrele_pending && gen == vrele_gen) { 800 cv_broadcast(&vrele_cv); 801 cv_wait(&vrele_cv, &vrele_lock); 802 } 803 mutex_exit(&vrele_lock); 804 } 805 806 /* 807 * Vnode reference, where a reference is already held by some other 808 * object (for example, a file structure). 809 */ 810 void 811 vref(vnode_t *vp) 812 { 813 814 KASSERT((vp->v_iflag & VI_MARKER) == 0); 815 KASSERT(vp->v_usecount != 0); 816 817 atomic_inc_uint(&vp->v_usecount); 818 } 819 820 /* 821 * Page or buffer structure gets a reference. 822 * Called with v_interlock held. 823 */ 824 void 825 vholdl(vnode_t *vp) 826 { 827 828 KASSERT(mutex_owned(vp->v_interlock)); 829 KASSERT((vp->v_iflag & VI_MARKER) == 0); 830 831 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) { 832 mutex_enter(&vnode_free_list_lock); 833 KASSERT(vp->v_freelisthd == &vnode_free_list); 834 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 835 vp->v_freelisthd = &vnode_hold_list; 836 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 837 mutex_exit(&vnode_free_list_lock); 838 } 839 } 840 841 /* 842 * Page or buffer structure frees a reference. 843 * Called with v_interlock held. 844 */ 845 void 846 holdrelel(vnode_t *vp) 847 { 848 849 KASSERT(mutex_owned(vp->v_interlock)); 850 KASSERT((vp->v_iflag & VI_MARKER) == 0); 851 852 if (vp->v_holdcnt <= 0) { 853 vnpanic(vp, "%s: holdcnt vp %p", __func__, vp); 854 } 855 856 vp->v_holdcnt--; 857 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) { 858 mutex_enter(&vnode_free_list_lock); 859 KASSERT(vp->v_freelisthd == &vnode_hold_list); 860 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 861 vp->v_freelisthd = &vnode_free_list; 862 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 863 mutex_exit(&vnode_free_list_lock); 864 } 865 } 866 867 /* 868 * Disassociate the underlying file system from a vnode. 869 * 870 * Must be called with the interlock held, and will return with it held. 871 */ 872 static void 873 vclean(vnode_t *vp) 874 { 875 lwp_t *l = curlwp; 876 bool recycle, active; 877 int error; 878 879 KASSERT(mutex_owned(vp->v_interlock)); 880 KASSERT((vp->v_iflag & VI_MARKER) == 0); 881 KASSERT(vp->v_usecount != 0); 882 883 /* If already clean, nothing to do. */ 884 if ((vp->v_iflag & VI_CLEAN) != 0) { 885 return; 886 } 887 888 active = (vp->v_usecount > 1); 889 mutex_exit(vp->v_interlock); 890 891 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 892 893 /* 894 * Prevent the vnode from being recycled or brought into use 895 * while we clean it out. 896 */ 897 mutex_enter(vp->v_interlock); 898 KASSERT((vp->v_iflag & (VI_XLOCK | VI_CLEAN)) == 0); 899 vp->v_iflag |= VI_XLOCK; 900 if (vp->v_iflag & VI_EXECMAP) { 901 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); 902 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); 903 } 904 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); 905 mutex_exit(vp->v_interlock); 906 907 /* 908 * Clean out any cached data associated with the vnode. 909 * If purging an active vnode, it must be closed and 910 * deactivated before being reclaimed. Note that the 911 * VOP_INACTIVE will unlock the vnode. 912 */ 913 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 914 if (error != 0) { 915 if (wapbl_vphaswapbl(vp)) 916 WAPBL_DISCARD(wapbl_vptomp(vp)); 917 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 918 } 919 KASSERT(error == 0); 920 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 921 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) { 922 spec_node_revoke(vp); 923 } 924 if (active) { 925 VOP_INACTIVE(vp, &recycle); 926 } else { 927 /* 928 * Any other processes trying to obtain this lock must first 929 * wait for VI_XLOCK to clear, then call the new lock operation. 930 */ 931 VOP_UNLOCK(vp); 932 } 933 934 /* Disassociate the underlying file system from the vnode. */ 935 if (VOP_RECLAIM(vp)) { 936 vnpanic(vp, "%s: cannot reclaim", __func__); 937 } 938 939 KASSERT(vp->v_data == NULL); 940 KASSERT(vp->v_uobj.uo_npages == 0); 941 942 if (vp->v_type == VREG && vp->v_ractx != NULL) { 943 uvm_ra_freectx(vp->v_ractx); 944 vp->v_ractx = NULL; 945 } 946 947 /* Purge name cache. */ 948 cache_purge(vp); 949 950 /* Move to dead mount. */ 951 vp->v_vflag &= ~VV_ROOT; 952 atomic_inc_uint(&dead_rootmount->mnt_refcnt); 953 vfs_insmntque(vp, dead_rootmount); 954 955 /* Done with purge, notify sleepers of the grim news. */ 956 mutex_enter(vp->v_interlock); 957 vp->v_op = dead_vnodeop_p; 958 vp->v_vflag |= VV_LOCKSWORK; 959 vp->v_iflag |= VI_CLEAN; 960 vp->v_tag = VT_NON; 961 KNOTE(&vp->v_klist, NOTE_REVOKE); 962 vp->v_iflag &= ~VI_XLOCK; 963 cv_broadcast(&vp->v_cv); 964 965 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 966 } 967 968 /* 969 * Recycle an unused vnode if caller holds the last reference. 970 */ 971 bool 972 vrecycle(vnode_t *vp) 973 { 974 975 mutex_enter(vp->v_interlock); 976 977 KASSERT((vp->v_iflag & VI_MARKER) == 0); 978 979 if (vp->v_usecount != 1) { 980 mutex_exit(vp->v_interlock); 981 return false; 982 } 983 if ((vp->v_iflag & VI_CHANGING) != 0) 984 vwait(vp, VI_CHANGING); 985 if (vp->v_usecount != 1) { 986 mutex_exit(vp->v_interlock); 987 return false; 988 } else if ((vp->v_iflag & VI_CLEAN) != 0) { 989 mutex_exit(vp->v_interlock); 990 return true; 991 } 992 vp->v_iflag |= VI_CHANGING; 993 vclean(vp); 994 vrelel(vp, VRELEL_CHANGING_SET); 995 return true; 996 } 997 998 /* 999 * Eliminate all activity associated with the requested vnode 1000 * and with all vnodes aliased to the requested vnode. 1001 */ 1002 void 1003 vrevoke(vnode_t *vp) 1004 { 1005 vnode_t *vq; 1006 enum vtype type; 1007 dev_t dev; 1008 1009 KASSERT(vp->v_usecount > 0); 1010 1011 mutex_enter(vp->v_interlock); 1012 if ((vp->v_iflag & VI_CLEAN) != 0) { 1013 mutex_exit(vp->v_interlock); 1014 return; 1015 } else if (vp->v_type != VBLK && vp->v_type != VCHR) { 1016 atomic_inc_uint(&vp->v_usecount); 1017 mutex_exit(vp->v_interlock); 1018 vgone(vp); 1019 return; 1020 } else { 1021 dev = vp->v_rdev; 1022 type = vp->v_type; 1023 mutex_exit(vp->v_interlock); 1024 } 1025 1026 while (spec_node_lookup_by_dev(type, dev, &vq) == 0) { 1027 vgone(vq); 1028 } 1029 } 1030 1031 /* 1032 * Eliminate all activity associated with a vnode in preparation for 1033 * reuse. Drops a reference from the vnode. 1034 */ 1035 void 1036 vgone(vnode_t *vp) 1037 { 1038 1039 mutex_enter(vp->v_interlock); 1040 if ((vp->v_iflag & VI_CHANGING) != 0) 1041 vwait(vp, VI_CHANGING); 1042 vp->v_iflag |= VI_CHANGING; 1043 vclean(vp); 1044 vrelel(vp, VRELEL_CHANGING_SET); 1045 } 1046 1047 static inline uint32_t 1048 vcache_hash(const struct vcache_key *key) 1049 { 1050 uint32_t hash = HASH32_BUF_INIT; 1051 1052 hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash); 1053 hash = hash32_buf(key->vk_key, key->vk_key_len, hash); 1054 return hash; 1055 } 1056 1057 static void 1058 vcache_init(void) 1059 { 1060 1061 vcache.pool = pool_cache_init(sizeof(struct vcache_node), 0, 0, 0, 1062 "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL); 1063 KASSERT(vcache.pool != NULL); 1064 mutex_init(&vcache.lock, MUTEX_DEFAULT, IPL_NONE); 1065 vcache.hashtab = hashinit(desiredvnodes, HASH_SLIST, true, 1066 &vcache.hashmask); 1067 } 1068 1069 static void 1070 vcache_reinit(void) 1071 { 1072 int i; 1073 uint32_t hash; 1074 u_long oldmask, newmask; 1075 struct hashhead *oldtab, *newtab; 1076 struct vcache_node *node; 1077 1078 newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask); 1079 mutex_enter(&vcache.lock); 1080 oldtab = vcache.hashtab; 1081 oldmask = vcache.hashmask; 1082 vcache.hashtab = newtab; 1083 vcache.hashmask = newmask; 1084 for (i = 0; i <= oldmask; i++) { 1085 while ((node = SLIST_FIRST(&oldtab[i])) != NULL) { 1086 SLIST_REMOVE(&oldtab[i], node, vcache_node, vn_hash); 1087 hash = vcache_hash(&node->vn_key); 1088 SLIST_INSERT_HEAD(&newtab[hash & vcache.hashmask], 1089 node, vn_hash); 1090 } 1091 } 1092 mutex_exit(&vcache.lock); 1093 hashdone(oldtab, HASH_SLIST, oldmask); 1094 } 1095 1096 static inline struct vcache_node * 1097 vcache_hash_lookup(const struct vcache_key *key, uint32_t hash) 1098 { 1099 struct hashhead *hashp; 1100 struct vcache_node *node; 1101 1102 KASSERT(mutex_owned(&vcache.lock)); 1103 1104 hashp = &vcache.hashtab[hash & vcache.hashmask]; 1105 SLIST_FOREACH(node, hashp, vn_hash) { 1106 if (key->vk_mount != node->vn_key.vk_mount) 1107 continue; 1108 if (key->vk_key_len != node->vn_key.vk_key_len) 1109 continue; 1110 if (memcmp(key->vk_key, node->vn_key.vk_key, key->vk_key_len)) 1111 continue; 1112 return node; 1113 } 1114 return NULL; 1115 } 1116 1117 /* 1118 * Get a vnode / fs node pair by key and return it referenced through vpp. 1119 */ 1120 int 1121 vcache_get(struct mount *mp, const void *key, size_t key_len, 1122 struct vnode **vpp) 1123 { 1124 int error; 1125 uint32_t hash; 1126 const void *new_key; 1127 struct vnode *vp; 1128 struct vcache_key vcache_key; 1129 struct vcache_node *node, *new_node; 1130 1131 new_key = NULL; 1132 *vpp = NULL; 1133 1134 vcache_key.vk_mount = mp; 1135 vcache_key.vk_key = key; 1136 vcache_key.vk_key_len = key_len; 1137 hash = vcache_hash(&vcache_key); 1138 1139 again: 1140 mutex_enter(&vcache.lock); 1141 node = vcache_hash_lookup(&vcache_key, hash); 1142 1143 /* If found, take a reference or retry. */ 1144 if (__predict_true(node != NULL && node->vn_vnode != NULL)) { 1145 vp = node->vn_vnode; 1146 mutex_enter(vp->v_interlock); 1147 mutex_exit(&vcache.lock); 1148 error = vget(vp, 0, true /* wait */); 1149 if (error == ENOENT) 1150 goto again; 1151 if (error == 0) 1152 *vpp = vp; 1153 KASSERT((error != 0) == (*vpp == NULL)); 1154 return error; 1155 } 1156 1157 /* If another thread loads this node, wait and retry. */ 1158 if (node != NULL) { 1159 KASSERT(node->vn_vnode == NULL); 1160 mutex_exit(&vcache.lock); 1161 kpause("vcache", false, mstohz(20), NULL); 1162 goto again; 1163 } 1164 mutex_exit(&vcache.lock); 1165 1166 /* Allocate and initialize a new vcache / vnode pair. */ 1167 error = vfs_busy(mp, NULL); 1168 if (error) 1169 return error; 1170 new_node = pool_cache_get(vcache.pool, PR_WAITOK); 1171 new_node->vn_vnode = NULL; 1172 new_node->vn_key = vcache_key; 1173 vp = vnalloc(NULL); 1174 mutex_enter(&vcache.lock); 1175 node = vcache_hash_lookup(&vcache_key, hash); 1176 if (node == NULL) { 1177 SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask], 1178 new_node, vn_hash); 1179 node = new_node; 1180 } 1181 mutex_exit(&vcache.lock); 1182 1183 /* If another thread beat us inserting this node, retry. */ 1184 if (node != new_node) { 1185 pool_cache_put(vcache.pool, new_node); 1186 KASSERT(vp->v_usecount == 1); 1187 vp->v_usecount = 0; 1188 vnfree(vp); 1189 vfs_unbusy(mp, false, NULL); 1190 goto again; 1191 } 1192 1193 /* Load the fs node. Exclusive as new_node->vn_vnode is NULL. */ 1194 vp->v_iflag |= VI_CHANGING; 1195 error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key); 1196 if (error) { 1197 mutex_enter(&vcache.lock); 1198 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask], 1199 new_node, vcache_node, vn_hash); 1200 mutex_exit(&vcache.lock); 1201 pool_cache_put(vcache.pool, new_node); 1202 KASSERT(vp->v_usecount == 1); 1203 vp->v_usecount = 0; 1204 vnfree(vp); 1205 vfs_unbusy(mp, false, NULL); 1206 KASSERT(*vpp == NULL); 1207 return error; 1208 } 1209 KASSERT(new_key != NULL); 1210 KASSERT(memcmp(key, new_key, key_len) == 0); 1211 KASSERT(vp->v_op != NULL); 1212 vfs_insmntque(vp, mp); 1213 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 1214 vp->v_vflag |= VV_MPSAFE; 1215 vfs_unbusy(mp, true, NULL); 1216 1217 /* Finished loading, finalize node. */ 1218 mutex_enter(&vcache.lock); 1219 new_node->vn_key.vk_key = new_key; 1220 new_node->vn_vnode = vp; 1221 mutex_exit(&vcache.lock); 1222 mutex_enter(vp->v_interlock); 1223 vp->v_iflag &= ~VI_CHANGING; 1224 cv_broadcast(&vp->v_cv); 1225 mutex_exit(vp->v_interlock); 1226 *vpp = vp; 1227 return 0; 1228 } 1229 1230 /* 1231 * Create a new vnode / fs node pair and return it referenced through vpp. 1232 */ 1233 int 1234 vcache_new(struct mount *mp, struct vnode *dvp, struct vattr *vap, 1235 kauth_cred_t cred, struct vnode **vpp) 1236 { 1237 int error; 1238 uint32_t hash; 1239 struct vnode *vp; 1240 struct vcache_node *new_node; 1241 struct vcache_node *old_node __diagused; 1242 1243 *vpp = NULL; 1244 1245 /* Allocate and initialize a new vcache / vnode pair. */ 1246 error = vfs_busy(mp, NULL); 1247 if (error) 1248 return error; 1249 new_node = pool_cache_get(vcache.pool, PR_WAITOK); 1250 new_node->vn_key.vk_mount = mp; 1251 new_node->vn_vnode = NULL; 1252 vp = vnalloc(NULL); 1253 1254 /* Create and load the fs node. */ 1255 vp->v_iflag |= VI_CHANGING; 1256 error = VFS_NEWVNODE(mp, dvp, vp, vap, cred, 1257 &new_node->vn_key.vk_key_len, &new_node->vn_key.vk_key); 1258 if (error) { 1259 pool_cache_put(vcache.pool, new_node); 1260 KASSERT(vp->v_usecount == 1); 1261 vp->v_usecount = 0; 1262 vnfree(vp); 1263 vfs_unbusy(mp, false, NULL); 1264 KASSERT(*vpp == NULL); 1265 return error; 1266 } 1267 KASSERT(new_node->vn_key.vk_key != NULL); 1268 KASSERT(vp->v_op != NULL); 1269 hash = vcache_hash(&new_node->vn_key); 1270 1271 /* Wait for previous instance to be reclaimed, then insert new node. */ 1272 mutex_enter(&vcache.lock); 1273 while ((old_node = vcache_hash_lookup(&new_node->vn_key, hash))) { 1274 #ifdef DIAGNOSTIC 1275 if (old_node->vn_vnode != NULL) 1276 mutex_enter(old_node->vn_vnode->v_interlock); 1277 KASSERT(old_node->vn_vnode == NULL || 1278 (old_node->vn_vnode->v_iflag & (VI_XLOCK | VI_CLEAN)) != 0); 1279 if (old_node->vn_vnode != NULL) 1280 mutex_exit(old_node->vn_vnode->v_interlock); 1281 #endif 1282 mutex_exit(&vcache.lock); 1283 kpause("vcache", false, mstohz(20), NULL); 1284 mutex_enter(&vcache.lock); 1285 } 1286 SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask], 1287 new_node, vn_hash); 1288 mutex_exit(&vcache.lock); 1289 vfs_insmntque(vp, mp); 1290 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 1291 vp->v_vflag |= VV_MPSAFE; 1292 vfs_unbusy(mp, true, NULL); 1293 1294 /* Finished loading, finalize node. */ 1295 mutex_enter(&vcache.lock); 1296 new_node->vn_vnode = vp; 1297 mutex_exit(&vcache.lock); 1298 mutex_enter(vp->v_interlock); 1299 vp->v_iflag &= ~VI_CHANGING; 1300 cv_broadcast(&vp->v_cv); 1301 mutex_exit(vp->v_interlock); 1302 *vpp = vp; 1303 return 0; 1304 } 1305 1306 /* 1307 * Prepare key change: lock old and new cache node. 1308 * Return an error if the new node already exists. 1309 */ 1310 int 1311 vcache_rekey_enter(struct mount *mp, struct vnode *vp, 1312 const void *old_key, size_t old_key_len, 1313 const void *new_key, size_t new_key_len) 1314 { 1315 uint32_t old_hash, new_hash; 1316 struct vcache_key old_vcache_key, new_vcache_key; 1317 struct vcache_node *node, *new_node; 1318 1319 old_vcache_key.vk_mount = mp; 1320 old_vcache_key.vk_key = old_key; 1321 old_vcache_key.vk_key_len = old_key_len; 1322 old_hash = vcache_hash(&old_vcache_key); 1323 1324 new_vcache_key.vk_mount = mp; 1325 new_vcache_key.vk_key = new_key; 1326 new_vcache_key.vk_key_len = new_key_len; 1327 new_hash = vcache_hash(&new_vcache_key); 1328 1329 new_node = pool_cache_get(vcache.pool, PR_WAITOK); 1330 new_node->vn_vnode = NULL; 1331 new_node->vn_key = new_vcache_key; 1332 1333 mutex_enter(&vcache.lock); 1334 node = vcache_hash_lookup(&new_vcache_key, new_hash); 1335 if (node != NULL) { 1336 mutex_exit(&vcache.lock); 1337 pool_cache_put(vcache.pool, new_node); 1338 return EEXIST; 1339 } 1340 SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask], 1341 new_node, vn_hash); 1342 node = vcache_hash_lookup(&old_vcache_key, old_hash); 1343 KASSERT(node != NULL); 1344 KASSERT(node->vn_vnode == vp); 1345 node->vn_vnode = NULL; 1346 node->vn_key = old_vcache_key; 1347 mutex_exit(&vcache.lock); 1348 return 0; 1349 } 1350 1351 /* 1352 * Key change complete: remove old node and unlock new node. 1353 */ 1354 void 1355 vcache_rekey_exit(struct mount *mp, struct vnode *vp, 1356 const void *old_key, size_t old_key_len, 1357 const void *new_key, size_t new_key_len) 1358 { 1359 uint32_t old_hash, new_hash; 1360 struct vcache_key old_vcache_key, new_vcache_key; 1361 struct vcache_node *node; 1362 1363 old_vcache_key.vk_mount = mp; 1364 old_vcache_key.vk_key = old_key; 1365 old_vcache_key.vk_key_len = old_key_len; 1366 old_hash = vcache_hash(&old_vcache_key); 1367 1368 new_vcache_key.vk_mount = mp; 1369 new_vcache_key.vk_key = new_key; 1370 new_vcache_key.vk_key_len = new_key_len; 1371 new_hash = vcache_hash(&new_vcache_key); 1372 1373 mutex_enter(&vcache.lock); 1374 node = vcache_hash_lookup(&new_vcache_key, new_hash); 1375 KASSERT(node != NULL && node->vn_vnode == NULL); 1376 KASSERT(node->vn_key.vk_key_len == new_key_len); 1377 node->vn_vnode = vp; 1378 node->vn_key = new_vcache_key; 1379 node = vcache_hash_lookup(&old_vcache_key, old_hash); 1380 KASSERT(node != NULL); 1381 KASSERT(node->vn_vnode == NULL); 1382 SLIST_REMOVE(&vcache.hashtab[old_hash & vcache.hashmask], 1383 node, vcache_node, vn_hash); 1384 mutex_exit(&vcache.lock); 1385 pool_cache_put(vcache.pool, node); 1386 } 1387 1388 /* 1389 * Remove a vnode / fs node pair from the cache. 1390 */ 1391 void 1392 vcache_remove(struct mount *mp, const void *key, size_t key_len) 1393 { 1394 uint32_t hash; 1395 struct vcache_key vcache_key; 1396 struct vcache_node *node; 1397 1398 vcache_key.vk_mount = mp; 1399 vcache_key.vk_key = key; 1400 vcache_key.vk_key_len = key_len; 1401 hash = vcache_hash(&vcache_key); 1402 1403 mutex_enter(&vcache.lock); 1404 node = vcache_hash_lookup(&vcache_key, hash); 1405 KASSERT(node != NULL); 1406 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask], 1407 node, vcache_node, vn_hash); 1408 mutex_exit(&vcache.lock); 1409 pool_cache_put(vcache.pool, node); 1410 } 1411 1412 /* 1413 * Update outstanding I/O count and do wakeup if requested. 1414 */ 1415 void 1416 vwakeup(struct buf *bp) 1417 { 1418 vnode_t *vp; 1419 1420 if ((vp = bp->b_vp) == NULL) 1421 return; 1422 1423 KASSERT(bp->b_objlock == vp->v_interlock); 1424 KASSERT(mutex_owned(bp->b_objlock)); 1425 1426 if (--vp->v_numoutput < 0) 1427 vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp); 1428 if (vp->v_numoutput == 0) 1429 cv_broadcast(&vp->v_cv); 1430 } 1431 1432 /* 1433 * Test a vnode for being or becoming dead. Returns one of: 1434 * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only. 1435 * ENOENT: vnode is dead. 1436 * 0: otherwise. 1437 * 1438 * Whenever this function returns a non-zero value all future 1439 * calls will also return a non-zero value. 1440 */ 1441 int 1442 vdead_check(struct vnode *vp, int flags) 1443 { 1444 1445 KASSERT(mutex_owned(vp->v_interlock)); 1446 if (ISSET(vp->v_iflag, VI_XLOCK)) { 1447 if (ISSET(flags, VDEAD_NOWAIT)) 1448 return EBUSY; 1449 vwait(vp, VI_XLOCK); 1450 KASSERT(ISSET(vp->v_iflag, VI_CLEAN)); 1451 } 1452 if (ISSET(vp->v_iflag, VI_CLEAN)) 1453 return ENOENT; 1454 return 0; 1455 } 1456 1457 /* 1458 * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or 1459 * recycled. 1460 */ 1461 static void 1462 vwait(vnode_t *vp, int flags) 1463 { 1464 1465 KASSERT(mutex_owned(vp->v_interlock)); 1466 KASSERT(vp->v_usecount != 0); 1467 1468 while ((vp->v_iflag & flags) != 0) 1469 cv_wait(&vp->v_cv, vp->v_interlock); 1470 } 1471 1472 int 1473 vfs_drainvnodes(long target) 1474 { 1475 int error; 1476 1477 mutex_enter(&vnode_free_list_lock); 1478 1479 while (numvnodes > target) { 1480 error = cleanvnode(); 1481 if (error != 0) 1482 return error; 1483 mutex_enter(&vnode_free_list_lock); 1484 } 1485 1486 mutex_exit(&vnode_free_list_lock); 1487 1488 vcache_reinit(); 1489 1490 return 0; 1491 } 1492 1493 void 1494 vnpanic(vnode_t *vp, const char *fmt, ...) 1495 { 1496 va_list ap; 1497 1498 #ifdef DIAGNOSTIC 1499 vprint(NULL, vp); 1500 #endif 1501 va_start(ap, fmt); 1502 vpanic(fmt, ap); 1503 va_end(ap); 1504 } 1505