1 /* $NetBSD: vfs_vnode.c,v 1.38 2014/09/05 05:57:21 matt Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 /* 70 * The vnode cache subsystem. 71 * 72 * Life-cycle 73 * 74 * Normally, there are two points where new vnodes are created: 75 * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode 76 * starts in one of the following ways: 77 * 78 * - Allocation, via getnewvnode(9) and/or vnalloc(9). 79 * - Reclamation of inactive vnode, via vget(9). 80 * 81 * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9) 82 * was another, traditional way. Currently, only the draining thread 83 * recycles the vnodes. This behaviour might be revisited. 84 * 85 * The life-cycle ends when the last reference is dropped, usually 86 * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform 87 * the file system that vnode is inactive. Via this call, file system 88 * indicates whether vnode can be recycled (usually, it checks its own 89 * references, e.g. count of links, whether the file was removed). 90 * 91 * Depending on indication, vnode can be put into a free list (cache), 92 * or cleaned via vclean(9), which calls VOP_RECLAIM(9) to disassociate 93 * underlying file system from the vnode, and finally destroyed. 94 * 95 * Reference counting 96 * 97 * Vnode is considered active, if reference count (vnode_t::v_usecount) 98 * is non-zero. It is maintained using: vref(9) and vrele(9), as well 99 * as vput(9), routines. Common points holding references are e.g. 100 * file openings, current working directory, mount points, etc. 101 * 102 * Note on v_usecount and its locking 103 * 104 * At nearly all points it is known that v_usecount could be zero, 105 * the vnode_t::v_interlock will be held. To change v_usecount away 106 * from zero, the interlock must be held. To change from a non-zero 107 * value to zero, again the interlock must be held. 108 * 109 * Changing the usecount from a non-zero value to a non-zero value can 110 * safely be done using atomic operations, without the interlock held. 111 * 112 * Note: if VI_CLEAN is set, vnode_t::v_interlock will be released while 113 * mntvnode_lock is still held. 114 * 115 * See PR 41374. 116 */ 117 118 #include <sys/cdefs.h> 119 __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.38 2014/09/05 05:57:21 matt Exp $"); 120 121 #define _VFS_VNODE_PRIVATE 122 123 #include <sys/param.h> 124 #include <sys/kernel.h> 125 126 #include <sys/atomic.h> 127 #include <sys/buf.h> 128 #include <sys/conf.h> 129 #include <sys/device.h> 130 #include <sys/hash.h> 131 #include <sys/kauth.h> 132 #include <sys/kmem.h> 133 #include <sys/kthread.h> 134 #include <sys/module.h> 135 #include <sys/mount.h> 136 #include <sys/namei.h> 137 #include <sys/syscallargs.h> 138 #include <sys/sysctl.h> 139 #include <sys/systm.h> 140 #include <sys/vnode.h> 141 #include <sys/wapbl.h> 142 #include <sys/fstrans.h> 143 144 #include <uvm/uvm.h> 145 #include <uvm/uvm_readahead.h> 146 147 /* Flags to vrelel. */ 148 #define VRELEL_ASYNC_RELE 0x0001 /* Always defer to vrele thread. */ 149 #define VRELEL_CHANGING_SET 0x0002 /* VI_CHANGING set by caller. */ 150 151 struct vcache_key { 152 struct mount *vk_mount; 153 const void *vk_key; 154 size_t vk_key_len; 155 }; 156 struct vcache_node { 157 SLIST_ENTRY(vcache_node) vn_hash; 158 struct vnode *vn_vnode; 159 struct vcache_key vn_key; 160 }; 161 162 u_int numvnodes __cacheline_aligned; 163 164 static pool_cache_t vnode_cache __read_mostly; 165 static struct mount *dead_mount; 166 167 /* 168 * There are two free lists: one is for vnodes which have no buffer/page 169 * references and one for those which do (i.e. v_holdcnt is non-zero). 170 * Vnode recycling mechanism first attempts to look into the former list. 171 */ 172 static kmutex_t vnode_free_list_lock __cacheline_aligned; 173 static vnodelst_t vnode_free_list __cacheline_aligned; 174 static vnodelst_t vnode_hold_list __cacheline_aligned; 175 static kcondvar_t vdrain_cv __cacheline_aligned; 176 177 static vnodelst_t vrele_list __cacheline_aligned; 178 static kmutex_t vrele_lock __cacheline_aligned; 179 static kcondvar_t vrele_cv __cacheline_aligned; 180 static lwp_t * vrele_lwp __cacheline_aligned; 181 static int vrele_pending __cacheline_aligned; 182 static int vrele_gen __cacheline_aligned; 183 184 SLIST_HEAD(hashhead, vcache_node); 185 static struct { 186 kmutex_t lock; 187 u_long hashmask; 188 struct hashhead *hashtab; 189 pool_cache_t pool; 190 } vcache __cacheline_aligned; 191 192 static int cleanvnode(void); 193 static void vcache_init(void); 194 static void vcache_reinit(void); 195 static void vclean(vnode_t *); 196 static void vrelel(vnode_t *, int); 197 static void vdrain_thread(void *); 198 static void vrele_thread(void *); 199 static void vnpanic(vnode_t *, const char *, ...) 200 __printflike(2, 3); 201 static void vwait(vnode_t *, int); 202 203 /* Routines having to do with the management of the vnode table. */ 204 extern int (**dead_vnodeop_p)(void *); 205 extern struct vfsops dead_vfsops; 206 207 void 208 vfs_vnode_sysinit(void) 209 { 210 int error __diagused; 211 212 vnode_cache = pool_cache_init(sizeof(vnode_t), 0, 0, 0, "vnodepl", 213 NULL, IPL_NONE, NULL, NULL, NULL); 214 KASSERT(vnode_cache != NULL); 215 216 dead_mount = vfs_mountalloc(&dead_vfsops, NULL); 217 KASSERT(dead_mount != NULL); 218 dead_mount->mnt_iflag = IMNT_MPSAFE; 219 220 mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE); 221 TAILQ_INIT(&vnode_free_list); 222 TAILQ_INIT(&vnode_hold_list); 223 TAILQ_INIT(&vrele_list); 224 225 vcache_init(); 226 227 mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE); 228 cv_init(&vdrain_cv, "vdrain"); 229 cv_init(&vrele_cv, "vrele"); 230 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread, 231 NULL, NULL, "vdrain"); 232 KASSERT(error == 0); 233 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread, 234 NULL, &vrele_lwp, "vrele"); 235 KASSERT(error == 0); 236 } 237 238 /* 239 * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a 240 * marker vnode. 241 */ 242 vnode_t * 243 vnalloc(struct mount *mp) 244 { 245 vnode_t *vp; 246 247 vp = pool_cache_get(vnode_cache, PR_WAITOK); 248 KASSERT(vp != NULL); 249 250 memset(vp, 0, sizeof(*vp)); 251 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0); 252 cv_init(&vp->v_cv, "vnode"); 253 /* 254 * Done by memset() above. 255 * LIST_INIT(&vp->v_nclist); 256 * LIST_INIT(&vp->v_dnclist); 257 */ 258 259 if (mp != NULL) { 260 vp->v_mount = mp; 261 vp->v_type = VBAD; 262 vp->v_iflag = VI_MARKER; 263 return vp; 264 } 265 266 mutex_enter(&vnode_free_list_lock); 267 numvnodes++; 268 if (numvnodes > desiredvnodes + desiredvnodes / 10) 269 cv_signal(&vdrain_cv); 270 mutex_exit(&vnode_free_list_lock); 271 272 rw_init(&vp->v_lock); 273 vp->v_usecount = 1; 274 vp->v_type = VNON; 275 vp->v_size = vp->v_writesize = VSIZENOTSET; 276 277 return vp; 278 } 279 280 /* 281 * Free an unused, unreferenced vnode. 282 */ 283 void 284 vnfree(vnode_t *vp) 285 { 286 287 KASSERT(vp->v_usecount == 0); 288 289 if ((vp->v_iflag & VI_MARKER) == 0) { 290 rw_destroy(&vp->v_lock); 291 mutex_enter(&vnode_free_list_lock); 292 numvnodes--; 293 mutex_exit(&vnode_free_list_lock); 294 } 295 296 /* 297 * Note: the vnode interlock will either be freed, of reference 298 * dropped (if VI_LOCKSHARE was in use). 299 */ 300 uvm_obj_destroy(&vp->v_uobj, true); 301 cv_destroy(&vp->v_cv); 302 pool_cache_put(vnode_cache, vp); 303 } 304 305 /* 306 * cleanvnode: grab a vnode from freelist, clean and free it. 307 * 308 * => Releases vnode_free_list_lock. 309 */ 310 static int 311 cleanvnode(void) 312 { 313 vnode_t *vp; 314 vnodelst_t *listhd; 315 struct mount *mp; 316 317 KASSERT(mutex_owned(&vnode_free_list_lock)); 318 319 listhd = &vnode_free_list; 320 try_nextlist: 321 TAILQ_FOREACH(vp, listhd, v_freelist) { 322 /* 323 * It's safe to test v_usecount and v_iflag 324 * without holding the interlock here, since 325 * these vnodes should never appear on the 326 * lists. 327 */ 328 KASSERT(vp->v_usecount == 0); 329 KASSERT((vp->v_iflag & VI_CLEAN) == 0); 330 KASSERT(vp->v_freelisthd == listhd); 331 332 if (!mutex_tryenter(vp->v_interlock)) 333 continue; 334 if ((vp->v_iflag & VI_XLOCK) != 0) { 335 mutex_exit(vp->v_interlock); 336 continue; 337 } 338 mp = vp->v_mount; 339 if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) { 340 mutex_exit(vp->v_interlock); 341 continue; 342 } 343 break; 344 } 345 346 if (vp == NULL) { 347 if (listhd == &vnode_free_list) { 348 listhd = &vnode_hold_list; 349 goto try_nextlist; 350 } 351 mutex_exit(&vnode_free_list_lock); 352 return EBUSY; 353 } 354 355 /* Remove it from the freelist. */ 356 TAILQ_REMOVE(listhd, vp, v_freelist); 357 vp->v_freelisthd = NULL; 358 mutex_exit(&vnode_free_list_lock); 359 360 KASSERT(vp->v_usecount == 0); 361 362 /* 363 * The vnode is still associated with a file system, so we must 364 * clean it out before freeing it. We need to add a reference 365 * before doing this. 366 */ 367 vp->v_usecount = 1; 368 KASSERT((vp->v_iflag & VI_CHANGING) == 0); 369 vp->v_iflag |= VI_CHANGING; 370 vclean(vp); 371 vrelel(vp, VRELEL_CHANGING_SET); 372 fstrans_done(mp); 373 374 return 0; 375 } 376 377 /* 378 * getnewvnode: return a fresh vnode. 379 * 380 * => Returns referenced vnode, moved into the mount queue. 381 * => Shares the interlock specified by 'slock', if it is not NULL. 382 */ 383 int 384 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), 385 kmutex_t *slock, vnode_t **vpp) 386 { 387 struct uvm_object *uobj __diagused; 388 vnode_t *vp; 389 int error = 0; 390 391 if (mp != NULL) { 392 /* 393 * Mark filesystem busy while we are creating a vnode. 394 * If unmount is in progress, this will fail. 395 */ 396 error = vfs_busy(mp, NULL); 397 if (error) 398 return error; 399 } 400 401 vp = NULL; 402 403 /* Allocate a new vnode. */ 404 vp = vnalloc(NULL); 405 406 KASSERT(vp->v_freelisthd == NULL); 407 KASSERT(LIST_EMPTY(&vp->v_nclist)); 408 KASSERT(LIST_EMPTY(&vp->v_dnclist)); 409 KASSERT(vp->v_data == NULL); 410 411 /* Initialize vnode. */ 412 vp->v_tag = tag; 413 vp->v_op = vops; 414 415 uobj = &vp->v_uobj; 416 KASSERT(uobj->pgops == &uvm_vnodeops); 417 KASSERT(uobj->uo_npages == 0); 418 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 419 420 /* Share the vnode_t::v_interlock, if requested. */ 421 if (slock) { 422 /* Set the interlock and mark that it is shared. */ 423 KASSERT(vp->v_mount == NULL); 424 mutex_obj_hold(slock); 425 uvm_obj_setlock(&vp->v_uobj, slock); 426 KASSERT(vp->v_interlock == slock); 427 vp->v_iflag |= VI_LOCKSHARE; 428 } 429 430 /* Finally, move vnode into the mount queue. */ 431 vfs_insmntque(vp, mp); 432 433 if (mp != NULL) { 434 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 435 vp->v_vflag |= VV_MPSAFE; 436 vfs_unbusy(mp, true, NULL); 437 } 438 439 *vpp = vp; 440 return 0; 441 } 442 443 /* 444 * This is really just the reverse of getnewvnode(). Needed for 445 * VFS_VGET functions who may need to push back a vnode in case 446 * of a locking race. 447 */ 448 void 449 ungetnewvnode(vnode_t *vp) 450 { 451 452 KASSERT(vp->v_usecount == 1); 453 KASSERT(vp->v_data == NULL); 454 KASSERT(vp->v_freelisthd == NULL); 455 456 mutex_enter(vp->v_interlock); 457 vp->v_iflag |= VI_CLEAN; 458 vrelel(vp, 0); 459 } 460 461 /* 462 * Helper thread to keep the number of vnodes below desiredvnodes. 463 */ 464 static void 465 vdrain_thread(void *cookie) 466 { 467 int error; 468 469 mutex_enter(&vnode_free_list_lock); 470 471 for (;;) { 472 cv_timedwait(&vdrain_cv, &vnode_free_list_lock, hz); 473 while (numvnodes > desiredvnodes) { 474 error = cleanvnode(); 475 if (error) 476 kpause("vndsbusy", false, hz, NULL); 477 mutex_enter(&vnode_free_list_lock); 478 if (error) 479 break; 480 } 481 } 482 } 483 484 /* 485 * Remove a vnode from its freelist. 486 */ 487 void 488 vremfree(vnode_t *vp) 489 { 490 491 KASSERT(mutex_owned(vp->v_interlock)); 492 KASSERT(vp->v_usecount == 0); 493 494 /* 495 * Note that the reference count must not change until 496 * the vnode is removed. 497 */ 498 mutex_enter(&vnode_free_list_lock); 499 if (vp->v_holdcnt > 0) { 500 KASSERT(vp->v_freelisthd == &vnode_hold_list); 501 } else { 502 KASSERT(vp->v_freelisthd == &vnode_free_list); 503 } 504 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 505 vp->v_freelisthd = NULL; 506 mutex_exit(&vnode_free_list_lock); 507 } 508 509 /* 510 * vget: get a particular vnode from the free list, increment its reference 511 * count and lock it. 512 * 513 * => Should be called with v_interlock held. 514 * 515 * If VI_CHANGING is set, the vnode may be eliminated in vgone()/vclean(). 516 * In that case, we cannot grab the vnode, so the process is awakened when 517 * the transition is completed, and an error returned to indicate that the 518 * vnode is no longer usable. 519 */ 520 int 521 vget(vnode_t *vp, int flags) 522 { 523 int error = 0; 524 525 KASSERT((vp->v_iflag & VI_MARKER) == 0); 526 KASSERT(mutex_owned(vp->v_interlock)); 527 KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT)) == 0); 528 529 /* 530 * Before adding a reference, we must remove the vnode 531 * from its freelist. 532 */ 533 if (vp->v_usecount == 0) { 534 vremfree(vp); 535 vp->v_usecount = 1; 536 } else { 537 atomic_inc_uint(&vp->v_usecount); 538 } 539 540 /* 541 * If the vnode is in the process of changing state we wait 542 * for the change to complete and take care not to return 543 * a clean vnode. 544 */ 545 if ((vp->v_iflag & VI_CHANGING) != 0) { 546 if ((flags & LK_NOWAIT) != 0) { 547 vrelel(vp, 0); 548 return EBUSY; 549 } 550 vwait(vp, VI_CHANGING); 551 if ((vp->v_iflag & VI_CLEAN) != 0) { 552 vrelel(vp, 0); 553 return ENOENT; 554 } 555 } 556 557 /* 558 * Ok, we got it in good shape. Just locking left. 559 */ 560 KASSERT((vp->v_iflag & VI_CLEAN) == 0); 561 mutex_exit(vp->v_interlock); 562 if (flags & (LK_EXCLUSIVE | LK_SHARED)) { 563 error = vn_lock(vp, flags); 564 if (error != 0) { 565 vrele(vp); 566 } 567 } 568 return error; 569 } 570 571 /* 572 * vput: unlock and release the reference. 573 */ 574 void 575 vput(vnode_t *vp) 576 { 577 578 KASSERT((vp->v_iflag & VI_MARKER) == 0); 579 580 VOP_UNLOCK(vp); 581 vrele(vp); 582 } 583 584 /* 585 * Try to drop reference on a vnode. Abort if we are releasing the 586 * last reference. Note: this _must_ succeed if not the last reference. 587 */ 588 static inline bool 589 vtryrele(vnode_t *vp) 590 { 591 u_int use, next; 592 593 for (use = vp->v_usecount;; use = next) { 594 if (use == 1) { 595 return false; 596 } 597 KASSERT(use > 1); 598 next = atomic_cas_uint(&vp->v_usecount, use, use - 1); 599 if (__predict_true(next == use)) { 600 return true; 601 } 602 } 603 } 604 605 /* 606 * Vnode release. If reference count drops to zero, call inactive 607 * routine and either return to freelist or free to the pool. 608 */ 609 static void 610 vrelel(vnode_t *vp, int flags) 611 { 612 bool recycle, defer; 613 int error; 614 615 KASSERT(mutex_owned(vp->v_interlock)); 616 KASSERT((vp->v_iflag & VI_MARKER) == 0); 617 KASSERT(vp->v_freelisthd == NULL); 618 619 if (__predict_false(vp->v_op == dead_vnodeop_p && 620 (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) { 621 vnpanic(vp, "dead but not clean"); 622 } 623 624 /* 625 * If not the last reference, just drop the reference count 626 * and unlock. 627 */ 628 if (vtryrele(vp)) { 629 if ((flags & VRELEL_CHANGING_SET) != 0) { 630 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 631 vp->v_iflag &= ~VI_CHANGING; 632 cv_broadcast(&vp->v_cv); 633 } 634 mutex_exit(vp->v_interlock); 635 return; 636 } 637 if (vp->v_usecount <= 0 || vp->v_writecount != 0) { 638 vnpanic(vp, "%s: bad ref count", __func__); 639 } 640 641 KASSERT((vp->v_iflag & VI_XLOCK) == 0); 642 643 #ifdef DIAGNOSTIC 644 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 645 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) { 646 vprint("vrelel: missing VOP_CLOSE()", vp); 647 } 648 #endif 649 650 /* 651 * If not clean, deactivate the vnode, but preserve 652 * our reference across the call to VOP_INACTIVE(). 653 */ 654 if ((vp->v_iflag & VI_CLEAN) == 0) { 655 recycle = false; 656 657 /* 658 * XXX This ugly block can be largely eliminated if 659 * locking is pushed down into the file systems. 660 * 661 * Defer vnode release to vrele_thread if caller 662 * requests it explicitly or is the pagedaemon. 663 */ 664 if ((curlwp == uvm.pagedaemon_lwp) || 665 (flags & VRELEL_ASYNC_RELE) != 0) { 666 defer = true; 667 } else if (curlwp == vrele_lwp) { 668 /* 669 * We have to try harder. 670 */ 671 mutex_exit(vp->v_interlock); 672 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 673 KASSERT(error == 0); 674 mutex_enter(vp->v_interlock); 675 defer = false; 676 } else { 677 /* If we can't acquire the lock, then defer. */ 678 mutex_exit(vp->v_interlock); 679 error = vn_lock(vp, 680 LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT); 681 defer = (error != 0); 682 mutex_enter(vp->v_interlock); 683 } 684 685 KASSERT(mutex_owned(vp->v_interlock)); 686 KASSERT(! (curlwp == vrele_lwp && defer)); 687 688 if (defer) { 689 /* 690 * Defer reclaim to the kthread; it's not safe to 691 * clean it here. We donate it our last reference. 692 */ 693 if ((flags & VRELEL_CHANGING_SET) != 0) { 694 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 695 vp->v_iflag &= ~VI_CHANGING; 696 cv_broadcast(&vp->v_cv); 697 } 698 mutex_enter(&vrele_lock); 699 TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist); 700 if (++vrele_pending > (desiredvnodes >> 8)) 701 cv_signal(&vrele_cv); 702 mutex_exit(&vrele_lock); 703 mutex_exit(vp->v_interlock); 704 return; 705 } 706 707 /* 708 * If the node got another reference while we 709 * released the interlock, don't try to inactivate it yet. 710 */ 711 if (__predict_false(vtryrele(vp))) { 712 VOP_UNLOCK(vp); 713 if ((flags & VRELEL_CHANGING_SET) != 0) { 714 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 715 vp->v_iflag &= ~VI_CHANGING; 716 cv_broadcast(&vp->v_cv); 717 } 718 mutex_exit(vp->v_interlock); 719 return; 720 } 721 722 if ((flags & VRELEL_CHANGING_SET) == 0) { 723 KASSERT((vp->v_iflag & VI_CHANGING) == 0); 724 vp->v_iflag |= VI_CHANGING; 725 } 726 mutex_exit(vp->v_interlock); 727 728 /* 729 * The vnode can gain another reference while being 730 * deactivated. If VOP_INACTIVE() indicates that 731 * the described file has been deleted, then recycle 732 * the vnode irrespective of additional references. 733 * Another thread may be waiting to re-use the on-disk 734 * inode. 735 * 736 * Note that VOP_INACTIVE() will drop the vnode lock. 737 */ 738 VOP_INACTIVE(vp, &recycle); 739 mutex_enter(vp->v_interlock); 740 if (!recycle) { 741 if (vtryrele(vp)) { 742 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 743 vp->v_iflag &= ~VI_CHANGING; 744 cv_broadcast(&vp->v_cv); 745 mutex_exit(vp->v_interlock); 746 return; 747 } 748 } 749 750 /* Take care of space accounting. */ 751 if (vp->v_iflag & VI_EXECMAP) { 752 atomic_add_int(&uvmexp.execpages, 753 -vp->v_uobj.uo_npages); 754 atomic_add_int(&uvmexp.filepages, 755 vp->v_uobj.uo_npages); 756 } 757 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP); 758 vp->v_vflag &= ~VV_MAPPED; 759 760 /* 761 * Recycle the vnode if the file is now unused (unlinked), 762 * otherwise just free it. 763 */ 764 if (recycle) { 765 vclean(vp); 766 } 767 KASSERT(vp->v_usecount > 0); 768 } else { /* vnode was already clean */ 769 if ((flags & VRELEL_CHANGING_SET) == 0) { 770 KASSERT((vp->v_iflag & VI_CHANGING) == 0); 771 vp->v_iflag |= VI_CHANGING; 772 } 773 } 774 775 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) { 776 /* Gained another reference while being reclaimed. */ 777 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 778 vp->v_iflag &= ~VI_CHANGING; 779 cv_broadcast(&vp->v_cv); 780 mutex_exit(vp->v_interlock); 781 return; 782 } 783 784 if ((vp->v_iflag & VI_CLEAN) != 0) { 785 /* 786 * It's clean so destroy it. It isn't referenced 787 * anywhere since it has been reclaimed. 788 */ 789 KASSERT(vp->v_holdcnt == 0); 790 KASSERT(vp->v_writecount == 0); 791 mutex_exit(vp->v_interlock); 792 vfs_insmntque(vp, NULL); 793 if (vp->v_type == VBLK || vp->v_type == VCHR) { 794 spec_node_destroy(vp); 795 } 796 vnfree(vp); 797 } else { 798 /* 799 * Otherwise, put it back onto the freelist. It 800 * can't be destroyed while still associated with 801 * a file system. 802 */ 803 mutex_enter(&vnode_free_list_lock); 804 if (vp->v_holdcnt > 0) { 805 vp->v_freelisthd = &vnode_hold_list; 806 } else { 807 vp->v_freelisthd = &vnode_free_list; 808 } 809 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 810 mutex_exit(&vnode_free_list_lock); 811 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 812 vp->v_iflag &= ~VI_CHANGING; 813 cv_broadcast(&vp->v_cv); 814 mutex_exit(vp->v_interlock); 815 } 816 } 817 818 void 819 vrele(vnode_t *vp) 820 { 821 822 KASSERT((vp->v_iflag & VI_MARKER) == 0); 823 824 if (vtryrele(vp)) { 825 return; 826 } 827 mutex_enter(vp->v_interlock); 828 vrelel(vp, 0); 829 } 830 831 /* 832 * Asynchronous vnode release, vnode is released in different context. 833 */ 834 void 835 vrele_async(vnode_t *vp) 836 { 837 838 KASSERT((vp->v_iflag & VI_MARKER) == 0); 839 840 if (vtryrele(vp)) { 841 return; 842 } 843 mutex_enter(vp->v_interlock); 844 vrelel(vp, VRELEL_ASYNC_RELE); 845 } 846 847 static void 848 vrele_thread(void *cookie) 849 { 850 vnodelst_t skip_list; 851 vnode_t *vp; 852 struct mount *mp; 853 854 TAILQ_INIT(&skip_list); 855 856 mutex_enter(&vrele_lock); 857 for (;;) { 858 while (TAILQ_EMPTY(&vrele_list)) { 859 vrele_gen++; 860 cv_broadcast(&vrele_cv); 861 cv_timedwait(&vrele_cv, &vrele_lock, hz); 862 TAILQ_CONCAT(&vrele_list, &skip_list, v_freelist); 863 } 864 vp = TAILQ_FIRST(&vrele_list); 865 mp = vp->v_mount; 866 TAILQ_REMOVE(&vrele_list, vp, v_freelist); 867 if (fstrans_start_nowait(mp, FSTRANS_LAZY) != 0) { 868 TAILQ_INSERT_TAIL(&skip_list, vp, v_freelist); 869 continue; 870 } 871 vrele_pending--; 872 mutex_exit(&vrele_lock); 873 874 /* 875 * If not the last reference, then ignore the vnode 876 * and look for more work. 877 */ 878 mutex_enter(vp->v_interlock); 879 vrelel(vp, 0); 880 fstrans_done(mp); 881 mutex_enter(&vrele_lock); 882 } 883 } 884 885 void 886 vrele_flush(void) 887 { 888 int gen; 889 890 mutex_enter(&vrele_lock); 891 gen = vrele_gen; 892 while (vrele_pending && gen == vrele_gen) { 893 cv_broadcast(&vrele_cv); 894 cv_wait(&vrele_cv, &vrele_lock); 895 } 896 mutex_exit(&vrele_lock); 897 } 898 899 /* 900 * Vnode reference, where a reference is already held by some other 901 * object (for example, a file structure). 902 */ 903 void 904 vref(vnode_t *vp) 905 { 906 907 KASSERT((vp->v_iflag & VI_MARKER) == 0); 908 KASSERT(vp->v_usecount != 0); 909 910 atomic_inc_uint(&vp->v_usecount); 911 } 912 913 /* 914 * Page or buffer structure gets a reference. 915 * Called with v_interlock held. 916 */ 917 void 918 vholdl(vnode_t *vp) 919 { 920 921 KASSERT(mutex_owned(vp->v_interlock)); 922 KASSERT((vp->v_iflag & VI_MARKER) == 0); 923 924 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) { 925 mutex_enter(&vnode_free_list_lock); 926 KASSERT(vp->v_freelisthd == &vnode_free_list); 927 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 928 vp->v_freelisthd = &vnode_hold_list; 929 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 930 mutex_exit(&vnode_free_list_lock); 931 } 932 } 933 934 /* 935 * Page or buffer structure frees a reference. 936 * Called with v_interlock held. 937 */ 938 void 939 holdrelel(vnode_t *vp) 940 { 941 942 KASSERT(mutex_owned(vp->v_interlock)); 943 KASSERT((vp->v_iflag & VI_MARKER) == 0); 944 945 if (vp->v_holdcnt <= 0) { 946 vnpanic(vp, "%s: holdcnt vp %p", __func__, vp); 947 } 948 949 vp->v_holdcnt--; 950 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) { 951 mutex_enter(&vnode_free_list_lock); 952 KASSERT(vp->v_freelisthd == &vnode_hold_list); 953 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 954 vp->v_freelisthd = &vnode_free_list; 955 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 956 mutex_exit(&vnode_free_list_lock); 957 } 958 } 959 960 /* 961 * Disassociate the underlying file system from a vnode. 962 * 963 * Must be called with the interlock held, and will return with it held. 964 */ 965 static void 966 vclean(vnode_t *vp) 967 { 968 lwp_t *l = curlwp; 969 bool recycle, active, doclose; 970 int error; 971 972 KASSERT(mutex_owned(vp->v_interlock)); 973 KASSERT((vp->v_iflag & VI_MARKER) == 0); 974 KASSERT(vp->v_usecount != 0); 975 976 /* If already clean, nothing to do. */ 977 if ((vp->v_iflag & VI_CLEAN) != 0) { 978 return; 979 } 980 981 active = (vp->v_usecount > 1); 982 doclose = ! (active && vp->v_type == VBLK && 983 spec_node_getmountedfs(vp) != NULL); 984 mutex_exit(vp->v_interlock); 985 986 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 987 988 /* 989 * Prevent the vnode from being recycled or brought into use 990 * while we clean it out. 991 */ 992 mutex_enter(vp->v_interlock); 993 KASSERT((vp->v_iflag & (VI_XLOCK | VI_CLEAN)) == 0); 994 vp->v_iflag |= VI_XLOCK; 995 if (vp->v_iflag & VI_EXECMAP) { 996 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); 997 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); 998 } 999 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); 1000 mutex_exit(vp->v_interlock); 1001 1002 /* 1003 * Clean out any cached data associated with the vnode. 1004 * If purging an active vnode, it must be closed and 1005 * deactivated before being reclaimed. Note that the 1006 * VOP_INACTIVE will unlock the vnode. 1007 */ 1008 if (doclose) { 1009 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1010 if (error != 0) { 1011 if (wapbl_vphaswapbl(vp)) 1012 WAPBL_DISCARD(wapbl_vptomp(vp)); 1013 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1014 } 1015 KASSERT(error == 0); 1016 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1017 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) { 1018 spec_node_revoke(vp); 1019 } 1020 } 1021 if (active) { 1022 VOP_INACTIVE(vp, &recycle); 1023 } else { 1024 /* 1025 * Any other processes trying to obtain this lock must first 1026 * wait for VI_XLOCK to clear, then call the new lock operation. 1027 */ 1028 VOP_UNLOCK(vp); 1029 } 1030 1031 /* Disassociate the underlying file system from the vnode. */ 1032 if (VOP_RECLAIM(vp)) { 1033 vnpanic(vp, "%s: cannot reclaim", __func__); 1034 } 1035 1036 KASSERT(vp->v_data == NULL); 1037 KASSERT(vp->v_uobj.uo_npages == 0); 1038 1039 if (vp->v_type == VREG && vp->v_ractx != NULL) { 1040 uvm_ra_freectx(vp->v_ractx); 1041 vp->v_ractx = NULL; 1042 } 1043 1044 /* Purge name cache. */ 1045 cache_purge(vp); 1046 1047 /* Move to dead mount. */ 1048 vp->v_vflag &= ~VV_ROOT; 1049 atomic_inc_uint(&dead_mount->mnt_refcnt); 1050 vfs_insmntque(vp, dead_mount); 1051 1052 /* Done with purge, notify sleepers of the grim news. */ 1053 mutex_enter(vp->v_interlock); 1054 if (doclose) { 1055 vp->v_op = dead_vnodeop_p; 1056 vp->v_vflag |= VV_LOCKSWORK; 1057 vp->v_iflag |= VI_CLEAN; 1058 } else { 1059 vp->v_op = spec_vnodeop_p; 1060 vp->v_vflag &= ~VV_LOCKSWORK; 1061 } 1062 vp->v_tag = VT_NON; 1063 KNOTE(&vp->v_klist, NOTE_REVOKE); 1064 vp->v_iflag &= ~VI_XLOCK; 1065 cv_broadcast(&vp->v_cv); 1066 1067 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1068 } 1069 1070 /* 1071 * Recycle an unused vnode if caller holds the last reference. 1072 */ 1073 bool 1074 vrecycle(vnode_t *vp) 1075 { 1076 1077 mutex_enter(vp->v_interlock); 1078 1079 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1080 1081 if (vp->v_usecount != 1) { 1082 mutex_exit(vp->v_interlock); 1083 return false; 1084 } 1085 if ((vp->v_iflag & VI_CHANGING) != 0) 1086 vwait(vp, VI_CHANGING); 1087 if (vp->v_usecount != 1) { 1088 mutex_exit(vp->v_interlock); 1089 return false; 1090 } else if ((vp->v_iflag & VI_CLEAN) != 0) { 1091 mutex_exit(vp->v_interlock); 1092 return true; 1093 } 1094 vp->v_iflag |= VI_CHANGING; 1095 vclean(vp); 1096 vrelel(vp, VRELEL_CHANGING_SET); 1097 return true; 1098 } 1099 1100 /* 1101 * Eliminate all activity associated with the requested vnode 1102 * and with all vnodes aliased to the requested vnode. 1103 */ 1104 void 1105 vrevoke(vnode_t *vp) 1106 { 1107 vnode_t *vq; 1108 enum vtype type; 1109 dev_t dev; 1110 1111 KASSERT(vp->v_usecount > 0); 1112 1113 mutex_enter(vp->v_interlock); 1114 if ((vp->v_iflag & VI_CLEAN) != 0) { 1115 mutex_exit(vp->v_interlock); 1116 return; 1117 } else if (vp->v_type != VBLK && vp->v_type != VCHR) { 1118 atomic_inc_uint(&vp->v_usecount); 1119 mutex_exit(vp->v_interlock); 1120 vgone(vp); 1121 return; 1122 } else { 1123 dev = vp->v_rdev; 1124 type = vp->v_type; 1125 mutex_exit(vp->v_interlock); 1126 } 1127 1128 while (spec_node_lookup_by_dev(type, dev, &vq) == 0) { 1129 vgone(vq); 1130 } 1131 } 1132 1133 /* 1134 * Eliminate all activity associated with a vnode in preparation for 1135 * reuse. Drops a reference from the vnode. 1136 */ 1137 void 1138 vgone(vnode_t *vp) 1139 { 1140 1141 mutex_enter(vp->v_interlock); 1142 if ((vp->v_iflag & VI_CHANGING) != 0) 1143 vwait(vp, VI_CHANGING); 1144 vp->v_iflag |= VI_CHANGING; 1145 vclean(vp); 1146 vrelel(vp, VRELEL_CHANGING_SET); 1147 } 1148 1149 static inline uint32_t 1150 vcache_hash(const struct vcache_key *key) 1151 { 1152 uint32_t hash = HASH32_BUF_INIT; 1153 1154 hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash); 1155 hash = hash32_buf(key->vk_key, key->vk_key_len, hash); 1156 return hash; 1157 } 1158 1159 static void 1160 vcache_init(void) 1161 { 1162 1163 vcache.pool = pool_cache_init(sizeof(struct vcache_node), 0, 0, 0, 1164 "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL); 1165 KASSERT(vcache.pool != NULL); 1166 mutex_init(&vcache.lock, MUTEX_DEFAULT, IPL_NONE); 1167 vcache.hashtab = hashinit(desiredvnodes, HASH_SLIST, true, 1168 &vcache.hashmask); 1169 } 1170 1171 static void 1172 vcache_reinit(void) 1173 { 1174 int i; 1175 uint32_t hash; 1176 u_long oldmask, newmask; 1177 struct hashhead *oldtab, *newtab; 1178 struct vcache_node *node; 1179 1180 newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask); 1181 mutex_enter(&vcache.lock); 1182 oldtab = vcache.hashtab; 1183 oldmask = vcache.hashmask; 1184 vcache.hashtab = newtab; 1185 vcache.hashmask = newmask; 1186 for (i = 0; i <= oldmask; i++) { 1187 while ((node = SLIST_FIRST(&oldtab[i])) != NULL) { 1188 SLIST_REMOVE(&oldtab[i], node, vcache_node, vn_hash); 1189 hash = vcache_hash(&node->vn_key); 1190 SLIST_INSERT_HEAD(&newtab[hash & vcache.hashmask], 1191 node, vn_hash); 1192 } 1193 } 1194 mutex_exit(&vcache.lock); 1195 hashdone(oldtab, HASH_SLIST, oldmask); 1196 } 1197 1198 static inline struct vcache_node * 1199 vcache_hash_lookup(const struct vcache_key *key, uint32_t hash) 1200 { 1201 struct hashhead *hashp; 1202 struct vcache_node *node; 1203 1204 KASSERT(mutex_owned(&vcache.lock)); 1205 1206 hashp = &vcache.hashtab[hash & vcache.hashmask]; 1207 SLIST_FOREACH(node, hashp, vn_hash) { 1208 if (key->vk_mount != node->vn_key.vk_mount) 1209 continue; 1210 if (key->vk_key_len != node->vn_key.vk_key_len) 1211 continue; 1212 if (memcmp(key->vk_key, node->vn_key.vk_key, key->vk_key_len)) 1213 continue; 1214 return node; 1215 } 1216 return NULL; 1217 } 1218 1219 /* 1220 * Get a vnode / fs node pair by key and return it referenced through vpp. 1221 */ 1222 int 1223 vcache_get(struct mount *mp, const void *key, size_t key_len, 1224 struct vnode **vpp) 1225 { 1226 int error; 1227 uint32_t hash; 1228 const void *new_key; 1229 struct vnode *vp; 1230 struct vcache_key vcache_key; 1231 struct vcache_node *node, *new_node; 1232 1233 new_key = NULL; 1234 *vpp = NULL; 1235 1236 vcache_key.vk_mount = mp; 1237 vcache_key.vk_key = key; 1238 vcache_key.vk_key_len = key_len; 1239 hash = vcache_hash(&vcache_key); 1240 1241 again: 1242 mutex_enter(&vcache.lock); 1243 node = vcache_hash_lookup(&vcache_key, hash); 1244 1245 /* If found, take a reference or retry. */ 1246 if (__predict_true(node != NULL && node->vn_vnode != NULL)) { 1247 vp = node->vn_vnode; 1248 mutex_enter(vp->v_interlock); 1249 mutex_exit(&vcache.lock); 1250 error = vget(vp, 0); 1251 if (error == ENOENT) 1252 goto again; 1253 if (error == 0) 1254 *vpp = vp; 1255 KASSERT((error != 0) == (*vpp == NULL)); 1256 return error; 1257 } 1258 1259 /* If another thread loads this node, wait and retry. */ 1260 if (node != NULL) { 1261 KASSERT(node->vn_vnode == NULL); 1262 mutex_exit(&vcache.lock); 1263 kpause("vcache", false, mstohz(20), NULL); 1264 goto again; 1265 } 1266 mutex_exit(&vcache.lock); 1267 1268 /* Allocate and initialize a new vcache / vnode pair. */ 1269 error = vfs_busy(mp, NULL); 1270 if (error) 1271 return error; 1272 new_node = pool_cache_get(vcache.pool, PR_WAITOK); 1273 new_node->vn_vnode = NULL; 1274 new_node->vn_key = vcache_key; 1275 vp = vnalloc(NULL); 1276 mutex_enter(&vcache.lock); 1277 node = vcache_hash_lookup(&vcache_key, hash); 1278 if (node == NULL) { 1279 SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask], 1280 new_node, vn_hash); 1281 node = new_node; 1282 } 1283 mutex_exit(&vcache.lock); 1284 1285 /* If another thread beat us inserting this node, retry. */ 1286 if (node != new_node) { 1287 pool_cache_put(vcache.pool, new_node); 1288 KASSERT(vp->v_usecount == 1); 1289 vp->v_usecount = 0; 1290 vnfree(vp); 1291 vfs_unbusy(mp, false, NULL); 1292 goto again; 1293 } 1294 1295 /* Load the fs node. Exclusive as new_node->vn_vnode is NULL. */ 1296 error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key); 1297 if (error) { 1298 mutex_enter(&vcache.lock); 1299 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask], 1300 new_node, vcache_node, vn_hash); 1301 mutex_exit(&vcache.lock); 1302 pool_cache_put(vcache.pool, new_node); 1303 KASSERT(vp->v_usecount == 1); 1304 vp->v_usecount = 0; 1305 vnfree(vp); 1306 vfs_unbusy(mp, false, NULL); 1307 KASSERT(*vpp == NULL); 1308 return error; 1309 } 1310 KASSERT(new_key != NULL); 1311 KASSERT(memcmp(key, new_key, key_len) == 0); 1312 KASSERT(vp->v_op != NULL); 1313 vfs_insmntque(vp, mp); 1314 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 1315 vp->v_vflag |= VV_MPSAFE; 1316 vfs_unbusy(mp, true, NULL); 1317 1318 /* Finished loading, finalize node. */ 1319 mutex_enter(&vcache.lock); 1320 new_node->vn_key.vk_key = new_key; 1321 new_node->vn_vnode = vp; 1322 mutex_exit(&vcache.lock); 1323 *vpp = vp; 1324 return 0; 1325 } 1326 1327 /* 1328 * Prepare key change: lock old and new cache node. 1329 * Return an error if the new node already exists. 1330 */ 1331 int 1332 vcache_rekey_enter(struct mount *mp, struct vnode *vp, 1333 const void *old_key, size_t old_key_len, 1334 const void *new_key, size_t new_key_len) 1335 { 1336 uint32_t old_hash, new_hash; 1337 struct vcache_key old_vcache_key, new_vcache_key; 1338 struct vcache_node *node, *new_node; 1339 1340 old_vcache_key.vk_mount = mp; 1341 old_vcache_key.vk_key = old_key; 1342 old_vcache_key.vk_key_len = old_key_len; 1343 old_hash = vcache_hash(&old_vcache_key); 1344 1345 new_vcache_key.vk_mount = mp; 1346 new_vcache_key.vk_key = new_key; 1347 new_vcache_key.vk_key_len = new_key_len; 1348 new_hash = vcache_hash(&new_vcache_key); 1349 1350 new_node = pool_cache_get(vcache.pool, PR_WAITOK); 1351 new_node->vn_vnode = NULL; 1352 new_node->vn_key = new_vcache_key; 1353 1354 mutex_enter(&vcache.lock); 1355 node = vcache_hash_lookup(&new_vcache_key, new_hash); 1356 if (node != NULL) { 1357 mutex_exit(&vcache.lock); 1358 pool_cache_put(vcache.pool, new_node); 1359 return EEXIST; 1360 } 1361 SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask], 1362 new_node, vn_hash); 1363 node = vcache_hash_lookup(&old_vcache_key, old_hash); 1364 KASSERT(node != NULL); 1365 KASSERT(node->vn_vnode == vp); 1366 node->vn_vnode = NULL; 1367 node->vn_key = old_vcache_key; 1368 mutex_exit(&vcache.lock); 1369 return 0; 1370 } 1371 1372 /* 1373 * Key change complete: remove old node and unlock new node. 1374 */ 1375 void 1376 vcache_rekey_exit(struct mount *mp, struct vnode *vp, 1377 const void *old_key, size_t old_key_len, 1378 const void *new_key, size_t new_key_len) 1379 { 1380 uint32_t old_hash, new_hash; 1381 struct vcache_key old_vcache_key, new_vcache_key; 1382 struct vcache_node *node; 1383 1384 old_vcache_key.vk_mount = mp; 1385 old_vcache_key.vk_key = old_key; 1386 old_vcache_key.vk_key_len = old_key_len; 1387 old_hash = vcache_hash(&old_vcache_key); 1388 1389 new_vcache_key.vk_mount = mp; 1390 new_vcache_key.vk_key = new_key; 1391 new_vcache_key.vk_key_len = new_key_len; 1392 new_hash = vcache_hash(&new_vcache_key); 1393 1394 mutex_enter(&vcache.lock); 1395 node = vcache_hash_lookup(&new_vcache_key, new_hash); 1396 KASSERT(node != NULL && node->vn_vnode == NULL); 1397 KASSERT(node->vn_key.vk_key_len == new_key_len); 1398 node->vn_vnode = vp; 1399 node->vn_key = new_vcache_key; 1400 node = vcache_hash_lookup(&old_vcache_key, old_hash); 1401 KASSERT(node != NULL); 1402 KASSERT(node->vn_vnode == NULL); 1403 SLIST_REMOVE(&vcache.hashtab[old_hash & vcache.hashmask], 1404 node, vcache_node, vn_hash); 1405 mutex_exit(&vcache.lock); 1406 pool_cache_put(vcache.pool, node); 1407 } 1408 1409 /* 1410 * Remove a vnode / fs node pair from the cache. 1411 */ 1412 void 1413 vcache_remove(struct mount *mp, const void *key, size_t key_len) 1414 { 1415 uint32_t hash; 1416 struct vcache_key vcache_key; 1417 struct vcache_node *node; 1418 1419 vcache_key.vk_mount = mp; 1420 vcache_key.vk_key = key; 1421 vcache_key.vk_key_len = key_len; 1422 hash = vcache_hash(&vcache_key); 1423 1424 mutex_enter(&vcache.lock); 1425 node = vcache_hash_lookup(&vcache_key, hash); 1426 KASSERT(node != NULL); 1427 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask], 1428 node, vcache_node, vn_hash); 1429 mutex_exit(&vcache.lock); 1430 pool_cache_put(vcache.pool, node); 1431 } 1432 1433 /* 1434 * Update outstanding I/O count and do wakeup if requested. 1435 */ 1436 void 1437 vwakeup(struct buf *bp) 1438 { 1439 vnode_t *vp; 1440 1441 if ((vp = bp->b_vp) == NULL) 1442 return; 1443 1444 KASSERT(bp->b_objlock == vp->v_interlock); 1445 KASSERT(mutex_owned(bp->b_objlock)); 1446 1447 if (--vp->v_numoutput < 0) 1448 vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp); 1449 if (vp->v_numoutput == 0) 1450 cv_broadcast(&vp->v_cv); 1451 } 1452 1453 /* 1454 * Test a vnode for being or becoming dead. Returns one of: 1455 * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only. 1456 * ENOENT: vnode is dead. 1457 * 0: otherwise. 1458 * 1459 * Whenever this function returns a non-zero value all future 1460 * calls will also return a non-zero value. 1461 */ 1462 int 1463 vdead_check(struct vnode *vp, int flags) 1464 { 1465 1466 KASSERT(mutex_owned(vp->v_interlock)); 1467 if (ISSET(vp->v_iflag, VI_XLOCK)) { 1468 if (ISSET(flags, VDEAD_NOWAIT)) 1469 return EBUSY; 1470 vwait(vp, VI_XLOCK); 1471 KASSERT(ISSET(vp->v_iflag, VI_CLEAN)); 1472 } 1473 if (ISSET(vp->v_iflag, VI_CLEAN)) 1474 return ENOENT; 1475 return 0; 1476 } 1477 1478 /* 1479 * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or 1480 * recycled. 1481 */ 1482 static void 1483 vwait(vnode_t *vp, int flags) 1484 { 1485 1486 KASSERT(mutex_owned(vp->v_interlock)); 1487 KASSERT(vp->v_usecount != 0); 1488 1489 while ((vp->v_iflag & flags) != 0) 1490 cv_wait(&vp->v_cv, vp->v_interlock); 1491 } 1492 1493 int 1494 vfs_drainvnodes(long target) 1495 { 1496 int error; 1497 1498 mutex_enter(&vnode_free_list_lock); 1499 1500 while (numvnodes > target) { 1501 error = cleanvnode(); 1502 if (error != 0) 1503 return error; 1504 mutex_enter(&vnode_free_list_lock); 1505 } 1506 1507 mutex_exit(&vnode_free_list_lock); 1508 1509 vcache_reinit(); 1510 1511 return 0; 1512 } 1513 1514 void 1515 vnpanic(vnode_t *vp, const char *fmt, ...) 1516 { 1517 va_list ap; 1518 1519 #ifdef DIAGNOSTIC 1520 vprint(NULL, vp); 1521 #endif 1522 va_start(ap, fmt); 1523 vpanic(fmt, ap); 1524 va_end(ap); 1525 } 1526