1 /* $NetBSD: vfs_vnode.c,v 1.37 2014/07/05 09:33:15 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 /* 70 * The vnode cache subsystem. 71 * 72 * Life-cycle 73 * 74 * Normally, there are two points where new vnodes are created: 75 * VOP_CREATE(9) and VOP_LOOKUP(9). The life-cycle of a vnode 76 * starts in one of the following ways: 77 * 78 * - Allocation, via getnewvnode(9) and/or vnalloc(9). 79 * - Reclamation of inactive vnode, via vget(9). 80 * 81 * Recycle from a free list, via getnewvnode(9) -> getcleanvnode(9) 82 * was another, traditional way. Currently, only the draining thread 83 * recycles the vnodes. This behaviour might be revisited. 84 * 85 * The life-cycle ends when the last reference is dropped, usually 86 * in VOP_REMOVE(9). In such case, VOP_INACTIVE(9) is called to inform 87 * the file system that vnode is inactive. Via this call, file system 88 * indicates whether vnode can be recycled (usually, it checks its own 89 * references, e.g. count of links, whether the file was removed). 90 * 91 * Depending on indication, vnode can be put into a free list (cache), 92 * or cleaned via vclean(9), which calls VOP_RECLAIM(9) to disassociate 93 * underlying file system from the vnode, and finally destroyed. 94 * 95 * Reference counting 96 * 97 * Vnode is considered active, if reference count (vnode_t::v_usecount) 98 * is non-zero. It is maintained using: vref(9) and vrele(9), as well 99 * as vput(9), routines. Common points holding references are e.g. 100 * file openings, current working directory, mount points, etc. 101 * 102 * Note on v_usecount and its locking 103 * 104 * At nearly all points it is known that v_usecount could be zero, 105 * the vnode_t::v_interlock will be held. To change v_usecount away 106 * from zero, the interlock must be held. To change from a non-zero 107 * value to zero, again the interlock must be held. 108 * 109 * Changing the usecount from a non-zero value to a non-zero value can 110 * safely be done using atomic operations, without the interlock held. 111 * 112 * Note: if VI_CLEAN is set, vnode_t::v_interlock will be released while 113 * mntvnode_lock is still held. 114 * 115 * See PR 41374. 116 */ 117 118 #include <sys/cdefs.h> 119 __KERNEL_RCSID(0, "$NetBSD: vfs_vnode.c,v 1.37 2014/07/05 09:33:15 hannken Exp $"); 120 121 #define _VFS_VNODE_PRIVATE 122 123 #include <sys/param.h> 124 #include <sys/kernel.h> 125 126 #include <sys/atomic.h> 127 #include <sys/buf.h> 128 #include <sys/conf.h> 129 #include <sys/device.h> 130 #include <sys/hash.h> 131 #include <sys/kauth.h> 132 #include <sys/kmem.h> 133 #include <sys/kthread.h> 134 #include <sys/module.h> 135 #include <sys/mount.h> 136 #include <sys/namei.h> 137 #include <sys/syscallargs.h> 138 #include <sys/sysctl.h> 139 #include <sys/systm.h> 140 #include <sys/vnode.h> 141 #include <sys/wapbl.h> 142 #include <sys/fstrans.h> 143 144 #include <uvm/uvm.h> 145 #include <uvm/uvm_readahead.h> 146 147 /* Flags to vrelel. */ 148 #define VRELEL_ASYNC_RELE 0x0001 /* Always defer to vrele thread. */ 149 #define VRELEL_CHANGING_SET 0x0002 /* VI_CHANGING set by caller. */ 150 151 struct vcache_key { 152 struct mount *vk_mount; 153 const void *vk_key; 154 size_t vk_key_len; 155 }; 156 struct vcache_node { 157 SLIST_ENTRY(vcache_node) vn_hash; 158 struct vnode *vn_vnode; 159 struct vcache_key vn_key; 160 }; 161 162 u_int numvnodes __cacheline_aligned; 163 164 static pool_cache_t vnode_cache __read_mostly; 165 static struct mount *dead_mount; 166 167 /* 168 * There are two free lists: one is for vnodes which have no buffer/page 169 * references and one for those which do (i.e. v_holdcnt is non-zero). 170 * Vnode recycling mechanism first attempts to look into the former list. 171 */ 172 static kmutex_t vnode_free_list_lock __cacheline_aligned; 173 static vnodelst_t vnode_free_list __cacheline_aligned; 174 static vnodelst_t vnode_hold_list __cacheline_aligned; 175 static kcondvar_t vdrain_cv __cacheline_aligned; 176 177 static vnodelst_t vrele_list __cacheline_aligned; 178 static kmutex_t vrele_lock __cacheline_aligned; 179 static kcondvar_t vrele_cv __cacheline_aligned; 180 static lwp_t * vrele_lwp __cacheline_aligned; 181 static int vrele_pending __cacheline_aligned; 182 static int vrele_gen __cacheline_aligned; 183 184 static struct { 185 kmutex_t lock; 186 u_long hashmask; 187 SLIST_HEAD(hashhead, vcache_node) *hashtab; 188 pool_cache_t pool; 189 } vcache __cacheline_aligned; 190 191 static int cleanvnode(void); 192 static void vcache_init(void); 193 static void vcache_reinit(void); 194 static void vclean(vnode_t *); 195 static void vrelel(vnode_t *, int); 196 static void vdrain_thread(void *); 197 static void vrele_thread(void *); 198 static void vnpanic(vnode_t *, const char *, ...) 199 __printflike(2, 3); 200 static void vwait(vnode_t *, int); 201 202 /* Routines having to do with the management of the vnode table. */ 203 extern int (**dead_vnodeop_p)(void *); 204 extern struct vfsops dead_vfsops; 205 206 void 207 vfs_vnode_sysinit(void) 208 { 209 int error __diagused; 210 211 vnode_cache = pool_cache_init(sizeof(vnode_t), 0, 0, 0, "vnodepl", 212 NULL, IPL_NONE, NULL, NULL, NULL); 213 KASSERT(vnode_cache != NULL); 214 215 dead_mount = vfs_mountalloc(&dead_vfsops, NULL); 216 KASSERT(dead_mount != NULL); 217 dead_mount->mnt_iflag = IMNT_MPSAFE; 218 219 mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE); 220 TAILQ_INIT(&vnode_free_list); 221 TAILQ_INIT(&vnode_hold_list); 222 TAILQ_INIT(&vrele_list); 223 224 vcache_init(); 225 226 mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE); 227 cv_init(&vdrain_cv, "vdrain"); 228 cv_init(&vrele_cv, "vrele"); 229 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vdrain_thread, 230 NULL, NULL, "vdrain"); 231 KASSERT(error == 0); 232 error = kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread, 233 NULL, &vrele_lwp, "vrele"); 234 KASSERT(error == 0); 235 } 236 237 /* 238 * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a 239 * marker vnode. 240 */ 241 vnode_t * 242 vnalloc(struct mount *mp) 243 { 244 vnode_t *vp; 245 246 vp = pool_cache_get(vnode_cache, PR_WAITOK); 247 KASSERT(vp != NULL); 248 249 memset(vp, 0, sizeof(*vp)); 250 uvm_obj_init(&vp->v_uobj, &uvm_vnodeops, true, 0); 251 cv_init(&vp->v_cv, "vnode"); 252 /* 253 * Done by memset() above. 254 * LIST_INIT(&vp->v_nclist); 255 * LIST_INIT(&vp->v_dnclist); 256 */ 257 258 if (mp != NULL) { 259 vp->v_mount = mp; 260 vp->v_type = VBAD; 261 vp->v_iflag = VI_MARKER; 262 return vp; 263 } 264 265 mutex_enter(&vnode_free_list_lock); 266 numvnodes++; 267 if (numvnodes > desiredvnodes + desiredvnodes / 10) 268 cv_signal(&vdrain_cv); 269 mutex_exit(&vnode_free_list_lock); 270 271 rw_init(&vp->v_lock); 272 vp->v_usecount = 1; 273 vp->v_type = VNON; 274 vp->v_size = vp->v_writesize = VSIZENOTSET; 275 276 return vp; 277 } 278 279 /* 280 * Free an unused, unreferenced vnode. 281 */ 282 void 283 vnfree(vnode_t *vp) 284 { 285 286 KASSERT(vp->v_usecount == 0); 287 288 if ((vp->v_iflag & VI_MARKER) == 0) { 289 rw_destroy(&vp->v_lock); 290 mutex_enter(&vnode_free_list_lock); 291 numvnodes--; 292 mutex_exit(&vnode_free_list_lock); 293 } 294 295 /* 296 * Note: the vnode interlock will either be freed, of reference 297 * dropped (if VI_LOCKSHARE was in use). 298 */ 299 uvm_obj_destroy(&vp->v_uobj, true); 300 cv_destroy(&vp->v_cv); 301 pool_cache_put(vnode_cache, vp); 302 } 303 304 /* 305 * cleanvnode: grab a vnode from freelist, clean and free it. 306 * 307 * => Releases vnode_free_list_lock. 308 */ 309 static int 310 cleanvnode(void) 311 { 312 vnode_t *vp; 313 vnodelst_t *listhd; 314 struct mount *mp; 315 316 KASSERT(mutex_owned(&vnode_free_list_lock)); 317 318 listhd = &vnode_free_list; 319 try_nextlist: 320 TAILQ_FOREACH(vp, listhd, v_freelist) { 321 /* 322 * It's safe to test v_usecount and v_iflag 323 * without holding the interlock here, since 324 * these vnodes should never appear on the 325 * lists. 326 */ 327 KASSERT(vp->v_usecount == 0); 328 KASSERT((vp->v_iflag & VI_CLEAN) == 0); 329 KASSERT(vp->v_freelisthd == listhd); 330 331 if (!mutex_tryenter(vp->v_interlock)) 332 continue; 333 if ((vp->v_iflag & VI_XLOCK) != 0) { 334 mutex_exit(vp->v_interlock); 335 continue; 336 } 337 mp = vp->v_mount; 338 if (fstrans_start_nowait(mp, FSTRANS_SHARED) != 0) { 339 mutex_exit(vp->v_interlock); 340 continue; 341 } 342 break; 343 } 344 345 if (vp == NULL) { 346 if (listhd == &vnode_free_list) { 347 listhd = &vnode_hold_list; 348 goto try_nextlist; 349 } 350 mutex_exit(&vnode_free_list_lock); 351 return EBUSY; 352 } 353 354 /* Remove it from the freelist. */ 355 TAILQ_REMOVE(listhd, vp, v_freelist); 356 vp->v_freelisthd = NULL; 357 mutex_exit(&vnode_free_list_lock); 358 359 KASSERT(vp->v_usecount == 0); 360 361 /* 362 * The vnode is still associated with a file system, so we must 363 * clean it out before freeing it. We need to add a reference 364 * before doing this. 365 */ 366 vp->v_usecount = 1; 367 KASSERT((vp->v_iflag & VI_CHANGING) == 0); 368 vp->v_iflag |= VI_CHANGING; 369 vclean(vp); 370 vrelel(vp, VRELEL_CHANGING_SET); 371 fstrans_done(mp); 372 373 return 0; 374 } 375 376 /* 377 * getnewvnode: return a fresh vnode. 378 * 379 * => Returns referenced vnode, moved into the mount queue. 380 * => Shares the interlock specified by 'slock', if it is not NULL. 381 */ 382 int 383 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), 384 kmutex_t *slock, vnode_t **vpp) 385 { 386 struct uvm_object *uobj __diagused; 387 vnode_t *vp; 388 int error = 0; 389 390 if (mp != NULL) { 391 /* 392 * Mark filesystem busy while we are creating a vnode. 393 * If unmount is in progress, this will fail. 394 */ 395 error = vfs_busy(mp, NULL); 396 if (error) 397 return error; 398 } 399 400 vp = NULL; 401 402 /* Allocate a new vnode. */ 403 vp = vnalloc(NULL); 404 405 KASSERT(vp->v_freelisthd == NULL); 406 KASSERT(LIST_EMPTY(&vp->v_nclist)); 407 KASSERT(LIST_EMPTY(&vp->v_dnclist)); 408 KASSERT(vp->v_data == NULL); 409 410 /* Initialize vnode. */ 411 vp->v_tag = tag; 412 vp->v_op = vops; 413 414 uobj = &vp->v_uobj; 415 KASSERT(uobj->pgops == &uvm_vnodeops); 416 KASSERT(uobj->uo_npages == 0); 417 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 418 419 /* Share the vnode_t::v_interlock, if requested. */ 420 if (slock) { 421 /* Set the interlock and mark that it is shared. */ 422 KASSERT(vp->v_mount == NULL); 423 mutex_obj_hold(slock); 424 uvm_obj_setlock(&vp->v_uobj, slock); 425 KASSERT(vp->v_interlock == slock); 426 vp->v_iflag |= VI_LOCKSHARE; 427 } 428 429 /* Finally, move vnode into the mount queue. */ 430 vfs_insmntque(vp, mp); 431 432 if (mp != NULL) { 433 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 434 vp->v_vflag |= VV_MPSAFE; 435 vfs_unbusy(mp, true, NULL); 436 } 437 438 *vpp = vp; 439 return 0; 440 } 441 442 /* 443 * This is really just the reverse of getnewvnode(). Needed for 444 * VFS_VGET functions who may need to push back a vnode in case 445 * of a locking race. 446 */ 447 void 448 ungetnewvnode(vnode_t *vp) 449 { 450 451 KASSERT(vp->v_usecount == 1); 452 KASSERT(vp->v_data == NULL); 453 KASSERT(vp->v_freelisthd == NULL); 454 455 mutex_enter(vp->v_interlock); 456 vp->v_iflag |= VI_CLEAN; 457 vrelel(vp, 0); 458 } 459 460 /* 461 * Helper thread to keep the number of vnodes below desiredvnodes. 462 */ 463 static void 464 vdrain_thread(void *cookie) 465 { 466 int error; 467 468 mutex_enter(&vnode_free_list_lock); 469 470 for (;;) { 471 cv_timedwait(&vdrain_cv, &vnode_free_list_lock, hz); 472 while (numvnodes > desiredvnodes) { 473 error = cleanvnode(); 474 if (error) 475 kpause("vndsbusy", false, hz, NULL); 476 mutex_enter(&vnode_free_list_lock); 477 if (error) 478 break; 479 } 480 } 481 } 482 483 /* 484 * Remove a vnode from its freelist. 485 */ 486 void 487 vremfree(vnode_t *vp) 488 { 489 490 KASSERT(mutex_owned(vp->v_interlock)); 491 KASSERT(vp->v_usecount == 0); 492 493 /* 494 * Note that the reference count must not change until 495 * the vnode is removed. 496 */ 497 mutex_enter(&vnode_free_list_lock); 498 if (vp->v_holdcnt > 0) { 499 KASSERT(vp->v_freelisthd == &vnode_hold_list); 500 } else { 501 KASSERT(vp->v_freelisthd == &vnode_free_list); 502 } 503 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 504 vp->v_freelisthd = NULL; 505 mutex_exit(&vnode_free_list_lock); 506 } 507 508 /* 509 * vget: get a particular vnode from the free list, increment its reference 510 * count and lock it. 511 * 512 * => Should be called with v_interlock held. 513 * 514 * If VI_CHANGING is set, the vnode may be eliminated in vgone()/vclean(). 515 * In that case, we cannot grab the vnode, so the process is awakened when 516 * the transition is completed, and an error returned to indicate that the 517 * vnode is no longer usable. 518 */ 519 int 520 vget(vnode_t *vp, int flags) 521 { 522 int error = 0; 523 524 KASSERT((vp->v_iflag & VI_MARKER) == 0); 525 KASSERT(mutex_owned(vp->v_interlock)); 526 KASSERT((flags & ~(LK_SHARED|LK_EXCLUSIVE|LK_NOWAIT)) == 0); 527 528 /* 529 * Before adding a reference, we must remove the vnode 530 * from its freelist. 531 */ 532 if (vp->v_usecount == 0) { 533 vremfree(vp); 534 vp->v_usecount = 1; 535 } else { 536 atomic_inc_uint(&vp->v_usecount); 537 } 538 539 /* 540 * If the vnode is in the process of changing state we wait 541 * for the change to complete and take care not to return 542 * a clean vnode. 543 */ 544 if ((vp->v_iflag & VI_CHANGING) != 0) { 545 if ((flags & LK_NOWAIT) != 0) { 546 vrelel(vp, 0); 547 return EBUSY; 548 } 549 vwait(vp, VI_CHANGING); 550 if ((vp->v_iflag & VI_CLEAN) != 0) { 551 vrelel(vp, 0); 552 return ENOENT; 553 } 554 } 555 556 /* 557 * Ok, we got it in good shape. Just locking left. 558 */ 559 KASSERT((vp->v_iflag & VI_CLEAN) == 0); 560 mutex_exit(vp->v_interlock); 561 if (flags & (LK_EXCLUSIVE | LK_SHARED)) { 562 error = vn_lock(vp, flags); 563 if (error != 0) { 564 vrele(vp); 565 } 566 } 567 return error; 568 } 569 570 /* 571 * vput: unlock and release the reference. 572 */ 573 void 574 vput(vnode_t *vp) 575 { 576 577 KASSERT((vp->v_iflag & VI_MARKER) == 0); 578 579 VOP_UNLOCK(vp); 580 vrele(vp); 581 } 582 583 /* 584 * Try to drop reference on a vnode. Abort if we are releasing the 585 * last reference. Note: this _must_ succeed if not the last reference. 586 */ 587 static inline bool 588 vtryrele(vnode_t *vp) 589 { 590 u_int use, next; 591 592 for (use = vp->v_usecount;; use = next) { 593 if (use == 1) { 594 return false; 595 } 596 KASSERT(use > 1); 597 next = atomic_cas_uint(&vp->v_usecount, use, use - 1); 598 if (__predict_true(next == use)) { 599 return true; 600 } 601 } 602 } 603 604 /* 605 * Vnode release. If reference count drops to zero, call inactive 606 * routine and either return to freelist or free to the pool. 607 */ 608 static void 609 vrelel(vnode_t *vp, int flags) 610 { 611 bool recycle, defer; 612 int error; 613 614 KASSERT(mutex_owned(vp->v_interlock)); 615 KASSERT((vp->v_iflag & VI_MARKER) == 0); 616 KASSERT(vp->v_freelisthd == NULL); 617 618 if (__predict_false(vp->v_op == dead_vnodeop_p && 619 (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) { 620 vnpanic(vp, "dead but not clean"); 621 } 622 623 /* 624 * If not the last reference, just drop the reference count 625 * and unlock. 626 */ 627 if (vtryrele(vp)) { 628 if ((flags & VRELEL_CHANGING_SET) != 0) { 629 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 630 vp->v_iflag &= ~VI_CHANGING; 631 cv_broadcast(&vp->v_cv); 632 } 633 mutex_exit(vp->v_interlock); 634 return; 635 } 636 if (vp->v_usecount <= 0 || vp->v_writecount != 0) { 637 vnpanic(vp, "%s: bad ref count", __func__); 638 } 639 640 KASSERT((vp->v_iflag & VI_XLOCK) == 0); 641 642 #ifdef DIAGNOSTIC 643 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 644 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) { 645 vprint("vrelel: missing VOP_CLOSE()", vp); 646 } 647 #endif 648 649 /* 650 * If not clean, deactivate the vnode, but preserve 651 * our reference across the call to VOP_INACTIVE(). 652 */ 653 if ((vp->v_iflag & VI_CLEAN) == 0) { 654 recycle = false; 655 656 /* 657 * XXX This ugly block can be largely eliminated if 658 * locking is pushed down into the file systems. 659 * 660 * Defer vnode release to vrele_thread if caller 661 * requests it explicitly or is the pagedaemon. 662 */ 663 if ((curlwp == uvm.pagedaemon_lwp) || 664 (flags & VRELEL_ASYNC_RELE) != 0) { 665 defer = true; 666 } else if (curlwp == vrele_lwp) { 667 /* 668 * We have to try harder. 669 */ 670 mutex_exit(vp->v_interlock); 671 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 672 KASSERT(error == 0); 673 mutex_enter(vp->v_interlock); 674 defer = false; 675 } else { 676 /* If we can't acquire the lock, then defer. */ 677 mutex_exit(vp->v_interlock); 678 error = vn_lock(vp, 679 LK_EXCLUSIVE | LK_RETRY | LK_NOWAIT); 680 defer = (error != 0); 681 mutex_enter(vp->v_interlock); 682 } 683 684 KASSERT(mutex_owned(vp->v_interlock)); 685 KASSERT(! (curlwp == vrele_lwp && defer)); 686 687 if (defer) { 688 /* 689 * Defer reclaim to the kthread; it's not safe to 690 * clean it here. We donate it our last reference. 691 */ 692 if ((flags & VRELEL_CHANGING_SET) != 0) { 693 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 694 vp->v_iflag &= ~VI_CHANGING; 695 cv_broadcast(&vp->v_cv); 696 } 697 mutex_enter(&vrele_lock); 698 TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist); 699 if (++vrele_pending > (desiredvnodes >> 8)) 700 cv_signal(&vrele_cv); 701 mutex_exit(&vrele_lock); 702 mutex_exit(vp->v_interlock); 703 return; 704 } 705 706 /* 707 * If the node got another reference while we 708 * released the interlock, don't try to inactivate it yet. 709 */ 710 if (__predict_false(vtryrele(vp))) { 711 VOP_UNLOCK(vp); 712 if ((flags & VRELEL_CHANGING_SET) != 0) { 713 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 714 vp->v_iflag &= ~VI_CHANGING; 715 cv_broadcast(&vp->v_cv); 716 } 717 mutex_exit(vp->v_interlock); 718 return; 719 } 720 721 if ((flags & VRELEL_CHANGING_SET) == 0) { 722 KASSERT((vp->v_iflag & VI_CHANGING) == 0); 723 vp->v_iflag |= VI_CHANGING; 724 } 725 mutex_exit(vp->v_interlock); 726 727 /* 728 * The vnode can gain another reference while being 729 * deactivated. If VOP_INACTIVE() indicates that 730 * the described file has been deleted, then recycle 731 * the vnode irrespective of additional references. 732 * Another thread may be waiting to re-use the on-disk 733 * inode. 734 * 735 * Note that VOP_INACTIVE() will drop the vnode lock. 736 */ 737 VOP_INACTIVE(vp, &recycle); 738 mutex_enter(vp->v_interlock); 739 if (!recycle) { 740 if (vtryrele(vp)) { 741 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 742 vp->v_iflag &= ~VI_CHANGING; 743 cv_broadcast(&vp->v_cv); 744 mutex_exit(vp->v_interlock); 745 return; 746 } 747 } 748 749 /* Take care of space accounting. */ 750 if (vp->v_iflag & VI_EXECMAP) { 751 atomic_add_int(&uvmexp.execpages, 752 -vp->v_uobj.uo_npages); 753 atomic_add_int(&uvmexp.filepages, 754 vp->v_uobj.uo_npages); 755 } 756 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP); 757 vp->v_vflag &= ~VV_MAPPED; 758 759 /* 760 * Recycle the vnode if the file is now unused (unlinked), 761 * otherwise just free it. 762 */ 763 if (recycle) { 764 vclean(vp); 765 } 766 KASSERT(vp->v_usecount > 0); 767 } else { /* vnode was already clean */ 768 if ((flags & VRELEL_CHANGING_SET) == 0) { 769 KASSERT((vp->v_iflag & VI_CHANGING) == 0); 770 vp->v_iflag |= VI_CHANGING; 771 } 772 } 773 774 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) { 775 /* Gained another reference while being reclaimed. */ 776 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 777 vp->v_iflag &= ~VI_CHANGING; 778 cv_broadcast(&vp->v_cv); 779 mutex_exit(vp->v_interlock); 780 return; 781 } 782 783 if ((vp->v_iflag & VI_CLEAN) != 0) { 784 /* 785 * It's clean so destroy it. It isn't referenced 786 * anywhere since it has been reclaimed. 787 */ 788 KASSERT(vp->v_holdcnt == 0); 789 KASSERT(vp->v_writecount == 0); 790 mutex_exit(vp->v_interlock); 791 vfs_insmntque(vp, NULL); 792 if (vp->v_type == VBLK || vp->v_type == VCHR) { 793 spec_node_destroy(vp); 794 } 795 vnfree(vp); 796 } else { 797 /* 798 * Otherwise, put it back onto the freelist. It 799 * can't be destroyed while still associated with 800 * a file system. 801 */ 802 mutex_enter(&vnode_free_list_lock); 803 if (vp->v_holdcnt > 0) { 804 vp->v_freelisthd = &vnode_hold_list; 805 } else { 806 vp->v_freelisthd = &vnode_free_list; 807 } 808 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 809 mutex_exit(&vnode_free_list_lock); 810 KASSERT((vp->v_iflag & VI_CHANGING) != 0); 811 vp->v_iflag &= ~VI_CHANGING; 812 cv_broadcast(&vp->v_cv); 813 mutex_exit(vp->v_interlock); 814 } 815 } 816 817 void 818 vrele(vnode_t *vp) 819 { 820 821 KASSERT((vp->v_iflag & VI_MARKER) == 0); 822 823 if (vtryrele(vp)) { 824 return; 825 } 826 mutex_enter(vp->v_interlock); 827 vrelel(vp, 0); 828 } 829 830 /* 831 * Asynchronous vnode release, vnode is released in different context. 832 */ 833 void 834 vrele_async(vnode_t *vp) 835 { 836 837 KASSERT((vp->v_iflag & VI_MARKER) == 0); 838 839 if (vtryrele(vp)) { 840 return; 841 } 842 mutex_enter(vp->v_interlock); 843 vrelel(vp, VRELEL_ASYNC_RELE); 844 } 845 846 static void 847 vrele_thread(void *cookie) 848 { 849 vnodelst_t skip_list; 850 vnode_t *vp; 851 struct mount *mp; 852 853 TAILQ_INIT(&skip_list); 854 855 mutex_enter(&vrele_lock); 856 for (;;) { 857 while (TAILQ_EMPTY(&vrele_list)) { 858 vrele_gen++; 859 cv_broadcast(&vrele_cv); 860 cv_timedwait(&vrele_cv, &vrele_lock, hz); 861 TAILQ_CONCAT(&vrele_list, &skip_list, v_freelist); 862 } 863 vp = TAILQ_FIRST(&vrele_list); 864 mp = vp->v_mount; 865 TAILQ_REMOVE(&vrele_list, vp, v_freelist); 866 if (fstrans_start_nowait(mp, FSTRANS_LAZY) != 0) { 867 TAILQ_INSERT_TAIL(&skip_list, vp, v_freelist); 868 continue; 869 } 870 vrele_pending--; 871 mutex_exit(&vrele_lock); 872 873 /* 874 * If not the last reference, then ignore the vnode 875 * and look for more work. 876 */ 877 mutex_enter(vp->v_interlock); 878 vrelel(vp, 0); 879 fstrans_done(mp); 880 mutex_enter(&vrele_lock); 881 } 882 } 883 884 void 885 vrele_flush(void) 886 { 887 int gen; 888 889 mutex_enter(&vrele_lock); 890 gen = vrele_gen; 891 while (vrele_pending && gen == vrele_gen) { 892 cv_broadcast(&vrele_cv); 893 cv_wait(&vrele_cv, &vrele_lock); 894 } 895 mutex_exit(&vrele_lock); 896 } 897 898 /* 899 * Vnode reference, where a reference is already held by some other 900 * object (for example, a file structure). 901 */ 902 void 903 vref(vnode_t *vp) 904 { 905 906 KASSERT((vp->v_iflag & VI_MARKER) == 0); 907 KASSERT(vp->v_usecount != 0); 908 909 atomic_inc_uint(&vp->v_usecount); 910 } 911 912 /* 913 * Page or buffer structure gets a reference. 914 * Called with v_interlock held. 915 */ 916 void 917 vholdl(vnode_t *vp) 918 { 919 920 KASSERT(mutex_owned(vp->v_interlock)); 921 KASSERT((vp->v_iflag & VI_MARKER) == 0); 922 923 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) { 924 mutex_enter(&vnode_free_list_lock); 925 KASSERT(vp->v_freelisthd == &vnode_free_list); 926 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 927 vp->v_freelisthd = &vnode_hold_list; 928 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 929 mutex_exit(&vnode_free_list_lock); 930 } 931 } 932 933 /* 934 * Page or buffer structure frees a reference. 935 * Called with v_interlock held. 936 */ 937 void 938 holdrelel(vnode_t *vp) 939 { 940 941 KASSERT(mutex_owned(vp->v_interlock)); 942 KASSERT((vp->v_iflag & VI_MARKER) == 0); 943 944 if (vp->v_holdcnt <= 0) { 945 vnpanic(vp, "%s: holdcnt vp %p", __func__, vp); 946 } 947 948 vp->v_holdcnt--; 949 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) { 950 mutex_enter(&vnode_free_list_lock); 951 KASSERT(vp->v_freelisthd == &vnode_hold_list); 952 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 953 vp->v_freelisthd = &vnode_free_list; 954 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 955 mutex_exit(&vnode_free_list_lock); 956 } 957 } 958 959 /* 960 * Disassociate the underlying file system from a vnode. 961 * 962 * Must be called with the interlock held, and will return with it held. 963 */ 964 static void 965 vclean(vnode_t *vp) 966 { 967 lwp_t *l = curlwp; 968 bool recycle, active, doclose; 969 int error; 970 971 KASSERT(mutex_owned(vp->v_interlock)); 972 KASSERT((vp->v_iflag & VI_MARKER) == 0); 973 KASSERT(vp->v_usecount != 0); 974 975 /* If already clean, nothing to do. */ 976 if ((vp->v_iflag & VI_CLEAN) != 0) { 977 return; 978 } 979 980 active = (vp->v_usecount > 1); 981 doclose = ! (active && vp->v_type == VBLK && 982 spec_node_getmountedfs(vp) != NULL); 983 mutex_exit(vp->v_interlock); 984 985 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 986 987 /* 988 * Prevent the vnode from being recycled or brought into use 989 * while we clean it out. 990 */ 991 mutex_enter(vp->v_interlock); 992 KASSERT((vp->v_iflag & (VI_XLOCK | VI_CLEAN)) == 0); 993 vp->v_iflag |= VI_XLOCK; 994 if (vp->v_iflag & VI_EXECMAP) { 995 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); 996 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); 997 } 998 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); 999 mutex_exit(vp->v_interlock); 1000 1001 /* 1002 * Clean out any cached data associated with the vnode. 1003 * If purging an active vnode, it must be closed and 1004 * deactivated before being reclaimed. Note that the 1005 * VOP_INACTIVE will unlock the vnode. 1006 */ 1007 if (doclose) { 1008 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1009 if (error != 0) { 1010 if (wapbl_vphaswapbl(vp)) 1011 WAPBL_DISCARD(wapbl_vptomp(vp)); 1012 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1013 } 1014 KASSERT(error == 0); 1015 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1016 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) { 1017 spec_node_revoke(vp); 1018 } 1019 } 1020 if (active) { 1021 VOP_INACTIVE(vp, &recycle); 1022 } else { 1023 /* 1024 * Any other processes trying to obtain this lock must first 1025 * wait for VI_XLOCK to clear, then call the new lock operation. 1026 */ 1027 VOP_UNLOCK(vp); 1028 } 1029 1030 /* Disassociate the underlying file system from the vnode. */ 1031 if (VOP_RECLAIM(vp)) { 1032 vnpanic(vp, "%s: cannot reclaim", __func__); 1033 } 1034 1035 KASSERT(vp->v_data == NULL); 1036 KASSERT(vp->v_uobj.uo_npages == 0); 1037 1038 if (vp->v_type == VREG && vp->v_ractx != NULL) { 1039 uvm_ra_freectx(vp->v_ractx); 1040 vp->v_ractx = NULL; 1041 } 1042 1043 /* Purge name cache. */ 1044 cache_purge(vp); 1045 1046 /* Move to dead mount. */ 1047 vp->v_vflag &= ~VV_ROOT; 1048 atomic_inc_uint(&dead_mount->mnt_refcnt); 1049 vfs_insmntque(vp, dead_mount); 1050 1051 /* Done with purge, notify sleepers of the grim news. */ 1052 mutex_enter(vp->v_interlock); 1053 if (doclose) { 1054 vp->v_op = dead_vnodeop_p; 1055 vp->v_vflag |= VV_LOCKSWORK; 1056 vp->v_iflag |= VI_CLEAN; 1057 } else { 1058 vp->v_op = spec_vnodeop_p; 1059 vp->v_vflag &= ~VV_LOCKSWORK; 1060 } 1061 vp->v_tag = VT_NON; 1062 KNOTE(&vp->v_klist, NOTE_REVOKE); 1063 vp->v_iflag &= ~VI_XLOCK; 1064 cv_broadcast(&vp->v_cv); 1065 1066 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1067 } 1068 1069 /* 1070 * Recycle an unused vnode if caller holds the last reference. 1071 */ 1072 bool 1073 vrecycle(vnode_t *vp) 1074 { 1075 1076 mutex_enter(vp->v_interlock); 1077 1078 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1079 1080 if (vp->v_usecount != 1) { 1081 mutex_exit(vp->v_interlock); 1082 return false; 1083 } 1084 if ((vp->v_iflag & VI_CHANGING) != 0) 1085 vwait(vp, VI_CHANGING); 1086 if (vp->v_usecount != 1) { 1087 mutex_exit(vp->v_interlock); 1088 return false; 1089 } else if ((vp->v_iflag & VI_CLEAN) != 0) { 1090 mutex_exit(vp->v_interlock); 1091 return true; 1092 } 1093 vp->v_iflag |= VI_CHANGING; 1094 vclean(vp); 1095 vrelel(vp, VRELEL_CHANGING_SET); 1096 return true; 1097 } 1098 1099 /* 1100 * Eliminate all activity associated with the requested vnode 1101 * and with all vnodes aliased to the requested vnode. 1102 */ 1103 void 1104 vrevoke(vnode_t *vp) 1105 { 1106 vnode_t *vq; 1107 enum vtype type; 1108 dev_t dev; 1109 1110 KASSERT(vp->v_usecount > 0); 1111 1112 mutex_enter(vp->v_interlock); 1113 if ((vp->v_iflag & VI_CLEAN) != 0) { 1114 mutex_exit(vp->v_interlock); 1115 return; 1116 } else if (vp->v_type != VBLK && vp->v_type != VCHR) { 1117 atomic_inc_uint(&vp->v_usecount); 1118 mutex_exit(vp->v_interlock); 1119 vgone(vp); 1120 return; 1121 } else { 1122 dev = vp->v_rdev; 1123 type = vp->v_type; 1124 mutex_exit(vp->v_interlock); 1125 } 1126 1127 while (spec_node_lookup_by_dev(type, dev, &vq) == 0) { 1128 vgone(vq); 1129 } 1130 } 1131 1132 /* 1133 * Eliminate all activity associated with a vnode in preparation for 1134 * reuse. Drops a reference from the vnode. 1135 */ 1136 void 1137 vgone(vnode_t *vp) 1138 { 1139 1140 mutex_enter(vp->v_interlock); 1141 if ((vp->v_iflag & VI_CHANGING) != 0) 1142 vwait(vp, VI_CHANGING); 1143 vp->v_iflag |= VI_CHANGING; 1144 vclean(vp); 1145 vrelel(vp, VRELEL_CHANGING_SET); 1146 } 1147 1148 static inline uint32_t 1149 vcache_hash(const struct vcache_key *key) 1150 { 1151 uint32_t hash = HASH32_BUF_INIT; 1152 1153 hash = hash32_buf(&key->vk_mount, sizeof(struct mount *), hash); 1154 hash = hash32_buf(key->vk_key, key->vk_key_len, hash); 1155 return hash; 1156 } 1157 1158 static void 1159 vcache_init(void) 1160 { 1161 1162 vcache.pool = pool_cache_init(sizeof(struct vcache_node), 0, 0, 0, 1163 "vcachepl", NULL, IPL_NONE, NULL, NULL, NULL); 1164 KASSERT(vcache.pool != NULL); 1165 mutex_init(&vcache.lock, MUTEX_DEFAULT, IPL_NONE); 1166 vcache.hashtab = hashinit(desiredvnodes, HASH_SLIST, true, 1167 &vcache.hashmask); 1168 } 1169 1170 static void 1171 vcache_reinit(void) 1172 { 1173 int i; 1174 uint32_t hash; 1175 u_long oldmask, newmask; 1176 struct hashhead *oldtab, *newtab; 1177 struct vcache_node *node; 1178 1179 newtab = hashinit(desiredvnodes, HASH_SLIST, true, &newmask); 1180 mutex_enter(&vcache.lock); 1181 oldtab = vcache.hashtab; 1182 oldmask = vcache.hashmask; 1183 vcache.hashtab = newtab; 1184 vcache.hashmask = newmask; 1185 for (i = 0; i <= oldmask; i++) { 1186 while ((node = SLIST_FIRST(&oldtab[i])) != NULL) { 1187 SLIST_REMOVE(&oldtab[i], node, vcache_node, vn_hash); 1188 hash = vcache_hash(&node->vn_key); 1189 SLIST_INSERT_HEAD(&newtab[hash & vcache.hashmask], 1190 node, vn_hash); 1191 } 1192 } 1193 mutex_exit(&vcache.lock); 1194 hashdone(oldtab, HASH_SLIST, oldmask); 1195 } 1196 1197 static inline struct vcache_node * 1198 vcache_hash_lookup(const struct vcache_key *key, uint32_t hash) 1199 { 1200 struct hashhead *hashp; 1201 struct vcache_node *node; 1202 1203 KASSERT(mutex_owned(&vcache.lock)); 1204 1205 hashp = &vcache.hashtab[hash & vcache.hashmask]; 1206 SLIST_FOREACH(node, hashp, vn_hash) { 1207 if (key->vk_mount != node->vn_key.vk_mount) 1208 continue; 1209 if (key->vk_key_len != node->vn_key.vk_key_len) 1210 continue; 1211 if (memcmp(key->vk_key, node->vn_key.vk_key, key->vk_key_len)) 1212 continue; 1213 return node; 1214 } 1215 return NULL; 1216 } 1217 1218 /* 1219 * Get a vnode / fs node pair by key and return it referenced through vpp. 1220 */ 1221 int 1222 vcache_get(struct mount *mp, const void *key, size_t key_len, 1223 struct vnode **vpp) 1224 { 1225 int error; 1226 uint32_t hash; 1227 const void *new_key; 1228 struct vnode *vp; 1229 struct vcache_key vcache_key; 1230 struct vcache_node *node, *new_node; 1231 1232 new_key = NULL; 1233 *vpp = NULL; 1234 1235 vcache_key.vk_mount = mp; 1236 vcache_key.vk_key = key; 1237 vcache_key.vk_key_len = key_len; 1238 hash = vcache_hash(&vcache_key); 1239 1240 again: 1241 mutex_enter(&vcache.lock); 1242 node = vcache_hash_lookup(&vcache_key, hash); 1243 1244 /* If found, take a reference or retry. */ 1245 if (__predict_true(node != NULL && node->vn_vnode != NULL)) { 1246 vp = node->vn_vnode; 1247 mutex_enter(vp->v_interlock); 1248 mutex_exit(&vcache.lock); 1249 error = vget(vp, 0); 1250 if (error == ENOENT) 1251 goto again; 1252 if (error == 0) 1253 *vpp = vp; 1254 KASSERT((error != 0) == (*vpp == NULL)); 1255 return error; 1256 } 1257 1258 /* If another thread loads this node, wait and retry. */ 1259 if (node != NULL) { 1260 KASSERT(node->vn_vnode == NULL); 1261 mutex_exit(&vcache.lock); 1262 kpause("vcache", false, mstohz(20), NULL); 1263 goto again; 1264 } 1265 mutex_exit(&vcache.lock); 1266 1267 /* Allocate and initialize a new vcache / vnode pair. */ 1268 error = vfs_busy(mp, NULL); 1269 if (error) 1270 return error; 1271 new_node = pool_cache_get(vcache.pool, PR_WAITOK); 1272 new_node->vn_vnode = NULL; 1273 new_node->vn_key = vcache_key; 1274 vp = vnalloc(NULL); 1275 mutex_enter(&vcache.lock); 1276 node = vcache_hash_lookup(&vcache_key, hash); 1277 if (node == NULL) { 1278 SLIST_INSERT_HEAD(&vcache.hashtab[hash & vcache.hashmask], 1279 new_node, vn_hash); 1280 node = new_node; 1281 } 1282 mutex_exit(&vcache.lock); 1283 1284 /* If another thread beat us inserting this node, retry. */ 1285 if (node != new_node) { 1286 pool_cache_put(vcache.pool, new_node); 1287 KASSERT(vp->v_usecount == 1); 1288 vp->v_usecount = 0; 1289 vnfree(vp); 1290 vfs_unbusy(mp, false, NULL); 1291 goto again; 1292 } 1293 1294 /* Load the fs node. Exclusive as new_node->vn_vnode is NULL. */ 1295 error = VFS_LOADVNODE(mp, vp, key, key_len, &new_key); 1296 if (error) { 1297 mutex_enter(&vcache.lock); 1298 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask], 1299 new_node, vcache_node, vn_hash); 1300 mutex_exit(&vcache.lock); 1301 pool_cache_put(vcache.pool, new_node); 1302 KASSERT(vp->v_usecount == 1); 1303 vp->v_usecount = 0; 1304 vnfree(vp); 1305 vfs_unbusy(mp, false, NULL); 1306 KASSERT(*vpp == NULL); 1307 return error; 1308 } 1309 KASSERT(new_key != NULL); 1310 KASSERT(memcmp(key, new_key, key_len) == 0); 1311 KASSERT(vp->v_op != NULL); 1312 vfs_insmntque(vp, mp); 1313 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 1314 vp->v_vflag |= VV_MPSAFE; 1315 vfs_unbusy(mp, true, NULL); 1316 1317 /* Finished loading, finalize node. */ 1318 mutex_enter(&vcache.lock); 1319 new_node->vn_key.vk_key = new_key; 1320 new_node->vn_vnode = vp; 1321 mutex_exit(&vcache.lock); 1322 *vpp = vp; 1323 return 0; 1324 } 1325 1326 /* 1327 * Prepare key change: lock old and new cache node. 1328 * Return an error if the new node already exists. 1329 */ 1330 int 1331 vcache_rekey_enter(struct mount *mp, struct vnode *vp, 1332 const void *old_key, size_t old_key_len, 1333 const void *new_key, size_t new_key_len) 1334 { 1335 uint32_t old_hash, new_hash; 1336 struct vcache_key old_vcache_key, new_vcache_key; 1337 struct vcache_node *node, *new_node; 1338 1339 old_vcache_key.vk_mount = mp; 1340 old_vcache_key.vk_key = old_key; 1341 old_vcache_key.vk_key_len = old_key_len; 1342 old_hash = vcache_hash(&old_vcache_key); 1343 1344 new_vcache_key.vk_mount = mp; 1345 new_vcache_key.vk_key = new_key; 1346 new_vcache_key.vk_key_len = new_key_len; 1347 new_hash = vcache_hash(&new_vcache_key); 1348 1349 new_node = pool_cache_get(vcache.pool, PR_WAITOK); 1350 new_node->vn_vnode = NULL; 1351 new_node->vn_key = new_vcache_key; 1352 1353 mutex_enter(&vcache.lock); 1354 node = vcache_hash_lookup(&new_vcache_key, new_hash); 1355 if (node != NULL) { 1356 mutex_exit(&vcache.lock); 1357 pool_cache_put(vcache.pool, new_node); 1358 return EEXIST; 1359 } 1360 SLIST_INSERT_HEAD(&vcache.hashtab[new_hash & vcache.hashmask], 1361 new_node, vn_hash); 1362 node = vcache_hash_lookup(&old_vcache_key, old_hash); 1363 KASSERT(node != NULL); 1364 KASSERT(node->vn_vnode == vp); 1365 node->vn_vnode = NULL; 1366 node->vn_key = old_vcache_key; 1367 mutex_exit(&vcache.lock); 1368 return 0; 1369 } 1370 1371 /* 1372 * Key change complete: remove old node and unlock new node. 1373 */ 1374 void 1375 vcache_rekey_exit(struct mount *mp, struct vnode *vp, 1376 const void *old_key, size_t old_key_len, 1377 const void *new_key, size_t new_key_len) 1378 { 1379 uint32_t old_hash, new_hash; 1380 struct vcache_key old_vcache_key, new_vcache_key; 1381 struct vcache_node *node; 1382 1383 old_vcache_key.vk_mount = mp; 1384 old_vcache_key.vk_key = old_key; 1385 old_vcache_key.vk_key_len = old_key_len; 1386 old_hash = vcache_hash(&old_vcache_key); 1387 1388 new_vcache_key.vk_mount = mp; 1389 new_vcache_key.vk_key = new_key; 1390 new_vcache_key.vk_key_len = new_key_len; 1391 new_hash = vcache_hash(&new_vcache_key); 1392 1393 mutex_enter(&vcache.lock); 1394 node = vcache_hash_lookup(&new_vcache_key, new_hash); 1395 KASSERT(node != NULL && node->vn_vnode == NULL); 1396 KASSERT(node->vn_key.vk_key_len == new_key_len); 1397 node->vn_vnode = vp; 1398 node->vn_key = new_vcache_key; 1399 node = vcache_hash_lookup(&old_vcache_key, old_hash); 1400 KASSERT(node != NULL); 1401 KASSERT(node->vn_vnode == NULL); 1402 SLIST_REMOVE(&vcache.hashtab[old_hash & vcache.hashmask], 1403 node, vcache_node, vn_hash); 1404 mutex_exit(&vcache.lock); 1405 pool_cache_put(vcache.pool, node); 1406 } 1407 1408 /* 1409 * Remove a vnode / fs node pair from the cache. 1410 */ 1411 void 1412 vcache_remove(struct mount *mp, const void *key, size_t key_len) 1413 { 1414 uint32_t hash; 1415 struct vcache_key vcache_key; 1416 struct vcache_node *node; 1417 1418 vcache_key.vk_mount = mp; 1419 vcache_key.vk_key = key; 1420 vcache_key.vk_key_len = key_len; 1421 hash = vcache_hash(&vcache_key); 1422 1423 mutex_enter(&vcache.lock); 1424 node = vcache_hash_lookup(&vcache_key, hash); 1425 KASSERT(node != NULL); 1426 SLIST_REMOVE(&vcache.hashtab[hash & vcache.hashmask], 1427 node, vcache_node, vn_hash); 1428 mutex_exit(&vcache.lock); 1429 pool_cache_put(vcache.pool, node); 1430 } 1431 1432 /* 1433 * Update outstanding I/O count and do wakeup if requested. 1434 */ 1435 void 1436 vwakeup(struct buf *bp) 1437 { 1438 vnode_t *vp; 1439 1440 if ((vp = bp->b_vp) == NULL) 1441 return; 1442 1443 KASSERT(bp->b_objlock == vp->v_interlock); 1444 KASSERT(mutex_owned(bp->b_objlock)); 1445 1446 if (--vp->v_numoutput < 0) 1447 vnpanic(vp, "%s: neg numoutput, vp %p", __func__, vp); 1448 if (vp->v_numoutput == 0) 1449 cv_broadcast(&vp->v_cv); 1450 } 1451 1452 /* 1453 * Test a vnode for being or becoming dead. Returns one of: 1454 * EBUSY: vnode is becoming dead, with "flags == VDEAD_NOWAIT" only. 1455 * ENOENT: vnode is dead. 1456 * 0: otherwise. 1457 * 1458 * Whenever this function returns a non-zero value all future 1459 * calls will also return a non-zero value. 1460 */ 1461 int 1462 vdead_check(struct vnode *vp, int flags) 1463 { 1464 1465 KASSERT(mutex_owned(vp->v_interlock)); 1466 if (ISSET(vp->v_iflag, VI_XLOCK)) { 1467 if (ISSET(flags, VDEAD_NOWAIT)) 1468 return EBUSY; 1469 vwait(vp, VI_XLOCK); 1470 KASSERT(ISSET(vp->v_iflag, VI_CLEAN)); 1471 } 1472 if (ISSET(vp->v_iflag, VI_CLEAN)) 1473 return ENOENT; 1474 return 0; 1475 } 1476 1477 /* 1478 * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or 1479 * recycled. 1480 */ 1481 static void 1482 vwait(vnode_t *vp, int flags) 1483 { 1484 1485 KASSERT(mutex_owned(vp->v_interlock)); 1486 KASSERT(vp->v_usecount != 0); 1487 1488 while ((vp->v_iflag & flags) != 0) 1489 cv_wait(&vp->v_cv, vp->v_interlock); 1490 } 1491 1492 int 1493 vfs_drainvnodes(long target) 1494 { 1495 int error; 1496 1497 mutex_enter(&vnode_free_list_lock); 1498 1499 while (numvnodes > target) { 1500 error = cleanvnode(); 1501 if (error != 0) 1502 return error; 1503 mutex_enter(&vnode_free_list_lock); 1504 } 1505 1506 mutex_exit(&vnode_free_list_lock); 1507 1508 vcache_reinit(); 1509 1510 return 0; 1511 } 1512 1513 void 1514 vnpanic(vnode_t *vp, const char *fmt, ...) 1515 { 1516 va_list ap; 1517 1518 #ifdef DIAGNOSTIC 1519 vprint(NULL, vp); 1520 #endif 1521 va_start(ap, fmt); 1522 vpanic(fmt, ap); 1523 va_end(ap); 1524 } 1525