1 /* $NetBSD: vfs_subr.c,v 1.407 2010/06/24 13:03:12 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 /* 70 * Note on v_usecount and locking: 71 * 72 * At nearly all points it is known that v_usecount could be zero, the 73 * vnode interlock will be held. 74 * 75 * To change v_usecount away from zero, the interlock must be held. To 76 * change from a non-zero value to zero, again the interlock must be 77 * held. 78 * 79 * There's a flag bit, VC_XLOCK, embedded in v_usecount. 80 * To raise v_usecount, if the VC_XLOCK bit is set in it, the interlock 81 * must be held. 82 * To modify the VC_XLOCK bit, the interlock must be held. 83 * We always keep the usecount (v_usecount & VC_MASK) non-zero while the 84 * VC_XLOCK bit is set. 85 * 86 * Unless the VC_XLOCK bit is set, changing the usecount from a non-zero 87 * value to a non-zero value can safely be done using atomic operations, 88 * without the interlock held. 89 * Even if the VC_XLOCK bit is set, decreasing the usecount to a non-zero 90 * value can be done using atomic operations, without the interlock held. 91 */ 92 93 #include <sys/cdefs.h> 94 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.407 2010/06/24 13:03:12 hannken Exp $"); 95 96 #include "opt_ddb.h" 97 #include "opt_compat_netbsd.h" 98 #include "opt_compat_43.h" 99 100 #include <sys/param.h> 101 #include <sys/systm.h> 102 #include <sys/conf.h> 103 #include <sys/dirent.h> 104 #include <sys/proc.h> 105 #include <sys/kernel.h> 106 #include <sys/mount.h> 107 #include <sys/fcntl.h> 108 #include <sys/vnode.h> 109 #include <sys/stat.h> 110 #include <sys/namei.h> 111 #include <sys/ucred.h> 112 #include <sys/buf.h> 113 #include <sys/errno.h> 114 #include <sys/kmem.h> 115 #include <sys/syscallargs.h> 116 #include <sys/device.h> 117 #include <sys/filedesc.h> 118 #include <sys/kauth.h> 119 #include <sys/atomic.h> 120 #include <sys/kthread.h> 121 #include <sys/wapbl.h> 122 #include <sys/module.h> 123 124 #include <miscfs/genfs/genfs.h> 125 #include <miscfs/specfs/specdev.h> 126 #include <miscfs/syncfs/syncfs.h> 127 128 #include <uvm/uvm.h> 129 #include <uvm/uvm_readahead.h> 130 #include <uvm/uvm_ddb.h> 131 132 #include <sys/sysctl.h> 133 134 const enum vtype iftovt_tab[16] = { 135 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 136 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 137 }; 138 const int vttoif_tab[9] = { 139 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 140 S_IFSOCK, S_IFIFO, S_IFMT, 141 }; 142 143 /* 144 * Insq/Remq for the vnode usage lists. 145 */ 146 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 147 #define bufremvn(bp) { \ 148 LIST_REMOVE(bp, b_vnbufs); \ 149 (bp)->b_vnbufs.le_next = NOLIST; \ 150 } 151 152 int doforce = 1; /* 1 => permit forcible unmounting */ 153 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 154 155 static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 156 static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 157 static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list); 158 159 struct mntlist mountlist = /* mounted filesystem list */ 160 CIRCLEQ_HEAD_INITIALIZER(mountlist); 161 162 u_int numvnodes; 163 static specificdata_domain_t mount_specificdata_domain; 164 165 static int vrele_pending; 166 static int vrele_gen; 167 static kmutex_t vrele_lock; 168 static kcondvar_t vrele_cv; 169 static lwp_t *vrele_lwp; 170 171 static uint64_t mountgen = 0; 172 static kmutex_t mountgen_lock; 173 174 kmutex_t mountlist_lock; 175 kmutex_t mntid_lock; 176 kmutex_t mntvnode_lock; 177 kmutex_t vnode_free_list_lock; 178 kmutex_t vfs_list_lock; 179 180 static pool_cache_t vnode_cache; 181 182 /* 183 * These define the root filesystem and device. 184 */ 185 struct vnode *rootvnode; 186 struct device *root_device; /* root device */ 187 188 /* 189 * Local declarations. 190 */ 191 192 static void vrele_thread(void *); 193 static void insmntque(vnode_t *, struct mount *); 194 static int getdevvp(dev_t, vnode_t **, enum vtype); 195 static vnode_t *getcleanvnode(void); 196 void vpanic(vnode_t *, const char *); 197 static void vfs_shutdown1(struct lwp *); 198 199 #ifdef DEBUG 200 void printlockedvnodes(void); 201 #endif 202 203 #ifdef DIAGNOSTIC 204 void 205 vpanic(vnode_t *vp, const char *msg) 206 { 207 208 vprint(NULL, vp); 209 panic("%s\n", msg); 210 } 211 #else 212 #define vpanic(vp, msg) /* nothing */ 213 #endif 214 215 void 216 vn_init1(void) 217 { 218 219 vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl", 220 NULL, IPL_NONE, NULL, NULL, NULL); 221 KASSERT(vnode_cache != NULL); 222 223 /* Create deferred release thread. */ 224 mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE); 225 cv_init(&vrele_cv, "vrele"); 226 if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread, 227 NULL, &vrele_lwp, "vrele")) 228 panic("fork vrele"); 229 } 230 231 /* 232 * Initialize the vnode management data structures. 233 */ 234 void 235 vntblinit(void) 236 { 237 238 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 239 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 240 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 241 mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE); 242 mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE); 243 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 244 245 mount_specificdata_domain = specificdata_domain_create(); 246 247 /* Initialize the filesystem syncer. */ 248 vn_initialize_syncerd(); 249 vn_init1(); 250 } 251 252 int 253 vfs_drainvnodes(long target, struct lwp *l) 254 { 255 256 while (numvnodes > target) { 257 vnode_t *vp; 258 259 mutex_enter(&vnode_free_list_lock); 260 vp = getcleanvnode(); 261 if (vp == NULL) 262 return EBUSY; /* give up */ 263 ungetnewvnode(vp); 264 } 265 266 return 0; 267 } 268 269 /* 270 * Lookup a mount point by filesystem identifier. 271 * 272 * XXX Needs to add a reference to the mount point. 273 */ 274 struct mount * 275 vfs_getvfs(fsid_t *fsid) 276 { 277 struct mount *mp; 278 279 mutex_enter(&mountlist_lock); 280 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { 281 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 282 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 283 mutex_exit(&mountlist_lock); 284 return (mp); 285 } 286 } 287 mutex_exit(&mountlist_lock); 288 return ((struct mount *)0); 289 } 290 291 /* 292 * Drop a reference to a mount structure, freeing if the last reference. 293 */ 294 void 295 vfs_destroy(struct mount *mp) 296 { 297 298 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 299 return; 300 } 301 302 /* 303 * Nothing else has visibility of the mount: we can now 304 * free the data structures. 305 */ 306 KASSERT(mp->mnt_refcnt == 0); 307 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 308 rw_destroy(&mp->mnt_unmounting); 309 mutex_destroy(&mp->mnt_updating); 310 mutex_destroy(&mp->mnt_renamelock); 311 if (mp->mnt_op != NULL) { 312 vfs_delref(mp->mnt_op); 313 } 314 kmem_free(mp, sizeof(*mp)); 315 } 316 317 /* 318 * grab a vnode from freelist and clean it. 319 */ 320 vnode_t * 321 getcleanvnode(void) 322 { 323 vnode_t *vp; 324 vnodelst_t *listhd; 325 326 KASSERT(mutex_owned(&vnode_free_list_lock)); 327 328 retry: 329 listhd = &vnode_free_list; 330 try_nextlist: 331 TAILQ_FOREACH(vp, listhd, v_freelist) { 332 /* 333 * It's safe to test v_usecount and v_iflag 334 * without holding the interlock here, since 335 * these vnodes should never appear on the 336 * lists. 337 */ 338 if (vp->v_usecount != 0) { 339 vpanic(vp, "free vnode isn't"); 340 } 341 if ((vp->v_iflag & VI_CLEAN) != 0) { 342 vpanic(vp, "clean vnode on freelist"); 343 } 344 if (vp->v_freelisthd != listhd) { 345 printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd); 346 vpanic(vp, "list head mismatch"); 347 } 348 if (!mutex_tryenter(&vp->v_interlock)) 349 continue; 350 if ((vp->v_iflag & VI_XLOCK) == 0) 351 break; 352 mutex_exit(&vp->v_interlock); 353 } 354 355 if (vp == NULL) { 356 if (listhd == &vnode_free_list) { 357 listhd = &vnode_hold_list; 358 goto try_nextlist; 359 } 360 mutex_exit(&vnode_free_list_lock); 361 return NULL; 362 } 363 364 /* Remove it from the freelist. */ 365 TAILQ_REMOVE(listhd, vp, v_freelist); 366 vp->v_freelisthd = NULL; 367 mutex_exit(&vnode_free_list_lock); 368 369 if (vp->v_usecount != 0) { 370 /* 371 * was referenced again before we got the interlock 372 * Don't return to freelist - the holder of the last 373 * reference will destroy it. 374 */ 375 mutex_exit(&vp->v_interlock); 376 mutex_enter(&vnode_free_list_lock); 377 goto retry; 378 } 379 380 /* 381 * The vnode is still associated with a file system, so we must 382 * clean it out before reusing it. We need to add a reference 383 * before doing this. If the vnode gains another reference while 384 * being cleaned out then we lose - retry. 385 */ 386 atomic_add_int(&vp->v_usecount, 1 + VC_XLOCK); 387 vclean(vp, DOCLOSE); 388 KASSERT(vp->v_usecount >= 1 + VC_XLOCK); 389 atomic_add_int(&vp->v_usecount, -VC_XLOCK); 390 if (vp->v_usecount == 1) { 391 /* We're about to dirty it. */ 392 vp->v_iflag &= ~VI_CLEAN; 393 mutex_exit(&vp->v_interlock); 394 if (vp->v_type == VBLK || vp->v_type == VCHR) { 395 spec_node_destroy(vp); 396 } 397 vp->v_type = VNON; 398 } else { 399 /* 400 * Don't return to freelist - the holder of the last 401 * reference will destroy it. 402 */ 403 vrelel(vp, 0); /* releases vp->v_interlock */ 404 mutex_enter(&vnode_free_list_lock); 405 goto retry; 406 } 407 408 if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 || 409 !TAILQ_EMPTY(&vp->v_uobj.memq)) { 410 vpanic(vp, "cleaned vnode isn't"); 411 } 412 if (vp->v_numoutput != 0) { 413 vpanic(vp, "clean vnode has pending I/O's"); 414 } 415 if ((vp->v_iflag & VI_ONWORKLST) != 0) { 416 vpanic(vp, "clean vnode on syncer list"); 417 } 418 419 return vp; 420 } 421 422 /* 423 * Mark a mount point as busy, and gain a new reference to it. Used to 424 * prevent the file system from being unmounted during critical sections. 425 * 426 * => The caller must hold a pre-existing reference to the mount. 427 * => Will fail if the file system is being unmounted, or is unmounted. 428 */ 429 int 430 vfs_busy(struct mount *mp, struct mount **nextp) 431 { 432 433 KASSERT(mp->mnt_refcnt > 0); 434 435 if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) { 436 if (nextp != NULL) { 437 KASSERT(mutex_owned(&mountlist_lock)); 438 *nextp = CIRCLEQ_NEXT(mp, mnt_list); 439 } 440 return EBUSY; 441 } 442 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 443 rw_exit(&mp->mnt_unmounting); 444 if (nextp != NULL) { 445 KASSERT(mutex_owned(&mountlist_lock)); 446 *nextp = CIRCLEQ_NEXT(mp, mnt_list); 447 } 448 return ENOENT; 449 } 450 if (nextp != NULL) { 451 mutex_exit(&mountlist_lock); 452 } 453 atomic_inc_uint(&mp->mnt_refcnt); 454 return 0; 455 } 456 457 /* 458 * Unbusy a busy filesystem. 459 * 460 * => If keepref is true, preserve reference added by vfs_busy(). 461 * => If nextp != NULL, acquire mountlist_lock. 462 */ 463 void 464 vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp) 465 { 466 467 KASSERT(mp->mnt_refcnt > 0); 468 469 if (nextp != NULL) { 470 mutex_enter(&mountlist_lock); 471 } 472 rw_exit(&mp->mnt_unmounting); 473 if (!keepref) { 474 vfs_destroy(mp); 475 } 476 if (nextp != NULL) { 477 KASSERT(mutex_owned(&mountlist_lock)); 478 *nextp = CIRCLEQ_NEXT(mp, mnt_list); 479 } 480 } 481 482 struct mount * 483 vfs_mountalloc(struct vfsops *vfsops, struct vnode *vp) 484 { 485 int error; 486 struct mount *mp; 487 488 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 489 if (mp == NULL) 490 return NULL; 491 492 mp->mnt_op = vfsops; 493 mp->mnt_refcnt = 1; 494 TAILQ_INIT(&mp->mnt_vnodelist); 495 rw_init(&mp->mnt_unmounting); 496 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); 497 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE); 498 error = vfs_busy(mp, NULL); 499 KASSERT(error == 0); 500 mp->mnt_vnodecovered = vp; 501 mount_initspecific(mp); 502 503 mutex_enter(&mountgen_lock); 504 mp->mnt_gen = mountgen++; 505 mutex_exit(&mountgen_lock); 506 507 return mp; 508 } 509 510 /* 511 * Lookup a filesystem type, and if found allocate and initialize 512 * a mount structure for it. 513 * 514 * Devname is usually updated by mount(8) after booting. 515 */ 516 int 517 vfs_rootmountalloc(const char *fstypename, const char *devname, 518 struct mount **mpp) 519 { 520 struct vfsops *vfsp = NULL; 521 struct mount *mp; 522 523 mutex_enter(&vfs_list_lock); 524 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 525 if (!strncmp(vfsp->vfs_name, fstypename, 526 sizeof(mp->mnt_stat.f_fstypename))) 527 break; 528 if (vfsp == NULL) { 529 mutex_exit(&vfs_list_lock); 530 return (ENODEV); 531 } 532 vfsp->vfs_refcount++; 533 mutex_exit(&vfs_list_lock); 534 535 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 536 return ENOMEM; 537 mp->mnt_flag = MNT_RDONLY; 538 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 539 sizeof(mp->mnt_stat.f_fstypename)); 540 mp->mnt_stat.f_mntonname[0] = '/'; 541 mp->mnt_stat.f_mntonname[1] = '\0'; 542 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 543 '\0'; 544 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 545 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 546 *mpp = mp; 547 return (0); 548 } 549 550 /* 551 * Routines having to do with the management of the vnode table. 552 */ 553 extern int (**dead_vnodeop_p)(void *); 554 555 /* 556 * Return the next vnode from the free list. 557 */ 558 int 559 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), 560 vnode_t **vpp) 561 { 562 struct uvm_object *uobj; 563 static int toggle; 564 vnode_t *vp; 565 int error = 0, tryalloc; 566 567 try_again: 568 if (mp != NULL) { 569 /* 570 * Mark filesystem busy while we're creating a 571 * vnode. If unmount is in progress, this will 572 * fail. 573 */ 574 error = vfs_busy(mp, NULL); 575 if (error) 576 return error; 577 } 578 579 /* 580 * We must choose whether to allocate a new vnode or recycle an 581 * existing one. The criterion for allocating a new one is that 582 * the total number of vnodes is less than the number desired or 583 * there are no vnodes on either free list. Generally we only 584 * want to recycle vnodes that have no buffers associated with 585 * them, so we look first on the vnode_free_list. If it is empty, 586 * we next consider vnodes with referencing buffers on the 587 * vnode_hold_list. The toggle ensures that half the time we 588 * will use a buffer from the vnode_hold_list, and half the time 589 * we will allocate a new one unless the list has grown to twice 590 * the desired size. We are reticent to recycle vnodes from the 591 * vnode_hold_list because we will lose the identity of all its 592 * referencing buffers. 593 */ 594 595 vp = NULL; 596 597 mutex_enter(&vnode_free_list_lock); 598 599 toggle ^= 1; 600 if (numvnodes > 2 * desiredvnodes) 601 toggle = 0; 602 603 tryalloc = numvnodes < desiredvnodes || 604 (TAILQ_FIRST(&vnode_free_list) == NULL && 605 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 606 607 if (tryalloc) { 608 numvnodes++; 609 mutex_exit(&vnode_free_list_lock); 610 if ((vp = vnalloc(NULL)) == NULL) { 611 mutex_enter(&vnode_free_list_lock); 612 numvnodes--; 613 } else 614 vp->v_usecount = 1; 615 } 616 617 if (vp == NULL) { 618 vp = getcleanvnode(); 619 if (vp == NULL) { 620 if (mp != NULL) { 621 vfs_unbusy(mp, false, NULL); 622 } 623 if (tryalloc) { 624 printf("WARNING: unable to allocate new " 625 "vnode, retrying...\n"); 626 kpause("newvn", false, hz, NULL); 627 goto try_again; 628 } 629 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 630 *vpp = 0; 631 return (ENFILE); 632 } 633 vp->v_iflag = 0; 634 vp->v_vflag = 0; 635 vp->v_uflag = 0; 636 vp->v_socket = NULL; 637 } 638 639 KASSERT(vp->v_usecount == 1); 640 KASSERT(vp->v_freelisthd == NULL); 641 KASSERT(LIST_EMPTY(&vp->v_nclist)); 642 KASSERT(LIST_EMPTY(&vp->v_dnclist)); 643 644 vp->v_type = VNON; 645 vp->v_tag = tag; 646 vp->v_op = vops; 647 insmntque(vp, mp); 648 *vpp = vp; 649 vp->v_data = 0; 650 651 /* 652 * initialize uvm_object within vnode. 653 */ 654 655 uobj = &vp->v_uobj; 656 KASSERT(uobj->pgops == &uvm_vnodeops); 657 KASSERT(uobj->uo_npages == 0); 658 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 659 vp->v_size = vp->v_writesize = VSIZENOTSET; 660 661 if (mp != NULL) { 662 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 663 vp->v_vflag |= VV_MPSAFE; 664 vfs_unbusy(mp, true, NULL); 665 } 666 667 return (0); 668 } 669 670 /* 671 * This is really just the reverse of getnewvnode(). Needed for 672 * VFS_VGET functions who may need to push back a vnode in case 673 * of a locking race. 674 */ 675 void 676 ungetnewvnode(vnode_t *vp) 677 { 678 679 KASSERT(vp->v_usecount == 1); 680 KASSERT(vp->v_data == NULL); 681 KASSERT(vp->v_freelisthd == NULL); 682 683 mutex_enter(&vp->v_interlock); 684 vp->v_iflag |= VI_CLEAN; 685 vrelel(vp, 0); 686 } 687 688 /* 689 * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a 690 * marker vnode and we are prepared to wait for the allocation. 691 */ 692 vnode_t * 693 vnalloc(struct mount *mp) 694 { 695 vnode_t *vp; 696 697 vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT)); 698 if (vp == NULL) { 699 return NULL; 700 } 701 702 memset(vp, 0, sizeof(*vp)); 703 UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0); 704 cv_init(&vp->v_cv, "vnode"); 705 /* 706 * done by memset() above. 707 * LIST_INIT(&vp->v_nclist); 708 * LIST_INIT(&vp->v_dnclist); 709 */ 710 711 if (mp != NULL) { 712 vp->v_mount = mp; 713 vp->v_type = VBAD; 714 vp->v_iflag = VI_MARKER; 715 } else { 716 rw_init(&vp->v_lock.vl_lock); 717 } 718 719 return vp; 720 } 721 722 /* 723 * Free an unused, unreferenced vnode. 724 */ 725 void 726 vnfree(vnode_t *vp) 727 { 728 729 KASSERT(vp->v_usecount == 0); 730 731 if ((vp->v_iflag & VI_MARKER) == 0) { 732 rw_destroy(&vp->v_lock.vl_lock); 733 mutex_enter(&vnode_free_list_lock); 734 numvnodes--; 735 mutex_exit(&vnode_free_list_lock); 736 } 737 738 UVM_OBJ_DESTROY(&vp->v_uobj); 739 cv_destroy(&vp->v_cv); 740 pool_cache_put(vnode_cache, vp); 741 } 742 743 /* 744 * Remove a vnode from its freelist. 745 */ 746 static inline void 747 vremfree(vnode_t *vp) 748 { 749 750 KASSERT(mutex_owned(&vp->v_interlock)); 751 KASSERT(vp->v_usecount == 0); 752 753 /* 754 * Note that the reference count must not change until 755 * the vnode is removed. 756 */ 757 mutex_enter(&vnode_free_list_lock); 758 if (vp->v_holdcnt > 0) { 759 KASSERT(vp->v_freelisthd == &vnode_hold_list); 760 } else { 761 KASSERT(vp->v_freelisthd == &vnode_free_list); 762 } 763 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 764 vp->v_freelisthd = NULL; 765 mutex_exit(&vnode_free_list_lock); 766 } 767 768 /* 769 * Move a vnode from one mount queue to another. 770 */ 771 static void 772 insmntque(vnode_t *vp, struct mount *mp) 773 { 774 struct mount *omp; 775 776 #ifdef DIAGNOSTIC 777 if ((mp != NULL) && 778 (mp->mnt_iflag & IMNT_UNMOUNT) && 779 vp->v_tag != VT_VFS) { 780 panic("insmntque into dying filesystem"); 781 } 782 #endif 783 784 mutex_enter(&mntvnode_lock); 785 /* 786 * Delete from old mount point vnode list, if on one. 787 */ 788 if ((omp = vp->v_mount) != NULL) 789 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes); 790 /* 791 * Insert into list of vnodes for the new mount point, if 792 * available. The caller must take a reference on the mount 793 * structure and donate to the vnode. 794 */ 795 if ((vp->v_mount = mp) != NULL) 796 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); 797 mutex_exit(&mntvnode_lock); 798 799 if (omp != NULL) { 800 /* Release reference to old mount. */ 801 vfs_destroy(omp); 802 } 803 } 804 805 /* 806 * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or 807 * recycled. 808 */ 809 void 810 vwait(vnode_t *vp, int flags) 811 { 812 813 KASSERT(mutex_owned(&vp->v_interlock)); 814 KASSERT(vp->v_usecount != 0); 815 816 while ((vp->v_iflag & flags) != 0) 817 cv_wait(&vp->v_cv, &vp->v_interlock); 818 } 819 820 /* 821 * Insert a marker vnode into a mount's vnode list, after the 822 * specified vnode. mntvnode_lock must be held. 823 */ 824 void 825 vmark(vnode_t *mvp, vnode_t *vp) 826 { 827 struct mount *mp; 828 829 mp = mvp->v_mount; 830 831 KASSERT(mutex_owned(&mntvnode_lock)); 832 KASSERT((mvp->v_iflag & VI_MARKER) != 0); 833 KASSERT(vp->v_mount == mp); 834 835 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes); 836 } 837 838 /* 839 * Remove a marker vnode from a mount's vnode list, and return 840 * a pointer to the next vnode in the list. mntvnode_lock must 841 * be held. 842 */ 843 vnode_t * 844 vunmark(vnode_t *mvp) 845 { 846 vnode_t *vp; 847 struct mount *mp; 848 849 mp = mvp->v_mount; 850 851 KASSERT(mutex_owned(&mntvnode_lock)); 852 KASSERT((mvp->v_iflag & VI_MARKER) != 0); 853 854 vp = TAILQ_NEXT(mvp, v_mntvnodes); 855 TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes); 856 857 KASSERT(vp == NULL || vp->v_mount == mp); 858 859 return vp; 860 } 861 862 /* 863 * Update outstanding I/O count and do wakeup if requested. 864 */ 865 void 866 vwakeup(struct buf *bp) 867 { 868 struct vnode *vp; 869 870 if ((vp = bp->b_vp) == NULL) 871 return; 872 873 KASSERT(bp->b_objlock == &vp->v_interlock); 874 KASSERT(mutex_owned(bp->b_objlock)); 875 876 if (--vp->v_numoutput < 0) 877 panic("vwakeup: neg numoutput, vp %p", vp); 878 if (vp->v_numoutput == 0) 879 cv_broadcast(&vp->v_cv); 880 } 881 882 /* 883 * Flush out and invalidate all buffers associated with a vnode. 884 * Called with the underlying vnode locked, which should prevent new dirty 885 * buffers from being queued. 886 */ 887 int 888 vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l, 889 bool catch, int slptimeo) 890 { 891 struct buf *bp, *nbp; 892 int error; 893 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 894 (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0); 895 896 /* XXXUBC this doesn't look at flags or slp* */ 897 mutex_enter(&vp->v_interlock); 898 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 899 if (error) { 900 return error; 901 } 902 903 if (flags & V_SAVE) { 904 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0); 905 if (error) 906 return (error); 907 KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd)); 908 } 909 910 mutex_enter(&bufcache_lock); 911 restart: 912 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 913 nbp = LIST_NEXT(bp, b_vnbufs); 914 error = bbusy(bp, catch, slptimeo, NULL); 915 if (error != 0) { 916 if (error == EPASSTHROUGH) 917 goto restart; 918 mutex_exit(&bufcache_lock); 919 return (error); 920 } 921 brelsel(bp, BC_INVAL | BC_VFLUSH); 922 } 923 924 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 925 nbp = LIST_NEXT(bp, b_vnbufs); 926 error = bbusy(bp, catch, slptimeo, NULL); 927 if (error != 0) { 928 if (error == EPASSTHROUGH) 929 goto restart; 930 mutex_exit(&bufcache_lock); 931 return (error); 932 } 933 /* 934 * XXX Since there are no node locks for NFS, I believe 935 * there is a slight chance that a delayed write will 936 * occur while sleeping just above, so check for it. 937 */ 938 if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) { 939 #ifdef DEBUG 940 printf("buffer still DELWRI\n"); 941 #endif 942 bp->b_cflags |= BC_BUSY | BC_VFLUSH; 943 mutex_exit(&bufcache_lock); 944 VOP_BWRITE(bp); 945 mutex_enter(&bufcache_lock); 946 goto restart; 947 } 948 brelsel(bp, BC_INVAL | BC_VFLUSH); 949 } 950 951 #ifdef DIAGNOSTIC 952 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 953 panic("vinvalbuf: flush failed, vp %p", vp); 954 #endif 955 956 mutex_exit(&bufcache_lock); 957 958 return (0); 959 } 960 961 /* 962 * Destroy any in core blocks past the truncation length. 963 * Called with the underlying vnode locked, which should prevent new dirty 964 * buffers from being queued. 965 */ 966 int 967 vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo) 968 { 969 struct buf *bp, *nbp; 970 int error; 971 voff_t off; 972 973 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 974 mutex_enter(&vp->v_interlock); 975 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 976 if (error) { 977 return error; 978 } 979 980 mutex_enter(&bufcache_lock); 981 restart: 982 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 983 nbp = LIST_NEXT(bp, b_vnbufs); 984 if (bp->b_lblkno < lbn) 985 continue; 986 error = bbusy(bp, catch, slptimeo, NULL); 987 if (error != 0) { 988 if (error == EPASSTHROUGH) 989 goto restart; 990 mutex_exit(&bufcache_lock); 991 return (error); 992 } 993 brelsel(bp, BC_INVAL | BC_VFLUSH); 994 } 995 996 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 997 nbp = LIST_NEXT(bp, b_vnbufs); 998 if (bp->b_lblkno < lbn) 999 continue; 1000 error = bbusy(bp, catch, slptimeo, NULL); 1001 if (error != 0) { 1002 if (error == EPASSTHROUGH) 1003 goto restart; 1004 mutex_exit(&bufcache_lock); 1005 return (error); 1006 } 1007 brelsel(bp, BC_INVAL | BC_VFLUSH); 1008 } 1009 mutex_exit(&bufcache_lock); 1010 1011 return (0); 1012 } 1013 1014 /* 1015 * Flush all dirty buffers from a vnode. 1016 * Called with the underlying vnode locked, which should prevent new dirty 1017 * buffers from being queued. 1018 */ 1019 void 1020 vflushbuf(struct vnode *vp, int sync) 1021 { 1022 struct buf *bp, *nbp; 1023 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); 1024 bool dirty; 1025 1026 mutex_enter(&vp->v_interlock); 1027 (void) VOP_PUTPAGES(vp, 0, 0, flags); 1028 1029 loop: 1030 mutex_enter(&bufcache_lock); 1031 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 1032 nbp = LIST_NEXT(bp, b_vnbufs); 1033 if ((bp->b_cflags & BC_BUSY)) 1034 continue; 1035 if ((bp->b_oflags & BO_DELWRI) == 0) 1036 panic("vflushbuf: not dirty, bp %p", bp); 1037 bp->b_cflags |= BC_BUSY | BC_VFLUSH; 1038 mutex_exit(&bufcache_lock); 1039 /* 1040 * Wait for I/O associated with indirect blocks to complete, 1041 * since there is no way to quickly wait for them below. 1042 */ 1043 if (bp->b_vp == vp || sync == 0) 1044 (void) bawrite(bp); 1045 else 1046 (void) bwrite(bp); 1047 goto loop; 1048 } 1049 mutex_exit(&bufcache_lock); 1050 1051 if (sync == 0) 1052 return; 1053 1054 mutex_enter(&vp->v_interlock); 1055 while (vp->v_numoutput != 0) 1056 cv_wait(&vp->v_cv, &vp->v_interlock); 1057 dirty = !LIST_EMPTY(&vp->v_dirtyblkhd); 1058 mutex_exit(&vp->v_interlock); 1059 1060 if (dirty) { 1061 vprint("vflushbuf: dirty", vp); 1062 goto loop; 1063 } 1064 } 1065 1066 /* 1067 * Create a vnode for a block device. 1068 * Used for root filesystem and swap areas. 1069 * Also used for memory file system special devices. 1070 */ 1071 int 1072 bdevvp(dev_t dev, vnode_t **vpp) 1073 { 1074 1075 return (getdevvp(dev, vpp, VBLK)); 1076 } 1077 1078 /* 1079 * Create a vnode for a character device. 1080 * Used for kernfs and some console handling. 1081 */ 1082 int 1083 cdevvp(dev_t dev, vnode_t **vpp) 1084 { 1085 1086 return (getdevvp(dev, vpp, VCHR)); 1087 } 1088 1089 /* 1090 * Associate a buffer with a vnode. There must already be a hold on 1091 * the vnode. 1092 */ 1093 void 1094 bgetvp(struct vnode *vp, struct buf *bp) 1095 { 1096 1097 KASSERT(bp->b_vp == NULL); 1098 KASSERT(bp->b_objlock == &buffer_lock); 1099 KASSERT(mutex_owned(&vp->v_interlock)); 1100 KASSERT(mutex_owned(&bufcache_lock)); 1101 KASSERT((bp->b_cflags & BC_BUSY) != 0); 1102 KASSERT(!cv_has_waiters(&bp->b_done)); 1103 1104 vholdl(vp); 1105 bp->b_vp = vp; 1106 if (vp->v_type == VBLK || vp->v_type == VCHR) 1107 bp->b_dev = vp->v_rdev; 1108 else 1109 bp->b_dev = NODEV; 1110 1111 /* 1112 * Insert onto list for new vnode. 1113 */ 1114 bufinsvn(bp, &vp->v_cleanblkhd); 1115 bp->b_objlock = &vp->v_interlock; 1116 } 1117 1118 /* 1119 * Disassociate a buffer from a vnode. 1120 */ 1121 void 1122 brelvp(struct buf *bp) 1123 { 1124 struct vnode *vp = bp->b_vp; 1125 1126 KASSERT(vp != NULL); 1127 KASSERT(bp->b_objlock == &vp->v_interlock); 1128 KASSERT(mutex_owned(&vp->v_interlock)); 1129 KASSERT(mutex_owned(&bufcache_lock)); 1130 KASSERT((bp->b_cflags & BC_BUSY) != 0); 1131 KASSERT(!cv_has_waiters(&bp->b_done)); 1132 1133 /* 1134 * Delete from old vnode list, if on one. 1135 */ 1136 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 1137 bufremvn(bp); 1138 1139 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_iflag & VI_ONWORKLST) && 1140 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 1141 vp->v_iflag &= ~VI_WRMAPDIRTY; 1142 vn_syncer_remove_from_worklist(vp); 1143 } 1144 1145 bp->b_objlock = &buffer_lock; 1146 bp->b_vp = NULL; 1147 holdrelel(vp); 1148 } 1149 1150 /* 1151 * Reassign a buffer from one vnode list to another. 1152 * The list reassignment must be within the same vnode. 1153 * Used to assign file specific control information 1154 * (indirect blocks) to the list to which they belong. 1155 */ 1156 void 1157 reassignbuf(struct buf *bp, struct vnode *vp) 1158 { 1159 struct buflists *listheadp; 1160 int delayx; 1161 1162 KASSERT(mutex_owned(&bufcache_lock)); 1163 KASSERT(bp->b_objlock == &vp->v_interlock); 1164 KASSERT(mutex_owned(&vp->v_interlock)); 1165 KASSERT((bp->b_cflags & BC_BUSY) != 0); 1166 1167 /* 1168 * Delete from old vnode list, if on one. 1169 */ 1170 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 1171 bufremvn(bp); 1172 1173 /* 1174 * If dirty, put on list of dirty buffers; 1175 * otherwise insert onto list of clean buffers. 1176 */ 1177 if ((bp->b_oflags & BO_DELWRI) == 0) { 1178 listheadp = &vp->v_cleanblkhd; 1179 if (TAILQ_EMPTY(&vp->v_uobj.memq) && 1180 (vp->v_iflag & VI_ONWORKLST) && 1181 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 1182 vp->v_iflag &= ~VI_WRMAPDIRTY; 1183 vn_syncer_remove_from_worklist(vp); 1184 } 1185 } else { 1186 listheadp = &vp->v_dirtyblkhd; 1187 if ((vp->v_iflag & VI_ONWORKLST) == 0) { 1188 switch (vp->v_type) { 1189 case VDIR: 1190 delayx = dirdelay; 1191 break; 1192 case VBLK: 1193 if (vp->v_specmountpoint != NULL) { 1194 delayx = metadelay; 1195 break; 1196 } 1197 /* fall through */ 1198 default: 1199 delayx = filedelay; 1200 break; 1201 } 1202 if (!vp->v_mount || 1203 (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) 1204 vn_syncer_add_to_worklist(vp, delayx); 1205 } 1206 } 1207 bufinsvn(bp, listheadp); 1208 } 1209 1210 /* 1211 * Create a vnode for a device. 1212 * Used by bdevvp (block device) for root file system etc., 1213 * and by cdevvp (character device) for console and kernfs. 1214 */ 1215 static int 1216 getdevvp(dev_t dev, vnode_t **vpp, enum vtype type) 1217 { 1218 vnode_t *vp; 1219 vnode_t *nvp; 1220 int error; 1221 1222 if (dev == NODEV) { 1223 *vpp = NULL; 1224 return (0); 1225 } 1226 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1227 if (error) { 1228 *vpp = NULL; 1229 return (error); 1230 } 1231 vp = nvp; 1232 vp->v_type = type; 1233 vp->v_vflag |= VV_MPSAFE; 1234 uvm_vnp_setsize(vp, 0); 1235 spec_node_init(vp, dev); 1236 *vpp = vp; 1237 return (0); 1238 } 1239 1240 /* 1241 * Try to gain a reference to a vnode, without acquiring its interlock. 1242 * The caller must hold a lock that will prevent the vnode from being 1243 * recycled or freed. 1244 */ 1245 bool 1246 vtryget(vnode_t *vp) 1247 { 1248 u_int use, next; 1249 1250 /* 1251 * If the vnode is being freed, don't make life any harder 1252 * for vclean() by adding another reference without waiting. 1253 * This is not strictly necessary, but we'll do it anyway. 1254 */ 1255 if (__predict_false((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0)) { 1256 return false; 1257 } 1258 for (use = vp->v_usecount;; use = next) { 1259 if (use == 0 || __predict_false((use & VC_XLOCK) != 0)) { 1260 /* Need interlock held if first reference. */ 1261 return false; 1262 } 1263 next = atomic_cas_uint(&vp->v_usecount, use, use + 1); 1264 if (__predict_true(next == use)) { 1265 return true; 1266 } 1267 } 1268 } 1269 1270 /* 1271 * Grab a particular vnode from the free list, increment its 1272 * reference count and lock it. If the vnode lock bit is set the 1273 * vnode is being eliminated in vgone. In that case, we can not 1274 * grab the vnode, so the process is awakened when the transition is 1275 * completed, and an error returned to indicate that the vnode is no 1276 * longer usable (possibly having been changed to a new file system type). 1277 */ 1278 int 1279 vget(vnode_t *vp, int flags) 1280 { 1281 int error = 0; 1282 1283 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1284 1285 if ((flags & LK_INTERLOCK) == 0) 1286 mutex_enter(&vp->v_interlock); 1287 1288 /* 1289 * Before adding a reference, we must remove the vnode 1290 * from its freelist. 1291 */ 1292 if (vp->v_usecount == 0) { 1293 vremfree(vp); 1294 vp->v_usecount = 1; 1295 } else { 1296 atomic_inc_uint(&vp->v_usecount); 1297 } 1298 1299 /* 1300 * If the vnode is in the process of being cleaned out for 1301 * another use, we wait for the cleaning to finish and then 1302 * return failure. Cleaning is determined by checking if 1303 * the VI_XLOCK or VI_FREEING flags are set. 1304 */ 1305 if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) { 1306 if ((flags & LK_NOWAIT) != 0) { 1307 vrelel(vp, 0); 1308 return EBUSY; 1309 } 1310 vwait(vp, VI_XLOCK | VI_FREEING); 1311 vrelel(vp, 0); 1312 return ENOENT; 1313 } 1314 1315 if ((vp->v_iflag & VI_INACTNOW) != 0) { 1316 /* 1317 * if it's being desactived, wait for it to complete. 1318 * Make sure to not return a clean vnode. 1319 */ 1320 if ((flags & LK_NOWAIT) != 0) { 1321 vrelel(vp, 0); 1322 return EBUSY; 1323 } 1324 vwait(vp, VI_INACTNOW); 1325 if ((vp->v_iflag & VI_CLEAN) != 0) { 1326 vrelel(vp, 0); 1327 return ENOENT; 1328 } 1329 } 1330 1331 /* 1332 * Ok, we got it in good shape. Just locking left. 1333 */ 1334 KASSERT((vp->v_iflag & VI_CLEAN) == 0); 1335 if (flags & LK_TYPE_MASK) { 1336 error = vn_lock(vp, flags | LK_INTERLOCK); 1337 if (error != 0) { 1338 vrele(vp); 1339 } 1340 } else { 1341 mutex_exit(&vp->v_interlock); 1342 } 1343 return error; 1344 } 1345 1346 /* 1347 * vput(), just unlock and vrele() 1348 */ 1349 void 1350 vput(vnode_t *vp) 1351 { 1352 1353 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1354 1355 VOP_UNLOCK(vp); 1356 vrele(vp); 1357 } 1358 1359 /* 1360 * Try to drop reference on a vnode. Abort if we are releasing the 1361 * last reference. Note: this _must_ succeed if not the last reference. 1362 */ 1363 static inline bool 1364 vtryrele(vnode_t *vp) 1365 { 1366 u_int use, next; 1367 1368 for (use = vp->v_usecount;; use = next) { 1369 if (use == 1) { 1370 return false; 1371 } 1372 KASSERT((use & VC_MASK) > 1); 1373 next = atomic_cas_uint(&vp->v_usecount, use, use - 1); 1374 if (__predict_true(next == use)) { 1375 return true; 1376 } 1377 } 1378 } 1379 1380 /* 1381 * Vnode release. If reference count drops to zero, call inactive 1382 * routine and either return to freelist or free to the pool. 1383 */ 1384 void 1385 vrelel(vnode_t *vp, int flags) 1386 { 1387 bool recycle, defer; 1388 int error; 1389 1390 KASSERT(mutex_owned(&vp->v_interlock)); 1391 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1392 KASSERT(vp->v_freelisthd == NULL); 1393 1394 if (__predict_false(vp->v_op == dead_vnodeop_p && 1395 (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) { 1396 vpanic(vp, "dead but not clean"); 1397 } 1398 1399 /* 1400 * If not the last reference, just drop the reference count 1401 * and unlock. 1402 */ 1403 if (vtryrele(vp)) { 1404 vp->v_iflag |= VI_INACTREDO; 1405 mutex_exit(&vp->v_interlock); 1406 return; 1407 } 1408 if (vp->v_usecount <= 0 || vp->v_writecount != 0) { 1409 vpanic(vp, "vrelel: bad ref count"); 1410 } 1411 1412 KASSERT((vp->v_iflag & VI_XLOCK) == 0); 1413 1414 /* 1415 * If not clean, deactivate the vnode, but preserve 1416 * our reference across the call to VOP_INACTIVE(). 1417 */ 1418 retry: 1419 if ((vp->v_iflag & VI_CLEAN) == 0) { 1420 recycle = false; 1421 vp->v_iflag |= VI_INACTNOW; 1422 1423 /* 1424 * XXX This ugly block can be largely eliminated if 1425 * locking is pushed down into the file systems. 1426 * 1427 * Defer vnode release to vrele_thread if caller 1428 * requests it explicitly. 1429 */ 1430 if ((curlwp == uvm.pagedaemon_lwp) || 1431 (flags & VRELEL_ASYNC_RELE) != 0) { 1432 /* The pagedaemon can't wait around; defer. */ 1433 defer = true; 1434 } else if (curlwp == vrele_lwp) { 1435 /* 1436 * We have to try harder. But we can't sleep 1437 * with VI_INACTNOW as vget() may be waiting on it. 1438 */ 1439 vp->v_iflag &= ~(VI_INACTREDO|VI_INACTNOW); 1440 cv_broadcast(&vp->v_cv); 1441 error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | 1442 LK_RETRY); 1443 if (error != 0) { 1444 /* XXX */ 1445 vpanic(vp, "vrele: unable to lock %p"); 1446 } 1447 mutex_enter(&vp->v_interlock); 1448 /* 1449 * if we did get another reference while 1450 * sleeping, don't try to inactivate it yet. 1451 */ 1452 if (__predict_false(vtryrele(vp))) { 1453 VOP_UNLOCK(vp); 1454 mutex_exit(&vp->v_interlock); 1455 return; 1456 } 1457 vp->v_iflag |= VI_INACTNOW; 1458 mutex_exit(&vp->v_interlock); 1459 defer = false; 1460 } else if ((vp->v_iflag & VI_LAYER) != 0) { 1461 /* 1462 * Acquiring the stack's lock in vclean() even 1463 * for an honest vput/vrele is dangerous because 1464 * our caller may hold other vnode locks; defer. 1465 */ 1466 defer = true; 1467 } else { 1468 /* If we can't acquire the lock, then defer. */ 1469 vp->v_iflag &= ~VI_INACTREDO; 1470 error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | 1471 LK_NOWAIT); 1472 if (error != 0) { 1473 defer = true; 1474 mutex_enter(&vp->v_interlock); 1475 } else { 1476 defer = false; 1477 } 1478 } 1479 1480 if (defer) { 1481 /* 1482 * Defer reclaim to the kthread; it's not safe to 1483 * clean it here. We donate it our last reference. 1484 */ 1485 KASSERT(mutex_owned(&vp->v_interlock)); 1486 KASSERT((vp->v_iflag & VI_INACTPEND) == 0); 1487 vp->v_iflag &= ~VI_INACTNOW; 1488 vp->v_iflag |= VI_INACTPEND; 1489 mutex_enter(&vrele_lock); 1490 TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist); 1491 if (++vrele_pending > (desiredvnodes >> 8)) 1492 cv_signal(&vrele_cv); 1493 mutex_exit(&vrele_lock); 1494 cv_broadcast(&vp->v_cv); 1495 mutex_exit(&vp->v_interlock); 1496 return; 1497 } 1498 1499 #ifdef DIAGNOSTIC 1500 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 1501 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) { 1502 vprint("vrelel: missing VOP_CLOSE()", vp); 1503 } 1504 #endif 1505 1506 /* 1507 * The vnode can gain another reference while being 1508 * deactivated. If VOP_INACTIVE() indicates that 1509 * the described file has been deleted, then recycle 1510 * the vnode irrespective of additional references. 1511 * Another thread may be waiting to re-use the on-disk 1512 * inode. 1513 * 1514 * Note that VOP_INACTIVE() will drop the vnode lock. 1515 */ 1516 VOP_INACTIVE(vp, &recycle); 1517 mutex_enter(&vp->v_interlock); 1518 vp->v_iflag &= ~VI_INACTNOW; 1519 cv_broadcast(&vp->v_cv); 1520 if (!recycle) { 1521 if (vtryrele(vp)) { 1522 mutex_exit(&vp->v_interlock); 1523 return; 1524 } 1525 1526 /* 1527 * If we grew another reference while 1528 * VOP_INACTIVE() was underway, retry. 1529 */ 1530 if ((vp->v_iflag & VI_INACTREDO) != 0) { 1531 goto retry; 1532 } 1533 } 1534 1535 /* Take care of space accounting. */ 1536 if (vp->v_iflag & VI_EXECMAP) { 1537 atomic_add_int(&uvmexp.execpages, 1538 -vp->v_uobj.uo_npages); 1539 atomic_add_int(&uvmexp.filepages, 1540 vp->v_uobj.uo_npages); 1541 } 1542 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP); 1543 vp->v_vflag &= ~VV_MAPPED; 1544 1545 /* 1546 * Recycle the vnode if the file is now unused (unlinked), 1547 * otherwise just free it. 1548 */ 1549 if (recycle) { 1550 vclean(vp, DOCLOSE); 1551 } 1552 KASSERT(vp->v_usecount > 0); 1553 } 1554 1555 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) { 1556 /* Gained another reference while being reclaimed. */ 1557 mutex_exit(&vp->v_interlock); 1558 return; 1559 } 1560 1561 if ((vp->v_iflag & VI_CLEAN) != 0) { 1562 /* 1563 * It's clean so destroy it. It isn't referenced 1564 * anywhere since it has been reclaimed. 1565 */ 1566 KASSERT(vp->v_holdcnt == 0); 1567 KASSERT(vp->v_writecount == 0); 1568 mutex_exit(&vp->v_interlock); 1569 insmntque(vp, NULL); 1570 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1571 spec_node_destroy(vp); 1572 } 1573 vnfree(vp); 1574 } else { 1575 /* 1576 * Otherwise, put it back onto the freelist. It 1577 * can't be destroyed while still associated with 1578 * a file system. 1579 */ 1580 mutex_enter(&vnode_free_list_lock); 1581 if (vp->v_holdcnt > 0) { 1582 vp->v_freelisthd = &vnode_hold_list; 1583 } else { 1584 vp->v_freelisthd = &vnode_free_list; 1585 } 1586 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 1587 mutex_exit(&vnode_free_list_lock); 1588 mutex_exit(&vp->v_interlock); 1589 } 1590 } 1591 1592 void 1593 vrele(vnode_t *vp) 1594 { 1595 1596 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1597 1598 if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) { 1599 return; 1600 } 1601 mutex_enter(&vp->v_interlock); 1602 vrelel(vp, 0); 1603 } 1604 1605 /* 1606 * Asynchronous vnode release, vnode is released in different context. 1607 */ 1608 void 1609 vrele_async(vnode_t *vp) 1610 { 1611 1612 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1613 1614 if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) { 1615 return; 1616 } 1617 1618 mutex_enter(&vp->v_interlock); 1619 vrelel(vp, VRELEL_ASYNC_RELE); 1620 } 1621 1622 static void 1623 vrele_thread(void *cookie) 1624 { 1625 vnode_t *vp; 1626 1627 for (;;) { 1628 mutex_enter(&vrele_lock); 1629 while (TAILQ_EMPTY(&vrele_list)) { 1630 vrele_gen++; 1631 cv_broadcast(&vrele_cv); 1632 cv_timedwait(&vrele_cv, &vrele_lock, hz); 1633 } 1634 vp = TAILQ_FIRST(&vrele_list); 1635 TAILQ_REMOVE(&vrele_list, vp, v_freelist); 1636 vrele_pending--; 1637 mutex_exit(&vrele_lock); 1638 1639 /* 1640 * If not the last reference, then ignore the vnode 1641 * and look for more work. 1642 */ 1643 mutex_enter(&vp->v_interlock); 1644 KASSERT((vp->v_iflag & VI_INACTPEND) != 0); 1645 vp->v_iflag &= ~VI_INACTPEND; 1646 vrelel(vp, 0); 1647 } 1648 } 1649 1650 /* 1651 * Page or buffer structure gets a reference. 1652 * Called with v_interlock held. 1653 */ 1654 void 1655 vholdl(vnode_t *vp) 1656 { 1657 1658 KASSERT(mutex_owned(&vp->v_interlock)); 1659 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1660 1661 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) { 1662 mutex_enter(&vnode_free_list_lock); 1663 KASSERT(vp->v_freelisthd == &vnode_free_list); 1664 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 1665 vp->v_freelisthd = &vnode_hold_list; 1666 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 1667 mutex_exit(&vnode_free_list_lock); 1668 } 1669 } 1670 1671 /* 1672 * Page or buffer structure frees a reference. 1673 * Called with v_interlock held. 1674 */ 1675 void 1676 holdrelel(vnode_t *vp) 1677 { 1678 1679 KASSERT(mutex_owned(&vp->v_interlock)); 1680 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1681 1682 if (vp->v_holdcnt <= 0) { 1683 vpanic(vp, "holdrelel: holdcnt vp %p"); 1684 } 1685 1686 vp->v_holdcnt--; 1687 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1688 mutex_enter(&vnode_free_list_lock); 1689 KASSERT(vp->v_freelisthd == &vnode_hold_list); 1690 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 1691 vp->v_freelisthd = &vnode_free_list; 1692 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 1693 mutex_exit(&vnode_free_list_lock); 1694 } 1695 } 1696 1697 /* 1698 * Vnode reference, where a reference is already held by some other 1699 * object (for example, a file structure). 1700 */ 1701 void 1702 vref(vnode_t *vp) 1703 { 1704 1705 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1706 KASSERT(vp->v_usecount != 0); 1707 1708 atomic_inc_uint(&vp->v_usecount); 1709 } 1710 1711 /* 1712 * Remove any vnodes in the vnode table belonging to mount point mp. 1713 * 1714 * If FORCECLOSE is not specified, there should not be any active ones, 1715 * return error if any are found (nb: this is a user error, not a 1716 * system error). If FORCECLOSE is specified, detach any active vnodes 1717 * that are found. 1718 * 1719 * If WRITECLOSE is set, only flush out regular file vnodes open for 1720 * writing. 1721 * 1722 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped. 1723 */ 1724 #ifdef DEBUG 1725 int busyprt = 0; /* print out busy vnodes */ 1726 struct ctldebug debug1 = { "busyprt", &busyprt }; 1727 #endif 1728 1729 static vnode_t * 1730 vflushnext(vnode_t *mvp, int *when) 1731 { 1732 1733 if (hardclock_ticks > *when) { 1734 mutex_exit(&mntvnode_lock); 1735 yield(); 1736 mutex_enter(&mntvnode_lock); 1737 *when = hardclock_ticks + hz / 10; 1738 } 1739 1740 return vunmark(mvp); 1741 } 1742 1743 int 1744 vflush(struct mount *mp, vnode_t *skipvp, int flags) 1745 { 1746 vnode_t *vp, *mvp; 1747 int busy = 0, when = 0, gen; 1748 1749 /* 1750 * First, flush out any vnode references from vrele_list. 1751 */ 1752 mutex_enter(&vrele_lock); 1753 gen = vrele_gen; 1754 while (vrele_pending && gen == vrele_gen) { 1755 cv_broadcast(&vrele_cv); 1756 cv_wait(&vrele_cv, &vrele_lock); 1757 } 1758 mutex_exit(&vrele_lock); 1759 1760 /* Allocate a marker vnode. */ 1761 if ((mvp = vnalloc(mp)) == NULL) 1762 return (ENOMEM); 1763 1764 /* 1765 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() 1766 * and vclean() are called 1767 */ 1768 mutex_enter(&mntvnode_lock); 1769 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL; 1770 vp = vflushnext(mvp, &when)) { 1771 vmark(mvp, vp); 1772 if (vp->v_mount != mp || vismarker(vp)) 1773 continue; 1774 /* 1775 * Skip over a selected vnode. 1776 */ 1777 if (vp == skipvp) 1778 continue; 1779 mutex_enter(&vp->v_interlock); 1780 /* 1781 * Ignore clean but still referenced vnodes. 1782 */ 1783 if ((vp->v_iflag & VI_CLEAN) != 0) { 1784 mutex_exit(&vp->v_interlock); 1785 continue; 1786 } 1787 /* 1788 * Skip over a vnodes marked VSYSTEM. 1789 */ 1790 if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) { 1791 mutex_exit(&vp->v_interlock); 1792 continue; 1793 } 1794 /* 1795 * If WRITECLOSE is set, only flush out regular file 1796 * vnodes open for writing. 1797 */ 1798 if ((flags & WRITECLOSE) && 1799 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1800 mutex_exit(&vp->v_interlock); 1801 continue; 1802 } 1803 /* 1804 * With v_usecount == 0, all we need to do is clear 1805 * out the vnode data structures and we are done. 1806 */ 1807 if (vp->v_usecount == 0) { 1808 mutex_exit(&mntvnode_lock); 1809 vremfree(vp); 1810 vp->v_usecount = 1; 1811 vclean(vp, DOCLOSE); 1812 vrelel(vp, 0); 1813 mutex_enter(&mntvnode_lock); 1814 continue; 1815 } 1816 /* 1817 * If FORCECLOSE is set, forcibly close the vnode. 1818 * For block or character devices, revert to an 1819 * anonymous device. For all other files, just 1820 * kill them. 1821 */ 1822 if (flags & FORCECLOSE) { 1823 mutex_exit(&mntvnode_lock); 1824 atomic_inc_uint(&vp->v_usecount); 1825 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1826 vclean(vp, DOCLOSE); 1827 vrelel(vp, 0); 1828 } else { 1829 vclean(vp, 0); 1830 vp->v_op = spec_vnodeop_p; /* XXXSMP */ 1831 mutex_exit(&vp->v_interlock); 1832 /* 1833 * The vnode isn't clean, but still resides 1834 * on the mount list. Remove it. XXX This 1835 * is a bit dodgy. 1836 */ 1837 insmntque(vp, NULL); 1838 vrele(vp); 1839 } 1840 mutex_enter(&mntvnode_lock); 1841 continue; 1842 } 1843 #ifdef DEBUG 1844 if (busyprt) 1845 vprint("vflush: busy vnode", vp); 1846 #endif 1847 mutex_exit(&vp->v_interlock); 1848 busy++; 1849 } 1850 mutex_exit(&mntvnode_lock); 1851 vnfree(mvp); 1852 if (busy) 1853 return (EBUSY); 1854 return (0); 1855 } 1856 1857 /* 1858 * Disassociate the underlying file system from a vnode. 1859 * 1860 * Must be called with the interlock held, and will return with it held. 1861 */ 1862 void 1863 vclean(vnode_t *vp, int flags) 1864 { 1865 lwp_t *l = curlwp; 1866 bool recycle, active; 1867 int error; 1868 1869 KASSERT(mutex_owned(&vp->v_interlock)); 1870 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1871 KASSERT(vp->v_usecount != 0); 1872 1873 /* If cleaning is already in progress wait until done and return. */ 1874 if (vp->v_iflag & VI_XLOCK) { 1875 vwait(vp, VI_XLOCK); 1876 return; 1877 } 1878 1879 /* If already clean, nothing to do. */ 1880 if ((vp->v_iflag & VI_CLEAN) != 0) { 1881 return; 1882 } 1883 1884 /* 1885 * Prevent the vnode from being recycled or brought into use 1886 * while we clean it out. 1887 */ 1888 vp->v_iflag |= VI_XLOCK; 1889 if (vp->v_iflag & VI_EXECMAP) { 1890 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); 1891 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); 1892 } 1893 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); 1894 active = (vp->v_usecount > 1); 1895 1896 /* XXXAD should not lock vnode under layer */ 1897 mutex_exit(&vp->v_interlock); 1898 VOP_LOCK(vp, LK_EXCLUSIVE); 1899 1900 /* 1901 * Clean out any cached data associated with the vnode. 1902 * If purging an active vnode, it must be closed and 1903 * deactivated before being reclaimed. Note that the 1904 * VOP_INACTIVE will unlock the vnode. 1905 */ 1906 if (flags & DOCLOSE) { 1907 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1908 if (error != 0) { 1909 /* XXX, fix vn_start_write's grab of mp and use that. */ 1910 1911 if (wapbl_vphaswapbl(vp)) 1912 WAPBL_DISCARD(wapbl_vptomp(vp)); 1913 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1914 } 1915 KASSERT(error == 0); 1916 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1917 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) { 1918 spec_node_revoke(vp); 1919 } 1920 } 1921 if (active) { 1922 VOP_INACTIVE(vp, &recycle); 1923 } else { 1924 /* 1925 * Any other processes trying to obtain this lock must first 1926 * wait for VI_XLOCK to clear, then call the new lock operation. 1927 */ 1928 VOP_UNLOCK(vp); 1929 } 1930 1931 /* Disassociate the underlying file system from the vnode. */ 1932 if (VOP_RECLAIM(vp)) { 1933 vpanic(vp, "vclean: cannot reclaim"); 1934 } 1935 1936 KASSERT(vp->v_uobj.uo_npages == 0); 1937 if (vp->v_type == VREG && vp->v_ractx != NULL) { 1938 uvm_ra_freectx(vp->v_ractx); 1939 vp->v_ractx = NULL; 1940 } 1941 cache_purge(vp); 1942 1943 /* Done with purge, notify sleepers of the grim news. */ 1944 mutex_enter(&vp->v_interlock); 1945 vp->v_op = dead_vnodeop_p; 1946 vp->v_tag = VT_NON; 1947 KNOTE(&vp->v_klist, NOTE_REVOKE); 1948 vp->v_iflag &= ~(VI_XLOCK | VI_FREEING); 1949 vp->v_vflag &= ~VV_LOCKSWORK; 1950 if ((flags & DOCLOSE) != 0) { 1951 vp->v_iflag |= VI_CLEAN; 1952 } 1953 cv_broadcast(&vp->v_cv); 1954 1955 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1956 } 1957 1958 /* 1959 * Recycle an unused vnode to the front of the free list. 1960 * Release the passed interlock if the vnode will be recycled. 1961 */ 1962 int 1963 vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l) 1964 { 1965 1966 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1967 1968 mutex_enter(&vp->v_interlock); 1969 if (vp->v_usecount != 0) { 1970 mutex_exit(&vp->v_interlock); 1971 return (0); 1972 } 1973 if (inter_lkp) 1974 mutex_exit(inter_lkp); 1975 vremfree(vp); 1976 vp->v_usecount = 1; 1977 vclean(vp, DOCLOSE); 1978 vrelel(vp, 0); 1979 return (1); 1980 } 1981 1982 /* 1983 * Eliminate all activity associated with a vnode in preparation for 1984 * reuse. Drops a reference from the vnode. 1985 */ 1986 void 1987 vgone(vnode_t *vp) 1988 { 1989 1990 mutex_enter(&vp->v_interlock); 1991 vclean(vp, DOCLOSE); 1992 vrelel(vp, 0); 1993 } 1994 1995 /* 1996 * Lookup a vnode by device number. 1997 */ 1998 int 1999 vfinddev(dev_t dev, enum vtype type, vnode_t **vpp) 2000 { 2001 vnode_t *vp; 2002 int rc = 0; 2003 2004 mutex_enter(&device_lock); 2005 for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 2006 if (dev != vp->v_rdev || type != vp->v_type) 2007 continue; 2008 *vpp = vp; 2009 rc = 1; 2010 break; 2011 } 2012 mutex_exit(&device_lock); 2013 return (rc); 2014 } 2015 2016 /* 2017 * Revoke all the vnodes corresponding to the specified minor number 2018 * range (endpoints inclusive) of the specified major. 2019 */ 2020 void 2021 vdevgone(int maj, int minl, int minh, enum vtype type) 2022 { 2023 vnode_t *vp, **vpp; 2024 dev_t dev; 2025 int mn; 2026 2027 vp = NULL; /* XXX gcc */ 2028 2029 mutex_enter(&device_lock); 2030 for (mn = minl; mn <= minh; mn++) { 2031 dev = makedev(maj, mn); 2032 vpp = &specfs_hash[SPECHASH(dev)]; 2033 for (vp = *vpp; vp != NULL;) { 2034 mutex_enter(&vp->v_interlock); 2035 if ((vp->v_iflag & VI_CLEAN) != 0 || 2036 dev != vp->v_rdev || type != vp->v_type) { 2037 mutex_exit(&vp->v_interlock); 2038 vp = vp->v_specnext; 2039 continue; 2040 } 2041 mutex_exit(&device_lock); 2042 if (vget(vp, LK_INTERLOCK) == 0) { 2043 VOP_REVOKE(vp, REVOKEALL); 2044 vrele(vp); 2045 } 2046 mutex_enter(&device_lock); 2047 vp = *vpp; 2048 } 2049 } 2050 mutex_exit(&device_lock); 2051 } 2052 2053 /* 2054 * Eliminate all activity associated with the requested vnode 2055 * and with all vnodes aliased to the requested vnode. 2056 */ 2057 void 2058 vrevoke(vnode_t *vp) 2059 { 2060 vnode_t *vq, **vpp; 2061 enum vtype type; 2062 dev_t dev; 2063 2064 KASSERT(vp->v_usecount > 0); 2065 2066 mutex_enter(&vp->v_interlock); 2067 if ((vp->v_iflag & VI_CLEAN) != 0) { 2068 mutex_exit(&vp->v_interlock); 2069 return; 2070 } else if (vp->v_type != VBLK && vp->v_type != VCHR) { 2071 atomic_inc_uint(&vp->v_usecount); 2072 vclean(vp, DOCLOSE); 2073 vrelel(vp, 0); 2074 return; 2075 } else { 2076 dev = vp->v_rdev; 2077 type = vp->v_type; 2078 mutex_exit(&vp->v_interlock); 2079 } 2080 2081 vpp = &specfs_hash[SPECHASH(dev)]; 2082 mutex_enter(&device_lock); 2083 for (vq = *vpp; vq != NULL;) { 2084 /* If clean or being cleaned, then ignore it. */ 2085 mutex_enter(&vq->v_interlock); 2086 if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 || 2087 vq->v_rdev != dev || vq->v_type != type) { 2088 mutex_exit(&vq->v_interlock); 2089 vq = vq->v_specnext; 2090 continue; 2091 } 2092 mutex_exit(&device_lock); 2093 if (vq->v_usecount == 0) { 2094 vremfree(vq); 2095 vq->v_usecount = 1; 2096 } else { 2097 atomic_inc_uint(&vq->v_usecount); 2098 } 2099 vclean(vq, DOCLOSE); 2100 vrelel(vq, 0); 2101 mutex_enter(&device_lock); 2102 vq = *vpp; 2103 } 2104 mutex_exit(&device_lock); 2105 } 2106 2107 /* 2108 * sysctl helper routine to return list of supported fstypes 2109 */ 2110 int 2111 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS) 2112 { 2113 char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 2114 char *where = oldp; 2115 struct vfsops *v; 2116 size_t needed, left, slen; 2117 int error, first; 2118 2119 if (newp != NULL) 2120 return (EPERM); 2121 if (namelen != 0) 2122 return (EINVAL); 2123 2124 first = 1; 2125 error = 0; 2126 needed = 0; 2127 left = *oldlenp; 2128 2129 sysctl_unlock(); 2130 mutex_enter(&vfs_list_lock); 2131 LIST_FOREACH(v, &vfs_list, vfs_list) { 2132 if (where == NULL) 2133 needed += strlen(v->vfs_name) + 1; 2134 else { 2135 memset(bf, 0, sizeof(bf)); 2136 if (first) { 2137 strncpy(bf, v->vfs_name, sizeof(bf)); 2138 first = 0; 2139 } else { 2140 bf[0] = ' '; 2141 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1); 2142 } 2143 bf[sizeof(bf)-1] = '\0'; 2144 slen = strlen(bf); 2145 if (left < slen + 1) 2146 break; 2147 v->vfs_refcount++; 2148 mutex_exit(&vfs_list_lock); 2149 /* +1 to copy out the trailing NUL byte */ 2150 error = copyout(bf, where, slen + 1); 2151 mutex_enter(&vfs_list_lock); 2152 v->vfs_refcount--; 2153 if (error) 2154 break; 2155 where += slen; 2156 needed += slen; 2157 left -= slen; 2158 } 2159 } 2160 mutex_exit(&vfs_list_lock); 2161 sysctl_relock(); 2162 *oldlenp = needed; 2163 return (error); 2164 } 2165 2166 2167 int kinfo_vdebug = 1; 2168 int kinfo_vgetfailed; 2169 #define KINFO_VNODESLOP 10 2170 /* 2171 * Dump vnode list (via sysctl). 2172 * Copyout address of vnode followed by vnode. 2173 */ 2174 /* ARGSUSED */ 2175 int 2176 sysctl_kern_vnode(SYSCTLFN_ARGS) 2177 { 2178 char *where = oldp; 2179 size_t *sizep = oldlenp; 2180 struct mount *mp, *nmp; 2181 vnode_t *vp, *mvp, vbuf; 2182 char *bp = where; 2183 char *ewhere; 2184 int error; 2185 2186 if (namelen != 0) 2187 return (EOPNOTSUPP); 2188 if (newp != NULL) 2189 return (EPERM); 2190 2191 #define VPTRSZ sizeof(vnode_t *) 2192 #define VNODESZ sizeof(vnode_t) 2193 if (where == NULL) { 2194 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 2195 return (0); 2196 } 2197 ewhere = where + *sizep; 2198 2199 sysctl_unlock(); 2200 mutex_enter(&mountlist_lock); 2201 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2202 mp = nmp) { 2203 if (vfs_busy(mp, &nmp)) { 2204 continue; 2205 } 2206 /* Allocate a marker vnode. */ 2207 mvp = vnalloc(mp); 2208 /* Should never fail for mp != NULL */ 2209 KASSERT(mvp != NULL); 2210 mutex_enter(&mntvnode_lock); 2211 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; 2212 vp = vunmark(mvp)) { 2213 vmark(mvp, vp); 2214 /* 2215 * Check that the vp is still associated with 2216 * this filesystem. RACE: could have been 2217 * recycled onto the same filesystem. 2218 */ 2219 if (vp->v_mount != mp || vismarker(vp)) 2220 continue; 2221 if (bp + VPTRSZ + VNODESZ > ewhere) { 2222 (void)vunmark(mvp); 2223 mutex_exit(&mntvnode_lock); 2224 vnfree(mvp); 2225 vfs_unbusy(mp, false, NULL); 2226 sysctl_relock(); 2227 *sizep = bp - where; 2228 return (ENOMEM); 2229 } 2230 memcpy(&vbuf, vp, VNODESZ); 2231 mutex_exit(&mntvnode_lock); 2232 if ((error = copyout(&vp, bp, VPTRSZ)) || 2233 (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) { 2234 mutex_enter(&mntvnode_lock); 2235 (void)vunmark(mvp); 2236 mutex_exit(&mntvnode_lock); 2237 vnfree(mvp); 2238 vfs_unbusy(mp, false, NULL); 2239 sysctl_relock(); 2240 return (error); 2241 } 2242 bp += VPTRSZ + VNODESZ; 2243 mutex_enter(&mntvnode_lock); 2244 } 2245 mutex_exit(&mntvnode_lock); 2246 vnfree(mvp); 2247 vfs_unbusy(mp, false, &nmp); 2248 } 2249 mutex_exit(&mountlist_lock); 2250 sysctl_relock(); 2251 2252 *sizep = bp - where; 2253 return (0); 2254 } 2255 2256 /* 2257 * Remove clean vnodes from a mountpoint's vnode list. 2258 */ 2259 void 2260 vfs_scrubvnlist(struct mount *mp) 2261 { 2262 vnode_t *vp, *nvp; 2263 2264 retry: 2265 mutex_enter(&mntvnode_lock); 2266 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 2267 nvp = TAILQ_NEXT(vp, v_mntvnodes); 2268 mutex_enter(&vp->v_interlock); 2269 if ((vp->v_iflag & VI_CLEAN) != 0) { 2270 TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes); 2271 vp->v_mount = NULL; 2272 mutex_exit(&mntvnode_lock); 2273 mutex_exit(&vp->v_interlock); 2274 vfs_destroy(mp); 2275 goto retry; 2276 } 2277 mutex_exit(&vp->v_interlock); 2278 } 2279 mutex_exit(&mntvnode_lock); 2280 } 2281 2282 /* 2283 * Check to see if a filesystem is mounted on a block device. 2284 */ 2285 int 2286 vfs_mountedon(vnode_t *vp) 2287 { 2288 vnode_t *vq; 2289 int error = 0; 2290 2291 if (vp->v_type != VBLK) 2292 return ENOTBLK; 2293 if (vp->v_specmountpoint != NULL) 2294 return (EBUSY); 2295 mutex_enter(&device_lock); 2296 for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL; 2297 vq = vq->v_specnext) { 2298 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 2299 continue; 2300 if (vq->v_specmountpoint != NULL) { 2301 error = EBUSY; 2302 break; 2303 } 2304 } 2305 mutex_exit(&device_lock); 2306 return (error); 2307 } 2308 2309 /* 2310 * Unmount all file systems. 2311 * We traverse the list in reverse order under the assumption that doing so 2312 * will avoid needing to worry about dependencies. 2313 */ 2314 bool 2315 vfs_unmountall(struct lwp *l) 2316 { 2317 2318 printf("unmounting file systems..."); 2319 return vfs_unmountall1(l, true, true); 2320 } 2321 2322 static void 2323 vfs_unmount_print(struct mount *mp, const char *pfx) 2324 { 2325 2326 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 2327 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 2328 mp->mnt_stat.f_fstypename); 2329 } 2330 2331 bool 2332 vfs_unmount_forceone(struct lwp *l) 2333 { 2334 struct mount *mp, *nmp; 2335 int error; 2336 2337 nmp = NULL; 2338 2339 CIRCLEQ_FOREACH_REVERSE(mp, &mountlist, mnt_list) { 2340 if (nmp == NULL || mp->mnt_gen > nmp->mnt_gen) { 2341 nmp = mp; 2342 } 2343 } 2344 if (nmp == NULL) { 2345 return false; 2346 } 2347 2348 #ifdef DEBUG 2349 printf("\nforcefully unmounting %s (%s)...", 2350 nmp->mnt_stat.f_mntonname, nmp->mnt_stat.f_mntfromname); 2351 #endif 2352 atomic_inc_uint(&nmp->mnt_refcnt); 2353 if ((error = dounmount(nmp, MNT_FORCE, l)) == 0) { 2354 vfs_unmount_print(nmp, "forcefully "); 2355 return true; 2356 } else { 2357 vfs_destroy(nmp); 2358 } 2359 2360 #ifdef DEBUG 2361 printf("forceful unmount of %s failed with error %d\n", 2362 nmp->mnt_stat.f_mntonname, error); 2363 #endif 2364 2365 return false; 2366 } 2367 2368 bool 2369 vfs_unmountall1(struct lwp *l, bool force, bool verbose) 2370 { 2371 struct mount *mp, *nmp; 2372 bool any_error = false, progress = false; 2373 int error; 2374 2375 for (mp = CIRCLEQ_LAST(&mountlist); 2376 mp != (void *)&mountlist; 2377 mp = nmp) { 2378 nmp = CIRCLEQ_PREV(mp, mnt_list); 2379 #ifdef DEBUG 2380 printf("\nunmounting %p %s (%s)...", 2381 (void *)mp, mp->mnt_stat.f_mntonname, 2382 mp->mnt_stat.f_mntfromname); 2383 #endif 2384 atomic_inc_uint(&mp->mnt_refcnt); 2385 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 2386 vfs_unmount_print(mp, ""); 2387 progress = true; 2388 } else { 2389 vfs_destroy(mp); 2390 if (verbose) { 2391 printf("unmount of %s failed with error %d\n", 2392 mp->mnt_stat.f_mntonname, error); 2393 } 2394 any_error = true; 2395 } 2396 } 2397 if (verbose) { 2398 printf(" done\n"); 2399 } 2400 if (any_error && verbose) { 2401 printf("WARNING: some file systems would not unmount\n"); 2402 } 2403 return progress; 2404 } 2405 2406 /* 2407 * Sync and unmount file systems before shutting down. 2408 */ 2409 void 2410 vfs_shutdown(void) 2411 { 2412 struct lwp *l; 2413 2414 /* XXX we're certainly not running in lwp0's context! */ 2415 l = (curlwp == NULL) ? &lwp0 : curlwp; 2416 2417 vfs_shutdown1(l); 2418 } 2419 2420 void 2421 vfs_sync_all(struct lwp *l) 2422 { 2423 printf("syncing disks... "); 2424 2425 /* remove user processes from run queue */ 2426 suspendsched(); 2427 (void) spl0(); 2428 2429 /* avoid coming back this way again if we panic. */ 2430 doing_shutdown = 1; 2431 2432 sys_sync(l, NULL, NULL); 2433 2434 /* Wait for sync to finish. */ 2435 if (buf_syncwait() != 0) { 2436 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2437 Debugger(); 2438 #endif 2439 printf("giving up\n"); 2440 return; 2441 } else 2442 printf("done\n"); 2443 } 2444 2445 static void 2446 vfs_shutdown1(struct lwp *l) 2447 { 2448 2449 vfs_sync_all(l); 2450 2451 /* 2452 * If we've panic'd, don't make the situation potentially 2453 * worse by unmounting the file systems. 2454 */ 2455 if (panicstr != NULL) 2456 return; 2457 2458 /* Release inodes held by texts before update. */ 2459 #ifdef notdef 2460 vnshutdown(); 2461 #endif 2462 /* Unmount file systems. */ 2463 vfs_unmountall(l); 2464 } 2465 2466 /* 2467 * Print a list of supported file system types (used by vfs_mountroot) 2468 */ 2469 static void 2470 vfs_print_fstypes(void) 2471 { 2472 struct vfsops *v; 2473 int cnt = 0; 2474 2475 mutex_enter(&vfs_list_lock); 2476 LIST_FOREACH(v, &vfs_list, vfs_list) 2477 ++cnt; 2478 mutex_exit(&vfs_list_lock); 2479 2480 if (cnt == 0) { 2481 printf("WARNING: No file system modules have been loaded.\n"); 2482 return; 2483 } 2484 2485 printf("Supported file systems:"); 2486 mutex_enter(&vfs_list_lock); 2487 LIST_FOREACH(v, &vfs_list, vfs_list) { 2488 printf(" %s", v->vfs_name); 2489 } 2490 mutex_exit(&vfs_list_lock); 2491 printf("\n"); 2492 } 2493 2494 /* 2495 * Mount the root file system. If the operator didn't specify a 2496 * file system to use, try all possible file systems until one 2497 * succeeds. 2498 */ 2499 int 2500 vfs_mountroot(void) 2501 { 2502 struct vfsops *v; 2503 int error = ENODEV; 2504 2505 if (root_device == NULL) 2506 panic("vfs_mountroot: root device unknown"); 2507 2508 switch (device_class(root_device)) { 2509 case DV_IFNET: 2510 if (rootdev != NODEV) 2511 panic("vfs_mountroot: rootdev set for DV_IFNET " 2512 "(0x%llx -> %llu,%llu)", 2513 (unsigned long long)rootdev, 2514 (unsigned long long)major(rootdev), 2515 (unsigned long long)minor(rootdev)); 2516 break; 2517 2518 case DV_DISK: 2519 if (rootdev == NODEV) 2520 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2521 if (bdevvp(rootdev, &rootvp)) 2522 panic("vfs_mountroot: can't get vnode for rootdev"); 2523 error = VOP_OPEN(rootvp, FREAD, FSCRED); 2524 if (error) { 2525 printf("vfs_mountroot: can't open root device\n"); 2526 return (error); 2527 } 2528 break; 2529 2530 case DV_VIRTUAL: 2531 break; 2532 2533 default: 2534 printf("%s: inappropriate for root file system\n", 2535 device_xname(root_device)); 2536 return (ENODEV); 2537 } 2538 2539 /* 2540 * If user specified a root fs type, use it. Make sure the 2541 * specified type exists and has a mount_root() 2542 */ 2543 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 2544 v = vfs_getopsbyname(rootfstype); 2545 error = EFTYPE; 2546 if (v != NULL) { 2547 if (v->vfs_mountroot != NULL) { 2548 error = (v->vfs_mountroot)(); 2549 } 2550 v->vfs_refcount--; 2551 } 2552 goto done; 2553 } 2554 2555 /* 2556 * Try each file system currently configured into the kernel. 2557 */ 2558 mutex_enter(&vfs_list_lock); 2559 LIST_FOREACH(v, &vfs_list, vfs_list) { 2560 if (v->vfs_mountroot == NULL) 2561 continue; 2562 #ifdef DEBUG 2563 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 2564 #endif 2565 v->vfs_refcount++; 2566 mutex_exit(&vfs_list_lock); 2567 error = (*v->vfs_mountroot)(); 2568 mutex_enter(&vfs_list_lock); 2569 v->vfs_refcount--; 2570 if (!error) { 2571 aprint_normal("root file system type: %s\n", 2572 v->vfs_name); 2573 break; 2574 } 2575 } 2576 mutex_exit(&vfs_list_lock); 2577 2578 if (v == NULL) { 2579 vfs_print_fstypes(); 2580 printf("no file system for %s", device_xname(root_device)); 2581 if (device_class(root_device) == DV_DISK) 2582 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 2583 printf("\n"); 2584 error = EFTYPE; 2585 } 2586 2587 done: 2588 if (error && device_class(root_device) == DV_DISK) { 2589 VOP_CLOSE(rootvp, FREAD, FSCRED); 2590 vrele(rootvp); 2591 } 2592 if (error == 0) { 2593 extern struct cwdinfo cwdi0; 2594 2595 CIRCLEQ_FIRST(&mountlist)->mnt_flag |= MNT_ROOTFS; 2596 CIRCLEQ_FIRST(&mountlist)->mnt_op->vfs_refcount++; 2597 2598 /* 2599 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 2600 * reference it. 2601 */ 2602 error = VFS_ROOT(CIRCLEQ_FIRST(&mountlist), &rootvnode); 2603 if (error) 2604 panic("cannot find root vnode, error=%d", error); 2605 cwdi0.cwdi_cdir = rootvnode; 2606 vref(cwdi0.cwdi_cdir); 2607 VOP_UNLOCK(rootvnode); 2608 cwdi0.cwdi_rdir = NULL; 2609 2610 /* 2611 * Now that root is mounted, we can fixup initproc's CWD 2612 * info. All other processes are kthreads, which merely 2613 * share proc0's CWD info. 2614 */ 2615 initproc->p_cwdi->cwdi_cdir = rootvnode; 2616 vref(initproc->p_cwdi->cwdi_cdir); 2617 initproc->p_cwdi->cwdi_rdir = NULL; 2618 /* 2619 * Enable loading of modules from the filesystem 2620 */ 2621 module_load_vfs_init(); 2622 2623 } 2624 return (error); 2625 } 2626 2627 /* 2628 * Get a new unique fsid 2629 */ 2630 void 2631 vfs_getnewfsid(struct mount *mp) 2632 { 2633 static u_short xxxfs_mntid; 2634 fsid_t tfsid; 2635 int mtype; 2636 2637 mutex_enter(&mntid_lock); 2638 mtype = makefstype(mp->mnt_op->vfs_name); 2639 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); 2640 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; 2641 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 2642 if (xxxfs_mntid == 0) 2643 ++xxxfs_mntid; 2644 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 2645 tfsid.__fsid_val[1] = mtype; 2646 if (!CIRCLEQ_EMPTY(&mountlist)) { 2647 while (vfs_getvfs(&tfsid)) { 2648 tfsid.__fsid_val[0]++; 2649 xxxfs_mntid++; 2650 } 2651 } 2652 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 2653 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 2654 mutex_exit(&mntid_lock); 2655 } 2656 2657 /* 2658 * Make a 'unique' number from a mount type name. 2659 */ 2660 long 2661 makefstype(const char *type) 2662 { 2663 long rv; 2664 2665 for (rv = 0; *type; type++) { 2666 rv <<= 2; 2667 rv ^= *type; 2668 } 2669 return rv; 2670 } 2671 2672 /* 2673 * Set vnode attributes to VNOVAL 2674 */ 2675 void 2676 vattr_null(struct vattr *vap) 2677 { 2678 2679 memset(vap, 0, sizeof(*vap)); 2680 2681 vap->va_type = VNON; 2682 2683 /* 2684 * Assign individually so that it is safe even if size and 2685 * sign of each member are varied. 2686 */ 2687 vap->va_mode = VNOVAL; 2688 vap->va_nlink = VNOVAL; 2689 vap->va_uid = VNOVAL; 2690 vap->va_gid = VNOVAL; 2691 vap->va_fsid = VNOVAL; 2692 vap->va_fileid = VNOVAL; 2693 vap->va_size = VNOVAL; 2694 vap->va_blocksize = VNOVAL; 2695 vap->va_atime.tv_sec = 2696 vap->va_mtime.tv_sec = 2697 vap->va_ctime.tv_sec = 2698 vap->va_birthtime.tv_sec = VNOVAL; 2699 vap->va_atime.tv_nsec = 2700 vap->va_mtime.tv_nsec = 2701 vap->va_ctime.tv_nsec = 2702 vap->va_birthtime.tv_nsec = VNOVAL; 2703 vap->va_gen = VNOVAL; 2704 vap->va_flags = VNOVAL; 2705 vap->va_rdev = VNOVAL; 2706 vap->va_bytes = VNOVAL; 2707 } 2708 2709 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 2710 #define ARRAY_PRINT(idx, arr) \ 2711 ((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN") 2712 2713 const char * const vnode_tags[] = { VNODE_TAGS }; 2714 const char * const vnode_types[] = { VNODE_TYPES }; 2715 const char vnode_flagbits[] = VNODE_FLAGBITS; 2716 2717 /* 2718 * Print out a description of a vnode. 2719 */ 2720 void 2721 vprint(const char *label, struct vnode *vp) 2722 { 2723 struct vnlock *vl; 2724 char bf[96]; 2725 int flag; 2726 2727 vl = &vp->v_lock; 2728 flag = vp->v_iflag | vp->v_vflag | vp->v_uflag; 2729 snprintb(bf, sizeof(bf), vnode_flagbits, flag); 2730 2731 if (label != NULL) 2732 printf("%s: ", label); 2733 printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), " 2734 "usecount %d, writecount %d, holdcount %d\n" 2735 "\tfreelisthd %p, mount %p, data %p lock %p\n", 2736 vp, bf, ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 2737 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 2738 vp->v_usecount, vp->v_writecount, vp->v_holdcnt, 2739 vp->v_freelisthd, vp->v_mount, vp->v_data, vl); 2740 if (vp->v_data != NULL) { 2741 printf("\t"); 2742 VOP_PRINT(vp); 2743 } 2744 } 2745 2746 #ifdef DEBUG 2747 /* 2748 * List all of the locked vnodes in the system. 2749 * Called when debugging the kernel. 2750 */ 2751 void 2752 printlockedvnodes(void) 2753 { 2754 struct mount *mp, *nmp; 2755 struct vnode *vp; 2756 2757 printf("Locked vnodes\n"); 2758 mutex_enter(&mountlist_lock); 2759 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2760 mp = nmp) { 2761 if (vfs_busy(mp, &nmp)) { 2762 continue; 2763 } 2764 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2765 if (VOP_ISLOCKED(vp)) 2766 vprint(NULL, vp); 2767 } 2768 mutex_enter(&mountlist_lock); 2769 vfs_unbusy(mp, false, &nmp); 2770 } 2771 mutex_exit(&mountlist_lock); 2772 } 2773 #endif 2774 2775 /* Deprecated. Kept for KPI compatibility. */ 2776 int 2777 vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid, 2778 mode_t acc_mode, kauth_cred_t cred) 2779 { 2780 2781 #ifdef DIAGNOSTIC 2782 printf("vaccess: deprecated interface used.\n"); 2783 #endif /* DIAGNOSTIC */ 2784 2785 return genfs_can_access(type, file_mode, uid, gid, acc_mode, cred); 2786 } 2787 2788 /* 2789 * Given a file system name, look up the vfsops for that 2790 * file system, or return NULL if file system isn't present 2791 * in the kernel. 2792 */ 2793 struct vfsops * 2794 vfs_getopsbyname(const char *name) 2795 { 2796 struct vfsops *v; 2797 2798 mutex_enter(&vfs_list_lock); 2799 LIST_FOREACH(v, &vfs_list, vfs_list) { 2800 if (strcmp(v->vfs_name, name) == 0) 2801 break; 2802 } 2803 if (v != NULL) 2804 v->vfs_refcount++; 2805 mutex_exit(&vfs_list_lock); 2806 2807 return (v); 2808 } 2809 2810 void 2811 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp) 2812 { 2813 const struct statvfs *mbp; 2814 2815 if (sbp == (mbp = &mp->mnt_stat)) 2816 return; 2817 2818 (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx)); 2819 sbp->f_fsid = mbp->f_fsid; 2820 sbp->f_owner = mbp->f_owner; 2821 sbp->f_flag = mbp->f_flag; 2822 sbp->f_syncwrites = mbp->f_syncwrites; 2823 sbp->f_asyncwrites = mbp->f_asyncwrites; 2824 sbp->f_syncreads = mbp->f_syncreads; 2825 sbp->f_asyncreads = mbp->f_asyncreads; 2826 (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare)); 2827 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename, 2828 sizeof(sbp->f_fstypename)); 2829 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname, 2830 sizeof(sbp->f_mntonname)); 2831 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, 2832 sizeof(sbp->f_mntfromname)); 2833 sbp->f_namemax = mbp->f_namemax; 2834 } 2835 2836 int 2837 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom, 2838 const char *vfsname, struct mount *mp, struct lwp *l) 2839 { 2840 int error; 2841 size_t size; 2842 struct statvfs *sfs = &mp->mnt_stat; 2843 int (*fun)(const void *, void *, size_t, size_t *); 2844 2845 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname, 2846 sizeof(mp->mnt_stat.f_fstypename)); 2847 2848 if (onp) { 2849 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 2850 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr; 2851 if (cwdi->cwdi_rdir != NULL) { 2852 size_t len; 2853 char *bp; 2854 char *path = PNBUF_GET(); 2855 2856 bp = path + MAXPATHLEN; 2857 *--bp = '\0'; 2858 rw_enter(&cwdi->cwdi_lock, RW_READER); 2859 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, 2860 path, MAXPATHLEN / 2, 0, l); 2861 rw_exit(&cwdi->cwdi_lock); 2862 if (error) { 2863 PNBUF_PUT(path); 2864 return error; 2865 } 2866 2867 len = strlen(bp); 2868 if (len > sizeof(sfs->f_mntonname) - 1) 2869 len = sizeof(sfs->f_mntonname) - 1; 2870 (void)strncpy(sfs->f_mntonname, bp, len); 2871 PNBUF_PUT(path); 2872 2873 if (len < sizeof(sfs->f_mntonname) - 1) { 2874 error = (*fun)(onp, &sfs->f_mntonname[len], 2875 sizeof(sfs->f_mntonname) - len - 1, &size); 2876 if (error) 2877 return error; 2878 size += len; 2879 } else { 2880 size = len; 2881 } 2882 } else { 2883 error = (*fun)(onp, &sfs->f_mntonname, 2884 sizeof(sfs->f_mntonname) - 1, &size); 2885 if (error) 2886 return error; 2887 } 2888 (void)memset(sfs->f_mntonname + size, 0, 2889 sizeof(sfs->f_mntonname) - size); 2890 } 2891 2892 if (fromp) { 2893 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr; 2894 error = (*fun)(fromp, sfs->f_mntfromname, 2895 sizeof(sfs->f_mntfromname) - 1, &size); 2896 if (error) 2897 return error; 2898 (void)memset(sfs->f_mntfromname + size, 0, 2899 sizeof(sfs->f_mntfromname) - size); 2900 } 2901 return 0; 2902 } 2903 2904 void 2905 vfs_timestamp(struct timespec *ts) 2906 { 2907 2908 nanotime(ts); 2909 } 2910 2911 time_t rootfstime; /* recorded root fs time, if known */ 2912 void 2913 setrootfstime(time_t t) 2914 { 2915 rootfstime = t; 2916 } 2917 2918 /* 2919 * Sham lock manager for vnodes. This is a temporary measure. 2920 */ 2921 int 2922 vlockmgr(struct vnlock *vl, int flags) 2923 { 2924 2925 KASSERT((flags & ~(LK_NOWAIT | LK_TYPE_MASK)) == 0); 2926 2927 switch (flags & (LK_NOWAIT | LK_TYPE_MASK)) { 2928 case LK_SHARED: 2929 rw_enter(&vl->vl_lock, RW_READER); 2930 return 0; 2931 2932 case LK_SHARED | LK_NOWAIT: 2933 return rw_tryenter(&vl->vl_lock, RW_READER) ? 0 : EBUSY; 2934 2935 case LK_EXCLUSIVE: 2936 rw_enter(&vl->vl_lock, RW_WRITER); 2937 return 0; 2938 2939 case LK_EXCLUSIVE | LK_NOWAIT: 2940 return rw_tryenter(&vl->vl_lock, RW_WRITER) ? 0 : EBUSY; 2941 2942 case LK_RELEASE: 2943 rw_exit(&vl->vl_lock); 2944 return 0; 2945 2946 default: 2947 panic("vlockmgr: flags %x", flags); 2948 } 2949 } 2950 2951 int 2952 vlockstatus(struct vnlock *vl) 2953 { 2954 2955 if (rw_write_held(&vl->vl_lock)) { 2956 return LK_EXCLUSIVE; 2957 } 2958 if (rw_read_held(&vl->vl_lock)) { 2959 return LK_SHARED; 2960 } 2961 return 0; 2962 } 2963 2964 static const uint8_t vttodt_tab[9] = { 2965 DT_UNKNOWN, /* VNON */ 2966 DT_REG, /* VREG */ 2967 DT_DIR, /* VDIR */ 2968 DT_BLK, /* VBLK */ 2969 DT_CHR, /* VCHR */ 2970 DT_LNK, /* VLNK */ 2971 DT_SOCK, /* VSUCK */ 2972 DT_FIFO, /* VFIFO */ 2973 DT_UNKNOWN /* VBAD */ 2974 }; 2975 2976 uint8_t 2977 vtype2dt(enum vtype vt) 2978 { 2979 2980 CTASSERT(VBAD == __arraycount(vttodt_tab) - 1); 2981 return vttodt_tab[vt]; 2982 } 2983 2984 /* 2985 * mount_specific_key_create -- 2986 * Create a key for subsystem mount-specific data. 2987 */ 2988 int 2989 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 2990 { 2991 2992 return (specificdata_key_create(mount_specificdata_domain, keyp, dtor)); 2993 } 2994 2995 /* 2996 * mount_specific_key_delete -- 2997 * Delete a key for subsystem mount-specific data. 2998 */ 2999 void 3000 mount_specific_key_delete(specificdata_key_t key) 3001 { 3002 3003 specificdata_key_delete(mount_specificdata_domain, key); 3004 } 3005 3006 /* 3007 * mount_initspecific -- 3008 * Initialize a mount's specificdata container. 3009 */ 3010 void 3011 mount_initspecific(struct mount *mp) 3012 { 3013 int error; 3014 3015 error = specificdata_init(mount_specificdata_domain, 3016 &mp->mnt_specdataref); 3017 KASSERT(error == 0); 3018 } 3019 3020 /* 3021 * mount_finispecific -- 3022 * Finalize a mount's specificdata container. 3023 */ 3024 void 3025 mount_finispecific(struct mount *mp) 3026 { 3027 3028 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 3029 } 3030 3031 /* 3032 * mount_getspecific -- 3033 * Return mount-specific data corresponding to the specified key. 3034 */ 3035 void * 3036 mount_getspecific(struct mount *mp, specificdata_key_t key) 3037 { 3038 3039 return (specificdata_getspecific(mount_specificdata_domain, 3040 &mp->mnt_specdataref, key)); 3041 } 3042 3043 /* 3044 * mount_setspecific -- 3045 * Set mount-specific data corresponding to the specified key. 3046 */ 3047 void 3048 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 3049 { 3050 3051 specificdata_setspecific(mount_specificdata_domain, 3052 &mp->mnt_specdataref, key, data); 3053 } 3054 3055 int 3056 VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c) 3057 { 3058 int error; 3059 3060 KERNEL_LOCK(1, NULL); 3061 error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c); 3062 KERNEL_UNLOCK_ONE(NULL); 3063 3064 return error; 3065 } 3066 3067 int 3068 VFS_START(struct mount *mp, int a) 3069 { 3070 int error; 3071 3072 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3073 KERNEL_LOCK(1, NULL); 3074 } 3075 error = (*(mp->mnt_op->vfs_start))(mp, a); 3076 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3077 KERNEL_UNLOCK_ONE(NULL); 3078 } 3079 3080 return error; 3081 } 3082 3083 int 3084 VFS_UNMOUNT(struct mount *mp, int a) 3085 { 3086 int error; 3087 3088 KERNEL_LOCK(1, NULL); 3089 error = (*(mp->mnt_op->vfs_unmount))(mp, a); 3090 KERNEL_UNLOCK_ONE(NULL); 3091 3092 return error; 3093 } 3094 3095 int 3096 VFS_ROOT(struct mount *mp, struct vnode **a) 3097 { 3098 int error; 3099 3100 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3101 KERNEL_LOCK(1, NULL); 3102 } 3103 error = (*(mp->mnt_op->vfs_root))(mp, a); 3104 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3105 KERNEL_UNLOCK_ONE(NULL); 3106 } 3107 3108 return error; 3109 } 3110 3111 int 3112 VFS_QUOTACTL(struct mount *mp, int a, uid_t b, void *c) 3113 { 3114 int error; 3115 3116 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3117 KERNEL_LOCK(1, NULL); 3118 } 3119 error = (*(mp->mnt_op->vfs_quotactl))(mp, a, b, c); 3120 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3121 KERNEL_UNLOCK_ONE(NULL); 3122 } 3123 3124 return error; 3125 } 3126 3127 int 3128 VFS_STATVFS(struct mount *mp, struct statvfs *a) 3129 { 3130 int error; 3131 3132 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3133 KERNEL_LOCK(1, NULL); 3134 } 3135 error = (*(mp->mnt_op->vfs_statvfs))(mp, a); 3136 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3137 KERNEL_UNLOCK_ONE(NULL); 3138 } 3139 3140 return error; 3141 } 3142 3143 int 3144 VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b) 3145 { 3146 int error; 3147 3148 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3149 KERNEL_LOCK(1, NULL); 3150 } 3151 error = (*(mp->mnt_op->vfs_sync))(mp, a, b); 3152 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3153 KERNEL_UNLOCK_ONE(NULL); 3154 } 3155 3156 return error; 3157 } 3158 3159 int 3160 VFS_FHTOVP(struct mount *mp, struct fid *a, struct vnode **b) 3161 { 3162 int error; 3163 3164 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3165 KERNEL_LOCK(1, NULL); 3166 } 3167 error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b); 3168 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3169 KERNEL_UNLOCK_ONE(NULL); 3170 } 3171 3172 return error; 3173 } 3174 3175 int 3176 VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b) 3177 { 3178 int error; 3179 3180 if ((vp->v_vflag & VV_MPSAFE) == 0) { 3181 KERNEL_LOCK(1, NULL); 3182 } 3183 error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b); 3184 if ((vp->v_vflag & VV_MPSAFE) == 0) { 3185 KERNEL_UNLOCK_ONE(NULL); 3186 } 3187 3188 return error; 3189 } 3190 3191 int 3192 VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b) 3193 { 3194 int error; 3195 3196 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3197 KERNEL_LOCK(1, NULL); 3198 } 3199 error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b); 3200 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3201 KERNEL_UNLOCK_ONE(NULL); 3202 } 3203 3204 return error; 3205 } 3206 3207 int 3208 VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d) 3209 { 3210 int error; 3211 3212 KERNEL_LOCK(1, NULL); /* XXXSMP check ffs */ 3213 error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d); 3214 KERNEL_UNLOCK_ONE(NULL); /* XXX */ 3215 3216 return error; 3217 } 3218 3219 int 3220 VFS_SUSPENDCTL(struct mount *mp, int a) 3221 { 3222 int error; 3223 3224 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3225 KERNEL_LOCK(1, NULL); 3226 } 3227 error = (*(mp->mnt_op->vfs_suspendctl))(mp, a); 3228 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3229 KERNEL_UNLOCK_ONE(NULL); 3230 } 3231 3232 return error; 3233 } 3234 3235 #if defined(DDB) || defined(DEBUGPRINT) 3236 static const char buf_flagbits[] = BUF_FLAGBITS; 3237 3238 void 3239 vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...)) 3240 { 3241 char bf[1024]; 3242 3243 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%" 3244 PRIx64 " dev 0x%x\n", 3245 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev); 3246 3247 snprintb(bf, sizeof(bf), 3248 buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags); 3249 (*pr)(" error %d flags 0x%s\n", bp->b_error, bf); 3250 3251 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 3252 bp->b_bufsize, bp->b_bcount, bp->b_resid); 3253 (*pr)(" data %p saveaddr %p\n", 3254 bp->b_data, bp->b_saveaddr); 3255 (*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock); 3256 } 3257 3258 3259 void 3260 vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...)) 3261 { 3262 char bf[256]; 3263 3264 uvm_object_printit(&vp->v_uobj, full, pr); 3265 snprintb(bf, sizeof(bf), 3266 vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag); 3267 (*pr)("\nVNODE flags %s\n", bf); 3268 (*pr)("mp %p numoutput %d size 0x%llx writesize 0x%llx\n", 3269 vp->v_mount, vp->v_numoutput, vp->v_size, vp->v_writesize); 3270 3271 (*pr)("data %p writecount %ld holdcnt %ld\n", 3272 vp->v_data, vp->v_writecount, vp->v_holdcnt); 3273 3274 (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n", 3275 ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 3276 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 3277 vp->v_mount, vp->v_mountedhere); 3278 3279 (*pr)("v_lock %p\n", &vp->v_lock); 3280 3281 if (full) { 3282 struct buf *bp; 3283 3284 (*pr)("clean bufs:\n"); 3285 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 3286 (*pr)(" bp %p\n", bp); 3287 vfs_buf_print(bp, full, pr); 3288 } 3289 3290 (*pr)("dirty bufs:\n"); 3291 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 3292 (*pr)(" bp %p\n", bp); 3293 vfs_buf_print(bp, full, pr); 3294 } 3295 } 3296 } 3297 3298 void 3299 vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...)) 3300 { 3301 char sbuf[256]; 3302 3303 (*pr)("vnodecovered = %p syncer = %p data = %p\n", 3304 mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data); 3305 3306 (*pr)("fs_bshift %d dev_bshift = %d\n", 3307 mp->mnt_fs_bshift,mp->mnt_dev_bshift); 3308 3309 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag); 3310 (*pr)("flag = %s\n", sbuf); 3311 3312 snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag); 3313 (*pr)("iflag = %s\n", sbuf); 3314 3315 (*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt, 3316 &mp->mnt_unmounting, &mp->mnt_updating); 3317 3318 (*pr)("statvfs cache:\n"); 3319 (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize); 3320 (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize); 3321 (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize); 3322 3323 (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks); 3324 (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree); 3325 (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail); 3326 (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd); 3327 3328 (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files); 3329 (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree); 3330 (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail); 3331 (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd); 3332 3333 (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n", 3334 mp->mnt_stat.f_fsidx.__fsid_val[0], 3335 mp->mnt_stat.f_fsidx.__fsid_val[1]); 3336 3337 (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner); 3338 (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax); 3339 3340 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag); 3341 3342 (*pr)("\tflag = %s\n",sbuf); 3343 (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites); 3344 (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites); 3345 (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads); 3346 (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads); 3347 (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename); 3348 (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname); 3349 (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname); 3350 3351 { 3352 int cnt = 0; 3353 struct vnode *vp; 3354 (*pr)("locked vnodes ="); 3355 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 3356 if (VOP_ISLOCKED(vp)) { 3357 if ((++cnt % 6) == 0) { 3358 (*pr)(" %p,\n\t", vp); 3359 } else { 3360 (*pr)(" %p,", vp); 3361 } 3362 } 3363 } 3364 (*pr)("\n"); 3365 } 3366 3367 if (full) { 3368 int cnt = 0; 3369 struct vnode *vp; 3370 (*pr)("all vnodes ="); 3371 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 3372 if (!TAILQ_NEXT(vp, v_mntvnodes)) { 3373 (*pr)(" %p", vp); 3374 } else if ((++cnt % 6) == 0) { 3375 (*pr)(" %p,\n\t", vp); 3376 } else { 3377 (*pr)(" %p,", vp); 3378 } 3379 } 3380 (*pr)("\n", vp); 3381 } 3382 } 3383 #endif /* DDB || DEBUGPRINT */ 3384 3385 /* 3386 * Check if a device pointed to by vp is mounted. 3387 * 3388 * Returns: 3389 * EINVAL if it's not a disk 3390 * EBUSY if it's a disk and mounted 3391 * 0 if it's a disk and not mounted 3392 */ 3393 int 3394 rawdev_mounted(struct vnode *vp, struct vnode **bvpp) 3395 { 3396 struct vnode *bvp; 3397 dev_t dev; 3398 int d_type; 3399 3400 bvp = NULL; 3401 dev = vp->v_rdev; 3402 d_type = D_OTHER; 3403 3404 if (iskmemvp(vp)) 3405 return EINVAL; 3406 3407 switch (vp->v_type) { 3408 case VCHR: { 3409 const struct cdevsw *cdev; 3410 3411 cdev = cdevsw_lookup(dev); 3412 if (cdev != NULL) { 3413 dev_t blkdev; 3414 3415 blkdev = devsw_chr2blk(dev); 3416 if (blkdev != NODEV) { 3417 vfinddev(blkdev, VBLK, &bvp); 3418 if (bvp != NULL) 3419 d_type = (cdev->d_flag & D_TYPEMASK); 3420 } 3421 } 3422 3423 break; 3424 } 3425 3426 case VBLK: { 3427 const struct bdevsw *bdev; 3428 3429 bdev = bdevsw_lookup(dev); 3430 if (bdev != NULL) 3431 d_type = (bdev->d_flag & D_TYPEMASK); 3432 3433 bvp = vp; 3434 3435 break; 3436 } 3437 3438 default: 3439 break; 3440 } 3441 3442 if (d_type != D_DISK) 3443 return EINVAL; 3444 3445 if (bvpp != NULL) 3446 *bvpp = bvp; 3447 3448 /* 3449 * XXX: This is bogus. We should be failing the request 3450 * XXX: not only if this specific slice is mounted, but 3451 * XXX: if it's on a disk with any other mounted slice. 3452 */ 3453 if (vfs_mountedon(bvp)) 3454 return EBUSY; 3455 3456 return 0; 3457 } 3458