1 /* $NetBSD: vfs_subr.c,v 1.400 2010/04/30 10:03:13 pooka Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 /* 70 * Note on v_usecount and locking: 71 * 72 * At nearly all points it is known that v_usecount could be zero, the 73 * vnode interlock will be held. 74 * 75 * To change v_usecount away from zero, the interlock must be held. To 76 * change from a non-zero value to zero, again the interlock must be 77 * held. 78 * 79 * There's a flag bit, VC_XLOCK, embedded in v_usecount. 80 * To raise v_usecount, if the VC_XLOCK bit is set in it, the interlock 81 * must be held. 82 * To modify the VC_XLOCK bit, the interlock must be held. 83 * We always keep the usecount (v_usecount & VC_MASK) non-zero while the 84 * VC_XLOCK bit is set. 85 * 86 * Unless the VC_XLOCK bit is set, changing the usecount from a non-zero 87 * value to a non-zero value can safely be done using atomic operations, 88 * without the interlock held. 89 * Even if the VC_XLOCK bit is set, decreasing the usecount to a non-zero 90 * value can be done using atomic operations, without the interlock held. 91 */ 92 93 #include <sys/cdefs.h> 94 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.400 2010/04/30 10:03:13 pooka Exp $"); 95 96 #include "opt_ddb.h" 97 #include "opt_compat_netbsd.h" 98 #include "opt_compat_43.h" 99 100 #include <sys/param.h> 101 #include <sys/systm.h> 102 #include <sys/conf.h> 103 #include <sys/dirent.h> 104 #include <sys/proc.h> 105 #include <sys/kernel.h> 106 #include <sys/mount.h> 107 #include <sys/fcntl.h> 108 #include <sys/vnode.h> 109 #include <sys/stat.h> 110 #include <sys/namei.h> 111 #include <sys/ucred.h> 112 #include <sys/buf.h> 113 #include <sys/errno.h> 114 #include <sys/kmem.h> 115 #include <sys/syscallargs.h> 116 #include <sys/device.h> 117 #include <sys/filedesc.h> 118 #include <sys/kauth.h> 119 #include <sys/atomic.h> 120 #include <sys/kthread.h> 121 #include <sys/wapbl.h> 122 123 #include <miscfs/genfs/genfs.h> 124 #include <miscfs/specfs/specdev.h> 125 #include <miscfs/syncfs/syncfs.h> 126 127 #include <uvm/uvm.h> 128 #include <uvm/uvm_readahead.h> 129 #include <uvm/uvm_ddb.h> 130 131 #include <sys/sysctl.h> 132 133 const enum vtype iftovt_tab[16] = { 134 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 135 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 136 }; 137 const int vttoif_tab[9] = { 138 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 139 S_IFSOCK, S_IFIFO, S_IFMT, 140 }; 141 142 /* 143 * Insq/Remq for the vnode usage lists. 144 */ 145 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 146 #define bufremvn(bp) { \ 147 LIST_REMOVE(bp, b_vnbufs); \ 148 (bp)->b_vnbufs.le_next = NOLIST; \ 149 } 150 151 int doforce = 1; /* 1 => permit forcible unmounting */ 152 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 153 154 static vnodelst_t vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 155 static vnodelst_t vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 156 static vnodelst_t vrele_list = TAILQ_HEAD_INITIALIZER(vrele_list); 157 158 struct mntlist mountlist = /* mounted filesystem list */ 159 CIRCLEQ_HEAD_INITIALIZER(mountlist); 160 161 u_int numvnodes; 162 static specificdata_domain_t mount_specificdata_domain; 163 164 static int vrele_pending; 165 static int vrele_gen; 166 static kmutex_t vrele_lock; 167 static kcondvar_t vrele_cv; 168 static lwp_t *vrele_lwp; 169 170 static uint64_t mountgen = 0; 171 static kmutex_t mountgen_lock; 172 173 kmutex_t mountlist_lock; 174 kmutex_t mntid_lock; 175 kmutex_t mntvnode_lock; 176 kmutex_t vnode_free_list_lock; 177 kmutex_t vfs_list_lock; 178 179 static pool_cache_t vnode_cache; 180 181 /* 182 * These define the root filesystem and device. 183 */ 184 struct vnode *rootvnode; 185 struct device *root_device; /* root device */ 186 187 /* 188 * Local declarations. 189 */ 190 191 static void vrele_thread(void *); 192 static void insmntque(vnode_t *, struct mount *); 193 static int getdevvp(dev_t, vnode_t **, enum vtype); 194 static vnode_t *getcleanvnode(void); 195 void vpanic(vnode_t *, const char *); 196 static void vfs_shutdown1(struct lwp *); 197 198 #ifdef DEBUG 199 void printlockedvnodes(void); 200 #endif 201 202 #ifdef DIAGNOSTIC 203 void 204 vpanic(vnode_t *vp, const char *msg) 205 { 206 207 vprint(NULL, vp); 208 panic("%s\n", msg); 209 } 210 #else 211 #define vpanic(vp, msg) /* nothing */ 212 #endif 213 214 void 215 vn_init1(void) 216 { 217 218 vnode_cache = pool_cache_init(sizeof(struct vnode), 0, 0, 0, "vnodepl", 219 NULL, IPL_NONE, NULL, NULL, NULL); 220 KASSERT(vnode_cache != NULL); 221 222 /* Create deferred release thread. */ 223 mutex_init(&vrele_lock, MUTEX_DEFAULT, IPL_NONE); 224 cv_init(&vrele_cv, "vrele"); 225 if (kthread_create(PRI_VM, KTHREAD_MPSAFE, NULL, vrele_thread, 226 NULL, &vrele_lwp, "vrele")) 227 panic("fork vrele"); 228 } 229 230 /* 231 * Initialize the vnode management data structures. 232 */ 233 void 234 vntblinit(void) 235 { 236 237 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 238 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 239 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 240 mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE); 241 mutex_init(&vnode_free_list_lock, MUTEX_DEFAULT, IPL_NONE); 242 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 243 244 mount_specificdata_domain = specificdata_domain_create(); 245 246 /* Initialize the filesystem syncer. */ 247 vn_initialize_syncerd(); 248 vn_init1(); 249 } 250 251 int 252 vfs_drainvnodes(long target, struct lwp *l) 253 { 254 255 while (numvnodes > target) { 256 vnode_t *vp; 257 258 mutex_enter(&vnode_free_list_lock); 259 vp = getcleanvnode(); 260 if (vp == NULL) 261 return EBUSY; /* give up */ 262 ungetnewvnode(vp); 263 } 264 265 return 0; 266 } 267 268 /* 269 * Lookup a mount point by filesystem identifier. 270 * 271 * XXX Needs to add a reference to the mount point. 272 */ 273 struct mount * 274 vfs_getvfs(fsid_t *fsid) 275 { 276 struct mount *mp; 277 278 mutex_enter(&mountlist_lock); 279 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { 280 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 281 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 282 mutex_exit(&mountlist_lock); 283 return (mp); 284 } 285 } 286 mutex_exit(&mountlist_lock); 287 return ((struct mount *)0); 288 } 289 290 /* 291 * Drop a reference to a mount structure, freeing if the last reference. 292 */ 293 void 294 vfs_destroy(struct mount *mp) 295 { 296 297 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 298 return; 299 } 300 301 /* 302 * Nothing else has visibility of the mount: we can now 303 * free the data structures. 304 */ 305 KASSERT(mp->mnt_refcnt == 0); 306 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 307 rw_destroy(&mp->mnt_unmounting); 308 mutex_destroy(&mp->mnt_updating); 309 mutex_destroy(&mp->mnt_renamelock); 310 if (mp->mnt_op != NULL) { 311 vfs_delref(mp->mnt_op); 312 } 313 kmem_free(mp, sizeof(*mp)); 314 } 315 316 /* 317 * grab a vnode from freelist and clean it. 318 */ 319 vnode_t * 320 getcleanvnode(void) 321 { 322 vnode_t *vp; 323 vnodelst_t *listhd; 324 325 KASSERT(mutex_owned(&vnode_free_list_lock)); 326 327 retry: 328 listhd = &vnode_free_list; 329 try_nextlist: 330 TAILQ_FOREACH(vp, listhd, v_freelist) { 331 /* 332 * It's safe to test v_usecount and v_iflag 333 * without holding the interlock here, since 334 * these vnodes should never appear on the 335 * lists. 336 */ 337 if (vp->v_usecount != 0) { 338 vpanic(vp, "free vnode isn't"); 339 } 340 if ((vp->v_iflag & VI_CLEAN) != 0) { 341 vpanic(vp, "clean vnode on freelist"); 342 } 343 if (vp->v_freelisthd != listhd) { 344 printf("vnode sez %p, listhd %p\n", vp->v_freelisthd, listhd); 345 vpanic(vp, "list head mismatch"); 346 } 347 if (!mutex_tryenter(&vp->v_interlock)) 348 continue; 349 /* 350 * Our lwp might hold the underlying vnode 351 * locked, so don't try to reclaim a VI_LAYER 352 * node if it's locked. 353 */ 354 if ((vp->v_iflag & VI_XLOCK) == 0 && 355 ((vp->v_iflag & VI_LAYER) == 0 || VOP_ISLOCKED(vp) == 0)) { 356 break; 357 } 358 mutex_exit(&vp->v_interlock); 359 } 360 361 if (vp == NULL) { 362 if (listhd == &vnode_free_list) { 363 listhd = &vnode_hold_list; 364 goto try_nextlist; 365 } 366 mutex_exit(&vnode_free_list_lock); 367 return NULL; 368 } 369 370 /* Remove it from the freelist. */ 371 TAILQ_REMOVE(listhd, vp, v_freelist); 372 vp->v_freelisthd = NULL; 373 mutex_exit(&vnode_free_list_lock); 374 375 if (vp->v_usecount != 0) { 376 /* 377 * was referenced again before we got the interlock 378 * Don't return to freelist - the holder of the last 379 * reference will destroy it. 380 */ 381 mutex_exit(&vp->v_interlock); 382 mutex_enter(&vnode_free_list_lock); 383 goto retry; 384 } 385 386 /* 387 * The vnode is still associated with a file system, so we must 388 * clean it out before reusing it. We need to add a reference 389 * before doing this. If the vnode gains another reference while 390 * being cleaned out then we lose - retry. 391 */ 392 atomic_add_int(&vp->v_usecount, 1 + VC_XLOCK); 393 vclean(vp, DOCLOSE); 394 KASSERT(vp->v_usecount >= 1 + VC_XLOCK); 395 atomic_add_int(&vp->v_usecount, -VC_XLOCK); 396 if (vp->v_usecount == 1) { 397 /* We're about to dirty it. */ 398 vp->v_iflag &= ~VI_CLEAN; 399 mutex_exit(&vp->v_interlock); 400 if (vp->v_type == VBLK || vp->v_type == VCHR) { 401 spec_node_destroy(vp); 402 } 403 vp->v_type = VNON; 404 } else { 405 /* 406 * Don't return to freelist - the holder of the last 407 * reference will destroy it. 408 */ 409 vrelel(vp, 0); /* releases vp->v_interlock */ 410 mutex_enter(&vnode_free_list_lock); 411 goto retry; 412 } 413 414 if (vp->v_data != NULL || vp->v_uobj.uo_npages != 0 || 415 !TAILQ_EMPTY(&vp->v_uobj.memq)) { 416 vpanic(vp, "cleaned vnode isn't"); 417 } 418 if (vp->v_numoutput != 0) { 419 vpanic(vp, "clean vnode has pending I/O's"); 420 } 421 if ((vp->v_iflag & VI_ONWORKLST) != 0) { 422 vpanic(vp, "clean vnode on syncer list"); 423 } 424 425 return vp; 426 } 427 428 /* 429 * Mark a mount point as busy, and gain a new reference to it. Used to 430 * prevent the file system from being unmounted during critical sections. 431 * 432 * => The caller must hold a pre-existing reference to the mount. 433 * => Will fail if the file system is being unmounted, or is unmounted. 434 */ 435 int 436 vfs_busy(struct mount *mp, struct mount **nextp) 437 { 438 439 KASSERT(mp->mnt_refcnt > 0); 440 441 if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) { 442 if (nextp != NULL) { 443 KASSERT(mutex_owned(&mountlist_lock)); 444 *nextp = CIRCLEQ_NEXT(mp, mnt_list); 445 } 446 return EBUSY; 447 } 448 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 449 rw_exit(&mp->mnt_unmounting); 450 if (nextp != NULL) { 451 KASSERT(mutex_owned(&mountlist_lock)); 452 *nextp = CIRCLEQ_NEXT(mp, mnt_list); 453 } 454 return ENOENT; 455 } 456 if (nextp != NULL) { 457 mutex_exit(&mountlist_lock); 458 } 459 atomic_inc_uint(&mp->mnt_refcnt); 460 return 0; 461 } 462 463 /* 464 * Unbusy a busy filesystem. 465 * 466 * => If keepref is true, preserve reference added by vfs_busy(). 467 * => If nextp != NULL, acquire mountlist_lock. 468 */ 469 void 470 vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp) 471 { 472 473 KASSERT(mp->mnt_refcnt > 0); 474 475 if (nextp != NULL) { 476 mutex_enter(&mountlist_lock); 477 } 478 rw_exit(&mp->mnt_unmounting); 479 if (!keepref) { 480 vfs_destroy(mp); 481 } 482 if (nextp != NULL) { 483 KASSERT(mutex_owned(&mountlist_lock)); 484 *nextp = CIRCLEQ_NEXT(mp, mnt_list); 485 } 486 } 487 488 struct mount * 489 vfs_mountalloc(struct vfsops *vfsops, struct vnode *vp) 490 { 491 int error; 492 struct mount *mp; 493 494 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 495 if (mp == NULL) 496 return NULL; 497 498 mp->mnt_op = vfsops; 499 mp->mnt_refcnt = 1; 500 TAILQ_INIT(&mp->mnt_vnodelist); 501 rw_init(&mp->mnt_unmounting); 502 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); 503 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE); 504 error = vfs_busy(mp, NULL); 505 KASSERT(error == 0); 506 mp->mnt_vnodecovered = vp; 507 mount_initspecific(mp); 508 509 mutex_enter(&mountgen_lock); 510 mp->mnt_gen = mountgen++; 511 mutex_exit(&mountgen_lock); 512 513 return mp; 514 } 515 516 /* 517 * Lookup a filesystem type, and if found allocate and initialize 518 * a mount structure for it. 519 * 520 * Devname is usually updated by mount(8) after booting. 521 */ 522 int 523 vfs_rootmountalloc(const char *fstypename, const char *devname, 524 struct mount **mpp) 525 { 526 struct vfsops *vfsp = NULL; 527 struct mount *mp; 528 529 mutex_enter(&vfs_list_lock); 530 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 531 if (!strncmp(vfsp->vfs_name, fstypename, 532 sizeof(mp->mnt_stat.f_fstypename))) 533 break; 534 if (vfsp == NULL) { 535 mutex_exit(&vfs_list_lock); 536 return (ENODEV); 537 } 538 vfsp->vfs_refcount++; 539 mutex_exit(&vfs_list_lock); 540 541 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 542 return ENOMEM; 543 mp->mnt_flag = MNT_RDONLY; 544 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 545 sizeof(mp->mnt_stat.f_fstypename)); 546 mp->mnt_stat.f_mntonname[0] = '/'; 547 mp->mnt_stat.f_mntonname[1] = '\0'; 548 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 549 '\0'; 550 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 551 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 552 *mpp = mp; 553 return (0); 554 } 555 556 /* 557 * Routines having to do with the management of the vnode table. 558 */ 559 extern int (**dead_vnodeop_p)(void *); 560 561 /* 562 * Return the next vnode from the free list. 563 */ 564 int 565 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), 566 vnode_t **vpp) 567 { 568 struct uvm_object *uobj; 569 static int toggle; 570 vnode_t *vp; 571 int error = 0, tryalloc; 572 573 try_again: 574 if (mp != NULL) { 575 /* 576 * Mark filesystem busy while we're creating a 577 * vnode. If unmount is in progress, this will 578 * fail. 579 */ 580 error = vfs_busy(mp, NULL); 581 if (error) 582 return error; 583 } 584 585 /* 586 * We must choose whether to allocate a new vnode or recycle an 587 * existing one. The criterion for allocating a new one is that 588 * the total number of vnodes is less than the number desired or 589 * there are no vnodes on either free list. Generally we only 590 * want to recycle vnodes that have no buffers associated with 591 * them, so we look first on the vnode_free_list. If it is empty, 592 * we next consider vnodes with referencing buffers on the 593 * vnode_hold_list. The toggle ensures that half the time we 594 * will use a buffer from the vnode_hold_list, and half the time 595 * we will allocate a new one unless the list has grown to twice 596 * the desired size. We are reticent to recycle vnodes from the 597 * vnode_hold_list because we will lose the identity of all its 598 * referencing buffers. 599 */ 600 601 vp = NULL; 602 603 mutex_enter(&vnode_free_list_lock); 604 605 toggle ^= 1; 606 if (numvnodes > 2 * desiredvnodes) 607 toggle = 0; 608 609 tryalloc = numvnodes < desiredvnodes || 610 (TAILQ_FIRST(&vnode_free_list) == NULL && 611 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 612 613 if (tryalloc) { 614 numvnodes++; 615 mutex_exit(&vnode_free_list_lock); 616 if ((vp = vnalloc(NULL)) == NULL) { 617 mutex_enter(&vnode_free_list_lock); 618 numvnodes--; 619 } else 620 vp->v_usecount = 1; 621 } 622 623 if (vp == NULL) { 624 vp = getcleanvnode(); 625 if (vp == NULL) { 626 if (mp != NULL) { 627 vfs_unbusy(mp, false, NULL); 628 } 629 if (tryalloc) { 630 printf("WARNING: unable to allocate new " 631 "vnode, retrying...\n"); 632 kpause("newvn", false, hz, NULL); 633 goto try_again; 634 } 635 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 636 *vpp = 0; 637 return (ENFILE); 638 } 639 vp->v_iflag = 0; 640 vp->v_vflag = 0; 641 vp->v_uflag = 0; 642 vp->v_socket = NULL; 643 } 644 645 KASSERT(vp->v_usecount == 1); 646 KASSERT(vp->v_freelisthd == NULL); 647 KASSERT(LIST_EMPTY(&vp->v_nclist)); 648 KASSERT(LIST_EMPTY(&vp->v_dnclist)); 649 650 vp->v_type = VNON; 651 vp->v_vnlock = &vp->v_lock; 652 vp->v_tag = tag; 653 vp->v_op = vops; 654 insmntque(vp, mp); 655 *vpp = vp; 656 vp->v_data = 0; 657 658 /* 659 * initialize uvm_object within vnode. 660 */ 661 662 uobj = &vp->v_uobj; 663 KASSERT(uobj->pgops == &uvm_vnodeops); 664 KASSERT(uobj->uo_npages == 0); 665 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 666 vp->v_size = vp->v_writesize = VSIZENOTSET; 667 668 if (mp != NULL) { 669 if ((mp->mnt_iflag & IMNT_MPSAFE) != 0) 670 vp->v_vflag |= VV_MPSAFE; 671 vfs_unbusy(mp, true, NULL); 672 } 673 674 return (0); 675 } 676 677 /* 678 * This is really just the reverse of getnewvnode(). Needed for 679 * VFS_VGET functions who may need to push back a vnode in case 680 * of a locking race. 681 */ 682 void 683 ungetnewvnode(vnode_t *vp) 684 { 685 686 KASSERT(vp->v_usecount == 1); 687 KASSERT(vp->v_data == NULL); 688 KASSERT(vp->v_freelisthd == NULL); 689 690 mutex_enter(&vp->v_interlock); 691 vp->v_iflag |= VI_CLEAN; 692 vrelel(vp, 0); 693 } 694 695 /* 696 * Allocate a new, uninitialized vnode. If 'mp' is non-NULL, this is a 697 * marker vnode and we are prepared to wait for the allocation. 698 */ 699 vnode_t * 700 vnalloc(struct mount *mp) 701 { 702 vnode_t *vp; 703 704 vp = pool_cache_get(vnode_cache, (mp != NULL ? PR_WAITOK : PR_NOWAIT)); 705 if (vp == NULL) { 706 return NULL; 707 } 708 709 memset(vp, 0, sizeof(*vp)); 710 UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 0); 711 cv_init(&vp->v_cv, "vnode"); 712 /* 713 * done by memset() above. 714 * LIST_INIT(&vp->v_nclist); 715 * LIST_INIT(&vp->v_dnclist); 716 */ 717 718 if (mp != NULL) { 719 vp->v_mount = mp; 720 vp->v_type = VBAD; 721 vp->v_iflag = VI_MARKER; 722 } else { 723 rw_init(&vp->v_lock.vl_lock); 724 } 725 726 return vp; 727 } 728 729 /* 730 * Free an unused, unreferenced vnode. 731 */ 732 void 733 vnfree(vnode_t *vp) 734 { 735 736 KASSERT(vp->v_usecount == 0); 737 738 if ((vp->v_iflag & VI_MARKER) == 0) { 739 rw_destroy(&vp->v_lock.vl_lock); 740 mutex_enter(&vnode_free_list_lock); 741 numvnodes--; 742 mutex_exit(&vnode_free_list_lock); 743 } 744 745 UVM_OBJ_DESTROY(&vp->v_uobj); 746 cv_destroy(&vp->v_cv); 747 pool_cache_put(vnode_cache, vp); 748 } 749 750 /* 751 * Remove a vnode from its freelist. 752 */ 753 static inline void 754 vremfree(vnode_t *vp) 755 { 756 757 KASSERT(mutex_owned(&vp->v_interlock)); 758 KASSERT(vp->v_usecount == 0); 759 760 /* 761 * Note that the reference count must not change until 762 * the vnode is removed. 763 */ 764 mutex_enter(&vnode_free_list_lock); 765 if (vp->v_holdcnt > 0) { 766 KASSERT(vp->v_freelisthd == &vnode_hold_list); 767 } else { 768 KASSERT(vp->v_freelisthd == &vnode_free_list); 769 } 770 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 771 vp->v_freelisthd = NULL; 772 mutex_exit(&vnode_free_list_lock); 773 } 774 775 /* 776 * Move a vnode from one mount queue to another. 777 */ 778 static void 779 insmntque(vnode_t *vp, struct mount *mp) 780 { 781 struct mount *omp; 782 783 #ifdef DIAGNOSTIC 784 if ((mp != NULL) && 785 (mp->mnt_iflag & IMNT_UNMOUNT) && 786 vp->v_tag != VT_VFS) { 787 panic("insmntque into dying filesystem"); 788 } 789 #endif 790 791 mutex_enter(&mntvnode_lock); 792 /* 793 * Delete from old mount point vnode list, if on one. 794 */ 795 if ((omp = vp->v_mount) != NULL) 796 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes); 797 /* 798 * Insert into list of vnodes for the new mount point, if 799 * available. The caller must take a reference on the mount 800 * structure and donate to the vnode. 801 */ 802 if ((vp->v_mount = mp) != NULL) 803 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); 804 mutex_exit(&mntvnode_lock); 805 806 if (omp != NULL) { 807 /* Release reference to old mount. */ 808 vfs_destroy(omp); 809 } 810 } 811 812 /* 813 * Wait for a vnode (typically with VI_XLOCK set) to be cleaned or 814 * recycled. 815 */ 816 void 817 vwait(vnode_t *vp, int flags) 818 { 819 820 KASSERT(mutex_owned(&vp->v_interlock)); 821 KASSERT(vp->v_usecount != 0); 822 823 while ((vp->v_iflag & flags) != 0) 824 cv_wait(&vp->v_cv, &vp->v_interlock); 825 } 826 827 /* 828 * Insert a marker vnode into a mount's vnode list, after the 829 * specified vnode. mntvnode_lock must be held. 830 */ 831 void 832 vmark(vnode_t *mvp, vnode_t *vp) 833 { 834 struct mount *mp; 835 836 mp = mvp->v_mount; 837 838 KASSERT(mutex_owned(&mntvnode_lock)); 839 KASSERT((mvp->v_iflag & VI_MARKER) != 0); 840 KASSERT(vp->v_mount == mp); 841 842 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes); 843 } 844 845 /* 846 * Remove a marker vnode from a mount's vnode list, and return 847 * a pointer to the next vnode in the list. mntvnode_lock must 848 * be held. 849 */ 850 vnode_t * 851 vunmark(vnode_t *mvp) 852 { 853 vnode_t *vp; 854 struct mount *mp; 855 856 mp = mvp->v_mount; 857 858 KASSERT(mutex_owned(&mntvnode_lock)); 859 KASSERT((mvp->v_iflag & VI_MARKER) != 0); 860 861 vp = TAILQ_NEXT(mvp, v_mntvnodes); 862 TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes); 863 864 KASSERT(vp == NULL || vp->v_mount == mp); 865 866 return vp; 867 } 868 869 /* 870 * Update outstanding I/O count and do wakeup if requested. 871 */ 872 void 873 vwakeup(struct buf *bp) 874 { 875 struct vnode *vp; 876 877 if ((vp = bp->b_vp) == NULL) 878 return; 879 880 KASSERT(bp->b_objlock == &vp->v_interlock); 881 KASSERT(mutex_owned(bp->b_objlock)); 882 883 if (--vp->v_numoutput < 0) 884 panic("vwakeup: neg numoutput, vp %p", vp); 885 if (vp->v_numoutput == 0) 886 cv_broadcast(&vp->v_cv); 887 } 888 889 /* 890 * Flush out and invalidate all buffers associated with a vnode. 891 * Called with the underlying vnode locked, which should prevent new dirty 892 * buffers from being queued. 893 */ 894 int 895 vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l, 896 bool catch, int slptimeo) 897 { 898 struct buf *bp, *nbp; 899 int error; 900 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 901 (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0); 902 903 /* XXXUBC this doesn't look at flags or slp* */ 904 mutex_enter(&vp->v_interlock); 905 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 906 if (error) { 907 return error; 908 } 909 910 if (flags & V_SAVE) { 911 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0); 912 if (error) 913 return (error); 914 KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd)); 915 } 916 917 mutex_enter(&bufcache_lock); 918 restart: 919 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 920 nbp = LIST_NEXT(bp, b_vnbufs); 921 error = bbusy(bp, catch, slptimeo, NULL); 922 if (error != 0) { 923 if (error == EPASSTHROUGH) 924 goto restart; 925 mutex_exit(&bufcache_lock); 926 return (error); 927 } 928 brelsel(bp, BC_INVAL | BC_VFLUSH); 929 } 930 931 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 932 nbp = LIST_NEXT(bp, b_vnbufs); 933 error = bbusy(bp, catch, slptimeo, NULL); 934 if (error != 0) { 935 if (error == EPASSTHROUGH) 936 goto restart; 937 mutex_exit(&bufcache_lock); 938 return (error); 939 } 940 /* 941 * XXX Since there are no node locks for NFS, I believe 942 * there is a slight chance that a delayed write will 943 * occur while sleeping just above, so check for it. 944 */ 945 if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) { 946 #ifdef DEBUG 947 printf("buffer still DELWRI\n"); 948 #endif 949 bp->b_cflags |= BC_BUSY | BC_VFLUSH; 950 mutex_exit(&bufcache_lock); 951 VOP_BWRITE(bp); 952 mutex_enter(&bufcache_lock); 953 goto restart; 954 } 955 brelsel(bp, BC_INVAL | BC_VFLUSH); 956 } 957 958 #ifdef DIAGNOSTIC 959 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 960 panic("vinvalbuf: flush failed, vp %p", vp); 961 #endif 962 963 mutex_exit(&bufcache_lock); 964 965 return (0); 966 } 967 968 /* 969 * Destroy any in core blocks past the truncation length. 970 * Called with the underlying vnode locked, which should prevent new dirty 971 * buffers from being queued. 972 */ 973 int 974 vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch, int slptimeo) 975 { 976 struct buf *bp, *nbp; 977 int error; 978 voff_t off; 979 980 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 981 mutex_enter(&vp->v_interlock); 982 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 983 if (error) { 984 return error; 985 } 986 987 mutex_enter(&bufcache_lock); 988 restart: 989 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 990 nbp = LIST_NEXT(bp, b_vnbufs); 991 if (bp->b_lblkno < lbn) 992 continue; 993 error = bbusy(bp, catch, slptimeo, NULL); 994 if (error != 0) { 995 if (error == EPASSTHROUGH) 996 goto restart; 997 mutex_exit(&bufcache_lock); 998 return (error); 999 } 1000 brelsel(bp, BC_INVAL | BC_VFLUSH); 1001 } 1002 1003 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 1004 nbp = LIST_NEXT(bp, b_vnbufs); 1005 if (bp->b_lblkno < lbn) 1006 continue; 1007 error = bbusy(bp, catch, slptimeo, NULL); 1008 if (error != 0) { 1009 if (error == EPASSTHROUGH) 1010 goto restart; 1011 mutex_exit(&bufcache_lock); 1012 return (error); 1013 } 1014 brelsel(bp, BC_INVAL | BC_VFLUSH); 1015 } 1016 mutex_exit(&bufcache_lock); 1017 1018 return (0); 1019 } 1020 1021 /* 1022 * Flush all dirty buffers from a vnode. 1023 * Called with the underlying vnode locked, which should prevent new dirty 1024 * buffers from being queued. 1025 */ 1026 void 1027 vflushbuf(struct vnode *vp, int sync) 1028 { 1029 struct buf *bp, *nbp; 1030 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); 1031 bool dirty; 1032 1033 mutex_enter(&vp->v_interlock); 1034 (void) VOP_PUTPAGES(vp, 0, 0, flags); 1035 1036 loop: 1037 mutex_enter(&bufcache_lock); 1038 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 1039 nbp = LIST_NEXT(bp, b_vnbufs); 1040 if ((bp->b_cflags & BC_BUSY)) 1041 continue; 1042 if ((bp->b_oflags & BO_DELWRI) == 0) 1043 panic("vflushbuf: not dirty, bp %p", bp); 1044 bp->b_cflags |= BC_BUSY | BC_VFLUSH; 1045 mutex_exit(&bufcache_lock); 1046 /* 1047 * Wait for I/O associated with indirect blocks to complete, 1048 * since there is no way to quickly wait for them below. 1049 */ 1050 if (bp->b_vp == vp || sync == 0) 1051 (void) bawrite(bp); 1052 else 1053 (void) bwrite(bp); 1054 goto loop; 1055 } 1056 mutex_exit(&bufcache_lock); 1057 1058 if (sync == 0) 1059 return; 1060 1061 mutex_enter(&vp->v_interlock); 1062 while (vp->v_numoutput != 0) 1063 cv_wait(&vp->v_cv, &vp->v_interlock); 1064 dirty = !LIST_EMPTY(&vp->v_dirtyblkhd); 1065 mutex_exit(&vp->v_interlock); 1066 1067 if (dirty) { 1068 vprint("vflushbuf: dirty", vp); 1069 goto loop; 1070 } 1071 } 1072 1073 /* 1074 * Create a vnode for a block device. 1075 * Used for root filesystem and swap areas. 1076 * Also used for memory file system special devices. 1077 */ 1078 int 1079 bdevvp(dev_t dev, vnode_t **vpp) 1080 { 1081 1082 return (getdevvp(dev, vpp, VBLK)); 1083 } 1084 1085 /* 1086 * Create a vnode for a character device. 1087 * Used for kernfs and some console handling. 1088 */ 1089 int 1090 cdevvp(dev_t dev, vnode_t **vpp) 1091 { 1092 1093 return (getdevvp(dev, vpp, VCHR)); 1094 } 1095 1096 /* 1097 * Associate a buffer with a vnode. There must already be a hold on 1098 * the vnode. 1099 */ 1100 void 1101 bgetvp(struct vnode *vp, struct buf *bp) 1102 { 1103 1104 KASSERT(bp->b_vp == NULL); 1105 KASSERT(bp->b_objlock == &buffer_lock); 1106 KASSERT(mutex_owned(&vp->v_interlock)); 1107 KASSERT(mutex_owned(&bufcache_lock)); 1108 KASSERT((bp->b_cflags & BC_BUSY) != 0); 1109 KASSERT(!cv_has_waiters(&bp->b_done)); 1110 1111 vholdl(vp); 1112 bp->b_vp = vp; 1113 if (vp->v_type == VBLK || vp->v_type == VCHR) 1114 bp->b_dev = vp->v_rdev; 1115 else 1116 bp->b_dev = NODEV; 1117 1118 /* 1119 * Insert onto list for new vnode. 1120 */ 1121 bufinsvn(bp, &vp->v_cleanblkhd); 1122 bp->b_objlock = &vp->v_interlock; 1123 } 1124 1125 /* 1126 * Disassociate a buffer from a vnode. 1127 */ 1128 void 1129 brelvp(struct buf *bp) 1130 { 1131 struct vnode *vp = bp->b_vp; 1132 1133 KASSERT(vp != NULL); 1134 KASSERT(bp->b_objlock == &vp->v_interlock); 1135 KASSERT(mutex_owned(&vp->v_interlock)); 1136 KASSERT(mutex_owned(&bufcache_lock)); 1137 KASSERT((bp->b_cflags & BC_BUSY) != 0); 1138 KASSERT(!cv_has_waiters(&bp->b_done)); 1139 1140 /* 1141 * Delete from old vnode list, if on one. 1142 */ 1143 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 1144 bufremvn(bp); 1145 1146 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_iflag & VI_ONWORKLST) && 1147 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 1148 vp->v_iflag &= ~VI_WRMAPDIRTY; 1149 vn_syncer_remove_from_worklist(vp); 1150 } 1151 1152 bp->b_objlock = &buffer_lock; 1153 bp->b_vp = NULL; 1154 holdrelel(vp); 1155 } 1156 1157 /* 1158 * Reassign a buffer from one vnode list to another. 1159 * The list reassignment must be within the same vnode. 1160 * Used to assign file specific control information 1161 * (indirect blocks) to the list to which they belong. 1162 */ 1163 void 1164 reassignbuf(struct buf *bp, struct vnode *vp) 1165 { 1166 struct buflists *listheadp; 1167 int delayx; 1168 1169 KASSERT(mutex_owned(&bufcache_lock)); 1170 KASSERT(bp->b_objlock == &vp->v_interlock); 1171 KASSERT(mutex_owned(&vp->v_interlock)); 1172 KASSERT((bp->b_cflags & BC_BUSY) != 0); 1173 1174 /* 1175 * Delete from old vnode list, if on one. 1176 */ 1177 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 1178 bufremvn(bp); 1179 1180 /* 1181 * If dirty, put on list of dirty buffers; 1182 * otherwise insert onto list of clean buffers. 1183 */ 1184 if ((bp->b_oflags & BO_DELWRI) == 0) { 1185 listheadp = &vp->v_cleanblkhd; 1186 if (TAILQ_EMPTY(&vp->v_uobj.memq) && 1187 (vp->v_iflag & VI_ONWORKLST) && 1188 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 1189 vp->v_iflag &= ~VI_WRMAPDIRTY; 1190 vn_syncer_remove_from_worklist(vp); 1191 } 1192 } else { 1193 listheadp = &vp->v_dirtyblkhd; 1194 if ((vp->v_iflag & VI_ONWORKLST) == 0) { 1195 switch (vp->v_type) { 1196 case VDIR: 1197 delayx = dirdelay; 1198 break; 1199 case VBLK: 1200 if (vp->v_specmountpoint != NULL) { 1201 delayx = metadelay; 1202 break; 1203 } 1204 /* fall through */ 1205 default: 1206 delayx = filedelay; 1207 break; 1208 } 1209 if (!vp->v_mount || 1210 (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) 1211 vn_syncer_add_to_worklist(vp, delayx); 1212 } 1213 } 1214 bufinsvn(bp, listheadp); 1215 } 1216 1217 /* 1218 * Create a vnode for a device. 1219 * Used by bdevvp (block device) for root file system etc., 1220 * and by cdevvp (character device) for console and kernfs. 1221 */ 1222 static int 1223 getdevvp(dev_t dev, vnode_t **vpp, enum vtype type) 1224 { 1225 vnode_t *vp; 1226 vnode_t *nvp; 1227 int error; 1228 1229 if (dev == NODEV) { 1230 *vpp = NULL; 1231 return (0); 1232 } 1233 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1234 if (error) { 1235 *vpp = NULL; 1236 return (error); 1237 } 1238 vp = nvp; 1239 vp->v_type = type; 1240 vp->v_vflag |= VV_MPSAFE; 1241 uvm_vnp_setsize(vp, 0); 1242 spec_node_init(vp, dev); 1243 *vpp = vp; 1244 return (0); 1245 } 1246 1247 /* 1248 * Try to gain a reference to a vnode, without acquiring its interlock. 1249 * The caller must hold a lock that will prevent the vnode from being 1250 * recycled or freed. 1251 */ 1252 bool 1253 vtryget(vnode_t *vp) 1254 { 1255 u_int use, next; 1256 1257 /* 1258 * If the vnode is being freed, don't make life any harder 1259 * for vclean() by adding another reference without waiting. 1260 * This is not strictly necessary, but we'll do it anyway. 1261 */ 1262 if (__predict_false((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0)) { 1263 return false; 1264 } 1265 for (use = vp->v_usecount;; use = next) { 1266 if (use == 0 || __predict_false((use & VC_XLOCK) != 0)) { 1267 /* Need interlock held if first reference. */ 1268 return false; 1269 } 1270 next = atomic_cas_uint(&vp->v_usecount, use, use + 1); 1271 if (__predict_true(next == use)) { 1272 return true; 1273 } 1274 } 1275 } 1276 1277 /* 1278 * Grab a particular vnode from the free list, increment its 1279 * reference count and lock it. If the vnode lock bit is set the 1280 * vnode is being eliminated in vgone. In that case, we can not 1281 * grab the vnode, so the process is awakened when the transition is 1282 * completed, and an error returned to indicate that the vnode is no 1283 * longer usable (possibly having been changed to a new file system type). 1284 */ 1285 int 1286 vget(vnode_t *vp, int flags) 1287 { 1288 int error; 1289 1290 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1291 1292 if ((flags & LK_INTERLOCK) == 0) 1293 mutex_enter(&vp->v_interlock); 1294 1295 /* 1296 * Before adding a reference, we must remove the vnode 1297 * from its freelist. 1298 */ 1299 if (vp->v_usecount == 0) { 1300 vremfree(vp); 1301 vp->v_usecount = 1; 1302 } else { 1303 atomic_inc_uint(&vp->v_usecount); 1304 } 1305 1306 /* 1307 * If the vnode is in the process of being cleaned out for 1308 * another use, we wait for the cleaning to finish and then 1309 * return failure. Cleaning is determined by checking if 1310 * the VI_XLOCK or VI_FREEING flags are set. 1311 */ 1312 if ((vp->v_iflag & (VI_XLOCK | VI_FREEING)) != 0) { 1313 if ((flags & LK_NOWAIT) != 0) { 1314 vrelel(vp, 0); 1315 return EBUSY; 1316 } 1317 vwait(vp, VI_XLOCK | VI_FREEING); 1318 vrelel(vp, 0); 1319 return ENOENT; 1320 } 1321 1322 if ((vp->v_iflag & VI_INACTNOW) != 0) { 1323 /* 1324 * if it's being desactived, wait for it to complete. 1325 * Make sure to not return a clean vnode. 1326 */ 1327 if ((flags & LK_NOWAIT) != 0) { 1328 vrelel(vp, 0); 1329 return EBUSY; 1330 } 1331 vwait(vp, VI_INACTNOW); 1332 if ((vp->v_iflag & VI_CLEAN) != 0) { 1333 vrelel(vp, 0); 1334 return ENOENT; 1335 } 1336 } 1337 if (flags & LK_TYPE_MASK) { 1338 error = vn_lock(vp, flags | LK_INTERLOCK); 1339 if (error != 0) { 1340 vrele(vp); 1341 } 1342 return error; 1343 } 1344 mutex_exit(&vp->v_interlock); 1345 return 0; 1346 } 1347 1348 /* 1349 * vput(), just unlock and vrele() 1350 */ 1351 void 1352 vput(vnode_t *vp) 1353 { 1354 1355 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1356 1357 VOP_UNLOCK(vp, 0); 1358 vrele(vp); 1359 } 1360 1361 /* 1362 * Try to drop reference on a vnode. Abort if we are releasing the 1363 * last reference. Note: this _must_ succeed if not the last reference. 1364 */ 1365 static inline bool 1366 vtryrele(vnode_t *vp) 1367 { 1368 u_int use, next; 1369 1370 for (use = vp->v_usecount;; use = next) { 1371 if (use == 1) { 1372 return false; 1373 } 1374 KASSERT((use & VC_MASK) > 1); 1375 next = atomic_cas_uint(&vp->v_usecount, use, use - 1); 1376 if (__predict_true(next == use)) { 1377 return true; 1378 } 1379 } 1380 } 1381 1382 /* 1383 * Vnode release. If reference count drops to zero, call inactive 1384 * routine and either return to freelist or free to the pool. 1385 */ 1386 void 1387 vrelel(vnode_t *vp, int flags) 1388 { 1389 bool recycle, defer; 1390 int error; 1391 1392 KASSERT(mutex_owned(&vp->v_interlock)); 1393 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1394 KASSERT(vp->v_freelisthd == NULL); 1395 1396 if (__predict_false(vp->v_op == dead_vnodeop_p && 1397 (vp->v_iflag & (VI_CLEAN|VI_XLOCK)) == 0)) { 1398 vpanic(vp, "dead but not clean"); 1399 } 1400 1401 /* 1402 * If not the last reference, just drop the reference count 1403 * and unlock. 1404 */ 1405 if (vtryrele(vp)) { 1406 vp->v_iflag |= VI_INACTREDO; 1407 mutex_exit(&vp->v_interlock); 1408 return; 1409 } 1410 if (vp->v_usecount <= 0 || vp->v_writecount != 0) { 1411 vpanic(vp, "vrelel: bad ref count"); 1412 } 1413 1414 KASSERT((vp->v_iflag & VI_XLOCK) == 0); 1415 1416 /* 1417 * If not clean, deactivate the vnode, but preserve 1418 * our reference across the call to VOP_INACTIVE(). 1419 */ 1420 retry: 1421 if ((vp->v_iflag & VI_CLEAN) == 0) { 1422 recycle = false; 1423 vp->v_iflag |= VI_INACTNOW; 1424 1425 /* 1426 * XXX This ugly block can be largely eliminated if 1427 * locking is pushed down into the file systems. 1428 * 1429 * Defer vnode release to vrele_thread if caller 1430 * requests it explicitly. 1431 */ 1432 if ((curlwp == uvm.pagedaemon_lwp) || 1433 (flags & VRELEL_ASYNC_RELE) != 0) { 1434 /* The pagedaemon can't wait around; defer. */ 1435 defer = true; 1436 } else if (curlwp == vrele_lwp) { 1437 /* 1438 * We have to try harder. But we can't sleep 1439 * with VI_INACTNOW as vget() may be waiting on it. 1440 */ 1441 vp->v_iflag &= ~(VI_INACTREDO|VI_INACTNOW); 1442 cv_broadcast(&vp->v_cv); 1443 error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | 1444 LK_RETRY); 1445 if (error != 0) { 1446 /* XXX */ 1447 vpanic(vp, "vrele: unable to lock %p"); 1448 } 1449 mutex_enter(&vp->v_interlock); 1450 /* 1451 * if we did get another reference while 1452 * sleeping, don't try to inactivate it yet. 1453 */ 1454 if (__predict_false(vtryrele(vp))) { 1455 VOP_UNLOCK(vp, 0); 1456 mutex_exit(&vp->v_interlock); 1457 return; 1458 } 1459 vp->v_iflag |= VI_INACTNOW; 1460 mutex_exit(&vp->v_interlock); 1461 defer = false; 1462 } else if ((vp->v_iflag & VI_LAYER) != 0) { 1463 /* 1464 * Acquiring the stack's lock in vclean() even 1465 * for an honest vput/vrele is dangerous because 1466 * our caller may hold other vnode locks; defer. 1467 */ 1468 defer = true; 1469 } else { 1470 /* If we can't acquire the lock, then defer. */ 1471 vp->v_iflag &= ~VI_INACTREDO; 1472 error = vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | 1473 LK_NOWAIT); 1474 if (error != 0) { 1475 defer = true; 1476 mutex_enter(&vp->v_interlock); 1477 } else { 1478 defer = false; 1479 } 1480 } 1481 1482 if (defer) { 1483 /* 1484 * Defer reclaim to the kthread; it's not safe to 1485 * clean it here. We donate it our last reference. 1486 */ 1487 KASSERT(mutex_owned(&vp->v_interlock)); 1488 KASSERT((vp->v_iflag & VI_INACTPEND) == 0); 1489 vp->v_iflag &= ~VI_INACTNOW; 1490 vp->v_iflag |= VI_INACTPEND; 1491 mutex_enter(&vrele_lock); 1492 TAILQ_INSERT_TAIL(&vrele_list, vp, v_freelist); 1493 if (++vrele_pending > (desiredvnodes >> 8)) 1494 cv_signal(&vrele_cv); 1495 mutex_exit(&vrele_lock); 1496 cv_broadcast(&vp->v_cv); 1497 mutex_exit(&vp->v_interlock); 1498 return; 1499 } 1500 1501 #ifdef DIAGNOSTIC 1502 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 1503 vp->v_specnode != NULL && vp->v_specnode->sn_opencnt != 0) { 1504 vprint("vrelel: missing VOP_CLOSE()", vp); 1505 } 1506 #endif 1507 1508 /* 1509 * The vnode can gain another reference while being 1510 * deactivated. If VOP_INACTIVE() indicates that 1511 * the described file has been deleted, then recycle 1512 * the vnode irrespective of additional references. 1513 * Another thread may be waiting to re-use the on-disk 1514 * inode. 1515 * 1516 * Note that VOP_INACTIVE() will drop the vnode lock. 1517 */ 1518 VOP_INACTIVE(vp, &recycle); 1519 mutex_enter(&vp->v_interlock); 1520 vp->v_iflag &= ~VI_INACTNOW; 1521 cv_broadcast(&vp->v_cv); 1522 if (!recycle) { 1523 if (vtryrele(vp)) { 1524 mutex_exit(&vp->v_interlock); 1525 return; 1526 } 1527 1528 /* 1529 * If we grew another reference while 1530 * VOP_INACTIVE() was underway, retry. 1531 */ 1532 if ((vp->v_iflag & VI_INACTREDO) != 0) { 1533 goto retry; 1534 } 1535 } 1536 1537 /* Take care of space accounting. */ 1538 if (vp->v_iflag & VI_EXECMAP) { 1539 atomic_add_int(&uvmexp.execpages, 1540 -vp->v_uobj.uo_npages); 1541 atomic_add_int(&uvmexp.filepages, 1542 vp->v_uobj.uo_npages); 1543 } 1544 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP|VI_WRMAP); 1545 vp->v_vflag &= ~VV_MAPPED; 1546 1547 /* 1548 * Recycle the vnode if the file is now unused (unlinked), 1549 * otherwise just free it. 1550 */ 1551 if (recycle) { 1552 vclean(vp, DOCLOSE); 1553 } 1554 KASSERT(vp->v_usecount > 0); 1555 } 1556 1557 if (atomic_dec_uint_nv(&vp->v_usecount) != 0) { 1558 /* Gained another reference while being reclaimed. */ 1559 mutex_exit(&vp->v_interlock); 1560 return; 1561 } 1562 1563 if ((vp->v_iflag & VI_CLEAN) != 0) { 1564 /* 1565 * It's clean so destroy it. It isn't referenced 1566 * anywhere since it has been reclaimed. 1567 */ 1568 KASSERT(vp->v_holdcnt == 0); 1569 KASSERT(vp->v_writecount == 0); 1570 mutex_exit(&vp->v_interlock); 1571 insmntque(vp, NULL); 1572 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1573 spec_node_destroy(vp); 1574 } 1575 vnfree(vp); 1576 } else { 1577 /* 1578 * Otherwise, put it back onto the freelist. It 1579 * can't be destroyed while still associated with 1580 * a file system. 1581 */ 1582 mutex_enter(&vnode_free_list_lock); 1583 if (vp->v_holdcnt > 0) { 1584 vp->v_freelisthd = &vnode_hold_list; 1585 } else { 1586 vp->v_freelisthd = &vnode_free_list; 1587 } 1588 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 1589 mutex_exit(&vnode_free_list_lock); 1590 mutex_exit(&vp->v_interlock); 1591 } 1592 } 1593 1594 void 1595 vrele(vnode_t *vp) 1596 { 1597 1598 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1599 1600 if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) { 1601 return; 1602 } 1603 mutex_enter(&vp->v_interlock); 1604 vrelel(vp, 0); 1605 } 1606 1607 /* 1608 * Asynchronous vnode release, vnode is released in different context. 1609 */ 1610 void 1611 vrele_async(vnode_t *vp) 1612 { 1613 1614 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1615 1616 if ((vp->v_iflag & VI_INACTNOW) == 0 && vtryrele(vp)) { 1617 return; 1618 } 1619 1620 mutex_enter(&vp->v_interlock); 1621 vrelel(vp, VRELEL_ASYNC_RELE); 1622 } 1623 1624 static void 1625 vrele_thread(void *cookie) 1626 { 1627 vnode_t *vp; 1628 1629 for (;;) { 1630 mutex_enter(&vrele_lock); 1631 while (TAILQ_EMPTY(&vrele_list)) { 1632 vrele_gen++; 1633 cv_broadcast(&vrele_cv); 1634 cv_timedwait(&vrele_cv, &vrele_lock, hz); 1635 } 1636 vp = TAILQ_FIRST(&vrele_list); 1637 TAILQ_REMOVE(&vrele_list, vp, v_freelist); 1638 vrele_pending--; 1639 mutex_exit(&vrele_lock); 1640 1641 /* 1642 * If not the last reference, then ignore the vnode 1643 * and look for more work. 1644 */ 1645 mutex_enter(&vp->v_interlock); 1646 KASSERT((vp->v_iflag & VI_INACTPEND) != 0); 1647 vp->v_iflag &= ~VI_INACTPEND; 1648 vrelel(vp, 0); 1649 } 1650 } 1651 1652 /* 1653 * Page or buffer structure gets a reference. 1654 * Called with v_interlock held. 1655 */ 1656 void 1657 vholdl(vnode_t *vp) 1658 { 1659 1660 KASSERT(mutex_owned(&vp->v_interlock)); 1661 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1662 1663 if (vp->v_holdcnt++ == 0 && vp->v_usecount == 0) { 1664 mutex_enter(&vnode_free_list_lock); 1665 KASSERT(vp->v_freelisthd == &vnode_free_list); 1666 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 1667 vp->v_freelisthd = &vnode_hold_list; 1668 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 1669 mutex_exit(&vnode_free_list_lock); 1670 } 1671 } 1672 1673 /* 1674 * Page or buffer structure frees a reference. 1675 * Called with v_interlock held. 1676 */ 1677 void 1678 holdrelel(vnode_t *vp) 1679 { 1680 1681 KASSERT(mutex_owned(&vp->v_interlock)); 1682 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1683 1684 if (vp->v_holdcnt <= 0) { 1685 vpanic(vp, "holdrelel: holdcnt vp %p"); 1686 } 1687 1688 vp->v_holdcnt--; 1689 if (vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1690 mutex_enter(&vnode_free_list_lock); 1691 KASSERT(vp->v_freelisthd == &vnode_hold_list); 1692 TAILQ_REMOVE(vp->v_freelisthd, vp, v_freelist); 1693 vp->v_freelisthd = &vnode_free_list; 1694 TAILQ_INSERT_TAIL(vp->v_freelisthd, vp, v_freelist); 1695 mutex_exit(&vnode_free_list_lock); 1696 } 1697 } 1698 1699 /* 1700 * Vnode reference, where a reference is already held by some other 1701 * object (for example, a file structure). 1702 */ 1703 void 1704 vref(vnode_t *vp) 1705 { 1706 1707 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1708 KASSERT(vp->v_usecount != 0); 1709 1710 atomic_inc_uint(&vp->v_usecount); 1711 } 1712 1713 /* 1714 * Remove any vnodes in the vnode table belonging to mount point mp. 1715 * 1716 * If FORCECLOSE is not specified, there should not be any active ones, 1717 * return error if any are found (nb: this is a user error, not a 1718 * system error). If FORCECLOSE is specified, detach any active vnodes 1719 * that are found. 1720 * 1721 * If WRITECLOSE is set, only flush out regular file vnodes open for 1722 * writing. 1723 * 1724 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped. 1725 */ 1726 #ifdef DEBUG 1727 int busyprt = 0; /* print out busy vnodes */ 1728 struct ctldebug debug1 = { "busyprt", &busyprt }; 1729 #endif 1730 1731 static vnode_t * 1732 vflushnext(vnode_t *mvp, int *when) 1733 { 1734 1735 if (hardclock_ticks > *when) { 1736 mutex_exit(&mntvnode_lock); 1737 yield(); 1738 mutex_enter(&mntvnode_lock); 1739 *when = hardclock_ticks + hz / 10; 1740 } 1741 1742 return vunmark(mvp); 1743 } 1744 1745 int 1746 vflush(struct mount *mp, vnode_t *skipvp, int flags) 1747 { 1748 vnode_t *vp, *mvp; 1749 int busy = 0, when = 0, gen; 1750 1751 /* 1752 * First, flush out any vnode references from vrele_list. 1753 */ 1754 mutex_enter(&vrele_lock); 1755 gen = vrele_gen; 1756 while (vrele_pending && gen == vrele_gen) { 1757 cv_broadcast(&vrele_cv); 1758 cv_wait(&vrele_cv, &vrele_lock); 1759 } 1760 mutex_exit(&vrele_lock); 1761 1762 /* Allocate a marker vnode. */ 1763 if ((mvp = vnalloc(mp)) == NULL) 1764 return (ENOMEM); 1765 1766 /* 1767 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() 1768 * and vclean() are called 1769 */ 1770 mutex_enter(&mntvnode_lock); 1771 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL; 1772 vp = vflushnext(mvp, &when)) { 1773 vmark(mvp, vp); 1774 if (vp->v_mount != mp || vismarker(vp)) 1775 continue; 1776 /* 1777 * Skip over a selected vnode. 1778 */ 1779 if (vp == skipvp) 1780 continue; 1781 mutex_enter(&vp->v_interlock); 1782 /* 1783 * Ignore clean but still referenced vnodes. 1784 */ 1785 if ((vp->v_iflag & VI_CLEAN) != 0) { 1786 mutex_exit(&vp->v_interlock); 1787 continue; 1788 } 1789 /* 1790 * Skip over a vnodes marked VSYSTEM. 1791 */ 1792 if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) { 1793 mutex_exit(&vp->v_interlock); 1794 continue; 1795 } 1796 /* 1797 * If WRITECLOSE is set, only flush out regular file 1798 * vnodes open for writing. 1799 */ 1800 if ((flags & WRITECLOSE) && 1801 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1802 mutex_exit(&vp->v_interlock); 1803 continue; 1804 } 1805 /* 1806 * With v_usecount == 0, all we need to do is clear 1807 * out the vnode data structures and we are done. 1808 */ 1809 if (vp->v_usecount == 0) { 1810 mutex_exit(&mntvnode_lock); 1811 vremfree(vp); 1812 vp->v_usecount = 1; 1813 vclean(vp, DOCLOSE); 1814 vrelel(vp, 0); 1815 mutex_enter(&mntvnode_lock); 1816 continue; 1817 } 1818 /* 1819 * If FORCECLOSE is set, forcibly close the vnode. 1820 * For block or character devices, revert to an 1821 * anonymous device. For all other files, just 1822 * kill them. 1823 */ 1824 if (flags & FORCECLOSE) { 1825 mutex_exit(&mntvnode_lock); 1826 atomic_inc_uint(&vp->v_usecount); 1827 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1828 vclean(vp, DOCLOSE); 1829 vrelel(vp, 0); 1830 } else { 1831 vclean(vp, 0); 1832 vp->v_op = spec_vnodeop_p; /* XXXSMP */ 1833 mutex_exit(&vp->v_interlock); 1834 /* 1835 * The vnode isn't clean, but still resides 1836 * on the mount list. Remove it. XXX This 1837 * is a bit dodgy. 1838 */ 1839 insmntque(vp, NULL); 1840 vrele(vp); 1841 } 1842 mutex_enter(&mntvnode_lock); 1843 continue; 1844 } 1845 #ifdef DEBUG 1846 if (busyprt) 1847 vprint("vflush: busy vnode", vp); 1848 #endif 1849 mutex_exit(&vp->v_interlock); 1850 busy++; 1851 } 1852 mutex_exit(&mntvnode_lock); 1853 vnfree(mvp); 1854 if (busy) 1855 return (EBUSY); 1856 return (0); 1857 } 1858 1859 /* 1860 * Disassociate the underlying file system from a vnode. 1861 * 1862 * Must be called with the interlock held, and will return with it held. 1863 */ 1864 void 1865 vclean(vnode_t *vp, int flags) 1866 { 1867 lwp_t *l = curlwp; 1868 bool recycle, active; 1869 int error; 1870 1871 KASSERT(mutex_owned(&vp->v_interlock)); 1872 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1873 KASSERT(vp->v_usecount != 0); 1874 1875 /* If cleaning is already in progress wait until done and return. */ 1876 if (vp->v_iflag & VI_XLOCK) { 1877 vwait(vp, VI_XLOCK); 1878 return; 1879 } 1880 1881 /* If already clean, nothing to do. */ 1882 if ((vp->v_iflag & VI_CLEAN) != 0) { 1883 return; 1884 } 1885 1886 /* 1887 * Prevent the vnode from being recycled or brought into use 1888 * while we clean it out. 1889 */ 1890 vp->v_iflag |= VI_XLOCK; 1891 if (vp->v_iflag & VI_EXECMAP) { 1892 atomic_add_int(&uvmexp.execpages, -vp->v_uobj.uo_npages); 1893 atomic_add_int(&uvmexp.filepages, vp->v_uobj.uo_npages); 1894 } 1895 vp->v_iflag &= ~(VI_TEXT|VI_EXECMAP); 1896 active = (vp->v_usecount > 1); 1897 1898 /* XXXAD should not lock vnode under layer */ 1899 VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK); 1900 1901 /* 1902 * Clean out any cached data associated with the vnode. 1903 * If purging an active vnode, it must be closed and 1904 * deactivated before being reclaimed. Note that the 1905 * VOP_INACTIVE will unlock the vnode. 1906 */ 1907 if (flags & DOCLOSE) { 1908 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1909 if (error != 0) { 1910 /* XXX, fix vn_start_write's grab of mp and use that. */ 1911 1912 if (wapbl_vphaswapbl(vp)) 1913 WAPBL_DISCARD(wapbl_vptomp(vp)); 1914 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1915 } 1916 KASSERT(error == 0); 1917 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1918 if (active && (vp->v_type == VBLK || vp->v_type == VCHR)) { 1919 spec_node_revoke(vp); 1920 } 1921 } 1922 if (active) { 1923 VOP_INACTIVE(vp, &recycle); 1924 } else { 1925 /* 1926 * Any other processes trying to obtain this lock must first 1927 * wait for VI_XLOCK to clear, then call the new lock operation. 1928 */ 1929 VOP_UNLOCK(vp, 0); 1930 } 1931 1932 /* Disassociate the underlying file system from the vnode. */ 1933 if (VOP_RECLAIM(vp)) { 1934 vpanic(vp, "vclean: cannot reclaim"); 1935 } 1936 1937 KASSERT(vp->v_uobj.uo_npages == 0); 1938 if (vp->v_type == VREG && vp->v_ractx != NULL) { 1939 uvm_ra_freectx(vp->v_ractx); 1940 vp->v_ractx = NULL; 1941 } 1942 cache_purge(vp); 1943 1944 /* Done with purge, notify sleepers of the grim news. */ 1945 mutex_enter(&vp->v_interlock); 1946 vp->v_op = dead_vnodeop_p; 1947 vp->v_tag = VT_NON; 1948 vp->v_vnlock = &vp->v_lock; 1949 KNOTE(&vp->v_klist, NOTE_REVOKE); 1950 vp->v_iflag &= ~(VI_XLOCK | VI_FREEING); 1951 vp->v_vflag &= ~VV_LOCKSWORK; 1952 if ((flags & DOCLOSE) != 0) { 1953 vp->v_iflag |= VI_CLEAN; 1954 } 1955 cv_broadcast(&vp->v_cv); 1956 1957 KASSERT((vp->v_iflag & VI_ONWORKLST) == 0); 1958 } 1959 1960 /* 1961 * Recycle an unused vnode to the front of the free list. 1962 * Release the passed interlock if the vnode will be recycled. 1963 */ 1964 int 1965 vrecycle(vnode_t *vp, kmutex_t *inter_lkp, struct lwp *l) 1966 { 1967 1968 KASSERT((vp->v_iflag & VI_MARKER) == 0); 1969 1970 mutex_enter(&vp->v_interlock); 1971 if (vp->v_usecount != 0) { 1972 mutex_exit(&vp->v_interlock); 1973 return (0); 1974 } 1975 if (inter_lkp) 1976 mutex_exit(inter_lkp); 1977 vremfree(vp); 1978 vp->v_usecount = 1; 1979 vclean(vp, DOCLOSE); 1980 vrelel(vp, 0); 1981 return (1); 1982 } 1983 1984 /* 1985 * Eliminate all activity associated with a vnode in preparation for 1986 * reuse. Drops a reference from the vnode. 1987 */ 1988 void 1989 vgone(vnode_t *vp) 1990 { 1991 1992 mutex_enter(&vp->v_interlock); 1993 vclean(vp, DOCLOSE); 1994 vrelel(vp, 0); 1995 } 1996 1997 /* 1998 * Lookup a vnode by device number. 1999 */ 2000 int 2001 vfinddev(dev_t dev, enum vtype type, vnode_t **vpp) 2002 { 2003 vnode_t *vp; 2004 int rc = 0; 2005 2006 mutex_enter(&device_lock); 2007 for (vp = specfs_hash[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 2008 if (dev != vp->v_rdev || type != vp->v_type) 2009 continue; 2010 *vpp = vp; 2011 rc = 1; 2012 break; 2013 } 2014 mutex_exit(&device_lock); 2015 return (rc); 2016 } 2017 2018 /* 2019 * Revoke all the vnodes corresponding to the specified minor number 2020 * range (endpoints inclusive) of the specified major. 2021 */ 2022 void 2023 vdevgone(int maj, int minl, int minh, enum vtype type) 2024 { 2025 vnode_t *vp, **vpp; 2026 dev_t dev; 2027 int mn; 2028 2029 vp = NULL; /* XXX gcc */ 2030 2031 mutex_enter(&device_lock); 2032 for (mn = minl; mn <= minh; mn++) { 2033 dev = makedev(maj, mn); 2034 vpp = &specfs_hash[SPECHASH(dev)]; 2035 for (vp = *vpp; vp != NULL;) { 2036 mutex_enter(&vp->v_interlock); 2037 if ((vp->v_iflag & VI_CLEAN) != 0 || 2038 dev != vp->v_rdev || type != vp->v_type) { 2039 mutex_exit(&vp->v_interlock); 2040 vp = vp->v_specnext; 2041 continue; 2042 } 2043 mutex_exit(&device_lock); 2044 if (vget(vp, LK_INTERLOCK) == 0) { 2045 VOP_REVOKE(vp, REVOKEALL); 2046 vrele(vp); 2047 } 2048 mutex_enter(&device_lock); 2049 vp = *vpp; 2050 } 2051 } 2052 mutex_exit(&device_lock); 2053 } 2054 2055 /* 2056 * Eliminate all activity associated with the requested vnode 2057 * and with all vnodes aliased to the requested vnode. 2058 */ 2059 void 2060 vrevoke(vnode_t *vp) 2061 { 2062 vnode_t *vq, **vpp; 2063 enum vtype type; 2064 dev_t dev; 2065 2066 KASSERT(vp->v_usecount > 0); 2067 2068 mutex_enter(&vp->v_interlock); 2069 if ((vp->v_iflag & VI_CLEAN) != 0) { 2070 mutex_exit(&vp->v_interlock); 2071 return; 2072 } else if (vp->v_type != VBLK && vp->v_type != VCHR) { 2073 atomic_inc_uint(&vp->v_usecount); 2074 vclean(vp, DOCLOSE); 2075 vrelel(vp, 0); 2076 return; 2077 } else { 2078 dev = vp->v_rdev; 2079 type = vp->v_type; 2080 mutex_exit(&vp->v_interlock); 2081 } 2082 2083 vpp = &specfs_hash[SPECHASH(dev)]; 2084 mutex_enter(&device_lock); 2085 for (vq = *vpp; vq != NULL;) { 2086 /* If clean or being cleaned, then ignore it. */ 2087 mutex_enter(&vq->v_interlock); 2088 if ((vq->v_iflag & (VI_CLEAN | VI_XLOCK)) != 0 || 2089 vq->v_rdev != dev || vq->v_type != type) { 2090 mutex_exit(&vq->v_interlock); 2091 vq = vq->v_specnext; 2092 continue; 2093 } 2094 mutex_exit(&device_lock); 2095 if (vq->v_usecount == 0) { 2096 vremfree(vq); 2097 vq->v_usecount = 1; 2098 } else { 2099 atomic_inc_uint(&vq->v_usecount); 2100 } 2101 vclean(vq, DOCLOSE); 2102 vrelel(vq, 0); 2103 mutex_enter(&device_lock); 2104 vq = *vpp; 2105 } 2106 mutex_exit(&device_lock); 2107 } 2108 2109 /* 2110 * sysctl helper routine to return list of supported fstypes 2111 */ 2112 int 2113 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS) 2114 { 2115 char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 2116 char *where = oldp; 2117 struct vfsops *v; 2118 size_t needed, left, slen; 2119 int error, first; 2120 2121 if (newp != NULL) 2122 return (EPERM); 2123 if (namelen != 0) 2124 return (EINVAL); 2125 2126 first = 1; 2127 error = 0; 2128 needed = 0; 2129 left = *oldlenp; 2130 2131 sysctl_unlock(); 2132 mutex_enter(&vfs_list_lock); 2133 LIST_FOREACH(v, &vfs_list, vfs_list) { 2134 if (where == NULL) 2135 needed += strlen(v->vfs_name) + 1; 2136 else { 2137 memset(bf, 0, sizeof(bf)); 2138 if (first) { 2139 strncpy(bf, v->vfs_name, sizeof(bf)); 2140 first = 0; 2141 } else { 2142 bf[0] = ' '; 2143 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1); 2144 } 2145 bf[sizeof(bf)-1] = '\0'; 2146 slen = strlen(bf); 2147 if (left < slen + 1) 2148 break; 2149 v->vfs_refcount++; 2150 mutex_exit(&vfs_list_lock); 2151 /* +1 to copy out the trailing NUL byte */ 2152 error = copyout(bf, where, slen + 1); 2153 mutex_enter(&vfs_list_lock); 2154 v->vfs_refcount--; 2155 if (error) 2156 break; 2157 where += slen; 2158 needed += slen; 2159 left -= slen; 2160 } 2161 } 2162 mutex_exit(&vfs_list_lock); 2163 sysctl_relock(); 2164 *oldlenp = needed; 2165 return (error); 2166 } 2167 2168 2169 int kinfo_vdebug = 1; 2170 int kinfo_vgetfailed; 2171 #define KINFO_VNODESLOP 10 2172 /* 2173 * Dump vnode list (via sysctl). 2174 * Copyout address of vnode followed by vnode. 2175 */ 2176 /* ARGSUSED */ 2177 int 2178 sysctl_kern_vnode(SYSCTLFN_ARGS) 2179 { 2180 char *where = oldp; 2181 size_t *sizep = oldlenp; 2182 struct mount *mp, *nmp; 2183 vnode_t *vp, *mvp, vbuf; 2184 char *bp = where; 2185 char *ewhere; 2186 int error; 2187 2188 if (namelen != 0) 2189 return (EOPNOTSUPP); 2190 if (newp != NULL) 2191 return (EPERM); 2192 2193 #define VPTRSZ sizeof(vnode_t *) 2194 #define VNODESZ sizeof(vnode_t) 2195 if (where == NULL) { 2196 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 2197 return (0); 2198 } 2199 ewhere = where + *sizep; 2200 2201 sysctl_unlock(); 2202 mutex_enter(&mountlist_lock); 2203 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2204 mp = nmp) { 2205 if (vfs_busy(mp, &nmp)) { 2206 continue; 2207 } 2208 /* Allocate a marker vnode. */ 2209 mvp = vnalloc(mp); 2210 /* Should never fail for mp != NULL */ 2211 KASSERT(mvp != NULL); 2212 mutex_enter(&mntvnode_lock); 2213 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; 2214 vp = vunmark(mvp)) { 2215 vmark(mvp, vp); 2216 /* 2217 * Check that the vp is still associated with 2218 * this filesystem. RACE: could have been 2219 * recycled onto the same filesystem. 2220 */ 2221 if (vp->v_mount != mp || vismarker(vp)) 2222 continue; 2223 if (bp + VPTRSZ + VNODESZ > ewhere) { 2224 (void)vunmark(mvp); 2225 mutex_exit(&mntvnode_lock); 2226 vnfree(mvp); 2227 vfs_unbusy(mp, false, NULL); 2228 sysctl_relock(); 2229 *sizep = bp - where; 2230 return (ENOMEM); 2231 } 2232 memcpy(&vbuf, vp, VNODESZ); 2233 mutex_exit(&mntvnode_lock); 2234 if ((error = copyout(&vp, bp, VPTRSZ)) || 2235 (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) { 2236 mutex_enter(&mntvnode_lock); 2237 (void)vunmark(mvp); 2238 mutex_exit(&mntvnode_lock); 2239 vnfree(mvp); 2240 vfs_unbusy(mp, false, NULL); 2241 sysctl_relock(); 2242 return (error); 2243 } 2244 bp += VPTRSZ + VNODESZ; 2245 mutex_enter(&mntvnode_lock); 2246 } 2247 mutex_exit(&mntvnode_lock); 2248 vnfree(mvp); 2249 vfs_unbusy(mp, false, &nmp); 2250 } 2251 mutex_exit(&mountlist_lock); 2252 sysctl_relock(); 2253 2254 *sizep = bp - where; 2255 return (0); 2256 } 2257 2258 /* 2259 * Remove clean vnodes from a mountpoint's vnode list. 2260 */ 2261 void 2262 vfs_scrubvnlist(struct mount *mp) 2263 { 2264 vnode_t *vp, *nvp; 2265 2266 retry: 2267 mutex_enter(&mntvnode_lock); 2268 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 2269 nvp = TAILQ_NEXT(vp, v_mntvnodes); 2270 mutex_enter(&vp->v_interlock); 2271 if ((vp->v_iflag & VI_CLEAN) != 0) { 2272 TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes); 2273 vp->v_mount = NULL; 2274 mutex_exit(&mntvnode_lock); 2275 mutex_exit(&vp->v_interlock); 2276 vfs_destroy(mp); 2277 goto retry; 2278 } 2279 mutex_exit(&vp->v_interlock); 2280 } 2281 mutex_exit(&mntvnode_lock); 2282 } 2283 2284 /* 2285 * Check to see if a filesystem is mounted on a block device. 2286 */ 2287 int 2288 vfs_mountedon(vnode_t *vp) 2289 { 2290 vnode_t *vq; 2291 int error = 0; 2292 2293 if (vp->v_type != VBLK) 2294 return ENOTBLK; 2295 if (vp->v_specmountpoint != NULL) 2296 return (EBUSY); 2297 mutex_enter(&device_lock); 2298 for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL; 2299 vq = vq->v_specnext) { 2300 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 2301 continue; 2302 if (vq->v_specmountpoint != NULL) { 2303 error = EBUSY; 2304 break; 2305 } 2306 } 2307 mutex_exit(&device_lock); 2308 return (error); 2309 } 2310 2311 /* 2312 * Unmount all file systems. 2313 * We traverse the list in reverse order under the assumption that doing so 2314 * will avoid needing to worry about dependencies. 2315 */ 2316 bool 2317 vfs_unmountall(struct lwp *l) 2318 { 2319 2320 printf("unmounting file systems..."); 2321 return vfs_unmountall1(l, true, true); 2322 } 2323 2324 static void 2325 vfs_unmount_print(struct mount *mp, const char *pfx) 2326 { 2327 2328 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 2329 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 2330 mp->mnt_stat.f_fstypename); 2331 } 2332 2333 bool 2334 vfs_unmount_forceone(struct lwp *l) 2335 { 2336 struct mount *mp, *nmp; 2337 int error; 2338 2339 nmp = NULL; 2340 2341 CIRCLEQ_FOREACH_REVERSE(mp, &mountlist, mnt_list) { 2342 if (nmp == NULL || mp->mnt_gen > nmp->mnt_gen) { 2343 nmp = mp; 2344 } 2345 } 2346 if (nmp == NULL) { 2347 return false; 2348 } 2349 2350 #ifdef DEBUG 2351 printf("\nforcefully unmounting %s (%s)...", 2352 nmp->mnt_stat.f_mntonname, nmp->mnt_stat.f_mntfromname); 2353 #endif 2354 atomic_inc_uint(&nmp->mnt_refcnt); 2355 if ((error = dounmount(nmp, MNT_FORCE, l)) == 0) { 2356 vfs_unmount_print(nmp, "forcefully "); 2357 return true; 2358 } else { 2359 vfs_destroy(nmp); 2360 } 2361 2362 #ifdef DEBUG 2363 printf("forceful unmount of %s failed with error %d\n", 2364 nmp->mnt_stat.f_mntonname, error); 2365 #endif 2366 2367 return false; 2368 } 2369 2370 bool 2371 vfs_unmountall1(struct lwp *l, bool force, bool verbose) 2372 { 2373 struct mount *mp, *nmp; 2374 bool any_error = false, progress = false; 2375 int error; 2376 2377 for (mp = CIRCLEQ_LAST(&mountlist); 2378 mp != (void *)&mountlist; 2379 mp = nmp) { 2380 nmp = CIRCLEQ_PREV(mp, mnt_list); 2381 #ifdef DEBUG 2382 printf("\nunmounting %p %s (%s)...", 2383 (void *)mp, mp->mnt_stat.f_mntonname, 2384 mp->mnt_stat.f_mntfromname); 2385 #endif 2386 atomic_inc_uint(&mp->mnt_refcnt); 2387 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 2388 vfs_unmount_print(mp, ""); 2389 progress = true; 2390 } else { 2391 vfs_destroy(mp); 2392 if (verbose) { 2393 printf("unmount of %s failed with error %d\n", 2394 mp->mnt_stat.f_mntonname, error); 2395 } 2396 any_error = true; 2397 } 2398 } 2399 if (verbose) { 2400 printf(" done\n"); 2401 } 2402 if (any_error && verbose) { 2403 printf("WARNING: some file systems would not unmount\n"); 2404 } 2405 return progress; 2406 } 2407 2408 /* 2409 * Sync and unmount file systems before shutting down. 2410 */ 2411 void 2412 vfs_shutdown(void) 2413 { 2414 struct lwp *l; 2415 2416 /* XXX we're certainly not running in lwp0's context! */ 2417 l = (curlwp == NULL) ? &lwp0 : curlwp; 2418 2419 vfs_shutdown1(l); 2420 } 2421 2422 void 2423 vfs_sync_all(struct lwp *l) 2424 { 2425 printf("syncing disks... "); 2426 2427 /* remove user processes from run queue */ 2428 suspendsched(); 2429 (void) spl0(); 2430 2431 /* avoid coming back this way again if we panic. */ 2432 doing_shutdown = 1; 2433 2434 sys_sync(l, NULL, NULL); 2435 2436 /* Wait for sync to finish. */ 2437 if (buf_syncwait() != 0) { 2438 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2439 Debugger(); 2440 #endif 2441 printf("giving up\n"); 2442 return; 2443 } else 2444 printf("done\n"); 2445 } 2446 2447 static void 2448 vfs_shutdown1(struct lwp *l) 2449 { 2450 2451 vfs_sync_all(l); 2452 2453 /* 2454 * If we've panic'd, don't make the situation potentially 2455 * worse by unmounting the file systems. 2456 */ 2457 if (panicstr != NULL) 2458 return; 2459 2460 /* Release inodes held by texts before update. */ 2461 #ifdef notdef 2462 vnshutdown(); 2463 #endif 2464 /* Unmount file systems. */ 2465 vfs_unmountall(l); 2466 } 2467 2468 /* 2469 * Print a list of supported file system types (used by vfs_mountroot) 2470 */ 2471 static void 2472 vfs_print_fstypes(void) 2473 { 2474 struct vfsops *v; 2475 int cnt = 0; 2476 2477 mutex_enter(&vfs_list_lock); 2478 LIST_FOREACH(v, &vfs_list, vfs_list) 2479 ++cnt; 2480 mutex_exit(&vfs_list_lock); 2481 2482 if (cnt == 0) { 2483 printf("WARNING: No file system modules have been loaded.\n"); 2484 return; 2485 } 2486 2487 printf("Supported file systems:"); 2488 mutex_enter(&vfs_list_lock); 2489 LIST_FOREACH(v, &vfs_list, vfs_list) { 2490 printf(" %s", v->vfs_name); 2491 } 2492 mutex_exit(&vfs_list_lock); 2493 printf("\n"); 2494 } 2495 2496 /* 2497 * Mount the root file system. If the operator didn't specify a 2498 * file system to use, try all possible file systems until one 2499 * succeeds. 2500 */ 2501 int 2502 vfs_mountroot(void) 2503 { 2504 struct vfsops *v; 2505 int error = ENODEV; 2506 2507 if (root_device == NULL) 2508 panic("vfs_mountroot: root device unknown"); 2509 2510 switch (device_class(root_device)) { 2511 case DV_IFNET: 2512 if (rootdev != NODEV) 2513 panic("vfs_mountroot: rootdev set for DV_IFNET " 2514 "(0x%llx -> %llu,%llu)", 2515 (unsigned long long)rootdev, 2516 (unsigned long long)major(rootdev), 2517 (unsigned long long)minor(rootdev)); 2518 break; 2519 2520 case DV_DISK: 2521 if (rootdev == NODEV) 2522 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2523 if (bdevvp(rootdev, &rootvp)) 2524 panic("vfs_mountroot: can't get vnode for rootdev"); 2525 error = VOP_OPEN(rootvp, FREAD, FSCRED); 2526 if (error) { 2527 printf("vfs_mountroot: can't open root device\n"); 2528 return (error); 2529 } 2530 break; 2531 2532 case DV_VIRTUAL: 2533 break; 2534 2535 default: 2536 printf("%s: inappropriate for root file system\n", 2537 device_xname(root_device)); 2538 return (ENODEV); 2539 } 2540 2541 /* 2542 * If user specified a root fs type, use it. Make sure the 2543 * specified type exists and has a mount_root() 2544 */ 2545 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 2546 v = vfs_getopsbyname(rootfstype); 2547 error = EFTYPE; 2548 if (v != NULL) { 2549 if (v->vfs_mountroot != NULL) { 2550 error = (v->vfs_mountroot)(); 2551 } 2552 v->vfs_refcount--; 2553 } 2554 goto done; 2555 } 2556 2557 /* 2558 * Try each file system currently configured into the kernel. 2559 */ 2560 mutex_enter(&vfs_list_lock); 2561 LIST_FOREACH(v, &vfs_list, vfs_list) { 2562 if (v->vfs_mountroot == NULL) 2563 continue; 2564 #ifdef DEBUG 2565 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 2566 #endif 2567 v->vfs_refcount++; 2568 mutex_exit(&vfs_list_lock); 2569 error = (*v->vfs_mountroot)(); 2570 mutex_enter(&vfs_list_lock); 2571 v->vfs_refcount--; 2572 if (!error) { 2573 aprint_normal("root file system type: %s\n", 2574 v->vfs_name); 2575 break; 2576 } 2577 } 2578 mutex_exit(&vfs_list_lock); 2579 2580 if (v == NULL) { 2581 vfs_print_fstypes(); 2582 printf("no file system for %s", device_xname(root_device)); 2583 if (device_class(root_device) == DV_DISK) 2584 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 2585 printf("\n"); 2586 error = EFTYPE; 2587 } 2588 2589 done: 2590 if (error && device_class(root_device) == DV_DISK) { 2591 VOP_CLOSE(rootvp, FREAD, FSCRED); 2592 vrele(rootvp); 2593 } 2594 if (error == 0) { 2595 extern struct cwdinfo cwdi0; 2596 2597 CIRCLEQ_FIRST(&mountlist)->mnt_flag |= MNT_ROOTFS; 2598 CIRCLEQ_FIRST(&mountlist)->mnt_op->vfs_refcount++; 2599 2600 /* 2601 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 2602 * reference it. 2603 */ 2604 error = VFS_ROOT(CIRCLEQ_FIRST(&mountlist), &rootvnode); 2605 if (error) 2606 panic("cannot find root vnode, error=%d", error); 2607 cwdi0.cwdi_cdir = rootvnode; 2608 vref(cwdi0.cwdi_cdir); 2609 VOP_UNLOCK(rootvnode, 0); 2610 cwdi0.cwdi_rdir = NULL; 2611 2612 /* 2613 * Now that root is mounted, we can fixup initproc's CWD 2614 * info. All other processes are kthreads, which merely 2615 * share proc0's CWD info. 2616 */ 2617 initproc->p_cwdi->cwdi_cdir = rootvnode; 2618 vref(initproc->p_cwdi->cwdi_cdir); 2619 initproc->p_cwdi->cwdi_rdir = NULL; 2620 } 2621 return (error); 2622 } 2623 2624 /* 2625 * Get a new unique fsid 2626 */ 2627 void 2628 vfs_getnewfsid(struct mount *mp) 2629 { 2630 static u_short xxxfs_mntid; 2631 fsid_t tfsid; 2632 int mtype; 2633 2634 mutex_enter(&mntid_lock); 2635 mtype = makefstype(mp->mnt_op->vfs_name); 2636 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); 2637 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; 2638 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 2639 if (xxxfs_mntid == 0) 2640 ++xxxfs_mntid; 2641 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 2642 tfsid.__fsid_val[1] = mtype; 2643 if (!CIRCLEQ_EMPTY(&mountlist)) { 2644 while (vfs_getvfs(&tfsid)) { 2645 tfsid.__fsid_val[0]++; 2646 xxxfs_mntid++; 2647 } 2648 } 2649 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 2650 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 2651 mutex_exit(&mntid_lock); 2652 } 2653 2654 /* 2655 * Make a 'unique' number from a mount type name. 2656 */ 2657 long 2658 makefstype(const char *type) 2659 { 2660 long rv; 2661 2662 for (rv = 0; *type; type++) { 2663 rv <<= 2; 2664 rv ^= *type; 2665 } 2666 return rv; 2667 } 2668 2669 /* 2670 * Set vnode attributes to VNOVAL 2671 */ 2672 void 2673 vattr_null(struct vattr *vap) 2674 { 2675 2676 memset(vap, 0, sizeof(*vap)); 2677 2678 vap->va_type = VNON; 2679 2680 /* 2681 * Assign individually so that it is safe even if size and 2682 * sign of each member are varied. 2683 */ 2684 vap->va_mode = VNOVAL; 2685 vap->va_nlink = VNOVAL; 2686 vap->va_uid = VNOVAL; 2687 vap->va_gid = VNOVAL; 2688 vap->va_fsid = VNOVAL; 2689 vap->va_fileid = VNOVAL; 2690 vap->va_size = VNOVAL; 2691 vap->va_blocksize = VNOVAL; 2692 vap->va_atime.tv_sec = 2693 vap->va_mtime.tv_sec = 2694 vap->va_ctime.tv_sec = 2695 vap->va_birthtime.tv_sec = VNOVAL; 2696 vap->va_atime.tv_nsec = 2697 vap->va_mtime.tv_nsec = 2698 vap->va_ctime.tv_nsec = 2699 vap->va_birthtime.tv_nsec = VNOVAL; 2700 vap->va_gen = VNOVAL; 2701 vap->va_flags = VNOVAL; 2702 vap->va_rdev = VNOVAL; 2703 vap->va_bytes = VNOVAL; 2704 } 2705 2706 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 2707 #define ARRAY_PRINT(idx, arr) \ 2708 ((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN") 2709 2710 const char * const vnode_tags[] = { VNODE_TAGS }; 2711 const char * const vnode_types[] = { VNODE_TYPES }; 2712 const char vnode_flagbits[] = VNODE_FLAGBITS; 2713 2714 /* 2715 * Print out a description of a vnode. 2716 */ 2717 void 2718 vprint(const char *label, struct vnode *vp) 2719 { 2720 struct vnlock *vl; 2721 char bf[96]; 2722 int flag; 2723 2724 vl = (vp->v_vnlock != NULL ? vp->v_vnlock : &vp->v_lock); 2725 flag = vp->v_iflag | vp->v_vflag | vp->v_uflag; 2726 snprintb(bf, sizeof(bf), vnode_flagbits, flag); 2727 2728 if (label != NULL) 2729 printf("%s: ", label); 2730 printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), " 2731 "usecount %d, writecount %d, holdcount %d\n" 2732 "\tfreelisthd %p, mount %p, data %p lock %p recursecnt %d\n", 2733 vp, bf, ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 2734 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 2735 vp->v_usecount, vp->v_writecount, vp->v_holdcnt, 2736 vp->v_freelisthd, vp->v_mount, vp->v_data, vl, vl->vl_recursecnt); 2737 if (vp->v_data != NULL) { 2738 printf("\t"); 2739 VOP_PRINT(vp); 2740 } 2741 } 2742 2743 #ifdef DEBUG 2744 /* 2745 * List all of the locked vnodes in the system. 2746 * Called when debugging the kernel. 2747 */ 2748 void 2749 printlockedvnodes(void) 2750 { 2751 struct mount *mp, *nmp; 2752 struct vnode *vp; 2753 2754 printf("Locked vnodes\n"); 2755 mutex_enter(&mountlist_lock); 2756 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2757 mp = nmp) { 2758 if (vfs_busy(mp, &nmp)) { 2759 continue; 2760 } 2761 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2762 if (VOP_ISLOCKED(vp)) 2763 vprint(NULL, vp); 2764 } 2765 mutex_enter(&mountlist_lock); 2766 vfs_unbusy(mp, false, &nmp); 2767 } 2768 mutex_exit(&mountlist_lock); 2769 } 2770 #endif 2771 2772 /* Deprecated. Kept for KPI compatibility. */ 2773 int 2774 vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid, 2775 mode_t acc_mode, kauth_cred_t cred) 2776 { 2777 2778 #ifdef DIAGNOSTIC 2779 printf("vaccess: deprecated interface used.\n"); 2780 #endif /* DIAGNOSTIC */ 2781 2782 return genfs_can_access(type, file_mode, uid, gid, acc_mode, cred); 2783 } 2784 2785 /* 2786 * Given a file system name, look up the vfsops for that 2787 * file system, or return NULL if file system isn't present 2788 * in the kernel. 2789 */ 2790 struct vfsops * 2791 vfs_getopsbyname(const char *name) 2792 { 2793 struct vfsops *v; 2794 2795 mutex_enter(&vfs_list_lock); 2796 LIST_FOREACH(v, &vfs_list, vfs_list) { 2797 if (strcmp(v->vfs_name, name) == 0) 2798 break; 2799 } 2800 if (v != NULL) 2801 v->vfs_refcount++; 2802 mutex_exit(&vfs_list_lock); 2803 2804 return (v); 2805 } 2806 2807 void 2808 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp) 2809 { 2810 const struct statvfs *mbp; 2811 2812 if (sbp == (mbp = &mp->mnt_stat)) 2813 return; 2814 2815 (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx)); 2816 sbp->f_fsid = mbp->f_fsid; 2817 sbp->f_owner = mbp->f_owner; 2818 sbp->f_flag = mbp->f_flag; 2819 sbp->f_syncwrites = mbp->f_syncwrites; 2820 sbp->f_asyncwrites = mbp->f_asyncwrites; 2821 sbp->f_syncreads = mbp->f_syncreads; 2822 sbp->f_asyncreads = mbp->f_asyncreads; 2823 (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare)); 2824 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename, 2825 sizeof(sbp->f_fstypename)); 2826 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname, 2827 sizeof(sbp->f_mntonname)); 2828 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, 2829 sizeof(sbp->f_mntfromname)); 2830 sbp->f_namemax = mbp->f_namemax; 2831 } 2832 2833 int 2834 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom, 2835 const char *vfsname, struct mount *mp, struct lwp *l) 2836 { 2837 int error; 2838 size_t size; 2839 struct statvfs *sfs = &mp->mnt_stat; 2840 int (*fun)(const void *, void *, size_t, size_t *); 2841 2842 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname, 2843 sizeof(mp->mnt_stat.f_fstypename)); 2844 2845 if (onp) { 2846 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 2847 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr; 2848 if (cwdi->cwdi_rdir != NULL) { 2849 size_t len; 2850 char *bp; 2851 char *path = PNBUF_GET(); 2852 2853 bp = path + MAXPATHLEN; 2854 *--bp = '\0'; 2855 rw_enter(&cwdi->cwdi_lock, RW_READER); 2856 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, 2857 path, MAXPATHLEN / 2, 0, l); 2858 rw_exit(&cwdi->cwdi_lock); 2859 if (error) { 2860 PNBUF_PUT(path); 2861 return error; 2862 } 2863 2864 len = strlen(bp); 2865 if (len > sizeof(sfs->f_mntonname) - 1) 2866 len = sizeof(sfs->f_mntonname) - 1; 2867 (void)strncpy(sfs->f_mntonname, bp, len); 2868 PNBUF_PUT(path); 2869 2870 if (len < sizeof(sfs->f_mntonname) - 1) { 2871 error = (*fun)(onp, &sfs->f_mntonname[len], 2872 sizeof(sfs->f_mntonname) - len - 1, &size); 2873 if (error) 2874 return error; 2875 size += len; 2876 } else { 2877 size = len; 2878 } 2879 } else { 2880 error = (*fun)(onp, &sfs->f_mntonname, 2881 sizeof(sfs->f_mntonname) - 1, &size); 2882 if (error) 2883 return error; 2884 } 2885 (void)memset(sfs->f_mntonname + size, 0, 2886 sizeof(sfs->f_mntonname) - size); 2887 } 2888 2889 if (fromp) { 2890 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr; 2891 error = (*fun)(fromp, sfs->f_mntfromname, 2892 sizeof(sfs->f_mntfromname) - 1, &size); 2893 if (error) 2894 return error; 2895 (void)memset(sfs->f_mntfromname + size, 0, 2896 sizeof(sfs->f_mntfromname) - size); 2897 } 2898 return 0; 2899 } 2900 2901 void 2902 vfs_timestamp(struct timespec *ts) 2903 { 2904 2905 nanotime(ts); 2906 } 2907 2908 time_t rootfstime; /* recorded root fs time, if known */ 2909 void 2910 setrootfstime(time_t t) 2911 { 2912 rootfstime = t; 2913 } 2914 2915 /* 2916 * Sham lock manager for vnodes. This is a temporary measure. 2917 */ 2918 int 2919 vlockmgr(struct vnlock *vl, int flags) 2920 { 2921 2922 KASSERT((flags & ~(LK_CANRECURSE | LK_NOWAIT | LK_TYPE_MASK)) == 0); 2923 2924 switch (flags & LK_TYPE_MASK) { 2925 case LK_SHARED: 2926 if (rw_tryenter(&vl->vl_lock, RW_READER)) { 2927 return 0; 2928 } 2929 if ((flags & LK_NOWAIT) != 0) { 2930 return EBUSY; 2931 } 2932 rw_enter(&vl->vl_lock, RW_READER); 2933 return 0; 2934 2935 case LK_EXCLUSIVE: 2936 if (rw_tryenter(&vl->vl_lock, RW_WRITER)) { 2937 return 0; 2938 } 2939 if ((vl->vl_canrecurse || (flags & LK_CANRECURSE) != 0) && 2940 rw_write_held(&vl->vl_lock)) { 2941 vl->vl_recursecnt++; 2942 return 0; 2943 } 2944 if ((flags & LK_NOWAIT) != 0) { 2945 return EBUSY; 2946 } 2947 rw_enter(&vl->vl_lock, RW_WRITER); 2948 return 0; 2949 2950 case LK_RELEASE: 2951 if (vl->vl_recursecnt != 0) { 2952 KASSERT(rw_write_held(&vl->vl_lock)); 2953 vl->vl_recursecnt--; 2954 return 0; 2955 } 2956 rw_exit(&vl->vl_lock); 2957 return 0; 2958 2959 default: 2960 panic("vlockmgr: flags %x", flags); 2961 } 2962 } 2963 2964 int 2965 vlockstatus(struct vnlock *vl) 2966 { 2967 2968 if (rw_write_held(&vl->vl_lock)) { 2969 return LK_EXCLUSIVE; 2970 } 2971 if (rw_read_held(&vl->vl_lock)) { 2972 return LK_SHARED; 2973 } 2974 return 0; 2975 } 2976 2977 static const uint8_t vttodt_tab[9] = { 2978 DT_UNKNOWN, /* VNON */ 2979 DT_REG, /* VREG */ 2980 DT_DIR, /* VDIR */ 2981 DT_BLK, /* VBLK */ 2982 DT_CHR, /* VCHR */ 2983 DT_LNK, /* VLNK */ 2984 DT_SOCK, /* VSUCK */ 2985 DT_FIFO, /* VFIFO */ 2986 DT_UNKNOWN /* VBAD */ 2987 }; 2988 2989 uint8_t 2990 vtype2dt(enum vtype vt) 2991 { 2992 2993 CTASSERT(VBAD == __arraycount(vttodt_tab) - 1); 2994 return vttodt_tab[vt]; 2995 } 2996 2997 /* 2998 * mount_specific_key_create -- 2999 * Create a key for subsystem mount-specific data. 3000 */ 3001 int 3002 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 3003 { 3004 3005 return (specificdata_key_create(mount_specificdata_domain, keyp, dtor)); 3006 } 3007 3008 /* 3009 * mount_specific_key_delete -- 3010 * Delete a key for subsystem mount-specific data. 3011 */ 3012 void 3013 mount_specific_key_delete(specificdata_key_t key) 3014 { 3015 3016 specificdata_key_delete(mount_specificdata_domain, key); 3017 } 3018 3019 /* 3020 * mount_initspecific -- 3021 * Initialize a mount's specificdata container. 3022 */ 3023 void 3024 mount_initspecific(struct mount *mp) 3025 { 3026 int error; 3027 3028 error = specificdata_init(mount_specificdata_domain, 3029 &mp->mnt_specdataref); 3030 KASSERT(error == 0); 3031 } 3032 3033 /* 3034 * mount_finispecific -- 3035 * Finalize a mount's specificdata container. 3036 */ 3037 void 3038 mount_finispecific(struct mount *mp) 3039 { 3040 3041 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 3042 } 3043 3044 /* 3045 * mount_getspecific -- 3046 * Return mount-specific data corresponding to the specified key. 3047 */ 3048 void * 3049 mount_getspecific(struct mount *mp, specificdata_key_t key) 3050 { 3051 3052 return (specificdata_getspecific(mount_specificdata_domain, 3053 &mp->mnt_specdataref, key)); 3054 } 3055 3056 /* 3057 * mount_setspecific -- 3058 * Set mount-specific data corresponding to the specified key. 3059 */ 3060 void 3061 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 3062 { 3063 3064 specificdata_setspecific(mount_specificdata_domain, 3065 &mp->mnt_specdataref, key, data); 3066 } 3067 3068 int 3069 VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c) 3070 { 3071 int error; 3072 3073 KERNEL_LOCK(1, NULL); 3074 error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c); 3075 KERNEL_UNLOCK_ONE(NULL); 3076 3077 return error; 3078 } 3079 3080 int 3081 VFS_START(struct mount *mp, int a) 3082 { 3083 int error; 3084 3085 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3086 KERNEL_LOCK(1, NULL); 3087 } 3088 error = (*(mp->mnt_op->vfs_start))(mp, a); 3089 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3090 KERNEL_UNLOCK_ONE(NULL); 3091 } 3092 3093 return error; 3094 } 3095 3096 int 3097 VFS_UNMOUNT(struct mount *mp, int a) 3098 { 3099 int error; 3100 3101 KERNEL_LOCK(1, NULL); 3102 error = (*(mp->mnt_op->vfs_unmount))(mp, a); 3103 KERNEL_UNLOCK_ONE(NULL); 3104 3105 return error; 3106 } 3107 3108 int 3109 VFS_ROOT(struct mount *mp, struct vnode **a) 3110 { 3111 int error; 3112 3113 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3114 KERNEL_LOCK(1, NULL); 3115 } 3116 error = (*(mp->mnt_op->vfs_root))(mp, a); 3117 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3118 KERNEL_UNLOCK_ONE(NULL); 3119 } 3120 3121 return error; 3122 } 3123 3124 int 3125 VFS_QUOTACTL(struct mount *mp, int a, uid_t b, void *c) 3126 { 3127 int error; 3128 3129 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3130 KERNEL_LOCK(1, NULL); 3131 } 3132 error = (*(mp->mnt_op->vfs_quotactl))(mp, a, b, c); 3133 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3134 KERNEL_UNLOCK_ONE(NULL); 3135 } 3136 3137 return error; 3138 } 3139 3140 int 3141 VFS_STATVFS(struct mount *mp, struct statvfs *a) 3142 { 3143 int error; 3144 3145 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3146 KERNEL_LOCK(1, NULL); 3147 } 3148 error = (*(mp->mnt_op->vfs_statvfs))(mp, a); 3149 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3150 KERNEL_UNLOCK_ONE(NULL); 3151 } 3152 3153 return error; 3154 } 3155 3156 int 3157 VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b) 3158 { 3159 int error; 3160 3161 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3162 KERNEL_LOCK(1, NULL); 3163 } 3164 error = (*(mp->mnt_op->vfs_sync))(mp, a, b); 3165 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3166 KERNEL_UNLOCK_ONE(NULL); 3167 } 3168 3169 return error; 3170 } 3171 3172 int 3173 VFS_FHTOVP(struct mount *mp, struct fid *a, struct vnode **b) 3174 { 3175 int error; 3176 3177 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3178 KERNEL_LOCK(1, NULL); 3179 } 3180 error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b); 3181 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3182 KERNEL_UNLOCK_ONE(NULL); 3183 } 3184 3185 return error; 3186 } 3187 3188 int 3189 VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b) 3190 { 3191 int error; 3192 3193 if ((vp->v_vflag & VV_MPSAFE) == 0) { 3194 KERNEL_LOCK(1, NULL); 3195 } 3196 error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b); 3197 if ((vp->v_vflag & VV_MPSAFE) == 0) { 3198 KERNEL_UNLOCK_ONE(NULL); 3199 } 3200 3201 return error; 3202 } 3203 3204 int 3205 VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b) 3206 { 3207 int error; 3208 3209 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3210 KERNEL_LOCK(1, NULL); 3211 } 3212 error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b); 3213 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3214 KERNEL_UNLOCK_ONE(NULL); 3215 } 3216 3217 return error; 3218 } 3219 3220 int 3221 VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d) 3222 { 3223 int error; 3224 3225 KERNEL_LOCK(1, NULL); /* XXXSMP check ffs */ 3226 error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d); 3227 KERNEL_UNLOCK_ONE(NULL); /* XXX */ 3228 3229 return error; 3230 } 3231 3232 int 3233 VFS_SUSPENDCTL(struct mount *mp, int a) 3234 { 3235 int error; 3236 3237 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3238 KERNEL_LOCK(1, NULL); 3239 } 3240 error = (*(mp->mnt_op->vfs_suspendctl))(mp, a); 3241 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 3242 KERNEL_UNLOCK_ONE(NULL); 3243 } 3244 3245 return error; 3246 } 3247 3248 #if defined(DDB) || defined(DEBUGPRINT) 3249 static const char buf_flagbits[] = BUF_FLAGBITS; 3250 3251 void 3252 vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...)) 3253 { 3254 char bf[1024]; 3255 3256 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%" 3257 PRIx64 " dev 0x%x\n", 3258 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev); 3259 3260 snprintb(bf, sizeof(bf), 3261 buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags); 3262 (*pr)(" error %d flags 0x%s\n", bp->b_error, bf); 3263 3264 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 3265 bp->b_bufsize, bp->b_bcount, bp->b_resid); 3266 (*pr)(" data %p saveaddr %p\n", 3267 bp->b_data, bp->b_saveaddr); 3268 (*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock); 3269 } 3270 3271 3272 void 3273 vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...)) 3274 { 3275 char bf[256]; 3276 3277 uvm_object_printit(&vp->v_uobj, full, pr); 3278 snprintb(bf, sizeof(bf), 3279 vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag); 3280 (*pr)("\nVNODE flags %s\n", bf); 3281 (*pr)("mp %p numoutput %d size 0x%llx writesize 0x%llx\n", 3282 vp->v_mount, vp->v_numoutput, vp->v_size, vp->v_writesize); 3283 3284 (*pr)("data %p writecount %ld holdcnt %ld\n", 3285 vp->v_data, vp->v_writecount, vp->v_holdcnt); 3286 3287 (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n", 3288 ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 3289 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 3290 vp->v_mount, vp->v_mountedhere); 3291 3292 (*pr)("v_lock %p v_vnlock %p\n", &vp->v_lock, vp->v_vnlock); 3293 3294 if (full) { 3295 struct buf *bp; 3296 3297 (*pr)("clean bufs:\n"); 3298 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 3299 (*pr)(" bp %p\n", bp); 3300 vfs_buf_print(bp, full, pr); 3301 } 3302 3303 (*pr)("dirty bufs:\n"); 3304 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 3305 (*pr)(" bp %p\n", bp); 3306 vfs_buf_print(bp, full, pr); 3307 } 3308 } 3309 } 3310 3311 void 3312 vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...)) 3313 { 3314 char sbuf[256]; 3315 3316 (*pr)("vnodecovered = %p syncer = %p data = %p\n", 3317 mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data); 3318 3319 (*pr)("fs_bshift %d dev_bshift = %d\n", 3320 mp->mnt_fs_bshift,mp->mnt_dev_bshift); 3321 3322 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag); 3323 (*pr)("flag = %s\n", sbuf); 3324 3325 snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag); 3326 (*pr)("iflag = %s\n", sbuf); 3327 3328 (*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt, 3329 &mp->mnt_unmounting, &mp->mnt_updating); 3330 3331 (*pr)("statvfs cache:\n"); 3332 (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize); 3333 (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize); 3334 (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize); 3335 3336 (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks); 3337 (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree); 3338 (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail); 3339 (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd); 3340 3341 (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files); 3342 (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree); 3343 (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail); 3344 (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd); 3345 3346 (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n", 3347 mp->mnt_stat.f_fsidx.__fsid_val[0], 3348 mp->mnt_stat.f_fsidx.__fsid_val[1]); 3349 3350 (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner); 3351 (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax); 3352 3353 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag); 3354 3355 (*pr)("\tflag = %s\n",sbuf); 3356 (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites); 3357 (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites); 3358 (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads); 3359 (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads); 3360 (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename); 3361 (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname); 3362 (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname); 3363 3364 { 3365 int cnt = 0; 3366 struct vnode *vp; 3367 (*pr)("locked vnodes ="); 3368 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 3369 if (VOP_ISLOCKED(vp)) { 3370 if ((++cnt % 6) == 0) { 3371 (*pr)(" %p,\n\t", vp); 3372 } else { 3373 (*pr)(" %p,", vp); 3374 } 3375 } 3376 } 3377 (*pr)("\n"); 3378 } 3379 3380 if (full) { 3381 int cnt = 0; 3382 struct vnode *vp; 3383 (*pr)("all vnodes ="); 3384 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 3385 if (!TAILQ_NEXT(vp, v_mntvnodes)) { 3386 (*pr)(" %p", vp); 3387 } else if ((++cnt % 6) == 0) { 3388 (*pr)(" %p,\n\t", vp); 3389 } else { 3390 (*pr)(" %p,", vp); 3391 } 3392 } 3393 (*pr)("\n", vp); 3394 } 3395 } 3396 #endif /* DDB || DEBUGPRINT */ 3397 3398 /* 3399 * Check if a device pointed to by vp is mounted. 3400 * 3401 * Returns: 3402 * EINVAL if it's not a disk 3403 * EBUSY if it's a disk and mounted 3404 * 0 if it's a disk and not mounted 3405 */ 3406 int 3407 rawdev_mounted(struct vnode *vp, struct vnode **bvpp) 3408 { 3409 struct vnode *bvp; 3410 dev_t dev; 3411 int d_type; 3412 3413 bvp = NULL; 3414 dev = vp->v_rdev; 3415 d_type = D_OTHER; 3416 3417 if (iskmemvp(vp)) 3418 return EINVAL; 3419 3420 switch (vp->v_type) { 3421 case VCHR: { 3422 const struct cdevsw *cdev; 3423 3424 cdev = cdevsw_lookup(dev); 3425 if (cdev != NULL) { 3426 dev_t blkdev; 3427 3428 blkdev = devsw_chr2blk(dev); 3429 if (blkdev != NODEV) { 3430 vfinddev(blkdev, VBLK, &bvp); 3431 if (bvp != NULL) 3432 d_type = (cdev->d_flag & D_TYPEMASK); 3433 } 3434 } 3435 3436 break; 3437 } 3438 3439 case VBLK: { 3440 const struct bdevsw *bdev; 3441 3442 bdev = bdevsw_lookup(dev); 3443 if (bdev != NULL) 3444 d_type = (bdev->d_flag & D_TYPEMASK); 3445 3446 bvp = vp; 3447 3448 break; 3449 } 3450 3451 default: 3452 break; 3453 } 3454 3455 if (d_type != D_DISK) 3456 return EINVAL; 3457 3458 if (bvpp != NULL) 3459 *bvpp = bvp; 3460 3461 /* 3462 * XXX: This is bogus. We should be failing the request 3463 * XXX: not only if this specific slice is mounted, but 3464 * XXX: if it's on a disk with any other mounted slice. 3465 */ 3466 if (vfs_mountedon(bvp)) 3467 return EBUSY; 3468 3469 return 0; 3470 } 3471