1 /* $NetBSD: vfs_subr.c,v 1.275 2006/11/01 10:17:59 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2004, 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * This code is derived from software contributed to The NetBSD Foundation 11 * by Charles M. Hannum. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the NetBSD 24 * Foundation, Inc. and its contributors. 25 * 4. Neither the name of The NetBSD Foundation nor the names of its 26 * contributors may be used to endorse or promote products derived 27 * from this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 30 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 31 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 32 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 33 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 37 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42 /* 43 * Copyright (c) 1989, 1993 44 * The Regents of the University of California. All rights reserved. 45 * (c) UNIX System Laboratories, Inc. 46 * All or some portions of this file are derived from material licensed 47 * to the University of California by American Telephone and Telegraph 48 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 49 * the permission of UNIX System Laboratories, Inc. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions 53 * are met: 54 * 1. Redistributions of source code must retain the above copyright 55 * notice, this list of conditions and the following disclaimer. 56 * 2. Redistributions in binary form must reproduce the above copyright 57 * notice, this list of conditions and the following disclaimer in the 58 * documentation and/or other materials provided with the distribution. 59 * 3. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 76 */ 77 78 /* 79 * External virtual filesystem routines 80 */ 81 82 #include <sys/cdefs.h> 83 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.275 2006/11/01 10:17:59 yamt Exp $"); 84 85 #include "opt_inet.h" 86 #include "opt_ddb.h" 87 #include "opt_compat_netbsd.h" 88 #include "opt_compat_43.h" 89 90 #include <sys/param.h> 91 #include <sys/systm.h> 92 #include <sys/proc.h> 93 #include <sys/kernel.h> 94 #include <sys/mount.h> 95 #include <sys/fcntl.h> 96 #include <sys/vnode.h> 97 #include <sys/stat.h> 98 #include <sys/namei.h> 99 #include <sys/ucred.h> 100 #include <sys/buf.h> 101 #include <sys/errno.h> 102 #include <sys/malloc.h> 103 #include <sys/domain.h> 104 #include <sys/mbuf.h> 105 #include <sys/sa.h> 106 #include <sys/syscallargs.h> 107 #include <sys/device.h> 108 #include <sys/filedesc.h> 109 #include <sys/kauth.h> 110 111 #include <miscfs/specfs/specdev.h> 112 #include <miscfs/genfs/genfs.h> 113 #include <miscfs/syncfs/syncfs.h> 114 115 #include <uvm/uvm.h> 116 #include <uvm/uvm_readahead.h> 117 #include <uvm/uvm_ddb.h> 118 119 #include <sys/sysctl.h> 120 121 const enum vtype iftovt_tab[16] = { 122 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 123 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 124 }; 125 const int vttoif_tab[9] = { 126 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 127 S_IFSOCK, S_IFIFO, S_IFMT, 128 }; 129 130 int doforce = 1; /* 1 => permit forcible unmounting */ 131 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 132 133 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 134 extern int vfs_magiclinks; /* 1 => expand "magic" symlinks */ 135 136 /* 137 * Insq/Remq for the vnode usage lists. 138 */ 139 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 140 #define bufremvn(bp) { \ 141 LIST_REMOVE(bp, b_vnbufs); \ 142 (bp)->b_vnbufs.le_next = NOLIST; \ 143 } 144 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 145 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 146 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 147 148 struct mntlist mountlist = /* mounted filesystem list */ 149 CIRCLEQ_HEAD_INITIALIZER(mountlist); 150 struct vfs_list_head vfs_list = /* vfs list */ 151 LIST_HEAD_INITIALIZER(vfs_list); 152 153 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 154 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 155 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 156 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 157 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 158 159 /* XXX - gross; single global lock to protect v_numoutput */ 160 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER; 161 162 /* 163 * These define the root filesystem and device. 164 */ 165 struct mount *rootfs; 166 struct vnode *rootvnode; 167 struct device *root_device; /* root device */ 168 169 POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 170 &pool_allocator_nointr); 171 172 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 173 174 /* 175 * Local declarations. 176 */ 177 static void insmntque(struct vnode *, struct mount *); 178 static int getdevvp(dev_t, struct vnode **, enum vtype); 179 static void vclean(struct vnode *, int, struct lwp *); 180 static struct vnode *getcleanvnode(struct lwp *); 181 182 #ifdef DEBUG 183 void printlockedvnodes(void); 184 #endif 185 186 /* 187 * Initialize the vnode management data structures. 188 */ 189 void 190 vntblinit(void) 191 { 192 193 /* 194 * Initialize the filesystem syncer. 195 */ 196 vn_initialize_syncerd(); 197 } 198 199 int 200 vfs_drainvnodes(long target, struct lwp *l) 201 { 202 203 simple_lock(&vnode_free_list_slock); 204 while (numvnodes > target) { 205 struct vnode *vp; 206 207 vp = getcleanvnode(l); 208 if (vp == NULL) 209 return EBUSY; /* give up */ 210 pool_put(&vnode_pool, vp); 211 simple_lock(&vnode_free_list_slock); 212 numvnodes--; 213 } 214 simple_unlock(&vnode_free_list_slock); 215 216 return 0; 217 } 218 219 /* 220 * grab a vnode from freelist and clean it. 221 */ 222 struct vnode * 223 getcleanvnode(struct lwp *l) 224 { 225 struct vnode *vp; 226 struct mount *mp; 227 struct freelst *listhd; 228 229 LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock)); 230 231 listhd = &vnode_free_list; 232 try_nextlist: 233 TAILQ_FOREACH(vp, listhd, v_freelist) { 234 if (!simple_lock_try(&vp->v_interlock)) 235 continue; 236 /* 237 * as our lwp might hold the underlying vnode locked, 238 * don't try to reclaim the VLAYER vnode if it's locked. 239 */ 240 if ((vp->v_flag & VXLOCK) == 0 && 241 ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) { 242 if (vn_start_write(vp, &mp, V_NOWAIT) == 0) 243 break; 244 } 245 mp = NULL; 246 simple_unlock(&vp->v_interlock); 247 } 248 249 if (vp == NULLVP) { 250 if (listhd == &vnode_free_list) { 251 listhd = &vnode_hold_list; 252 goto try_nextlist; 253 } 254 simple_unlock(&vnode_free_list_slock); 255 return NULLVP; 256 } 257 258 if (vp->v_usecount) 259 panic("free vnode isn't, vp %p", vp); 260 TAILQ_REMOVE(listhd, vp, v_freelist); 261 /* see comment on why 0xdeadb is set at end of vgone (below) */ 262 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 263 simple_unlock(&vnode_free_list_slock); 264 vp->v_lease = NULL; 265 266 if (vp->v_type != VBAD) 267 vgonel(vp, l); 268 else 269 simple_unlock(&vp->v_interlock); 270 vn_finished_write(mp, 0); 271 #ifdef DIAGNOSTIC 272 if (vp->v_data || vp->v_uobj.uo_npages || 273 TAILQ_FIRST(&vp->v_uobj.memq)) 274 panic("cleaned vnode isn't, vp %p", vp); 275 if (vp->v_numoutput) 276 panic("clean vnode has pending I/O's, vp %p", vp); 277 #endif 278 KASSERT((vp->v_flag & VONWORKLST) == 0); 279 280 return vp; 281 } 282 283 /* 284 * Mark a mount point as busy. Used to synchronize access and to delay 285 * unmounting. Interlock is not released on failure. 286 */ 287 int 288 vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp) 289 { 290 int lkflags; 291 292 while (mp->mnt_iflag & IMNT_UNMOUNT) { 293 int gone, n; 294 295 if (flags & LK_NOWAIT) 296 return (ENOENT); 297 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 298 && mp->mnt_unmounter == curlwp) 299 return (EDEADLK); 300 if (interlkp) 301 simple_unlock(interlkp); 302 /* 303 * Since all busy locks are shared except the exclusive 304 * lock granted when unmounting, the only place that a 305 * wakeup needs to be done is at the release of the 306 * exclusive lock at the end of dounmount. 307 */ 308 simple_lock(&mp->mnt_slock); 309 mp->mnt_wcnt++; 310 ltsleep((caddr_t)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock); 311 n = --mp->mnt_wcnt; 312 simple_unlock(&mp->mnt_slock); 313 gone = mp->mnt_iflag & IMNT_GONE; 314 315 if (n == 0) 316 wakeup(&mp->mnt_wcnt); 317 if (interlkp) 318 simple_lock(interlkp); 319 if (gone) 320 return (ENOENT); 321 } 322 lkflags = LK_SHARED; 323 if (interlkp) 324 lkflags |= LK_INTERLOCK; 325 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 326 panic("vfs_busy: unexpected lock failure"); 327 return (0); 328 } 329 330 /* 331 * Free a busy filesystem. 332 */ 333 void 334 vfs_unbusy(struct mount *mp) 335 { 336 337 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 338 } 339 340 /* 341 * Lookup a filesystem type, and if found allocate and initialize 342 * a mount structure for it. 343 * 344 * Devname is usually updated by mount(8) after booting. 345 */ 346 int 347 vfs_rootmountalloc(const char *fstypename, const char *devname, 348 struct mount **mpp) 349 { 350 struct vfsops *vfsp = NULL; 351 struct mount *mp; 352 353 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 354 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 355 break; 356 357 if (vfsp == NULL) 358 return (ENODEV); 359 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 360 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 361 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 362 simple_lock_init(&mp->mnt_slock); 363 (void)vfs_busy(mp, LK_NOWAIT, 0); 364 TAILQ_INIT(&mp->mnt_vnodelist); 365 mp->mnt_op = vfsp; 366 mp->mnt_flag = MNT_RDONLY; 367 mp->mnt_vnodecovered = NULLVP; 368 mp->mnt_leaf = mp; 369 vfsp->vfs_refcount++; 370 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 371 mp->mnt_stat.f_mntonname[0] = '/'; 372 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 373 *mpp = mp; 374 return (0); 375 } 376 377 /* 378 * Lookup a mount point by filesystem identifier. 379 */ 380 struct mount * 381 vfs_getvfs(fsid_t *fsid) 382 { 383 struct mount *mp; 384 385 simple_lock(&mountlist_slock); 386 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { 387 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 388 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 389 simple_unlock(&mountlist_slock); 390 return (mp); 391 } 392 } 393 simple_unlock(&mountlist_slock); 394 return ((struct mount *)0); 395 } 396 397 /* 398 * Get a new unique fsid 399 */ 400 void 401 vfs_getnewfsid(struct mount *mp) 402 { 403 static u_short xxxfs_mntid; 404 fsid_t tfsid; 405 int mtype; 406 407 simple_lock(&mntid_slock); 408 mtype = makefstype(mp->mnt_op->vfs_name); 409 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); 410 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; 411 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 412 if (xxxfs_mntid == 0) 413 ++xxxfs_mntid; 414 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 415 tfsid.__fsid_val[1] = mtype; 416 if (!CIRCLEQ_EMPTY(&mountlist)) { 417 while (vfs_getvfs(&tfsid)) { 418 tfsid.__fsid_val[0]++; 419 xxxfs_mntid++; 420 } 421 } 422 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 423 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 424 simple_unlock(&mntid_slock); 425 } 426 427 /* 428 * Make a 'unique' number from a mount type name. 429 */ 430 long 431 makefstype(const char *type) 432 { 433 long rv; 434 435 for (rv = 0; *type; type++) { 436 rv <<= 2; 437 rv ^= *type; 438 } 439 return rv; 440 } 441 442 443 /* 444 * Set vnode attributes to VNOVAL 445 */ 446 void 447 vattr_null(struct vattr *vap) 448 { 449 450 vap->va_type = VNON; 451 452 /* 453 * Assign individually so that it is safe even if size and 454 * sign of each member are varied. 455 */ 456 vap->va_mode = VNOVAL; 457 vap->va_nlink = VNOVAL; 458 vap->va_uid = VNOVAL; 459 vap->va_gid = VNOVAL; 460 vap->va_fsid = VNOVAL; 461 vap->va_fileid = VNOVAL; 462 vap->va_size = VNOVAL; 463 vap->va_blocksize = VNOVAL; 464 vap->va_atime.tv_sec = 465 vap->va_mtime.tv_sec = 466 vap->va_ctime.tv_sec = 467 vap->va_birthtime.tv_sec = VNOVAL; 468 vap->va_atime.tv_nsec = 469 vap->va_mtime.tv_nsec = 470 vap->va_ctime.tv_nsec = 471 vap->va_birthtime.tv_nsec = VNOVAL; 472 vap->va_gen = VNOVAL; 473 vap->va_flags = VNOVAL; 474 vap->va_rdev = VNOVAL; 475 vap->va_bytes = VNOVAL; 476 vap->va_vaflags = 0; 477 } 478 479 /* 480 * Routines having to do with the management of the vnode table. 481 */ 482 extern int (**dead_vnodeop_p)(void *); 483 long numvnodes; 484 485 /* 486 * Return the next vnode from the free list. 487 */ 488 int 489 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), 490 struct vnode **vpp) 491 { 492 extern struct uvm_pagerops uvm_vnodeops; 493 struct uvm_object *uobj; 494 struct lwp *l = curlwp; /* XXX */ 495 static int toggle; 496 struct vnode *vp; 497 int error = 0, tryalloc; 498 499 try_again: 500 if (mp) { 501 /* 502 * Mark filesystem busy while we're creating a vnode. 503 * If unmount is in progress, this will wait; if the 504 * unmount succeeds (only if umount -f), this will 505 * return an error. If the unmount fails, we'll keep 506 * going afterwards. 507 * (This puts the per-mount vnode list logically under 508 * the protection of the vfs_busy lock). 509 */ 510 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 511 if (error && error != EDEADLK) 512 return error; 513 } 514 515 /* 516 * We must choose whether to allocate a new vnode or recycle an 517 * existing one. The criterion for allocating a new one is that 518 * the total number of vnodes is less than the number desired or 519 * there are no vnodes on either free list. Generally we only 520 * want to recycle vnodes that have no buffers associated with 521 * them, so we look first on the vnode_free_list. If it is empty, 522 * we next consider vnodes with referencing buffers on the 523 * vnode_hold_list. The toggle ensures that half the time we 524 * will use a buffer from the vnode_hold_list, and half the time 525 * we will allocate a new one unless the list has grown to twice 526 * the desired size. We are reticent to recycle vnodes from the 527 * vnode_hold_list because we will lose the identity of all its 528 * referencing buffers. 529 */ 530 531 vp = NULL; 532 533 simple_lock(&vnode_free_list_slock); 534 535 toggle ^= 1; 536 if (numvnodes > 2 * desiredvnodes) 537 toggle = 0; 538 539 tryalloc = numvnodes < desiredvnodes || 540 (TAILQ_FIRST(&vnode_free_list) == NULL && 541 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 542 543 if (tryalloc && 544 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { 545 numvnodes++; 546 simple_unlock(&vnode_free_list_slock); 547 memset(vp, 0, sizeof(*vp)); 548 UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 1); 549 /* 550 * done by memset() above. 551 * LIST_INIT(&vp->v_nclist); 552 * LIST_INIT(&vp->v_dnclist); 553 */ 554 } else { 555 vp = getcleanvnode(l); 556 /* 557 * Unless this is a bad time of the month, at most 558 * the first NCPUS items on the free list are 559 * locked, so this is close enough to being empty. 560 */ 561 if (vp == NULLVP) { 562 if (mp && error != EDEADLK) 563 vfs_unbusy(mp); 564 if (tryalloc) { 565 printf("WARNING: unable to allocate new " 566 "vnode, retrying...\n"); 567 (void) tsleep(&lbolt, PRIBIO, "newvn", hz); 568 goto try_again; 569 } 570 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 571 *vpp = 0; 572 return (ENFILE); 573 } 574 vp->v_usecount = 1; 575 vp->v_flag = 0; 576 vp->v_socket = NULL; 577 } 578 vp->v_type = VNON; 579 vp->v_vnlock = &vp->v_lock; 580 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 581 KASSERT(LIST_EMPTY(&vp->v_nclist)); 582 KASSERT(LIST_EMPTY(&vp->v_dnclist)); 583 vp->v_tag = tag; 584 vp->v_op = vops; 585 insmntque(vp, mp); 586 *vpp = vp; 587 vp->v_data = 0; 588 simple_lock_init(&vp->v_interlock); 589 590 /* 591 * initialize uvm_object within vnode. 592 */ 593 594 uobj = &vp->v_uobj; 595 KASSERT(uobj->pgops == &uvm_vnodeops); 596 KASSERT(uobj->uo_npages == 0); 597 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 598 vp->v_size = VSIZENOTSET; 599 600 if (mp && error != EDEADLK) 601 vfs_unbusy(mp); 602 return (0); 603 } 604 605 /* 606 * This is really just the reverse of getnewvnode(). Needed for 607 * VFS_VGET functions who may need to push back a vnode in case 608 * of a locking race. 609 */ 610 void 611 ungetnewvnode(struct vnode *vp) 612 { 613 #ifdef DIAGNOSTIC 614 if (vp->v_usecount != 1) 615 panic("ungetnewvnode: busy vnode"); 616 #endif 617 vp->v_usecount--; 618 insmntque(vp, NULL); 619 vp->v_type = VBAD; 620 621 simple_lock(&vp->v_interlock); 622 /* 623 * Insert at head of LRU list 624 */ 625 simple_lock(&vnode_free_list_slock); 626 if (vp->v_holdcnt > 0) 627 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 628 else 629 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 630 simple_unlock(&vnode_free_list_slock); 631 simple_unlock(&vp->v_interlock); 632 } 633 634 /* 635 * Move a vnode from one mount queue to another. 636 */ 637 static void 638 insmntque(struct vnode *vp, struct mount *mp) 639 { 640 641 #ifdef DIAGNOSTIC 642 if ((mp != NULL) && 643 (mp->mnt_iflag & IMNT_UNMOUNT) && 644 !(mp->mnt_flag & MNT_SOFTDEP) && 645 vp->v_tag != VT_VFS) { 646 panic("insmntque into dying filesystem"); 647 } 648 #endif 649 650 simple_lock(&mntvnode_slock); 651 /* 652 * Delete from old mount point vnode list, if on one. 653 */ 654 if (vp->v_mount != NULL) 655 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes); 656 /* 657 * Insert into list of vnodes for the new mount point, if available. 658 */ 659 if ((vp->v_mount = mp) != NULL) { 660 if (TAILQ_EMPTY(&mp->mnt_vnodelist)) { 661 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 662 } else { 663 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); 664 } 665 } 666 simple_unlock(&mntvnode_slock); 667 } 668 669 /* 670 * Update outstanding I/O count and do wakeup if requested. 671 */ 672 void 673 vwakeup(struct buf *bp) 674 { 675 struct vnode *vp; 676 677 if ((vp = bp->b_vp) != NULL) { 678 /* XXX global lock hack 679 * can't use v_interlock here since this is called 680 * in interrupt context from biodone(). 681 */ 682 simple_lock(&global_v_numoutput_slock); 683 if (--vp->v_numoutput < 0) 684 panic("vwakeup: neg numoutput, vp %p", vp); 685 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 686 vp->v_flag &= ~VBWAIT; 687 wakeup((caddr_t)&vp->v_numoutput); 688 } 689 simple_unlock(&global_v_numoutput_slock); 690 } 691 } 692 693 /* 694 * Flush out and invalidate all buffers associated with a vnode. 695 * Called with the underlying vnode locked, which should prevent new dirty 696 * buffers from being queued. 697 */ 698 int 699 vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l, 700 int slpflag, int slptimeo) 701 { 702 struct buf *bp, *nbp; 703 int s, error; 704 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 705 (flags & V_SAVE ? PGO_CLEANIT : 0); 706 707 /* XXXUBC this doesn't look at flags or slp* */ 708 simple_lock(&vp->v_interlock); 709 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 710 if (error) { 711 return error; 712 } 713 714 if (flags & V_SAVE) { 715 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, l); 716 if (error) 717 return (error); 718 #ifdef DIAGNOSTIC 719 s = splbio(); 720 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 721 panic("vinvalbuf: dirty bufs, vp %p", vp); 722 splx(s); 723 #endif 724 } 725 726 s = splbio(); 727 728 restart: 729 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 730 nbp = LIST_NEXT(bp, b_vnbufs); 731 simple_lock(&bp->b_interlock); 732 if (bp->b_flags & B_BUSY) { 733 bp->b_flags |= B_WANTED; 734 error = ltsleep((caddr_t)bp, 735 slpflag | (PRIBIO + 1) | PNORELOCK, 736 "vinvalbuf", slptimeo, &bp->b_interlock); 737 if (error) { 738 splx(s); 739 return (error); 740 } 741 goto restart; 742 } 743 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 744 simple_unlock(&bp->b_interlock); 745 brelse(bp); 746 } 747 748 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 749 nbp = LIST_NEXT(bp, b_vnbufs); 750 simple_lock(&bp->b_interlock); 751 if (bp->b_flags & B_BUSY) { 752 bp->b_flags |= B_WANTED; 753 error = ltsleep((caddr_t)bp, 754 slpflag | (PRIBIO + 1) | PNORELOCK, 755 "vinvalbuf", slptimeo, &bp->b_interlock); 756 if (error) { 757 splx(s); 758 return (error); 759 } 760 goto restart; 761 } 762 /* 763 * XXX Since there are no node locks for NFS, I believe 764 * there is a slight chance that a delayed write will 765 * occur while sleeping just above, so check for it. 766 */ 767 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 768 #ifdef DEBUG 769 printf("buffer still DELWRI\n"); 770 #endif 771 bp->b_flags |= B_BUSY | B_VFLUSH; 772 simple_unlock(&bp->b_interlock); 773 VOP_BWRITE(bp); 774 goto restart; 775 } 776 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 777 simple_unlock(&bp->b_interlock); 778 brelse(bp); 779 } 780 781 #ifdef DIAGNOSTIC 782 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 783 panic("vinvalbuf: flush failed, vp %p", vp); 784 #endif 785 786 splx(s); 787 788 return (0); 789 } 790 791 /* 792 * Destroy any in core blocks past the truncation length. 793 * Called with the underlying vnode locked, which should prevent new dirty 794 * buffers from being queued. 795 */ 796 int 797 vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo) 798 { 799 struct buf *bp, *nbp; 800 int s, error; 801 voff_t off; 802 803 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 804 simple_lock(&vp->v_interlock); 805 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 806 if (error) { 807 return error; 808 } 809 810 s = splbio(); 811 812 restart: 813 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 814 nbp = LIST_NEXT(bp, b_vnbufs); 815 if (bp->b_lblkno < lbn) 816 continue; 817 simple_lock(&bp->b_interlock); 818 if (bp->b_flags & B_BUSY) { 819 bp->b_flags |= B_WANTED; 820 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 821 "vtruncbuf", slptimeo, &bp->b_interlock); 822 if (error) { 823 splx(s); 824 return (error); 825 } 826 goto restart; 827 } 828 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 829 simple_unlock(&bp->b_interlock); 830 brelse(bp); 831 } 832 833 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 834 nbp = LIST_NEXT(bp, b_vnbufs); 835 if (bp->b_lblkno < lbn) 836 continue; 837 simple_lock(&bp->b_interlock); 838 if (bp->b_flags & B_BUSY) { 839 bp->b_flags |= B_WANTED; 840 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 841 "vtruncbuf", slptimeo, &bp->b_interlock); 842 if (error) { 843 splx(s); 844 return (error); 845 } 846 goto restart; 847 } 848 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 849 simple_unlock(&bp->b_interlock); 850 brelse(bp); 851 } 852 853 splx(s); 854 855 return (0); 856 } 857 858 void 859 vflushbuf(struct vnode *vp, int sync) 860 { 861 struct buf *bp, *nbp; 862 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); 863 int s; 864 865 simple_lock(&vp->v_interlock); 866 (void) VOP_PUTPAGES(vp, 0, 0, flags); 867 868 loop: 869 s = splbio(); 870 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 871 nbp = LIST_NEXT(bp, b_vnbufs); 872 simple_lock(&bp->b_interlock); 873 if ((bp->b_flags & B_BUSY)) { 874 simple_unlock(&bp->b_interlock); 875 continue; 876 } 877 if ((bp->b_flags & B_DELWRI) == 0) 878 panic("vflushbuf: not dirty, bp %p", bp); 879 bp->b_flags |= B_BUSY | B_VFLUSH; 880 simple_unlock(&bp->b_interlock); 881 splx(s); 882 /* 883 * Wait for I/O associated with indirect blocks to complete, 884 * since there is no way to quickly wait for them below. 885 */ 886 if (bp->b_vp == vp || sync == 0) 887 (void) bawrite(bp); 888 else 889 (void) bwrite(bp); 890 goto loop; 891 } 892 if (sync == 0) { 893 splx(s); 894 return; 895 } 896 simple_lock(&global_v_numoutput_slock); 897 while (vp->v_numoutput) { 898 vp->v_flag |= VBWAIT; 899 ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0, 900 &global_v_numoutput_slock); 901 } 902 simple_unlock(&global_v_numoutput_slock); 903 splx(s); 904 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 905 vprint("vflushbuf: dirty", vp); 906 goto loop; 907 } 908 } 909 910 /* 911 * Associate a buffer with a vnode. 912 */ 913 void 914 bgetvp(struct vnode *vp, struct buf *bp) 915 { 916 int s; 917 918 if (bp->b_vp) 919 panic("bgetvp: not free, bp %p", bp); 920 VHOLD(vp); 921 s = splbio(); 922 bp->b_vp = vp; 923 if (vp->v_type == VBLK || vp->v_type == VCHR) 924 bp->b_dev = vp->v_rdev; 925 else 926 bp->b_dev = NODEV; 927 /* 928 * Insert onto list for new vnode. 929 */ 930 bufinsvn(bp, &vp->v_cleanblkhd); 931 splx(s); 932 } 933 934 /* 935 * Disassociate a buffer from a vnode. 936 */ 937 void 938 brelvp(struct buf *bp) 939 { 940 struct vnode *vp; 941 int s; 942 943 if (bp->b_vp == NULL) 944 panic("brelvp: vp NULL, bp %p", bp); 945 946 s = splbio(); 947 vp = bp->b_vp; 948 /* 949 * Delete from old vnode list, if on one. 950 */ 951 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 952 bufremvn(bp); 953 954 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) && 955 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 956 vp->v_flag &= ~VWRITEMAPDIRTY; 957 vn_syncer_remove_from_worklist(vp); 958 } 959 960 bp->b_vp = NULL; 961 HOLDRELE(vp); 962 splx(s); 963 } 964 965 /* 966 * Reassign a buffer from one vnode to another. 967 * Used to assign file specific control information 968 * (indirect blocks) to the vnode to which they belong. 969 * 970 * This function must be called at splbio(). 971 */ 972 void 973 reassignbuf(struct buf *bp, struct vnode *newvp) 974 { 975 struct buflists *listheadp; 976 int delayx; 977 978 /* 979 * Delete from old vnode list, if on one. 980 */ 981 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 982 bufremvn(bp); 983 /* 984 * If dirty, put on list of dirty buffers; 985 * otherwise insert onto list of clean buffers. 986 */ 987 if ((bp->b_flags & B_DELWRI) == 0) { 988 listheadp = &newvp->v_cleanblkhd; 989 if (TAILQ_EMPTY(&newvp->v_uobj.memq) && 990 (newvp->v_flag & VONWORKLST) && 991 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 992 newvp->v_flag &= ~VWRITEMAPDIRTY; 993 vn_syncer_remove_from_worklist(newvp); 994 } 995 } else { 996 listheadp = &newvp->v_dirtyblkhd; 997 if ((newvp->v_flag & VONWORKLST) == 0) { 998 switch (newvp->v_type) { 999 case VDIR: 1000 delayx = dirdelay; 1001 break; 1002 case VBLK: 1003 if (newvp->v_specmountpoint != NULL) { 1004 delayx = metadelay; 1005 break; 1006 } 1007 /* fall through */ 1008 default: 1009 delayx = filedelay; 1010 break; 1011 } 1012 if (!newvp->v_mount || 1013 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 1014 vn_syncer_add_to_worklist(newvp, delayx); 1015 } 1016 } 1017 bufinsvn(bp, listheadp); 1018 } 1019 1020 /* 1021 * Create a vnode for a block device. 1022 * Used for root filesystem and swap areas. 1023 * Also used for memory file system special devices. 1024 */ 1025 int 1026 bdevvp(dev_t dev, struct vnode **vpp) 1027 { 1028 1029 return (getdevvp(dev, vpp, VBLK)); 1030 } 1031 1032 /* 1033 * Create a vnode for a character device. 1034 * Used for kernfs and some console handling. 1035 */ 1036 int 1037 cdevvp(dev_t dev, struct vnode **vpp) 1038 { 1039 1040 return (getdevvp(dev, vpp, VCHR)); 1041 } 1042 1043 /* 1044 * Create a vnode for a device. 1045 * Used by bdevvp (block device) for root file system etc., 1046 * and by cdevvp (character device) for console and kernfs. 1047 */ 1048 static int 1049 getdevvp(dev_t dev, struct vnode **vpp, enum vtype type) 1050 { 1051 struct vnode *vp; 1052 struct vnode *nvp; 1053 int error; 1054 1055 if (dev == NODEV) { 1056 *vpp = NULLVP; 1057 return (0); 1058 } 1059 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1060 if (error) { 1061 *vpp = NULLVP; 1062 return (error); 1063 } 1064 vp = nvp; 1065 vp->v_type = type; 1066 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1067 vput(vp); 1068 vp = nvp; 1069 } 1070 *vpp = vp; 1071 return (0); 1072 } 1073 1074 /* 1075 * Check to see if the new vnode represents a special device 1076 * for which we already have a vnode (either because of 1077 * bdevvp() or because of a different vnode representing 1078 * the same block device). If such an alias exists, deallocate 1079 * the existing contents and return the aliased vnode. The 1080 * caller is responsible for filling it with its new contents. 1081 */ 1082 struct vnode * 1083 checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp) 1084 { 1085 struct lwp *l = curlwp; /* XXX */ 1086 struct vnode *vp; 1087 struct vnode **vpp; 1088 1089 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1090 return (NULLVP); 1091 1092 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1093 loop: 1094 simple_lock(&spechash_slock); 1095 for (vp = *vpp; vp; vp = vp->v_specnext) { 1096 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1097 continue; 1098 /* 1099 * Alias, but not in use, so flush it out. 1100 */ 1101 simple_lock(&vp->v_interlock); 1102 simple_unlock(&spechash_slock); 1103 if (vp->v_usecount == 0) { 1104 vgonel(vp, l); 1105 goto loop; 1106 } 1107 /* 1108 * What we're interested to know here is if someone else has 1109 * removed this vnode from the device hash list while we were 1110 * waiting. This can only happen if vclean() did it, and 1111 * this requires the vnode to be locked. 1112 */ 1113 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) 1114 goto loop; 1115 if (vp->v_specinfo == NULL) { 1116 vput(vp); 1117 goto loop; 1118 } 1119 simple_lock(&spechash_slock); 1120 break; 1121 } 1122 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1123 MALLOC(nvp->v_specinfo, struct specinfo *, 1124 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1125 /* XXX Erg. */ 1126 if (nvp->v_specinfo == NULL) { 1127 simple_unlock(&spechash_slock); 1128 uvm_wait("checkalias"); 1129 goto loop; 1130 } 1131 1132 nvp->v_rdev = nvp_rdev; 1133 nvp->v_hashchain = vpp; 1134 nvp->v_specnext = *vpp; 1135 nvp->v_specmountpoint = NULL; 1136 simple_unlock(&spechash_slock); 1137 nvp->v_speclockf = NULL; 1138 simple_lock_init(&nvp->v_spec_cow_slock); 1139 SLIST_INIT(&nvp->v_spec_cow_head); 1140 nvp->v_spec_cow_req = 0; 1141 nvp->v_spec_cow_count = 0; 1142 1143 *vpp = nvp; 1144 if (vp != NULLVP) { 1145 nvp->v_flag |= VALIASED; 1146 vp->v_flag |= VALIASED; 1147 vput(vp); 1148 } 1149 return (NULLVP); 1150 } 1151 simple_unlock(&spechash_slock); 1152 VOP_UNLOCK(vp, 0); 1153 simple_lock(&vp->v_interlock); 1154 vclean(vp, 0, l); 1155 vp->v_op = nvp->v_op; 1156 vp->v_tag = nvp->v_tag; 1157 vp->v_vnlock = &vp->v_lock; 1158 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1159 nvp->v_type = VNON; 1160 insmntque(vp, mp); 1161 return (vp); 1162 } 1163 1164 /* 1165 * Grab a particular vnode from the free list, increment its 1166 * reference count and lock it. If the vnode lock bit is set the 1167 * vnode is being eliminated in vgone. In that case, we can not 1168 * grab the vnode, so the process is awakened when the transition is 1169 * completed, and an error returned to indicate that the vnode is no 1170 * longer usable (possibly having been changed to a new file system type). 1171 */ 1172 int 1173 vget(struct vnode *vp, int flags) 1174 { 1175 int error; 1176 1177 /* 1178 * If the vnode is in the process of being cleaned out for 1179 * another use, we wait for the cleaning to finish and then 1180 * return failure. Cleaning is determined by checking that 1181 * the VXLOCK flag is set. 1182 */ 1183 1184 if ((flags & LK_INTERLOCK) == 0) 1185 simple_lock(&vp->v_interlock); 1186 if ((vp->v_flag & (VXLOCK | VFREEING)) != 0) { 1187 if (flags & LK_NOWAIT) { 1188 simple_unlock(&vp->v_interlock); 1189 return EBUSY; 1190 } 1191 vp->v_flag |= VXWANT; 1192 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock); 1193 return (ENOENT); 1194 } 1195 if (vp->v_usecount == 0) { 1196 simple_lock(&vnode_free_list_slock); 1197 if (vp->v_holdcnt > 0) 1198 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1199 else 1200 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1201 simple_unlock(&vnode_free_list_slock); 1202 } 1203 vp->v_usecount++; 1204 #ifdef DIAGNOSTIC 1205 if (vp->v_usecount == 0) { 1206 vprint("vget", vp); 1207 panic("vget: usecount overflow, vp %p", vp); 1208 } 1209 #endif 1210 if (flags & LK_TYPE_MASK) { 1211 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1212 vrele(vp); 1213 } 1214 return (error); 1215 } 1216 simple_unlock(&vp->v_interlock); 1217 return (0); 1218 } 1219 1220 /* 1221 * vput(), just unlock and vrele() 1222 */ 1223 void 1224 vput(struct vnode *vp) 1225 { 1226 struct lwp *l = curlwp; /* XXX */ 1227 1228 #ifdef DIAGNOSTIC 1229 if (vp == NULL) 1230 panic("vput: null vp"); 1231 #endif 1232 simple_lock(&vp->v_interlock); 1233 vp->v_usecount--; 1234 if (vp->v_usecount > 0) { 1235 simple_unlock(&vp->v_interlock); 1236 VOP_UNLOCK(vp, 0); 1237 return; 1238 } 1239 #ifdef DIAGNOSTIC 1240 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1241 vprint("vput: bad ref count", vp); 1242 panic("vput: ref cnt"); 1243 } 1244 #endif 1245 /* 1246 * Insert at tail of LRU list. 1247 */ 1248 simple_lock(&vnode_free_list_slock); 1249 if (vp->v_holdcnt > 0) 1250 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1251 else 1252 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1253 simple_unlock(&vnode_free_list_slock); 1254 if (vp->v_flag & VEXECMAP) { 1255 uvmexp.execpages -= vp->v_uobj.uo_npages; 1256 uvmexp.filepages += vp->v_uobj.uo_npages; 1257 } 1258 vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP|VMAPPED); 1259 simple_unlock(&vp->v_interlock); 1260 VOP_INACTIVE(vp, l); 1261 } 1262 1263 /* 1264 * Vnode release. 1265 * If count drops to zero, call inactive routine and return to freelist. 1266 */ 1267 void 1268 vrele(struct vnode *vp) 1269 { 1270 struct lwp *l = curlwp; /* XXX */ 1271 1272 #ifdef DIAGNOSTIC 1273 if (vp == NULL) 1274 panic("vrele: null vp"); 1275 #endif 1276 simple_lock(&vp->v_interlock); 1277 vp->v_usecount--; 1278 if (vp->v_usecount > 0) { 1279 simple_unlock(&vp->v_interlock); 1280 return; 1281 } 1282 #ifdef DIAGNOSTIC 1283 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1284 vprint("vrele: bad ref count", vp); 1285 panic("vrele: ref cnt vp %p", vp); 1286 } 1287 #endif 1288 /* 1289 * Insert at tail of LRU list. 1290 */ 1291 simple_lock(&vnode_free_list_slock); 1292 if (vp->v_holdcnt > 0) 1293 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1294 else 1295 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1296 simple_unlock(&vnode_free_list_slock); 1297 if (vp->v_flag & VEXECMAP) { 1298 uvmexp.execpages -= vp->v_uobj.uo_npages; 1299 uvmexp.filepages += vp->v_uobj.uo_npages; 1300 } 1301 vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP|VMAPPED); 1302 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1303 VOP_INACTIVE(vp, l); 1304 } 1305 1306 /* 1307 * Page or buffer structure gets a reference. 1308 * Called with v_interlock held. 1309 */ 1310 void 1311 vholdl(struct vnode *vp) 1312 { 1313 1314 /* 1315 * If it is on the freelist and the hold count is currently 1316 * zero, move it to the hold list. The test of the back 1317 * pointer and the use reference count of zero is because 1318 * it will be removed from a free list by getnewvnode, 1319 * but will not have its reference count incremented until 1320 * after calling vgone. If the reference count were 1321 * incremented first, vgone would (incorrectly) try to 1322 * close the previous instance of the underlying object. 1323 * So, the back pointer is explicitly set to `0xdeadb' in 1324 * getnewvnode after removing it from a freelist to ensure 1325 * that we do not try to move it here. 1326 */ 1327 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1328 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1329 simple_lock(&vnode_free_list_slock); 1330 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1331 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1332 simple_unlock(&vnode_free_list_slock); 1333 } 1334 vp->v_holdcnt++; 1335 } 1336 1337 /* 1338 * Page or buffer structure frees a reference. 1339 * Called with v_interlock held. 1340 */ 1341 void 1342 holdrelel(struct vnode *vp) 1343 { 1344 1345 if (vp->v_holdcnt <= 0) 1346 panic("holdrelel: holdcnt vp %p", vp); 1347 vp->v_holdcnt--; 1348 1349 /* 1350 * If it is on the holdlist and the hold count drops to 1351 * zero, move it to the free list. The test of the back 1352 * pointer and the use reference count of zero is because 1353 * it will be removed from a free list by getnewvnode, 1354 * but will not have its reference count incremented until 1355 * after calling vgone. If the reference count were 1356 * incremented first, vgone would (incorrectly) try to 1357 * close the previous instance of the underlying object. 1358 * So, the back pointer is explicitly set to `0xdeadb' in 1359 * getnewvnode after removing it from a freelist to ensure 1360 * that we do not try to move it here. 1361 */ 1362 1363 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1364 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1365 simple_lock(&vnode_free_list_slock); 1366 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1367 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1368 simple_unlock(&vnode_free_list_slock); 1369 } 1370 } 1371 1372 /* 1373 * Vnode reference. 1374 */ 1375 void 1376 vref(struct vnode *vp) 1377 { 1378 1379 simple_lock(&vp->v_interlock); 1380 if (vp->v_usecount <= 0) 1381 panic("vref used where vget required, vp %p", vp); 1382 vp->v_usecount++; 1383 #ifdef DIAGNOSTIC 1384 if (vp->v_usecount == 0) { 1385 vprint("vref", vp); 1386 panic("vref: usecount overflow, vp %p", vp); 1387 } 1388 #endif 1389 simple_unlock(&vp->v_interlock); 1390 } 1391 1392 /* 1393 * Remove any vnodes in the vnode table belonging to mount point mp. 1394 * 1395 * If FORCECLOSE is not specified, there should not be any active ones, 1396 * return error if any are found (nb: this is a user error, not a 1397 * system error). If FORCECLOSE is specified, detach any active vnodes 1398 * that are found. 1399 * 1400 * If WRITECLOSE is set, only flush out regular file vnodes open for 1401 * writing. 1402 * 1403 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped. 1404 */ 1405 #ifdef DEBUG 1406 int busyprt = 0; /* print out busy vnodes */ 1407 struct ctldebug debug1 = { "busyprt", &busyprt }; 1408 #endif 1409 1410 int 1411 vflush(struct mount *mp, struct vnode *skipvp, int flags) 1412 { 1413 struct lwp *l = curlwp; /* XXX */ 1414 struct vnode *vp, *nvp; 1415 int busy = 0; 1416 1417 simple_lock(&mntvnode_slock); 1418 loop: 1419 /* 1420 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() 1421 * and vclean() are called 1422 */ 1423 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 1424 if (vp->v_mount != mp) 1425 goto loop; 1426 nvp = TAILQ_NEXT(vp, v_mntvnodes); 1427 /* 1428 * Skip over a selected vnode. 1429 */ 1430 if (vp == skipvp) 1431 continue; 1432 simple_lock(&vp->v_interlock); 1433 /* 1434 * Skip over a vnodes marked VSYSTEM. 1435 */ 1436 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1437 simple_unlock(&vp->v_interlock); 1438 continue; 1439 } 1440 /* 1441 * If WRITECLOSE is set, only flush out regular file 1442 * vnodes open for writing. 1443 */ 1444 if ((flags & WRITECLOSE) && 1445 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1446 simple_unlock(&vp->v_interlock); 1447 continue; 1448 } 1449 /* 1450 * With v_usecount == 0, all we need to do is clear 1451 * out the vnode data structures and we are done. 1452 */ 1453 if (vp->v_usecount == 0) { 1454 simple_unlock(&mntvnode_slock); 1455 vgonel(vp, l); 1456 simple_lock(&mntvnode_slock); 1457 continue; 1458 } 1459 /* 1460 * If FORCECLOSE is set, forcibly close the vnode. 1461 * For block or character devices, revert to an 1462 * anonymous device. For all other files, just kill them. 1463 */ 1464 if (flags & FORCECLOSE) { 1465 simple_unlock(&mntvnode_slock); 1466 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1467 vgonel(vp, l); 1468 } else { 1469 vclean(vp, 0, l); 1470 vp->v_op = spec_vnodeop_p; 1471 insmntque(vp, (struct mount *)0); 1472 } 1473 simple_lock(&mntvnode_slock); 1474 continue; 1475 } 1476 #ifdef DEBUG 1477 if (busyprt) 1478 vprint("vflush: busy vnode", vp); 1479 #endif 1480 simple_unlock(&vp->v_interlock); 1481 busy++; 1482 } 1483 simple_unlock(&mntvnode_slock); 1484 if (busy) 1485 return (EBUSY); 1486 return (0); 1487 } 1488 1489 /* 1490 * Disassociate the underlying file system from a vnode. 1491 */ 1492 static void 1493 vclean(struct vnode *vp, int flags, struct lwp *l) 1494 { 1495 struct mount *mp; 1496 int active; 1497 1498 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1499 1500 /* 1501 * Check to see if the vnode is in use. 1502 * If so we have to reference it before we clean it out 1503 * so that its count cannot fall to zero and generate a 1504 * race against ourselves to recycle it. 1505 */ 1506 1507 if ((active = vp->v_usecount) != 0) { 1508 vp->v_usecount++; 1509 #ifdef DIAGNOSTIC 1510 if (vp->v_usecount == 0) { 1511 vprint("vclean", vp); 1512 panic("vclean: usecount overflow"); 1513 } 1514 #endif 1515 } 1516 1517 /* 1518 * Prevent the vnode from being recycled or 1519 * brought into use while we clean it out. 1520 */ 1521 if (vp->v_flag & VXLOCK) 1522 panic("vclean: deadlock, vp %p", vp); 1523 vp->v_flag |= VXLOCK; 1524 if (vp->v_flag & VEXECMAP) { 1525 uvmexp.execpages -= vp->v_uobj.uo_npages; 1526 uvmexp.filepages += vp->v_uobj.uo_npages; 1527 } 1528 vp->v_flag &= ~(VTEXT|VEXECMAP); 1529 1530 /* 1531 * Even if the count is zero, the VOP_INACTIVE routine may still 1532 * have the object locked while it cleans it out. The VOP_LOCK 1533 * ensures that the VOP_INACTIVE routine is done with its work. 1534 * For active vnodes, it ensures that no other activity can 1535 * occur while the underlying object is being cleaned out. 1536 */ 1537 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1538 1539 /* 1540 * Clean out any cached data associated with the vnode. 1541 * If special device, remove it from special device alias list. 1542 * if it is on one. 1543 */ 1544 if (flags & DOCLOSE) { 1545 int error; 1546 struct vnode *vq, *vx; 1547 1548 vn_start_write(vp, &mp, V_WAIT | V_LOWER); 1549 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1550 vn_finished_write(mp, V_LOWER); 1551 if (error) 1552 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1553 KASSERT(error == 0); 1554 KASSERT((vp->v_flag & VONWORKLST) == 0); 1555 1556 if (active) 1557 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1558 1559 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 1560 vp->v_specinfo != 0) { 1561 simple_lock(&spechash_slock); 1562 if (vp->v_hashchain != NULL) { 1563 if (*vp->v_hashchain == vp) { 1564 *vp->v_hashchain = vp->v_specnext; 1565 } else { 1566 for (vq = *vp->v_hashchain; vq; 1567 vq = vq->v_specnext) { 1568 if (vq->v_specnext != vp) 1569 continue; 1570 vq->v_specnext = vp->v_specnext; 1571 break; 1572 } 1573 if (vq == NULL) 1574 panic("missing bdev"); 1575 } 1576 if (vp->v_flag & VALIASED) { 1577 vx = NULL; 1578 for (vq = *vp->v_hashchain; vq; 1579 vq = vq->v_specnext) { 1580 if (vq->v_rdev != vp->v_rdev || 1581 vq->v_type != vp->v_type) 1582 continue; 1583 if (vx) 1584 break; 1585 vx = vq; 1586 } 1587 if (vx == NULL) 1588 panic("missing alias"); 1589 if (vq == NULL) 1590 vx->v_flag &= ~VALIASED; 1591 vp->v_flag &= ~VALIASED; 1592 } 1593 } 1594 simple_unlock(&spechash_slock); 1595 FREE(vp->v_specinfo, M_VNODE); 1596 vp->v_specinfo = NULL; 1597 } 1598 } 1599 LOCK_ASSERT(!simple_lock_held(&vp->v_interlock)); 1600 1601 /* 1602 * If purging an active vnode, it must be closed and 1603 * deactivated before being reclaimed. Note that the 1604 * VOP_INACTIVE will unlock the vnode. 1605 */ 1606 if (active) { 1607 VOP_INACTIVE(vp, l); 1608 } else { 1609 /* 1610 * Any other processes trying to obtain this lock must first 1611 * wait for VXLOCK to clear, then call the new lock operation. 1612 */ 1613 VOP_UNLOCK(vp, 0); 1614 } 1615 /* 1616 * Reclaim the vnode. 1617 */ 1618 if (VOP_RECLAIM(vp, l)) 1619 panic("vclean: cannot reclaim, vp %p", vp); 1620 if (active) { 1621 /* 1622 * Inline copy of vrele() since VOP_INACTIVE 1623 * has already been called. 1624 */ 1625 simple_lock(&vp->v_interlock); 1626 if (--vp->v_usecount <= 0) { 1627 #ifdef DIAGNOSTIC 1628 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1629 vprint("vclean: bad ref count", vp); 1630 panic("vclean: ref cnt"); 1631 } 1632 #endif 1633 /* 1634 * Insert at tail of LRU list. 1635 */ 1636 1637 simple_unlock(&vp->v_interlock); 1638 simple_lock(&vnode_free_list_slock); 1639 #ifdef DIAGNOSTIC 1640 if (vp->v_holdcnt > 0) 1641 panic("vclean: not clean, vp %p", vp); 1642 #endif 1643 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1644 simple_unlock(&vnode_free_list_slock); 1645 } else 1646 simple_unlock(&vp->v_interlock); 1647 } 1648 1649 KASSERT(vp->v_uobj.uo_npages == 0); 1650 if (vp->v_type == VREG && vp->v_ractx != NULL) { 1651 uvm_ra_freectx(vp->v_ractx); 1652 vp->v_ractx = NULL; 1653 } 1654 cache_purge(vp); 1655 1656 /* 1657 * Done with purge, notify sleepers of the grim news. 1658 */ 1659 vp->v_op = dead_vnodeop_p; 1660 vp->v_tag = VT_NON; 1661 simple_lock(&vp->v_interlock); 1662 VN_KNOTE(vp, NOTE_REVOKE); /* FreeBSD has this in vn_pollgone() */ 1663 vp->v_flag &= ~(VXLOCK|VLOCKSWORK); 1664 if (vp->v_flag & VXWANT) { 1665 vp->v_flag &= ~VXWANT; 1666 simple_unlock(&vp->v_interlock); 1667 wakeup((caddr_t)vp); 1668 } else 1669 simple_unlock(&vp->v_interlock); 1670 } 1671 1672 /* 1673 * Recycle an unused vnode to the front of the free list. 1674 * Release the passed interlock if the vnode will be recycled. 1675 */ 1676 int 1677 vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct lwp *l) 1678 { 1679 1680 simple_lock(&vp->v_interlock); 1681 if (vp->v_usecount == 0) { 1682 if (inter_lkp) 1683 simple_unlock(inter_lkp); 1684 vgonel(vp, l); 1685 return (1); 1686 } 1687 simple_unlock(&vp->v_interlock); 1688 return (0); 1689 } 1690 1691 /* 1692 * Eliminate all activity associated with a vnode 1693 * in preparation for reuse. 1694 */ 1695 void 1696 vgone(struct vnode *vp) 1697 { 1698 struct lwp *l = curlwp; /* XXX */ 1699 1700 simple_lock(&vp->v_interlock); 1701 vgonel(vp, l); 1702 } 1703 1704 /* 1705 * vgone, with the vp interlock held. 1706 */ 1707 void 1708 vgonel(struct vnode *vp, struct lwp *l) 1709 { 1710 1711 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1712 1713 /* 1714 * If a vgone (or vclean) is already in progress, 1715 * wait until it is done and return. 1716 */ 1717 1718 if (vp->v_flag & VXLOCK) { 1719 vp->v_flag |= VXWANT; 1720 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock); 1721 return; 1722 } 1723 1724 /* 1725 * Clean out the filesystem specific data. 1726 */ 1727 1728 vclean(vp, DOCLOSE, l); 1729 KASSERT((vp->v_flag & VONWORKLST) == 0); 1730 1731 /* 1732 * Delete from old mount point vnode list, if on one. 1733 */ 1734 1735 if (vp->v_mount != NULL) 1736 insmntque(vp, (struct mount *)0); 1737 1738 /* 1739 * The test of the back pointer and the reference count of 1740 * zero is because it will be removed from the free list by 1741 * getcleanvnode, but will not have its reference count 1742 * incremented until after calling vgone. If the reference 1743 * count were incremented first, vgone would (incorrectly) 1744 * try to close the previous instance of the underlying object. 1745 * So, the back pointer is explicitly set to `0xdeadb' in 1746 * getnewvnode after removing it from the freelist to ensure 1747 * that we do not try to move it here. 1748 */ 1749 1750 vp->v_type = VBAD; 1751 if (vp->v_usecount == 0) { 1752 boolean_t dofree; 1753 1754 simple_lock(&vnode_free_list_slock); 1755 if (vp->v_holdcnt > 0) 1756 panic("vgonel: not clean, vp %p", vp); 1757 /* 1758 * if it isn't on the freelist, we're called by getcleanvnode 1759 * and vnode is being re-used. otherwise, we'll free it. 1760 */ 1761 dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb; 1762 if (dofree) { 1763 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1764 numvnodes--; 1765 } 1766 simple_unlock(&vnode_free_list_slock); 1767 if (dofree) 1768 pool_put(&vnode_pool, vp); 1769 } 1770 } 1771 1772 /* 1773 * Lookup a vnode by device number. 1774 */ 1775 int 1776 vfinddev(dev_t dev, enum vtype type, struct vnode **vpp) 1777 { 1778 struct vnode *vp; 1779 int rc = 0; 1780 1781 simple_lock(&spechash_slock); 1782 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1783 if (dev != vp->v_rdev || type != vp->v_type) 1784 continue; 1785 *vpp = vp; 1786 rc = 1; 1787 break; 1788 } 1789 simple_unlock(&spechash_slock); 1790 return (rc); 1791 } 1792 1793 /* 1794 * Revoke all the vnodes corresponding to the specified minor number 1795 * range (endpoints inclusive) of the specified major. 1796 */ 1797 void 1798 vdevgone(int maj, int minl, int minh, enum vtype type) 1799 { 1800 struct vnode *vp; 1801 int mn; 1802 1803 vp = NULL; /* XXX gcc */ 1804 1805 for (mn = minl; mn <= minh; mn++) 1806 if (vfinddev(makedev(maj, mn), type, &vp)) 1807 VOP_REVOKE(vp, REVOKEALL); 1808 } 1809 1810 /* 1811 * Calculate the total number of references to a special device. 1812 */ 1813 int 1814 vcount(struct vnode *vp) 1815 { 1816 struct vnode *vq, *vnext; 1817 int count; 1818 1819 loop: 1820 if ((vp->v_flag & VALIASED) == 0) 1821 return (vp->v_usecount); 1822 simple_lock(&spechash_slock); 1823 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1824 vnext = vq->v_specnext; 1825 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1826 continue; 1827 /* 1828 * Alias, but not in use, so flush it out. 1829 */ 1830 if (vq->v_usecount == 0 && vq != vp && 1831 (vq->v_flag & VXLOCK) == 0) { 1832 simple_unlock(&spechash_slock); 1833 vgone(vq); 1834 goto loop; 1835 } 1836 count += vq->v_usecount; 1837 } 1838 simple_unlock(&spechash_slock); 1839 return (count); 1840 } 1841 1842 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 1843 #define ARRAY_PRINT(idx, arr) \ 1844 ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN") 1845 1846 const char * const vnode_tags[] = { VNODE_TAGS }; 1847 const char * const vnode_types[] = { VNODE_TYPES }; 1848 const char vnode_flagbits[] = VNODE_FLAGBITS; 1849 1850 /* 1851 * Print out a description of a vnode. 1852 */ 1853 void 1854 vprint(const char *label, struct vnode *vp) 1855 { 1856 char bf[96]; 1857 1858 if (label != NULL) 1859 printf("%s: ", label); 1860 printf("tag %s(%d) type %s(%d), usecount %d, writecount %ld, " 1861 "refcount %ld,", ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 1862 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 1863 vp->v_usecount, vp->v_writecount, vp->v_holdcnt); 1864 bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf)); 1865 if (bf[0] != '\0') 1866 printf(" flags (%s)", &bf[1]); 1867 if (vp->v_data == NULL) { 1868 printf("\n"); 1869 } else { 1870 printf("\n\t"); 1871 VOP_PRINT(vp); 1872 } 1873 } 1874 1875 #ifdef DEBUG 1876 /* 1877 * List all of the locked vnodes in the system. 1878 * Called when debugging the kernel. 1879 */ 1880 void 1881 printlockedvnodes(void) 1882 { 1883 struct mount *mp, *nmp; 1884 struct vnode *vp; 1885 1886 printf("Locked vnodes\n"); 1887 simple_lock(&mountlist_slock); 1888 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1889 mp = nmp) { 1890 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1891 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1892 continue; 1893 } 1894 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1895 if (VOP_ISLOCKED(vp)) 1896 vprint(NULL, vp); 1897 } 1898 simple_lock(&mountlist_slock); 1899 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1900 vfs_unbusy(mp); 1901 } 1902 simple_unlock(&mountlist_slock); 1903 } 1904 #endif 1905 1906 /* 1907 * sysctl helper routine to return list of supported fstypes 1908 */ 1909 static int 1910 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS) 1911 { 1912 char bf[MFSNAMELEN]; 1913 char *where = oldp; 1914 struct vfsops *v; 1915 size_t needed, left, slen; 1916 int error, first; 1917 1918 if (newp != NULL) 1919 return (EPERM); 1920 if (namelen != 0) 1921 return (EINVAL); 1922 1923 first = 1; 1924 error = 0; 1925 needed = 0; 1926 left = *oldlenp; 1927 1928 LIST_FOREACH(v, &vfs_list, vfs_list) { 1929 if (where == NULL) 1930 needed += strlen(v->vfs_name) + 1; 1931 else { 1932 memset(bf, 0, sizeof(bf)); 1933 if (first) { 1934 strncpy(bf, v->vfs_name, sizeof(bf)); 1935 first = 0; 1936 } else { 1937 bf[0] = ' '; 1938 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1); 1939 } 1940 bf[sizeof(bf)-1] = '\0'; 1941 slen = strlen(bf); 1942 if (left < slen + 1) 1943 break; 1944 /* +1 to copy out the trailing NUL byte */ 1945 error = copyout(bf, where, slen + 1); 1946 if (error) 1947 break; 1948 where += slen; 1949 needed += slen; 1950 left -= slen; 1951 } 1952 } 1953 *oldlenp = needed; 1954 return (error); 1955 } 1956 1957 /* 1958 * Top level filesystem related information gathering. 1959 */ 1960 SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup") 1961 { 1962 sysctl_createv(clog, 0, NULL, NULL, 1963 CTLFLAG_PERMANENT, 1964 CTLTYPE_NODE, "vfs", NULL, 1965 NULL, 0, NULL, 0, 1966 CTL_VFS, CTL_EOL); 1967 sysctl_createv(clog, 0, NULL, NULL, 1968 CTLFLAG_PERMANENT, 1969 CTLTYPE_NODE, "generic", 1970 SYSCTL_DESCR("Non-specific vfs related information"), 1971 NULL, 0, NULL, 0, 1972 CTL_VFS, VFS_GENERIC, CTL_EOL); 1973 sysctl_createv(clog, 0, NULL, NULL, 1974 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1975 CTLTYPE_INT, "usermount", 1976 SYSCTL_DESCR("Whether unprivileged users may mount " 1977 "filesystems"), 1978 NULL, 0, &dovfsusermount, 0, 1979 CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL); 1980 sysctl_createv(clog, 0, NULL, NULL, 1981 CTLFLAG_PERMANENT, 1982 CTLTYPE_STRING, "fstypes", 1983 SYSCTL_DESCR("List of file systems present"), 1984 sysctl_vfs_generic_fstypes, 0, NULL, 0, 1985 CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL); 1986 sysctl_createv(clog, 0, NULL, NULL, 1987 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1988 CTLTYPE_INT, "magiclinks", 1989 SYSCTL_DESCR("Whether \"magic\" symlinks are expanded"), 1990 NULL, 0, &vfs_magiclinks, 0, 1991 CTL_VFS, VFS_GENERIC, VFS_MAGICLINKS, CTL_EOL); 1992 } 1993 1994 1995 int kinfo_vdebug = 1; 1996 int kinfo_vgetfailed; 1997 #define KINFO_VNODESLOP 10 1998 /* 1999 * Dump vnode list (via sysctl). 2000 * Copyout address of vnode followed by vnode. 2001 */ 2002 /* ARGSUSED */ 2003 int 2004 sysctl_kern_vnode(SYSCTLFN_ARGS) 2005 { 2006 char *where = oldp; 2007 size_t *sizep = oldlenp; 2008 struct mount *mp, *nmp; 2009 struct vnode *vp; 2010 char *bp = where, *savebp; 2011 char *ewhere; 2012 int error; 2013 2014 if (namelen != 0) 2015 return (EOPNOTSUPP); 2016 if (newp != NULL) 2017 return (EPERM); 2018 2019 #define VPTRSZ sizeof(struct vnode *) 2020 #define VNODESZ sizeof(struct vnode) 2021 if (where == NULL) { 2022 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 2023 return (0); 2024 } 2025 ewhere = where + *sizep; 2026 2027 simple_lock(&mountlist_slock); 2028 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2029 mp = nmp) { 2030 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 2031 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2032 continue; 2033 } 2034 savebp = bp; 2035 again: 2036 simple_lock(&mntvnode_slock); 2037 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2038 /* 2039 * Check that the vp is still associated with 2040 * this filesystem. RACE: could have been 2041 * recycled onto the same filesystem. 2042 */ 2043 if (vp->v_mount != mp) { 2044 simple_unlock(&mntvnode_slock); 2045 if (kinfo_vdebug) 2046 printf("kinfo: vp changed\n"); 2047 bp = savebp; 2048 goto again; 2049 } 2050 if (bp + VPTRSZ + VNODESZ > ewhere) { 2051 simple_unlock(&mntvnode_slock); 2052 *sizep = bp - where; 2053 return (ENOMEM); 2054 } 2055 simple_unlock(&mntvnode_slock); 2056 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2057 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2058 return (error); 2059 bp += VPTRSZ + VNODESZ; 2060 simple_lock(&mntvnode_slock); 2061 } 2062 simple_unlock(&mntvnode_slock); 2063 simple_lock(&mountlist_slock); 2064 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2065 vfs_unbusy(mp); 2066 } 2067 simple_unlock(&mountlist_slock); 2068 2069 *sizep = bp - where; 2070 return (0); 2071 } 2072 2073 /* 2074 * Check to see if a filesystem is mounted on a block device. 2075 */ 2076 int 2077 vfs_mountedon(struct vnode *vp) 2078 { 2079 struct vnode *vq; 2080 int error = 0; 2081 2082 if (vp->v_type != VBLK) 2083 return ENOTBLK; 2084 if (vp->v_specmountpoint != NULL) 2085 return (EBUSY); 2086 if (vp->v_flag & VALIASED) { 2087 simple_lock(&spechash_slock); 2088 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2089 if (vq->v_rdev != vp->v_rdev || 2090 vq->v_type != vp->v_type) 2091 continue; 2092 if (vq->v_specmountpoint != NULL) { 2093 error = EBUSY; 2094 break; 2095 } 2096 } 2097 simple_unlock(&spechash_slock); 2098 } 2099 return (error); 2100 } 2101 2102 /* 2103 * Do the usual access checking. 2104 * file_mode, uid and gid are from the vnode in question, 2105 * while acc_mode and cred are from the VOP_ACCESS parameter list 2106 */ 2107 int 2108 vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid, 2109 mode_t acc_mode, kauth_cred_t cred) 2110 { 2111 mode_t mask; 2112 int error, ismember; 2113 2114 /* 2115 * Super-user always gets read/write access, but execute access depends 2116 * on at least one execute bit being set. 2117 */ 2118 if (kauth_cred_geteuid(cred) == 0) { 2119 if ((acc_mode & VEXEC) && type != VDIR && 2120 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2121 return (EACCES); 2122 return (0); 2123 } 2124 2125 mask = 0; 2126 2127 /* Otherwise, check the owner. */ 2128 if (kauth_cred_geteuid(cred) == uid) { 2129 if (acc_mode & VEXEC) 2130 mask |= S_IXUSR; 2131 if (acc_mode & VREAD) 2132 mask |= S_IRUSR; 2133 if (acc_mode & VWRITE) 2134 mask |= S_IWUSR; 2135 return ((file_mode & mask) == mask ? 0 : EACCES); 2136 } 2137 2138 /* Otherwise, check the groups. */ 2139 error = kauth_cred_ismember_gid(cred, gid, &ismember); 2140 if (error) 2141 return (error); 2142 if (kauth_cred_getegid(cred) == gid || ismember) { 2143 if (acc_mode & VEXEC) 2144 mask |= S_IXGRP; 2145 if (acc_mode & VREAD) 2146 mask |= S_IRGRP; 2147 if (acc_mode & VWRITE) 2148 mask |= S_IWGRP; 2149 return ((file_mode & mask) == mask ? 0 : EACCES); 2150 } 2151 2152 /* Otherwise, check everyone else. */ 2153 if (acc_mode & VEXEC) 2154 mask |= S_IXOTH; 2155 if (acc_mode & VREAD) 2156 mask |= S_IROTH; 2157 if (acc_mode & VWRITE) 2158 mask |= S_IWOTH; 2159 return ((file_mode & mask) == mask ? 0 : EACCES); 2160 } 2161 2162 /* 2163 * Unmount all file systems. 2164 * We traverse the list in reverse order under the assumption that doing so 2165 * will avoid needing to worry about dependencies. 2166 */ 2167 void 2168 vfs_unmountall(struct lwp *l) 2169 { 2170 struct mount *mp, *nmp; 2171 int allerror, error; 2172 2173 printf("unmounting file systems..."); 2174 for (allerror = 0, 2175 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2176 nmp = mp->mnt_list.cqe_prev; 2177 #ifdef DEBUG 2178 printf("\nunmounting %s (%s)...", 2179 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2180 #endif 2181 /* 2182 * XXX Freeze syncer. Must do this before locking the 2183 * mount point. See dounmount() for details. 2184 */ 2185 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2186 if (vfs_busy(mp, 0, 0)) { 2187 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2188 continue; 2189 } 2190 if ((error = dounmount(mp, MNT_FORCE, l)) != 0) { 2191 printf("unmount of %s failed with error %d\n", 2192 mp->mnt_stat.f_mntonname, error); 2193 allerror = 1; 2194 } 2195 } 2196 printf(" done\n"); 2197 if (allerror) 2198 printf("WARNING: some file systems would not unmount\n"); 2199 } 2200 2201 extern struct simplelock bqueue_slock; /* XXX */ 2202 2203 /* 2204 * Sync and unmount file systems before shutting down. 2205 */ 2206 void 2207 vfs_shutdown(void) 2208 { 2209 struct lwp *l; 2210 2211 /* XXX we're certainly not running in lwp0's context! */ 2212 l = curlwp; 2213 if (l == NULL) 2214 l = &lwp0; 2215 2216 printf("syncing disks... "); 2217 2218 /* remove user process from run queue */ 2219 suspendsched(); 2220 (void) spl0(); 2221 2222 /* avoid coming back this way again if we panic. */ 2223 doing_shutdown = 1; 2224 2225 sys_sync(l, NULL, NULL); 2226 2227 /* Wait for sync to finish. */ 2228 if (buf_syncwait() != 0) { 2229 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2230 Debugger(); 2231 #endif 2232 printf("giving up\n"); 2233 return; 2234 } else 2235 printf("done\n"); 2236 2237 /* 2238 * If we've panic'd, don't make the situation potentially 2239 * worse by unmounting the file systems. 2240 */ 2241 if (panicstr != NULL) 2242 return; 2243 2244 /* Release inodes held by texts before update. */ 2245 #ifdef notdef 2246 vnshutdown(); 2247 #endif 2248 /* Unmount file systems. */ 2249 vfs_unmountall(l); 2250 } 2251 2252 /* 2253 * Mount the root file system. If the operator didn't specify a 2254 * file system to use, try all possible file systems until one 2255 * succeeds. 2256 */ 2257 int 2258 vfs_mountroot(void) 2259 { 2260 struct vfsops *v; 2261 int error = ENODEV; 2262 2263 if (root_device == NULL) 2264 panic("vfs_mountroot: root device unknown"); 2265 2266 switch (device_class(root_device)) { 2267 case DV_IFNET: 2268 if (rootdev != NODEV) 2269 panic("vfs_mountroot: rootdev set for DV_IFNET " 2270 "(0x%08x -> %d,%d)", rootdev, 2271 major(rootdev), minor(rootdev)); 2272 break; 2273 2274 case DV_DISK: 2275 if (rootdev == NODEV) 2276 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2277 if (bdevvp(rootdev, &rootvp)) 2278 panic("vfs_mountroot: can't get vnode for rootdev"); 2279 error = VOP_OPEN(rootvp, FREAD, FSCRED, curlwp); 2280 if (error) { 2281 printf("vfs_mountroot: can't open root device\n"); 2282 return (error); 2283 } 2284 break; 2285 2286 default: 2287 printf("%s: inappropriate for root file system\n", 2288 root_device->dv_xname); 2289 return (ENODEV); 2290 } 2291 2292 /* 2293 * If user specified a file system, use it. 2294 */ 2295 if (mountroot != NULL) { 2296 error = (*mountroot)(); 2297 goto done; 2298 } 2299 2300 /* 2301 * Try each file system currently configured into the kernel. 2302 */ 2303 LIST_FOREACH(v, &vfs_list, vfs_list) { 2304 if (v->vfs_mountroot == NULL) 2305 continue; 2306 #ifdef DEBUG 2307 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 2308 #endif 2309 error = (*v->vfs_mountroot)(); 2310 if (!error) { 2311 aprint_normal("root file system type: %s\n", 2312 v->vfs_name); 2313 break; 2314 } 2315 } 2316 2317 if (v == NULL) { 2318 printf("no file system for %s", root_device->dv_xname); 2319 if (device_class(root_device) == DV_DISK) 2320 printf(" (dev 0x%x)", rootdev); 2321 printf("\n"); 2322 error = EFTYPE; 2323 } 2324 2325 done: 2326 if (error && device_class(root_device) == DV_DISK) { 2327 VOP_CLOSE(rootvp, FREAD, FSCRED, curlwp); 2328 vrele(rootvp); 2329 } 2330 return (error); 2331 } 2332 2333 /* 2334 * Given a file system name, look up the vfsops for that 2335 * file system, or return NULL if file system isn't present 2336 * in the kernel. 2337 */ 2338 struct vfsops * 2339 vfs_getopsbyname(const char *name) 2340 { 2341 struct vfsops *v; 2342 2343 LIST_FOREACH(v, &vfs_list, vfs_list) { 2344 if (strcmp(v->vfs_name, name) == 0) 2345 break; 2346 } 2347 2348 return (v); 2349 } 2350 2351 /* 2352 * Establish a file system and initialize it. 2353 */ 2354 int 2355 vfs_attach(struct vfsops *vfs) 2356 { 2357 struct vfsops *v; 2358 int error = 0; 2359 2360 2361 /* 2362 * Make sure this file system doesn't already exist. 2363 */ 2364 LIST_FOREACH(v, &vfs_list, vfs_list) { 2365 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2366 error = EEXIST; 2367 goto out; 2368 } 2369 } 2370 2371 /* 2372 * Initialize the vnode operations for this file system. 2373 */ 2374 vfs_opv_init(vfs->vfs_opv_descs); 2375 2376 /* 2377 * Now initialize the file system itself. 2378 */ 2379 (*vfs->vfs_init)(); 2380 2381 /* 2382 * ...and link it into the kernel's list. 2383 */ 2384 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2385 2386 /* 2387 * Sanity: make sure the reference count is 0. 2388 */ 2389 vfs->vfs_refcount = 0; 2390 2391 out: 2392 return (error); 2393 } 2394 2395 /* 2396 * Remove a file system from the kernel. 2397 */ 2398 int 2399 vfs_detach(struct vfsops *vfs) 2400 { 2401 struct vfsops *v; 2402 2403 /* 2404 * Make sure no one is using the filesystem. 2405 */ 2406 if (vfs->vfs_refcount != 0) 2407 return (EBUSY); 2408 2409 /* 2410 * ...and remove it from the kernel's list. 2411 */ 2412 LIST_FOREACH(v, &vfs_list, vfs_list) { 2413 if (v == vfs) { 2414 LIST_REMOVE(v, vfs_list); 2415 break; 2416 } 2417 } 2418 2419 if (v == NULL) 2420 return (ESRCH); 2421 2422 /* 2423 * Now run the file system-specific cleanups. 2424 */ 2425 (*vfs->vfs_done)(); 2426 2427 /* 2428 * Free the vnode operations vector. 2429 */ 2430 vfs_opv_free(vfs->vfs_opv_descs); 2431 return (0); 2432 } 2433 2434 void 2435 vfs_reinit(void) 2436 { 2437 struct vfsops *vfs; 2438 2439 LIST_FOREACH(vfs, &vfs_list, vfs_list) { 2440 if (vfs->vfs_reinit) { 2441 (*vfs->vfs_reinit)(); 2442 } 2443 } 2444 } 2445 2446 /* 2447 * Request a filesystem to suspend write operations. 2448 */ 2449 int 2450 vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo) 2451 { 2452 struct lwp *l = curlwp; /* XXX */ 2453 int error; 2454 2455 while ((mp->mnt_iflag & IMNT_SUSPEND)) { 2456 if (slptimeo < 0) 2457 return EWOULDBLOCK; 2458 error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo); 2459 if (error) 2460 return error; 2461 } 2462 mp->mnt_iflag |= IMNT_SUSPEND; 2463 2464 simple_lock(&mp->mnt_slock); 2465 if (mp->mnt_writeopcountupper > 0) 2466 ltsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt", 2467 0, &mp->mnt_slock); 2468 simple_unlock(&mp->mnt_slock); 2469 2470 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l); 2471 if (error) { 2472 vfs_write_resume(mp); 2473 return error; 2474 } 2475 mp->mnt_iflag |= IMNT_SUSPENDLOW; 2476 2477 simple_lock(&mp->mnt_slock); 2478 if (mp->mnt_writeopcountlower > 0) 2479 ltsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt", 2480 0, &mp->mnt_slock); 2481 mp->mnt_iflag |= IMNT_SUSPENDED; 2482 simple_unlock(&mp->mnt_slock); 2483 2484 return 0; 2485 } 2486 2487 /* 2488 * Request a filesystem to resume write operations. 2489 */ 2490 void 2491 vfs_write_resume(struct mount *mp) 2492 { 2493 2494 if ((mp->mnt_iflag & IMNT_SUSPEND) == 0) 2495 return; 2496 mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED); 2497 wakeup(&mp->mnt_flag); 2498 } 2499 2500 void 2501 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp) 2502 { 2503 const struct statvfs *mbp; 2504 2505 if (sbp == (mbp = &mp->mnt_stat)) 2506 return; 2507 2508 (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx)); 2509 sbp->f_fsid = mbp->f_fsid; 2510 sbp->f_owner = mbp->f_owner; 2511 sbp->f_flag = mbp->f_flag; 2512 sbp->f_syncwrites = mbp->f_syncwrites; 2513 sbp->f_asyncwrites = mbp->f_asyncwrites; 2514 sbp->f_syncreads = mbp->f_syncreads; 2515 sbp->f_asyncreads = mbp->f_asyncreads; 2516 (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare)); 2517 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename, 2518 sizeof(sbp->f_fstypename)); 2519 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname, 2520 sizeof(sbp->f_mntonname)); 2521 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, 2522 sizeof(sbp->f_mntfromname)); 2523 sbp->f_namemax = mbp->f_namemax; 2524 } 2525 2526 int 2527 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom, 2528 struct mount *mp, struct lwp *l) 2529 { 2530 int error; 2531 size_t size; 2532 struct statvfs *sfs = &mp->mnt_stat; 2533 int (*fun)(const void *, void *, size_t, size_t *); 2534 2535 (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name, 2536 sizeof(mp->mnt_stat.f_fstypename)); 2537 2538 if (onp) { 2539 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 2540 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr; 2541 if (cwdi->cwdi_rdir != NULL) { 2542 size_t len; 2543 char *bp; 2544 char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 2545 2546 if (!path) /* XXX can't happen with M_WAITOK */ 2547 return ENOMEM; 2548 2549 bp = path + MAXPATHLEN; 2550 *--bp = '\0'; 2551 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, 2552 path, MAXPATHLEN / 2, 0, l); 2553 if (error) { 2554 free(path, M_TEMP); 2555 return error; 2556 } 2557 2558 len = strlen(bp); 2559 if (len > sizeof(sfs->f_mntonname) - 1) 2560 len = sizeof(sfs->f_mntonname) - 1; 2561 (void)strncpy(sfs->f_mntonname, bp, len); 2562 free(path, M_TEMP); 2563 2564 if (len < sizeof(sfs->f_mntonname) - 1) { 2565 error = (*fun)(onp, &sfs->f_mntonname[len], 2566 sizeof(sfs->f_mntonname) - len - 1, &size); 2567 if (error) 2568 return error; 2569 size += len; 2570 } else { 2571 size = len; 2572 } 2573 } else { 2574 error = (*fun)(onp, &sfs->f_mntonname, 2575 sizeof(sfs->f_mntonname) - 1, &size); 2576 if (error) 2577 return error; 2578 } 2579 (void)memset(sfs->f_mntonname + size, 0, 2580 sizeof(sfs->f_mntonname) - size); 2581 } 2582 2583 if (fromp) { 2584 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr; 2585 error = (*fun)(fromp, sfs->f_mntfromname, 2586 sizeof(sfs->f_mntfromname) - 1, &size); 2587 if (error) 2588 return error; 2589 (void)memset(sfs->f_mntfromname + size, 0, 2590 sizeof(sfs->f_mntfromname) - size); 2591 } 2592 return 0; 2593 } 2594 2595 void 2596 vfs_timestamp(struct timespec *ts) 2597 { 2598 2599 nanotime(ts); 2600 } 2601 2602 #ifdef DDB 2603 static const char buf_flagbits[] = BUF_FLAGBITS; 2604 2605 void 2606 vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...)) 2607 { 2608 char bf[1024]; 2609 2610 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%" 2611 PRIx64 " dev 0x%x\n", 2612 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev); 2613 2614 bitmask_snprintf(bp->b_flags, buf_flagbits, bf, sizeof(bf)); 2615 (*pr)(" error %d flags 0x%s\n", bp->b_error, bf); 2616 2617 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 2618 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2619 (*pr)(" data %p saveaddr %p dep %p\n", 2620 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2621 (*pr)(" iodone %p\n", bp->b_iodone); 2622 } 2623 2624 2625 void 2626 vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...)) 2627 { 2628 char bf[256]; 2629 2630 uvm_object_printit(&vp->v_uobj, full, pr); 2631 bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf)); 2632 (*pr)("\nVNODE flags %s\n", bf); 2633 (*pr)("mp %p numoutput %d size 0x%llx\n", 2634 vp->v_mount, vp->v_numoutput, vp->v_size); 2635 2636 (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n", 2637 vp->v_data, vp->v_usecount, vp->v_writecount, 2638 vp->v_holdcnt, vp->v_numoutput); 2639 2640 (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n", 2641 ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 2642 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 2643 vp->v_mount, vp->v_mountedhere); 2644 2645 if (full) { 2646 struct buf *bp; 2647 2648 (*pr)("clean bufs:\n"); 2649 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 2650 (*pr)(" bp %p\n", bp); 2651 vfs_buf_print(bp, full, pr); 2652 } 2653 2654 (*pr)("dirty bufs:\n"); 2655 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 2656 (*pr)(" bp %p\n", bp); 2657 vfs_buf_print(bp, full, pr); 2658 } 2659 } 2660 } 2661 2662 void 2663 vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...)) 2664 { 2665 char sbuf[256]; 2666 2667 (*pr)("vnodecovered = %p syncer = %p data = %p\n", 2668 mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data); 2669 2670 (*pr)("fs_bshift %d dev_bshift = %d\n", 2671 mp->mnt_fs_bshift,mp->mnt_dev_bshift); 2672 2673 bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf)); 2674 (*pr)("flag = %s\n", sbuf); 2675 2676 bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf)); 2677 (*pr)("iflag = %s\n", sbuf); 2678 2679 /* XXX use lockmgr_printinfo */ 2680 if (mp->mnt_lock.lk_sharecount) 2681 (*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg, 2682 mp->mnt_lock.lk_sharecount); 2683 else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) { 2684 (*pr)(" lock type %s: EXCL (count %d) by ", 2685 mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount); 2686 if (mp->mnt_lock.lk_flags & LK_SPIN) 2687 (*pr)("processor %lu", mp->mnt_lock.lk_cpu); 2688 else 2689 (*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder, 2690 mp->mnt_lock.lk_locklwp); 2691 } else 2692 (*pr)(" not locked"); 2693 if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0) 2694 (*pr)(" with %d pending", mp->mnt_lock.lk_waitcount); 2695 2696 (*pr)("\n"); 2697 2698 if (mp->mnt_unmounter) { 2699 (*pr)("unmounter pid = %d ",mp->mnt_unmounter->l_proc); 2700 } 2701 (*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n", 2702 mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower); 2703 2704 (*pr)("statvfs cache:\n"); 2705 (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize); 2706 (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize); 2707 (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize); 2708 2709 (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks); 2710 (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree); 2711 (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail); 2712 (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd); 2713 2714 (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files); 2715 (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree); 2716 (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail); 2717 (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd); 2718 2719 (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n", 2720 mp->mnt_stat.f_fsidx.__fsid_val[0], 2721 mp->mnt_stat.f_fsidx.__fsid_val[1]); 2722 2723 (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner); 2724 (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax); 2725 2726 bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf, 2727 sizeof(sbuf)); 2728 (*pr)("\tflag = %s\n",sbuf); 2729 (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites); 2730 (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites); 2731 (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads); 2732 (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads); 2733 (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename); 2734 (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname); 2735 (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname); 2736 2737 { 2738 int cnt = 0; 2739 struct vnode *vp; 2740 (*pr)("locked vnodes ="); 2741 /* XXX would take mountlist lock, except ddb may not have context */ 2742 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2743 if (VOP_ISLOCKED(vp)) { 2744 if ((++cnt % 6) == 0) { 2745 (*pr)(" %p,\n\t", vp); 2746 } else { 2747 (*pr)(" %p,", vp); 2748 } 2749 } 2750 } 2751 (*pr)("\n"); 2752 } 2753 2754 if (full) { 2755 int cnt = 0; 2756 struct vnode *vp; 2757 (*pr)("all vnodes ="); 2758 /* XXX would take mountlist lock, except ddb may not have context */ 2759 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2760 if (!TAILQ_NEXT(vp, v_mntvnodes)) { 2761 (*pr)(" %p", vp); 2762 } else if ((++cnt % 6) == 0) { 2763 (*pr)(" %p,\n\t", vp); 2764 } else { 2765 (*pr)(" %p,", vp); 2766 } 2767 } 2768 (*pr)("\n", vp); 2769 } 2770 } 2771 #endif /* DDB */ 2772