1 /* $NetBSD: vfs_subr.c,v 1.256 2005/12/11 12:24:30 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2004, 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * This code is derived from software contributed to The NetBSD Foundation 11 * by Charles M. Hannum. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the NetBSD 24 * Foundation, Inc. and its contributors. 25 * 4. Neither the name of The NetBSD Foundation nor the names of its 26 * contributors may be used to endorse or promote products derived 27 * from this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 30 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 31 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 32 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 33 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 37 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42 /* 43 * Copyright (c) 1989, 1993 44 * The Regents of the University of California. All rights reserved. 45 * (c) UNIX System Laboratories, Inc. 46 * All or some portions of this file are derived from material licensed 47 * to the University of California by American Telephone and Telegraph 48 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 49 * the permission of UNIX System Laboratories, Inc. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions 53 * are met: 54 * 1. Redistributions of source code must retain the above copyright 55 * notice, this list of conditions and the following disclaimer. 56 * 2. Redistributions in binary form must reproduce the above copyright 57 * notice, this list of conditions and the following disclaimer in the 58 * documentation and/or other materials provided with the distribution. 59 * 3. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 76 */ 77 78 /* 79 * External virtual filesystem routines 80 */ 81 82 #include <sys/cdefs.h> 83 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.256 2005/12/11 12:24:30 christos Exp $"); 84 85 #include "opt_inet.h" 86 #include "opt_ddb.h" 87 #include "opt_compat_netbsd.h" 88 #include "opt_compat_43.h" 89 90 #include <sys/param.h> 91 #include <sys/systm.h> 92 #include <sys/proc.h> 93 #include <sys/kernel.h> 94 #include <sys/mount.h> 95 #include <sys/fcntl.h> 96 #include <sys/vnode.h> 97 #include <sys/stat.h> 98 #include <sys/namei.h> 99 #include <sys/ucred.h> 100 #include <sys/buf.h> 101 #include <sys/errno.h> 102 #include <sys/malloc.h> 103 #include <sys/domain.h> 104 #include <sys/mbuf.h> 105 #include <sys/sa.h> 106 #include <sys/syscallargs.h> 107 #include <sys/device.h> 108 #include <sys/filedesc.h> 109 110 #include <miscfs/specfs/specdev.h> 111 #include <miscfs/genfs/genfs.h> 112 #include <miscfs/syncfs/syncfs.h> 113 114 #include <uvm/uvm.h> 115 #include <uvm/uvm_readahead.h> 116 #include <uvm/uvm_ddb.h> 117 118 #include <sys/sysctl.h> 119 120 const enum vtype iftovt_tab[16] = { 121 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 122 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 123 }; 124 const int vttoif_tab[9] = { 125 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 126 S_IFSOCK, S_IFIFO, S_IFMT, 127 }; 128 129 int doforce = 1; /* 1 => permit forcible unmounting */ 130 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 131 132 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 133 134 /* 135 * Insq/Remq for the vnode usage lists. 136 */ 137 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 138 #define bufremvn(bp) { \ 139 LIST_REMOVE(bp, b_vnbufs); \ 140 (bp)->b_vnbufs.le_next = NOLIST; \ 141 } 142 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 143 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 144 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 145 146 struct mntlist mountlist = /* mounted filesystem list */ 147 CIRCLEQ_HEAD_INITIALIZER(mountlist); 148 struct vfs_list_head vfs_list = /* vfs list */ 149 LIST_HEAD_INITIALIZER(vfs_list); 150 151 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 152 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 153 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 154 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 155 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 156 157 /* XXX - gross; single global lock to protect v_numoutput */ 158 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER; 159 160 /* 161 * These define the root filesystem and device. 162 */ 163 struct mount *rootfs; 164 struct vnode *rootvnode; 165 struct device *root_device; /* root device */ 166 167 POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 168 &pool_allocator_nointr); 169 170 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 171 172 /* 173 * Local declarations. 174 */ 175 void insmntque(struct vnode *, struct mount *); 176 int getdevvp(dev_t, struct vnode **, enum vtype); 177 178 void vclean(struct vnode *, int, struct lwp *); 179 180 static struct vnode *getcleanvnode(struct lwp *); 181 182 #ifdef DEBUG 183 void printlockedvnodes(void); 184 #endif 185 186 /* 187 * Initialize the vnode management data structures. 188 */ 189 void 190 vntblinit(void) 191 { 192 193 /* 194 * Initialize the filesystem syncer. 195 */ 196 vn_initialize_syncerd(); 197 } 198 199 int 200 vfs_drainvnodes(long target, struct lwp *l) 201 { 202 203 simple_lock(&vnode_free_list_slock); 204 while (numvnodes > target) { 205 struct vnode *vp; 206 207 vp = getcleanvnode(l); 208 if (vp == NULL) 209 return EBUSY; /* give up */ 210 pool_put(&vnode_pool, vp); 211 simple_lock(&vnode_free_list_slock); 212 numvnodes--; 213 } 214 simple_unlock(&vnode_free_list_slock); 215 216 return 0; 217 } 218 219 /* 220 * grab a vnode from freelist and clean it. 221 */ 222 struct vnode * 223 getcleanvnode(struct lwp *l) 224 { 225 struct vnode *vp; 226 struct mount *mp; 227 struct freelst *listhd; 228 229 LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock)); 230 231 listhd = &vnode_free_list; 232 try_nextlist: 233 TAILQ_FOREACH(vp, listhd, v_freelist) { 234 if (!simple_lock_try(&vp->v_interlock)) 235 continue; 236 /* 237 * as our lwp might hold the underlying vnode locked, 238 * don't try to reclaim the VLAYER vnode if it's locked. 239 */ 240 if ((vp->v_flag & VXLOCK) == 0 && 241 ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) { 242 if (vn_start_write(vp, &mp, V_NOWAIT) == 0) 243 break; 244 } 245 mp = NULL; 246 simple_unlock(&vp->v_interlock); 247 } 248 249 if (vp == NULLVP) { 250 if (listhd == &vnode_free_list) { 251 listhd = &vnode_hold_list; 252 goto try_nextlist; 253 } 254 simple_unlock(&vnode_free_list_slock); 255 return NULLVP; 256 } 257 258 if (vp->v_usecount) 259 panic("free vnode isn't, vp %p", vp); 260 TAILQ_REMOVE(listhd, vp, v_freelist); 261 /* see comment on why 0xdeadb is set at end of vgone (below) */ 262 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 263 simple_unlock(&vnode_free_list_slock); 264 vp->v_lease = NULL; 265 266 if (vp->v_type != VBAD) 267 vgonel(vp, l); 268 else 269 simple_unlock(&vp->v_interlock); 270 vn_finished_write(mp, 0); 271 #ifdef DIAGNOSTIC 272 if (vp->v_data || vp->v_uobj.uo_npages || 273 TAILQ_FIRST(&vp->v_uobj.memq)) 274 panic("cleaned vnode isn't, vp %p", vp); 275 if (vp->v_numoutput) 276 panic("clean vnode has pending I/O's, vp %p", vp); 277 #endif 278 KASSERT((vp->v_flag & VONWORKLST) == 0); 279 280 return vp; 281 } 282 283 /* 284 * Mark a mount point as busy. Used to synchronize access and to delay 285 * unmounting. Interlock is not released on failure. 286 */ 287 int 288 vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp) 289 { 290 int lkflags; 291 292 while (mp->mnt_iflag & IMNT_UNMOUNT) { 293 int gone, n; 294 295 if (flags & LK_NOWAIT) 296 return (ENOENT); 297 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 298 && mp->mnt_unmounter == curlwp) 299 return (EDEADLK); 300 if (interlkp) 301 simple_unlock(interlkp); 302 /* 303 * Since all busy locks are shared except the exclusive 304 * lock granted when unmounting, the only place that a 305 * wakeup needs to be done is at the release of the 306 * exclusive lock at the end of dounmount. 307 */ 308 simple_lock(&mp->mnt_slock); 309 mp->mnt_wcnt++; 310 ltsleep((caddr_t)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock); 311 n = --mp->mnt_wcnt; 312 simple_unlock(&mp->mnt_slock); 313 gone = mp->mnt_iflag & IMNT_GONE; 314 315 if (n == 0) 316 wakeup(&mp->mnt_wcnt); 317 if (interlkp) 318 simple_lock(interlkp); 319 if (gone) 320 return (ENOENT); 321 } 322 lkflags = LK_SHARED; 323 if (interlkp) 324 lkflags |= LK_INTERLOCK; 325 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 326 panic("vfs_busy: unexpected lock failure"); 327 return (0); 328 } 329 330 /* 331 * Free a busy filesystem. 332 */ 333 void 334 vfs_unbusy(struct mount *mp) 335 { 336 337 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 338 } 339 340 /* 341 * Lookup a filesystem type, and if found allocate and initialize 342 * a mount structure for it. 343 * 344 * Devname is usually updated by mount(8) after booting. 345 */ 346 int 347 vfs_rootmountalloc(const char *fstypename, const char *devname, 348 struct mount **mpp) 349 { 350 struct vfsops *vfsp = NULL; 351 struct mount *mp; 352 353 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 354 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 355 break; 356 357 if (vfsp == NULL) 358 return (ENODEV); 359 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 360 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 361 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 362 simple_lock_init(&mp->mnt_slock); 363 (void)vfs_busy(mp, LK_NOWAIT, 0); 364 LIST_INIT(&mp->mnt_vnodelist); 365 mp->mnt_op = vfsp; 366 mp->mnt_flag = MNT_RDONLY; 367 mp->mnt_vnodecovered = NULLVP; 368 mp->mnt_leaf = mp; 369 vfsp->vfs_refcount++; 370 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 371 mp->mnt_stat.f_mntonname[0] = '/'; 372 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 373 *mpp = mp; 374 return (0); 375 } 376 377 /* 378 * Lookup a mount point by filesystem identifier. 379 */ 380 struct mount * 381 vfs_getvfs(fsid_t *fsid) 382 { 383 struct mount *mp; 384 385 simple_lock(&mountlist_slock); 386 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { 387 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 388 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 389 simple_unlock(&mountlist_slock); 390 return (mp); 391 } 392 } 393 simple_unlock(&mountlist_slock); 394 return ((struct mount *)0); 395 } 396 397 /* 398 * Get a new unique fsid 399 */ 400 void 401 vfs_getnewfsid(struct mount *mp) 402 { 403 static u_short xxxfs_mntid; 404 fsid_t tfsid; 405 int mtype; 406 407 simple_lock(&mntid_slock); 408 mtype = makefstype(mp->mnt_op->vfs_name); 409 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); 410 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; 411 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 412 if (xxxfs_mntid == 0) 413 ++xxxfs_mntid; 414 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 415 tfsid.__fsid_val[1] = mtype; 416 if (!CIRCLEQ_EMPTY(&mountlist)) { 417 while (vfs_getvfs(&tfsid)) { 418 tfsid.__fsid_val[0]++; 419 xxxfs_mntid++; 420 } 421 } 422 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 423 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 424 simple_unlock(&mntid_slock); 425 } 426 427 /* 428 * Make a 'unique' number from a mount type name. 429 */ 430 long 431 makefstype(const char *type) 432 { 433 long rv; 434 435 for (rv = 0; *type; type++) { 436 rv <<= 2; 437 rv ^= *type; 438 } 439 return rv; 440 } 441 442 443 /* 444 * Set vnode attributes to VNOVAL 445 */ 446 void 447 vattr_null(struct vattr *vap) 448 { 449 450 vap->va_type = VNON; 451 452 /* 453 * Assign individually so that it is safe even if size and 454 * sign of each member are varied. 455 */ 456 vap->va_mode = VNOVAL; 457 vap->va_nlink = VNOVAL; 458 vap->va_uid = VNOVAL; 459 vap->va_gid = VNOVAL; 460 vap->va_fsid = VNOVAL; 461 vap->va_fileid = VNOVAL; 462 vap->va_size = VNOVAL; 463 vap->va_blocksize = VNOVAL; 464 vap->va_atime.tv_sec = 465 vap->va_mtime.tv_sec = 466 vap->va_ctime.tv_sec = 467 vap->va_birthtime.tv_sec = VNOVAL; 468 vap->va_atime.tv_nsec = 469 vap->va_mtime.tv_nsec = 470 vap->va_ctime.tv_nsec = 471 vap->va_birthtime.tv_nsec = VNOVAL; 472 vap->va_gen = VNOVAL; 473 vap->va_flags = VNOVAL; 474 vap->va_rdev = VNOVAL; 475 vap->va_bytes = VNOVAL; 476 vap->va_vaflags = 0; 477 } 478 479 /* 480 * Routines having to do with the management of the vnode table. 481 */ 482 extern int (**dead_vnodeop_p)(void *); 483 long numvnodes; 484 485 /* 486 * Return the next vnode from the free list. 487 */ 488 int 489 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), 490 struct vnode **vpp) 491 { 492 extern struct uvm_pagerops uvm_vnodeops; 493 struct uvm_object *uobj; 494 struct lwp *l = curlwp; /* XXX */ 495 static int toggle; 496 struct vnode *vp; 497 int error = 0, tryalloc; 498 499 try_again: 500 if (mp) { 501 /* 502 * Mark filesystem busy while we're creating a vnode. 503 * If unmount is in progress, this will wait; if the 504 * unmount succeeds (only if umount -f), this will 505 * return an error. If the unmount fails, we'll keep 506 * going afterwards. 507 * (This puts the per-mount vnode list logically under 508 * the protection of the vfs_busy lock). 509 */ 510 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 511 if (error && error != EDEADLK) 512 return error; 513 } 514 515 /* 516 * We must choose whether to allocate a new vnode or recycle an 517 * existing one. The criterion for allocating a new one is that 518 * the total number of vnodes is less than the number desired or 519 * there are no vnodes on either free list. Generally we only 520 * want to recycle vnodes that have no buffers associated with 521 * them, so we look first on the vnode_free_list. If it is empty, 522 * we next consider vnodes with referencing buffers on the 523 * vnode_hold_list. The toggle ensures that half the time we 524 * will use a buffer from the vnode_hold_list, and half the time 525 * we will allocate a new one unless the list has grown to twice 526 * the desired size. We are reticent to recycle vnodes from the 527 * vnode_hold_list because we will lose the identity of all its 528 * referencing buffers. 529 */ 530 531 vp = NULL; 532 533 simple_lock(&vnode_free_list_slock); 534 535 toggle ^= 1; 536 if (numvnodes > 2 * desiredvnodes) 537 toggle = 0; 538 539 tryalloc = numvnodes < desiredvnodes || 540 (TAILQ_FIRST(&vnode_free_list) == NULL && 541 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 542 543 if (tryalloc && 544 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { 545 numvnodes++; 546 simple_unlock(&vnode_free_list_slock); 547 memset(vp, 0, sizeof(*vp)); 548 UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 1); 549 /* 550 * done by memset() above. 551 * LIST_INIT(&vp->v_nclist); 552 * LIST_INIT(&vp->v_dnclist); 553 */ 554 } else { 555 vp = getcleanvnode(l); 556 /* 557 * Unless this is a bad time of the month, at most 558 * the first NCPUS items on the free list are 559 * locked, so this is close enough to being empty. 560 */ 561 if (vp == NULLVP) { 562 if (mp && error != EDEADLK) 563 vfs_unbusy(mp); 564 if (tryalloc) { 565 printf("WARNING: unable to allocate new " 566 "vnode, retrying...\n"); 567 (void) tsleep(&lbolt, PRIBIO, "newvn", hz); 568 goto try_again; 569 } 570 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 571 *vpp = 0; 572 return (ENFILE); 573 } 574 vp->v_usecount = 1; 575 vp->v_flag = 0; 576 vp->v_socket = NULL; 577 } 578 vp->v_type = VNON; 579 vp->v_vnlock = &vp->v_lock; 580 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 581 KASSERT(LIST_EMPTY(&vp->v_nclist)); 582 KASSERT(LIST_EMPTY(&vp->v_dnclist)); 583 vp->v_tag = tag; 584 vp->v_op = vops; 585 insmntque(vp, mp); 586 *vpp = vp; 587 vp->v_data = 0; 588 simple_lock_init(&vp->v_interlock); 589 590 /* 591 * initialize uvm_object within vnode. 592 */ 593 594 uobj = &vp->v_uobj; 595 KASSERT(uobj->pgops == &uvm_vnodeops); 596 KASSERT(uobj->uo_npages == 0); 597 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 598 vp->v_size = VSIZENOTSET; 599 600 if (mp && error != EDEADLK) 601 vfs_unbusy(mp); 602 return (0); 603 } 604 605 /* 606 * This is really just the reverse of getnewvnode(). Needed for 607 * VFS_VGET functions who may need to push back a vnode in case 608 * of a locking race. 609 */ 610 void 611 ungetnewvnode(struct vnode *vp) 612 { 613 #ifdef DIAGNOSTIC 614 if (vp->v_usecount != 1) 615 panic("ungetnewvnode: busy vnode"); 616 #endif 617 vp->v_usecount--; 618 insmntque(vp, NULL); 619 vp->v_type = VBAD; 620 621 simple_lock(&vp->v_interlock); 622 /* 623 * Insert at head of LRU list 624 */ 625 simple_lock(&vnode_free_list_slock); 626 if (vp->v_holdcnt > 0) 627 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 628 else 629 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 630 simple_unlock(&vnode_free_list_slock); 631 simple_unlock(&vp->v_interlock); 632 } 633 634 /* 635 * Move a vnode from one mount queue to another. 636 */ 637 void 638 insmntque(struct vnode *vp, struct mount *mp) 639 { 640 641 #ifdef DIAGNOSTIC 642 if ((mp != NULL) && 643 (mp->mnt_iflag & IMNT_UNMOUNT) && 644 !(mp->mnt_flag & MNT_SOFTDEP) && 645 vp->v_tag != VT_VFS) { 646 panic("insmntque into dying filesystem"); 647 } 648 #endif 649 650 simple_lock(&mntvnode_slock); 651 /* 652 * Delete from old mount point vnode list, if on one. 653 */ 654 if (vp->v_mount != NULL) 655 LIST_REMOVE(vp, v_mntvnodes); 656 /* 657 * Insert into list of vnodes for the new mount point, if available. 658 */ 659 if ((vp->v_mount = mp) != NULL) 660 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 661 simple_unlock(&mntvnode_slock); 662 } 663 664 /* 665 * Update outstanding I/O count and do wakeup if requested. 666 */ 667 void 668 vwakeup(struct buf *bp) 669 { 670 struct vnode *vp; 671 672 if ((vp = bp->b_vp) != NULL) { 673 /* XXX global lock hack 674 * can't use v_interlock here since this is called 675 * in interrupt context from biodone(). 676 */ 677 simple_lock(&global_v_numoutput_slock); 678 if (--vp->v_numoutput < 0) 679 panic("vwakeup: neg numoutput, vp %p", vp); 680 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 681 vp->v_flag &= ~VBWAIT; 682 wakeup((caddr_t)&vp->v_numoutput); 683 } 684 simple_unlock(&global_v_numoutput_slock); 685 } 686 } 687 688 /* 689 * Flush out and invalidate all buffers associated with a vnode. 690 * Called with the underlying vnode locked, which should prevent new dirty 691 * buffers from being queued. 692 */ 693 int 694 vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct lwp *l, 695 int slpflag, int slptimeo) 696 { 697 struct buf *bp, *nbp; 698 int s, error; 699 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 700 (flags & V_SAVE ? PGO_CLEANIT : 0); 701 702 /* XXXUBC this doesn't look at flags or slp* */ 703 simple_lock(&vp->v_interlock); 704 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 705 if (error) { 706 return error; 707 } 708 709 if (flags & V_SAVE) { 710 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, l); 711 if (error) 712 return (error); 713 #ifdef DIAGNOSTIC 714 s = splbio(); 715 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 716 panic("vinvalbuf: dirty bufs, vp %p", vp); 717 splx(s); 718 #endif 719 } 720 721 s = splbio(); 722 723 restart: 724 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 725 nbp = LIST_NEXT(bp, b_vnbufs); 726 simple_lock(&bp->b_interlock); 727 if (bp->b_flags & B_BUSY) { 728 bp->b_flags |= B_WANTED; 729 error = ltsleep((caddr_t)bp, 730 slpflag | (PRIBIO + 1) | PNORELOCK, 731 "vinvalbuf", slptimeo, &bp->b_interlock); 732 if (error) { 733 splx(s); 734 return (error); 735 } 736 goto restart; 737 } 738 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 739 simple_unlock(&bp->b_interlock); 740 brelse(bp); 741 } 742 743 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 744 nbp = LIST_NEXT(bp, b_vnbufs); 745 simple_lock(&bp->b_interlock); 746 if (bp->b_flags & B_BUSY) { 747 bp->b_flags |= B_WANTED; 748 error = ltsleep((caddr_t)bp, 749 slpflag | (PRIBIO + 1) | PNORELOCK, 750 "vinvalbuf", slptimeo, &bp->b_interlock); 751 if (error) { 752 splx(s); 753 return (error); 754 } 755 goto restart; 756 } 757 /* 758 * XXX Since there are no node locks for NFS, I believe 759 * there is a slight chance that a delayed write will 760 * occur while sleeping just above, so check for it. 761 */ 762 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 763 #ifdef DEBUG 764 printf("buffer still DELWRI\n"); 765 #endif 766 bp->b_flags |= B_BUSY | B_VFLUSH; 767 simple_unlock(&bp->b_interlock); 768 VOP_BWRITE(bp); 769 goto restart; 770 } 771 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 772 simple_unlock(&bp->b_interlock); 773 brelse(bp); 774 } 775 776 #ifdef DIAGNOSTIC 777 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 778 panic("vinvalbuf: flush failed, vp %p", vp); 779 #endif 780 781 splx(s); 782 783 return (0); 784 } 785 786 /* 787 * Destroy any in core blocks past the truncation length. 788 * Called with the underlying vnode locked, which should prevent new dirty 789 * buffers from being queued. 790 */ 791 int 792 vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo) 793 { 794 struct buf *bp, *nbp; 795 int s, error; 796 voff_t off; 797 798 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 799 simple_lock(&vp->v_interlock); 800 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 801 if (error) { 802 return error; 803 } 804 805 s = splbio(); 806 807 restart: 808 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 809 nbp = LIST_NEXT(bp, b_vnbufs); 810 if (bp->b_lblkno < lbn) 811 continue; 812 simple_lock(&bp->b_interlock); 813 if (bp->b_flags & B_BUSY) { 814 bp->b_flags |= B_WANTED; 815 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 816 "vtruncbuf", slptimeo, &bp->b_interlock); 817 if (error) { 818 splx(s); 819 return (error); 820 } 821 goto restart; 822 } 823 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 824 simple_unlock(&bp->b_interlock); 825 brelse(bp); 826 } 827 828 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 829 nbp = LIST_NEXT(bp, b_vnbufs); 830 if (bp->b_lblkno < lbn) 831 continue; 832 simple_lock(&bp->b_interlock); 833 if (bp->b_flags & B_BUSY) { 834 bp->b_flags |= B_WANTED; 835 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 836 "vtruncbuf", slptimeo, &bp->b_interlock); 837 if (error) { 838 splx(s); 839 return (error); 840 } 841 goto restart; 842 } 843 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 844 simple_unlock(&bp->b_interlock); 845 brelse(bp); 846 } 847 848 splx(s); 849 850 return (0); 851 } 852 853 void 854 vflushbuf(struct vnode *vp, int sync) 855 { 856 struct buf *bp, *nbp; 857 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); 858 int s; 859 860 simple_lock(&vp->v_interlock); 861 (void) VOP_PUTPAGES(vp, 0, 0, flags); 862 863 loop: 864 s = splbio(); 865 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 866 nbp = LIST_NEXT(bp, b_vnbufs); 867 simple_lock(&bp->b_interlock); 868 if ((bp->b_flags & B_BUSY)) { 869 simple_unlock(&bp->b_interlock); 870 continue; 871 } 872 if ((bp->b_flags & B_DELWRI) == 0) 873 panic("vflushbuf: not dirty, bp %p", bp); 874 bp->b_flags |= B_BUSY | B_VFLUSH; 875 simple_unlock(&bp->b_interlock); 876 splx(s); 877 /* 878 * Wait for I/O associated with indirect blocks to complete, 879 * since there is no way to quickly wait for them below. 880 */ 881 if (bp->b_vp == vp || sync == 0) 882 (void) bawrite(bp); 883 else 884 (void) bwrite(bp); 885 goto loop; 886 } 887 if (sync == 0) { 888 splx(s); 889 return; 890 } 891 simple_lock(&global_v_numoutput_slock); 892 while (vp->v_numoutput) { 893 vp->v_flag |= VBWAIT; 894 ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0, 895 &global_v_numoutput_slock); 896 } 897 simple_unlock(&global_v_numoutput_slock); 898 splx(s); 899 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 900 vprint("vflushbuf: dirty", vp); 901 goto loop; 902 } 903 } 904 905 /* 906 * Associate a buffer with a vnode. 907 */ 908 void 909 bgetvp(struct vnode *vp, struct buf *bp) 910 { 911 int s; 912 913 if (bp->b_vp) 914 panic("bgetvp: not free, bp %p", bp); 915 VHOLD(vp); 916 s = splbio(); 917 bp->b_vp = vp; 918 if (vp->v_type == VBLK || vp->v_type == VCHR) 919 bp->b_dev = vp->v_rdev; 920 else 921 bp->b_dev = NODEV; 922 /* 923 * Insert onto list for new vnode. 924 */ 925 bufinsvn(bp, &vp->v_cleanblkhd); 926 splx(s); 927 } 928 929 /* 930 * Disassociate a buffer from a vnode. 931 */ 932 void 933 brelvp(struct buf *bp) 934 { 935 struct vnode *vp; 936 int s; 937 938 if (bp->b_vp == NULL) 939 panic("brelvp: vp NULL, bp %p", bp); 940 941 s = splbio(); 942 vp = bp->b_vp; 943 /* 944 * Delete from old vnode list, if on one. 945 */ 946 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 947 bufremvn(bp); 948 949 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) && 950 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 951 vp->v_flag &= ~(VWRITEMAPDIRTY|VONWORKLST); 952 LIST_REMOVE(vp, v_synclist); 953 } 954 955 bp->b_vp = NULL; 956 HOLDRELE(vp); 957 splx(s); 958 } 959 960 /* 961 * Reassign a buffer from one vnode to another. 962 * Used to assign file specific control information 963 * (indirect blocks) to the vnode to which they belong. 964 * 965 * This function must be called at splbio(). 966 */ 967 void 968 reassignbuf(struct buf *bp, struct vnode *newvp) 969 { 970 struct buflists *listheadp; 971 int delayx; 972 973 /* 974 * Delete from old vnode list, if on one. 975 */ 976 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 977 bufremvn(bp); 978 /* 979 * If dirty, put on list of dirty buffers; 980 * otherwise insert onto list of clean buffers. 981 */ 982 if ((bp->b_flags & B_DELWRI) == 0) { 983 listheadp = &newvp->v_cleanblkhd; 984 if (TAILQ_EMPTY(&newvp->v_uobj.memq) && 985 (newvp->v_flag & VONWORKLST) && 986 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 987 newvp->v_flag &= ~(VWRITEMAPDIRTY|VONWORKLST); 988 LIST_REMOVE(newvp, v_synclist); 989 } 990 } else { 991 listheadp = &newvp->v_dirtyblkhd; 992 if ((newvp->v_flag & VONWORKLST) == 0) { 993 switch (newvp->v_type) { 994 case VDIR: 995 delayx = dirdelay; 996 break; 997 case VBLK: 998 if (newvp->v_specmountpoint != NULL) { 999 delayx = metadelay; 1000 break; 1001 } 1002 /* fall through */ 1003 default: 1004 delayx = filedelay; 1005 break; 1006 } 1007 if (!newvp->v_mount || 1008 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 1009 vn_syncer_add_to_worklist(newvp, delayx); 1010 } 1011 } 1012 bufinsvn(bp, listheadp); 1013 } 1014 1015 /* 1016 * Create a vnode for a block device. 1017 * Used for root filesystem and swap areas. 1018 * Also used for memory file system special devices. 1019 */ 1020 int 1021 bdevvp(dev_t dev, struct vnode **vpp) 1022 { 1023 1024 return (getdevvp(dev, vpp, VBLK)); 1025 } 1026 1027 /* 1028 * Create a vnode for a character device. 1029 * Used for kernfs and some console handling. 1030 */ 1031 int 1032 cdevvp(dev_t dev, struct vnode **vpp) 1033 { 1034 1035 return (getdevvp(dev, vpp, VCHR)); 1036 } 1037 1038 /* 1039 * Create a vnode for a device. 1040 * Used by bdevvp (block device) for root file system etc., 1041 * and by cdevvp (character device) for console and kernfs. 1042 */ 1043 int 1044 getdevvp(dev_t dev, struct vnode **vpp, enum vtype type) 1045 { 1046 struct vnode *vp; 1047 struct vnode *nvp; 1048 int error; 1049 1050 if (dev == NODEV) { 1051 *vpp = NULLVP; 1052 return (0); 1053 } 1054 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1055 if (error) { 1056 *vpp = NULLVP; 1057 return (error); 1058 } 1059 vp = nvp; 1060 vp->v_type = type; 1061 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1062 vput(vp); 1063 vp = nvp; 1064 } 1065 *vpp = vp; 1066 return (0); 1067 } 1068 1069 /* 1070 * Check to see if the new vnode represents a special device 1071 * for which we already have a vnode (either because of 1072 * bdevvp() or because of a different vnode representing 1073 * the same block device). If such an alias exists, deallocate 1074 * the existing contents and return the aliased vnode. The 1075 * caller is responsible for filling it with its new contents. 1076 */ 1077 struct vnode * 1078 checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp) 1079 { 1080 struct lwp *l = curlwp; /* XXX */ 1081 struct vnode *vp; 1082 struct vnode **vpp; 1083 1084 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1085 return (NULLVP); 1086 1087 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1088 loop: 1089 simple_lock(&spechash_slock); 1090 for (vp = *vpp; vp; vp = vp->v_specnext) { 1091 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1092 continue; 1093 /* 1094 * Alias, but not in use, so flush it out. 1095 */ 1096 simple_lock(&vp->v_interlock); 1097 simple_unlock(&spechash_slock); 1098 if (vp->v_usecount == 0) { 1099 vgonel(vp, l); 1100 goto loop; 1101 } 1102 /* 1103 * What we're interested to know here is if someone else has 1104 * removed this vnode from the device hash list while we were 1105 * waiting. This can only happen if vclean() did it, and 1106 * this requires the vnode to be locked. Therefore, we use 1107 * LK_SLEEPFAIL and retry. 1108 */ 1109 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_SLEEPFAIL)) 1110 goto loop; 1111 simple_lock(&spechash_slock); 1112 break; 1113 } 1114 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1115 MALLOC(nvp->v_specinfo, struct specinfo *, 1116 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1117 /* XXX Erg. */ 1118 if (nvp->v_specinfo == NULL) { 1119 simple_unlock(&spechash_slock); 1120 uvm_wait("checkalias"); 1121 goto loop; 1122 } 1123 1124 nvp->v_rdev = nvp_rdev; 1125 nvp->v_hashchain = vpp; 1126 nvp->v_specnext = *vpp; 1127 nvp->v_specmountpoint = NULL; 1128 simple_unlock(&spechash_slock); 1129 nvp->v_speclockf = NULL; 1130 simple_lock_init(&nvp->v_spec_cow_slock); 1131 SLIST_INIT(&nvp->v_spec_cow_head); 1132 nvp->v_spec_cow_req = 0; 1133 nvp->v_spec_cow_count = 0; 1134 1135 *vpp = nvp; 1136 if (vp != NULLVP) { 1137 nvp->v_flag |= VALIASED; 1138 vp->v_flag |= VALIASED; 1139 vput(vp); 1140 } 1141 return (NULLVP); 1142 } 1143 simple_unlock(&spechash_slock); 1144 VOP_UNLOCK(vp, 0); 1145 simple_lock(&vp->v_interlock); 1146 vclean(vp, 0, l); 1147 vp->v_op = nvp->v_op; 1148 vp->v_tag = nvp->v_tag; 1149 vp->v_vnlock = &vp->v_lock; 1150 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1151 nvp->v_type = VNON; 1152 insmntque(vp, mp); 1153 return (vp); 1154 } 1155 1156 /* 1157 * Grab a particular vnode from the free list, increment its 1158 * reference count and lock it. If the vnode lock bit is set the 1159 * vnode is being eliminated in vgone. In that case, we can not 1160 * grab the vnode, so the process is awakened when the transition is 1161 * completed, and an error returned to indicate that the vnode is no 1162 * longer usable (possibly having been changed to a new file system type). 1163 */ 1164 int 1165 vget(struct vnode *vp, int flags) 1166 { 1167 int error; 1168 1169 /* 1170 * If the vnode is in the process of being cleaned out for 1171 * another use, we wait for the cleaning to finish and then 1172 * return failure. Cleaning is determined by checking that 1173 * the VXLOCK flag is set. 1174 */ 1175 1176 if ((flags & LK_INTERLOCK) == 0) 1177 simple_lock(&vp->v_interlock); 1178 if (vp->v_flag & VXLOCK) { 1179 if (flags & LK_NOWAIT) { 1180 simple_unlock(&vp->v_interlock); 1181 return EBUSY; 1182 } 1183 vp->v_flag |= VXWANT; 1184 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock); 1185 return (ENOENT); 1186 } 1187 if (vp->v_usecount == 0) { 1188 simple_lock(&vnode_free_list_slock); 1189 if (vp->v_holdcnt > 0) 1190 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1191 else 1192 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1193 simple_unlock(&vnode_free_list_slock); 1194 } 1195 vp->v_usecount++; 1196 #ifdef DIAGNOSTIC 1197 if (vp->v_usecount == 0) { 1198 vprint("vget", vp); 1199 panic("vget: usecount overflow, vp %p", vp); 1200 } 1201 #endif 1202 if (flags & LK_TYPE_MASK) { 1203 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1204 /* 1205 * must expand vrele here because we do not want 1206 * to call VOP_INACTIVE if the reference count 1207 * drops back to zero since it was never really 1208 * active. We must remove it from the free list 1209 * before sleeping so that multiple processes do 1210 * not try to recycle it. 1211 */ 1212 simple_lock(&vp->v_interlock); 1213 vp->v_usecount--; 1214 if (vp->v_usecount > 0) { 1215 simple_unlock(&vp->v_interlock); 1216 return (error); 1217 } 1218 /* 1219 * insert at tail of LRU list 1220 */ 1221 simple_lock(&vnode_free_list_slock); 1222 if (vp->v_holdcnt > 0) 1223 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, 1224 v_freelist); 1225 else 1226 TAILQ_INSERT_TAIL(&vnode_free_list, vp, 1227 v_freelist); 1228 simple_unlock(&vnode_free_list_slock); 1229 simple_unlock(&vp->v_interlock); 1230 } 1231 return (error); 1232 } 1233 simple_unlock(&vp->v_interlock); 1234 return (0); 1235 } 1236 1237 /* 1238 * vput(), just unlock and vrele() 1239 */ 1240 void 1241 vput(struct vnode *vp) 1242 { 1243 struct lwp *l = curlwp; /* XXX */ 1244 1245 #ifdef DIAGNOSTIC 1246 if (vp == NULL) 1247 panic("vput: null vp"); 1248 #endif 1249 simple_lock(&vp->v_interlock); 1250 vp->v_usecount--; 1251 if (vp->v_usecount > 0) { 1252 simple_unlock(&vp->v_interlock); 1253 VOP_UNLOCK(vp, 0); 1254 return; 1255 } 1256 #ifdef DIAGNOSTIC 1257 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1258 vprint("vput: bad ref count", vp); 1259 panic("vput: ref cnt"); 1260 } 1261 #endif 1262 /* 1263 * Insert at tail of LRU list. 1264 */ 1265 simple_lock(&vnode_free_list_slock); 1266 if (vp->v_holdcnt > 0) 1267 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1268 else 1269 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1270 simple_unlock(&vnode_free_list_slock); 1271 if (vp->v_flag & VEXECMAP) { 1272 uvmexp.execpages -= vp->v_uobj.uo_npages; 1273 uvmexp.filepages += vp->v_uobj.uo_npages; 1274 } 1275 vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP); 1276 simple_unlock(&vp->v_interlock); 1277 VOP_INACTIVE(vp, l); 1278 } 1279 1280 /* 1281 * Vnode release. 1282 * If count drops to zero, call inactive routine and return to freelist. 1283 */ 1284 void 1285 vrele(struct vnode *vp) 1286 { 1287 struct lwp *l = curlwp; /* XXX */ 1288 1289 #ifdef DIAGNOSTIC 1290 if (vp == NULL) 1291 panic("vrele: null vp"); 1292 #endif 1293 simple_lock(&vp->v_interlock); 1294 vp->v_usecount--; 1295 if (vp->v_usecount > 0) { 1296 simple_unlock(&vp->v_interlock); 1297 return; 1298 } 1299 #ifdef DIAGNOSTIC 1300 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1301 vprint("vrele: bad ref count", vp); 1302 panic("vrele: ref cnt vp %p", vp); 1303 } 1304 #endif 1305 /* 1306 * Insert at tail of LRU list. 1307 */ 1308 simple_lock(&vnode_free_list_slock); 1309 if (vp->v_holdcnt > 0) 1310 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1311 else 1312 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1313 simple_unlock(&vnode_free_list_slock); 1314 if (vp->v_flag & VEXECMAP) { 1315 uvmexp.execpages -= vp->v_uobj.uo_npages; 1316 uvmexp.filepages += vp->v_uobj.uo_npages; 1317 } 1318 vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP); 1319 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1320 VOP_INACTIVE(vp, l); 1321 } 1322 1323 #ifdef DIAGNOSTIC 1324 /* 1325 * Page or buffer structure gets a reference. 1326 */ 1327 void 1328 vholdl(struct vnode *vp) 1329 { 1330 1331 /* 1332 * If it is on the freelist and the hold count is currently 1333 * zero, move it to the hold list. The test of the back 1334 * pointer and the use reference count of zero is because 1335 * it will be removed from a free list by getnewvnode, 1336 * but will not have its reference count incremented until 1337 * after calling vgone. If the reference count were 1338 * incremented first, vgone would (incorrectly) try to 1339 * close the previous instance of the underlying object. 1340 * So, the back pointer is explicitly set to `0xdeadb' in 1341 * getnewvnode after removing it from a freelist to ensure 1342 * that we do not try to move it here. 1343 */ 1344 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1345 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1346 simple_lock(&vnode_free_list_slock); 1347 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1348 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1349 simple_unlock(&vnode_free_list_slock); 1350 } 1351 vp->v_holdcnt++; 1352 } 1353 1354 /* 1355 * Page or buffer structure frees a reference. 1356 */ 1357 void 1358 holdrelel(struct vnode *vp) 1359 { 1360 1361 if (vp->v_holdcnt <= 0) 1362 panic("holdrelel: holdcnt vp %p", vp); 1363 vp->v_holdcnt--; 1364 1365 /* 1366 * If it is on the holdlist and the hold count drops to 1367 * zero, move it to the free list. The test of the back 1368 * pointer and the use reference count of zero is because 1369 * it will be removed from a free list by getnewvnode, 1370 * but will not have its reference count incremented until 1371 * after calling vgone. If the reference count were 1372 * incremented first, vgone would (incorrectly) try to 1373 * close the previous instance of the underlying object. 1374 * So, the back pointer is explicitly set to `0xdeadb' in 1375 * getnewvnode after removing it from a freelist to ensure 1376 * that we do not try to move it here. 1377 */ 1378 1379 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1380 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1381 simple_lock(&vnode_free_list_slock); 1382 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1383 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1384 simple_unlock(&vnode_free_list_slock); 1385 } 1386 } 1387 1388 /* 1389 * Vnode reference. 1390 */ 1391 void 1392 vref(struct vnode *vp) 1393 { 1394 1395 simple_lock(&vp->v_interlock); 1396 if (vp->v_usecount <= 0) 1397 panic("vref used where vget required, vp %p", vp); 1398 vp->v_usecount++; 1399 #ifdef DIAGNOSTIC 1400 if (vp->v_usecount == 0) { 1401 vprint("vref", vp); 1402 panic("vref: usecount overflow, vp %p", vp); 1403 } 1404 #endif 1405 simple_unlock(&vp->v_interlock); 1406 } 1407 #endif /* DIAGNOSTIC */ 1408 1409 /* 1410 * Remove any vnodes in the vnode table belonging to mount point mp. 1411 * 1412 * If FORCECLOSE is not specified, there should not be any active ones, 1413 * return error if any are found (nb: this is a user error, not a 1414 * system error). If FORCECLOSE is specified, detach any active vnodes 1415 * that are found. 1416 * 1417 * If WRITECLOSE is set, only flush out regular file vnodes open for 1418 * writing. 1419 * 1420 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped. 1421 */ 1422 #ifdef DEBUG 1423 int busyprt = 0; /* print out busy vnodes */ 1424 struct ctldebug debug1 = { "busyprt", &busyprt }; 1425 #endif 1426 1427 int 1428 vflush(struct mount *mp, struct vnode *skipvp, int flags) 1429 { 1430 struct lwp *l = curlwp; /* XXX */ 1431 struct vnode *vp, *nvp; 1432 int busy = 0; 1433 1434 simple_lock(&mntvnode_slock); 1435 loop: 1436 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 1437 if (vp->v_mount != mp) 1438 goto loop; 1439 nvp = LIST_NEXT(vp, v_mntvnodes); 1440 /* 1441 * Skip over a selected vnode. 1442 */ 1443 if (vp == skipvp) 1444 continue; 1445 simple_lock(&vp->v_interlock); 1446 /* 1447 * Skip over a vnodes marked VSYSTEM. 1448 */ 1449 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1450 simple_unlock(&vp->v_interlock); 1451 continue; 1452 } 1453 /* 1454 * If WRITECLOSE is set, only flush out regular file 1455 * vnodes open for writing. 1456 */ 1457 if ((flags & WRITECLOSE) && 1458 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1459 simple_unlock(&vp->v_interlock); 1460 continue; 1461 } 1462 /* 1463 * With v_usecount == 0, all we need to do is clear 1464 * out the vnode data structures and we are done. 1465 */ 1466 if (vp->v_usecount == 0) { 1467 simple_unlock(&mntvnode_slock); 1468 vgonel(vp, l); 1469 simple_lock(&mntvnode_slock); 1470 continue; 1471 } 1472 /* 1473 * If FORCECLOSE is set, forcibly close the vnode. 1474 * For block or character devices, revert to an 1475 * anonymous device. For all other files, just kill them. 1476 */ 1477 if (flags & FORCECLOSE) { 1478 simple_unlock(&mntvnode_slock); 1479 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1480 vgonel(vp, l); 1481 } else { 1482 vclean(vp, 0, l); 1483 vp->v_op = spec_vnodeop_p; 1484 insmntque(vp, (struct mount *)0); 1485 } 1486 simple_lock(&mntvnode_slock); 1487 continue; 1488 } 1489 #ifdef DEBUG 1490 if (busyprt) 1491 vprint("vflush: busy vnode", vp); 1492 #endif 1493 simple_unlock(&vp->v_interlock); 1494 busy++; 1495 } 1496 simple_unlock(&mntvnode_slock); 1497 if (busy) 1498 return (EBUSY); 1499 return (0); 1500 } 1501 1502 /* 1503 * Disassociate the underlying file system from a vnode. 1504 */ 1505 void 1506 vclean(struct vnode *vp, int flags, struct lwp *l) 1507 { 1508 struct mount *mp; 1509 int active; 1510 1511 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1512 1513 /* 1514 * Check to see if the vnode is in use. 1515 * If so we have to reference it before we clean it out 1516 * so that its count cannot fall to zero and generate a 1517 * race against ourselves to recycle it. 1518 */ 1519 1520 if ((active = vp->v_usecount) != 0) { 1521 vp->v_usecount++; 1522 #ifdef DIAGNOSTIC 1523 if (vp->v_usecount == 0) { 1524 vprint("vclean", vp); 1525 panic("vclean: usecount overflow"); 1526 } 1527 #endif 1528 } 1529 1530 /* 1531 * Prevent the vnode from being recycled or 1532 * brought into use while we clean it out. 1533 */ 1534 if (vp->v_flag & VXLOCK) 1535 panic("vclean: deadlock, vp %p", vp); 1536 vp->v_flag |= VXLOCK; 1537 if (vp->v_flag & VEXECMAP) { 1538 uvmexp.execpages -= vp->v_uobj.uo_npages; 1539 uvmexp.filepages += vp->v_uobj.uo_npages; 1540 } 1541 vp->v_flag &= ~(VTEXT|VEXECMAP); 1542 1543 /* 1544 * Even if the count is zero, the VOP_INACTIVE routine may still 1545 * have the object locked while it cleans it out. The VOP_LOCK 1546 * ensures that the VOP_INACTIVE routine is done with its work. 1547 * For active vnodes, it ensures that no other activity can 1548 * occur while the underlying object is being cleaned out. 1549 */ 1550 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1551 1552 /* 1553 * Clean out any cached data associated with the vnode. 1554 * If special device, remove it from special device alias list. 1555 * if it is on one. 1556 */ 1557 if (flags & DOCLOSE) { 1558 int error; 1559 struct vnode *vq, *vx; 1560 1561 vn_start_write(vp, &mp, V_WAIT | V_LOWER); 1562 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1563 vn_finished_write(mp, V_LOWER); 1564 if (error) 1565 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1566 KASSERT(error == 0); 1567 KASSERT((vp->v_flag & VONWORKLST) == 0); 1568 1569 if (active) 1570 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1571 1572 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 1573 vp->v_specinfo != 0) { 1574 simple_lock(&spechash_slock); 1575 if (vp->v_hashchain != NULL) { 1576 if (*vp->v_hashchain == vp) { 1577 *vp->v_hashchain = vp->v_specnext; 1578 } else { 1579 for (vq = *vp->v_hashchain; vq; 1580 vq = vq->v_specnext) { 1581 if (vq->v_specnext != vp) 1582 continue; 1583 vq->v_specnext = vp->v_specnext; 1584 break; 1585 } 1586 if (vq == NULL) 1587 panic("missing bdev"); 1588 } 1589 if (vp->v_flag & VALIASED) { 1590 vx = NULL; 1591 for (vq = *vp->v_hashchain; vq; 1592 vq = vq->v_specnext) { 1593 if (vq->v_rdev != vp->v_rdev || 1594 vq->v_type != vp->v_type) 1595 continue; 1596 if (vx) 1597 break; 1598 vx = vq; 1599 } 1600 if (vx == NULL) 1601 panic("missing alias"); 1602 if (vq == NULL) 1603 vx->v_flag &= ~VALIASED; 1604 vp->v_flag &= ~VALIASED; 1605 } 1606 } 1607 simple_unlock(&spechash_slock); 1608 FREE(vp->v_specinfo, M_VNODE); 1609 vp->v_specinfo = NULL; 1610 } 1611 } 1612 LOCK_ASSERT(!simple_lock_held(&vp->v_interlock)); 1613 1614 /* 1615 * If purging an active vnode, it must be closed and 1616 * deactivated before being reclaimed. Note that the 1617 * VOP_INACTIVE will unlock the vnode. 1618 */ 1619 if (active) { 1620 VOP_INACTIVE(vp, l); 1621 } else { 1622 /* 1623 * Any other processes trying to obtain this lock must first 1624 * wait for VXLOCK to clear, then call the new lock operation. 1625 */ 1626 VOP_UNLOCK(vp, 0); 1627 } 1628 /* 1629 * Reclaim the vnode. 1630 */ 1631 if (VOP_RECLAIM(vp, l)) 1632 panic("vclean: cannot reclaim, vp %p", vp); 1633 if (active) { 1634 /* 1635 * Inline copy of vrele() since VOP_INACTIVE 1636 * has already been called. 1637 */ 1638 simple_lock(&vp->v_interlock); 1639 if (--vp->v_usecount <= 0) { 1640 #ifdef DIAGNOSTIC 1641 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1642 vprint("vclean: bad ref count", vp); 1643 panic("vclean: ref cnt"); 1644 } 1645 #endif 1646 /* 1647 * Insert at tail of LRU list. 1648 */ 1649 1650 simple_unlock(&vp->v_interlock); 1651 simple_lock(&vnode_free_list_slock); 1652 #ifdef DIAGNOSTIC 1653 if (vp->v_holdcnt > 0) 1654 panic("vclean: not clean, vp %p", vp); 1655 #endif 1656 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1657 simple_unlock(&vnode_free_list_slock); 1658 } else 1659 simple_unlock(&vp->v_interlock); 1660 } 1661 1662 KASSERT(vp->v_uobj.uo_npages == 0); 1663 if (vp->v_type == VREG && vp->v_ractx != NULL) { 1664 uvm_ra_freectx(vp->v_ractx); 1665 vp->v_ractx = NULL; 1666 } 1667 cache_purge(vp); 1668 1669 /* 1670 * Done with purge, notify sleepers of the grim news. 1671 */ 1672 vp->v_op = dead_vnodeop_p; 1673 vp->v_tag = VT_NON; 1674 simple_lock(&vp->v_interlock); 1675 VN_KNOTE(vp, NOTE_REVOKE); /* FreeBSD has this in vn_pollgone() */ 1676 vp->v_flag &= ~(VXLOCK|VLOCKSWORK); 1677 if (vp->v_flag & VXWANT) { 1678 vp->v_flag &= ~VXWANT; 1679 simple_unlock(&vp->v_interlock); 1680 wakeup((caddr_t)vp); 1681 } else 1682 simple_unlock(&vp->v_interlock); 1683 } 1684 1685 /* 1686 * Recycle an unused vnode to the front of the free list. 1687 * Release the passed interlock if the vnode will be recycled. 1688 */ 1689 int 1690 vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct lwp *l) 1691 { 1692 1693 simple_lock(&vp->v_interlock); 1694 if (vp->v_usecount == 0) { 1695 if (inter_lkp) 1696 simple_unlock(inter_lkp); 1697 vgonel(vp, l); 1698 return (1); 1699 } 1700 simple_unlock(&vp->v_interlock); 1701 return (0); 1702 } 1703 1704 /* 1705 * Eliminate all activity associated with a vnode 1706 * in preparation for reuse. 1707 */ 1708 void 1709 vgone(struct vnode *vp) 1710 { 1711 struct lwp *l = curlwp; /* XXX */ 1712 1713 simple_lock(&vp->v_interlock); 1714 vgonel(vp, l); 1715 } 1716 1717 /* 1718 * vgone, with the vp interlock held. 1719 */ 1720 void 1721 vgonel(struct vnode *vp, struct lwp *l) 1722 { 1723 1724 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1725 1726 /* 1727 * If a vgone (or vclean) is already in progress, 1728 * wait until it is done and return. 1729 */ 1730 1731 if (vp->v_flag & VXLOCK) { 1732 vp->v_flag |= VXWANT; 1733 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock); 1734 return; 1735 } 1736 1737 /* 1738 * Clean out the filesystem specific data. 1739 */ 1740 1741 vclean(vp, DOCLOSE, l); 1742 KASSERT((vp->v_flag & VONWORKLST) == 0); 1743 1744 /* 1745 * Delete from old mount point vnode list, if on one. 1746 */ 1747 1748 if (vp->v_mount != NULL) 1749 insmntque(vp, (struct mount *)0); 1750 1751 /* 1752 * The test of the back pointer and the reference count of 1753 * zero is because it will be removed from the free list by 1754 * getcleanvnode, but will not have its reference count 1755 * incremented until after calling vgone. If the reference 1756 * count were incremented first, vgone would (incorrectly) 1757 * try to close the previous instance of the underlying object. 1758 * So, the back pointer is explicitly set to `0xdeadb' in 1759 * getnewvnode after removing it from the freelist to ensure 1760 * that we do not try to move it here. 1761 */ 1762 1763 vp->v_type = VBAD; 1764 if (vp->v_usecount == 0) { 1765 boolean_t dofree; 1766 1767 simple_lock(&vnode_free_list_slock); 1768 if (vp->v_holdcnt > 0) 1769 panic("vgonel: not clean, vp %p", vp); 1770 /* 1771 * if it isn't on the freelist, we're called by getcleanvnode 1772 * and vnode is being re-used. otherwise, we'll free it. 1773 */ 1774 dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb; 1775 if (dofree) { 1776 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1777 numvnodes--; 1778 } 1779 simple_unlock(&vnode_free_list_slock); 1780 if (dofree) 1781 pool_put(&vnode_pool, vp); 1782 } 1783 } 1784 1785 /* 1786 * Lookup a vnode by device number. 1787 */ 1788 int 1789 vfinddev(dev_t dev, enum vtype type, struct vnode **vpp) 1790 { 1791 struct vnode *vp; 1792 int rc = 0; 1793 1794 simple_lock(&spechash_slock); 1795 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1796 if (dev != vp->v_rdev || type != vp->v_type) 1797 continue; 1798 *vpp = vp; 1799 rc = 1; 1800 break; 1801 } 1802 simple_unlock(&spechash_slock); 1803 return (rc); 1804 } 1805 1806 /* 1807 * Revoke all the vnodes corresponding to the specified minor number 1808 * range (endpoints inclusive) of the specified major. 1809 */ 1810 void 1811 vdevgone(int maj, int minl, int minh, enum vtype type) 1812 { 1813 struct vnode *vp; 1814 int mn; 1815 1816 for (mn = minl; mn <= minh; mn++) 1817 if (vfinddev(makedev(maj, mn), type, &vp)) 1818 VOP_REVOKE(vp, REVOKEALL); 1819 } 1820 1821 /* 1822 * Calculate the total number of references to a special device. 1823 */ 1824 int 1825 vcount(struct vnode *vp) 1826 { 1827 struct vnode *vq, *vnext; 1828 int count; 1829 1830 loop: 1831 if ((vp->v_flag & VALIASED) == 0) 1832 return (vp->v_usecount); 1833 simple_lock(&spechash_slock); 1834 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1835 vnext = vq->v_specnext; 1836 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1837 continue; 1838 /* 1839 * Alias, but not in use, so flush it out. 1840 */ 1841 if (vq->v_usecount == 0 && vq != vp && 1842 (vq->v_flag & VXLOCK) == 0) { 1843 simple_unlock(&spechash_slock); 1844 vgone(vq); 1845 goto loop; 1846 } 1847 count += vq->v_usecount; 1848 } 1849 simple_unlock(&spechash_slock); 1850 return (count); 1851 } 1852 1853 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 1854 #define ARRAY_PRINT(idx, arr) \ 1855 ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN") 1856 1857 const char * const vnode_tags[] = { VNODE_TAGS }; 1858 const char * const vnode_types[] = { VNODE_TYPES }; 1859 const char vnode_flagbits[] = VNODE_FLAGBITS; 1860 1861 /* 1862 * Print out a description of a vnode. 1863 */ 1864 void 1865 vprint(const char *label, struct vnode *vp) 1866 { 1867 char bf[96]; 1868 1869 if (label != NULL) 1870 printf("%s: ", label); 1871 printf("tag %s(%d) type %s(%d), usecount %d, writecount %ld, " 1872 "refcount %ld,", ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 1873 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 1874 vp->v_usecount, vp->v_writecount, vp->v_holdcnt); 1875 bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf)); 1876 if (bf[0] != '\0') 1877 printf(" flags (%s)", &bf[1]); 1878 if (vp->v_data == NULL) { 1879 printf("\n"); 1880 } else { 1881 printf("\n\t"); 1882 VOP_PRINT(vp); 1883 } 1884 } 1885 1886 #ifdef DEBUG 1887 /* 1888 * List all of the locked vnodes in the system. 1889 * Called when debugging the kernel. 1890 */ 1891 void 1892 printlockedvnodes(void) 1893 { 1894 struct mount *mp, *nmp; 1895 struct vnode *vp; 1896 1897 printf("Locked vnodes\n"); 1898 simple_lock(&mountlist_slock); 1899 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1900 mp = nmp) { 1901 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1902 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1903 continue; 1904 } 1905 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1906 if (VOP_ISLOCKED(vp)) 1907 vprint(NULL, vp); 1908 } 1909 simple_lock(&mountlist_slock); 1910 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1911 vfs_unbusy(mp); 1912 } 1913 simple_unlock(&mountlist_slock); 1914 } 1915 #endif 1916 1917 /* 1918 * sysctl helper routine to return list of supported fstypes 1919 */ 1920 static int 1921 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS) 1922 { 1923 char bf[MFSNAMELEN]; 1924 char *where = oldp; 1925 struct vfsops *v; 1926 size_t needed, left, slen; 1927 int error, first; 1928 1929 if (newp != NULL) 1930 return (EPERM); 1931 if (namelen != 0) 1932 return (EINVAL); 1933 1934 first = 1; 1935 error = 0; 1936 needed = 0; 1937 left = *oldlenp; 1938 1939 LIST_FOREACH(v, &vfs_list, vfs_list) { 1940 if (where == NULL) 1941 needed += strlen(v->vfs_name) + 1; 1942 else { 1943 memset(bf, 0, sizeof(bf)); 1944 if (first) { 1945 strncpy(bf, v->vfs_name, sizeof(bf)); 1946 first = 0; 1947 } else { 1948 bf[0] = ' '; 1949 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1); 1950 } 1951 bf[sizeof(bf)-1] = '\0'; 1952 slen = strlen(bf); 1953 if (left < slen + 1) 1954 break; 1955 /* +1 to copy out the trailing NUL byte */ 1956 error = copyout(bf, where, slen + 1); 1957 if (error) 1958 break; 1959 where += slen; 1960 needed += slen; 1961 left -= slen; 1962 } 1963 } 1964 *oldlenp = needed; 1965 return (error); 1966 } 1967 1968 /* 1969 * Top level filesystem related information gathering. 1970 */ 1971 SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup") 1972 { 1973 sysctl_createv(clog, 0, NULL, NULL, 1974 CTLFLAG_PERMANENT, 1975 CTLTYPE_NODE, "vfs", NULL, 1976 NULL, 0, NULL, 0, 1977 CTL_VFS, CTL_EOL); 1978 sysctl_createv(clog, 0, NULL, NULL, 1979 CTLFLAG_PERMANENT, 1980 CTLTYPE_NODE, "generic", 1981 SYSCTL_DESCR("Non-specific vfs related information"), 1982 NULL, 0, NULL, 0, 1983 CTL_VFS, VFS_GENERIC, CTL_EOL); 1984 sysctl_createv(clog, 0, NULL, NULL, 1985 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1986 CTLTYPE_INT, "usermount", 1987 SYSCTL_DESCR("Whether unprivileged users may mount " 1988 "filesystems"), 1989 NULL, 0, &dovfsusermount, 0, 1990 CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL); 1991 sysctl_createv(clog, 0, NULL, NULL, 1992 CTLFLAG_PERMANENT, 1993 CTLTYPE_STRING, "fstypes", 1994 SYSCTL_DESCR("List of file systems present"), 1995 sysctl_vfs_generic_fstypes, 0, NULL, 0, 1996 CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL); 1997 } 1998 1999 2000 int kinfo_vdebug = 1; 2001 int kinfo_vgetfailed; 2002 #define KINFO_VNODESLOP 10 2003 /* 2004 * Dump vnode list (via sysctl). 2005 * Copyout address of vnode followed by vnode. 2006 */ 2007 /* ARGSUSED */ 2008 int 2009 sysctl_kern_vnode(SYSCTLFN_ARGS) 2010 { 2011 char *where = oldp; 2012 size_t *sizep = oldlenp; 2013 struct mount *mp, *nmp; 2014 struct vnode *nvp, *vp; 2015 char *bp = where, *savebp; 2016 char *ewhere; 2017 int error; 2018 2019 if (namelen != 0) 2020 return (EOPNOTSUPP); 2021 if (newp != NULL) 2022 return (EPERM); 2023 2024 #define VPTRSZ sizeof(struct vnode *) 2025 #define VNODESZ sizeof(struct vnode) 2026 if (where == NULL) { 2027 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 2028 return (0); 2029 } 2030 ewhere = where + *sizep; 2031 2032 simple_lock(&mountlist_slock); 2033 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2034 mp = nmp) { 2035 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 2036 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2037 continue; 2038 } 2039 savebp = bp; 2040 again: 2041 simple_lock(&mntvnode_slock); 2042 for (vp = LIST_FIRST(&mp->mnt_vnodelist); 2043 vp != NULL; 2044 vp = nvp) { 2045 /* 2046 * Check that the vp is still associated with 2047 * this filesystem. RACE: could have been 2048 * recycled onto the same filesystem. 2049 */ 2050 if (vp->v_mount != mp) { 2051 simple_unlock(&mntvnode_slock); 2052 if (kinfo_vdebug) 2053 printf("kinfo: vp changed\n"); 2054 bp = savebp; 2055 goto again; 2056 } 2057 nvp = LIST_NEXT(vp, v_mntvnodes); 2058 if (bp + VPTRSZ + VNODESZ > ewhere) { 2059 simple_unlock(&mntvnode_slock); 2060 *sizep = bp - where; 2061 return (ENOMEM); 2062 } 2063 simple_unlock(&mntvnode_slock); 2064 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2065 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2066 return (error); 2067 bp += VPTRSZ + VNODESZ; 2068 simple_lock(&mntvnode_slock); 2069 } 2070 simple_unlock(&mntvnode_slock); 2071 simple_lock(&mountlist_slock); 2072 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2073 vfs_unbusy(mp); 2074 } 2075 simple_unlock(&mountlist_slock); 2076 2077 *sizep = bp - where; 2078 return (0); 2079 } 2080 2081 /* 2082 * Check to see if a filesystem is mounted on a block device. 2083 */ 2084 int 2085 vfs_mountedon(struct vnode *vp) 2086 { 2087 struct vnode *vq; 2088 int error = 0; 2089 2090 if (vp->v_specmountpoint != NULL) 2091 return (EBUSY); 2092 if (vp->v_flag & VALIASED) { 2093 simple_lock(&spechash_slock); 2094 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2095 if (vq->v_rdev != vp->v_rdev || 2096 vq->v_type != vp->v_type) 2097 continue; 2098 if (vq->v_specmountpoint != NULL) { 2099 error = EBUSY; 2100 break; 2101 } 2102 } 2103 simple_unlock(&spechash_slock); 2104 } 2105 return (error); 2106 } 2107 2108 /* 2109 * Do the usual access checking. 2110 * file_mode, uid and gid are from the vnode in question, 2111 * while acc_mode and cred are from the VOP_ACCESS parameter list 2112 */ 2113 int 2114 vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid, 2115 mode_t acc_mode, struct ucred *cred) 2116 { 2117 mode_t mask; 2118 2119 /* 2120 * Super-user always gets read/write access, but execute access depends 2121 * on at least one execute bit being set. 2122 */ 2123 if (cred->cr_uid == 0) { 2124 if ((acc_mode & VEXEC) && type != VDIR && 2125 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2126 return (EACCES); 2127 return (0); 2128 } 2129 2130 mask = 0; 2131 2132 /* Otherwise, check the owner. */ 2133 if (cred->cr_uid == uid) { 2134 if (acc_mode & VEXEC) 2135 mask |= S_IXUSR; 2136 if (acc_mode & VREAD) 2137 mask |= S_IRUSR; 2138 if (acc_mode & VWRITE) 2139 mask |= S_IWUSR; 2140 return ((file_mode & mask) == mask ? 0 : EACCES); 2141 } 2142 2143 /* Otherwise, check the groups. */ 2144 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2145 if (acc_mode & VEXEC) 2146 mask |= S_IXGRP; 2147 if (acc_mode & VREAD) 2148 mask |= S_IRGRP; 2149 if (acc_mode & VWRITE) 2150 mask |= S_IWGRP; 2151 return ((file_mode & mask) == mask ? 0 : EACCES); 2152 } 2153 2154 /* Otherwise, check everyone else. */ 2155 if (acc_mode & VEXEC) 2156 mask |= S_IXOTH; 2157 if (acc_mode & VREAD) 2158 mask |= S_IROTH; 2159 if (acc_mode & VWRITE) 2160 mask |= S_IWOTH; 2161 return ((file_mode & mask) == mask ? 0 : EACCES); 2162 } 2163 2164 /* 2165 * Unmount all file systems. 2166 * We traverse the list in reverse order under the assumption that doing so 2167 * will avoid needing to worry about dependencies. 2168 */ 2169 void 2170 vfs_unmountall(struct lwp *l) 2171 { 2172 struct mount *mp, *nmp; 2173 int allerror, error; 2174 2175 printf("unmounting file systems..."); 2176 for (allerror = 0, 2177 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2178 nmp = mp->mnt_list.cqe_prev; 2179 #ifdef DEBUG 2180 printf("\nunmounting %s (%s)...", 2181 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2182 #endif 2183 /* 2184 * XXX Freeze syncer. Must do this before locking the 2185 * mount point. See dounmount() for details. 2186 */ 2187 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2188 if (vfs_busy(mp, 0, 0)) { 2189 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2190 continue; 2191 } 2192 if ((error = dounmount(mp, MNT_FORCE, l)) != 0) { 2193 printf("unmount of %s failed with error %d\n", 2194 mp->mnt_stat.f_mntonname, error); 2195 allerror = 1; 2196 } 2197 } 2198 printf(" done\n"); 2199 if (allerror) 2200 printf("WARNING: some file systems would not unmount\n"); 2201 } 2202 2203 extern struct simplelock bqueue_slock; /* XXX */ 2204 2205 /* 2206 * Sync and unmount file systems before shutting down. 2207 */ 2208 void 2209 vfs_shutdown(void) 2210 { 2211 struct lwp *l = curlwp; 2212 struct proc *p; 2213 2214 /* XXX we're certainly not running in proc0's context! */ 2215 if (l == NULL || (p = l->l_proc) == NULL) 2216 p = &proc0; 2217 2218 printf("syncing disks... "); 2219 2220 /* remove user process from run queue */ 2221 suspendsched(); 2222 (void) spl0(); 2223 2224 /* avoid coming back this way again if we panic. */ 2225 doing_shutdown = 1; 2226 2227 sys_sync(l, NULL, NULL); 2228 2229 /* Wait for sync to finish. */ 2230 if (buf_syncwait() != 0) { 2231 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2232 Debugger(); 2233 #endif 2234 printf("giving up\n"); 2235 return; 2236 } else 2237 printf("done\n"); 2238 2239 /* 2240 * If we've panic'd, don't make the situation potentially 2241 * worse by unmounting the file systems. 2242 */ 2243 if (panicstr != NULL) 2244 return; 2245 2246 /* Release inodes held by texts before update. */ 2247 #ifdef notdef 2248 vnshutdown(); 2249 #endif 2250 /* Unmount file systems. */ 2251 vfs_unmountall(l); 2252 } 2253 2254 /* 2255 * Mount the root file system. If the operator didn't specify a 2256 * file system to use, try all possible file systems until one 2257 * succeeds. 2258 */ 2259 int 2260 vfs_mountroot(void) 2261 { 2262 struct vfsops *v; 2263 int error = ENODEV; 2264 2265 if (root_device == NULL) 2266 panic("vfs_mountroot: root device unknown"); 2267 2268 switch (root_device->dv_class) { 2269 case DV_IFNET: 2270 if (rootdev != NODEV) 2271 panic("vfs_mountroot: rootdev set for DV_IFNET " 2272 "(0x%08x -> %d,%d)", rootdev, 2273 major(rootdev), minor(rootdev)); 2274 break; 2275 2276 case DV_DISK: 2277 if (rootdev == NODEV) 2278 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2279 if (bdevvp(rootdev, &rootvp)) 2280 panic("vfs_mountroot: can't get vnode for rootdev"); 2281 error = VOP_OPEN(rootvp, FREAD, FSCRED, curlwp); 2282 if (error) { 2283 printf("vfs_mountroot: can't open root device\n"); 2284 return (error); 2285 } 2286 break; 2287 2288 default: 2289 printf("%s: inappropriate for root file system\n", 2290 root_device->dv_xname); 2291 return (ENODEV); 2292 } 2293 2294 /* 2295 * If user specified a file system, use it. 2296 */ 2297 if (mountroot != NULL) { 2298 error = (*mountroot)(); 2299 goto done; 2300 } 2301 2302 /* 2303 * Try each file system currently configured into the kernel. 2304 */ 2305 LIST_FOREACH(v, &vfs_list, vfs_list) { 2306 if (v->vfs_mountroot == NULL) 2307 continue; 2308 #ifdef DEBUG 2309 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 2310 #endif 2311 error = (*v->vfs_mountroot)(); 2312 if (!error) { 2313 aprint_normal("root file system type: %s\n", 2314 v->vfs_name); 2315 break; 2316 } 2317 } 2318 2319 if (v == NULL) { 2320 printf("no file system for %s", root_device->dv_xname); 2321 if (root_device->dv_class == DV_DISK) 2322 printf(" (dev 0x%x)", rootdev); 2323 printf("\n"); 2324 error = EFTYPE; 2325 } 2326 2327 done: 2328 if (error && root_device->dv_class == DV_DISK) { 2329 VOP_CLOSE(rootvp, FREAD, FSCRED, curlwp); 2330 vrele(rootvp); 2331 } 2332 return (error); 2333 } 2334 2335 /* 2336 * Given a file system name, look up the vfsops for that 2337 * file system, or return NULL if file system isn't present 2338 * in the kernel. 2339 */ 2340 struct vfsops * 2341 vfs_getopsbyname(const char *name) 2342 { 2343 struct vfsops *v; 2344 2345 LIST_FOREACH(v, &vfs_list, vfs_list) { 2346 if (strcmp(v->vfs_name, name) == 0) 2347 break; 2348 } 2349 2350 return (v); 2351 } 2352 2353 /* 2354 * Establish a file system and initialize it. 2355 */ 2356 int 2357 vfs_attach(struct vfsops *vfs) 2358 { 2359 struct vfsops *v; 2360 int error = 0; 2361 2362 2363 /* 2364 * Make sure this file system doesn't already exist. 2365 */ 2366 LIST_FOREACH(v, &vfs_list, vfs_list) { 2367 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2368 error = EEXIST; 2369 goto out; 2370 } 2371 } 2372 2373 /* 2374 * Initialize the vnode operations for this file system. 2375 */ 2376 vfs_opv_init(vfs->vfs_opv_descs); 2377 2378 /* 2379 * Now initialize the file system itself. 2380 */ 2381 (*vfs->vfs_init)(); 2382 2383 /* 2384 * ...and link it into the kernel's list. 2385 */ 2386 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2387 2388 /* 2389 * Sanity: make sure the reference count is 0. 2390 */ 2391 vfs->vfs_refcount = 0; 2392 2393 out: 2394 return (error); 2395 } 2396 2397 /* 2398 * Remove a file system from the kernel. 2399 */ 2400 int 2401 vfs_detach(struct vfsops *vfs) 2402 { 2403 struct vfsops *v; 2404 2405 /* 2406 * Make sure no one is using the filesystem. 2407 */ 2408 if (vfs->vfs_refcount != 0) 2409 return (EBUSY); 2410 2411 /* 2412 * ...and remove it from the kernel's list. 2413 */ 2414 LIST_FOREACH(v, &vfs_list, vfs_list) { 2415 if (v == vfs) { 2416 LIST_REMOVE(v, vfs_list); 2417 break; 2418 } 2419 } 2420 2421 if (v == NULL) 2422 return (ESRCH); 2423 2424 /* 2425 * Now run the file system-specific cleanups. 2426 */ 2427 (*vfs->vfs_done)(); 2428 2429 /* 2430 * Free the vnode operations vector. 2431 */ 2432 vfs_opv_free(vfs->vfs_opv_descs); 2433 return (0); 2434 } 2435 2436 void 2437 vfs_reinit(void) 2438 { 2439 struct vfsops *vfs; 2440 2441 LIST_FOREACH(vfs, &vfs_list, vfs_list) { 2442 if (vfs->vfs_reinit) { 2443 (*vfs->vfs_reinit)(); 2444 } 2445 } 2446 } 2447 2448 /* 2449 * Request a filesystem to suspend write operations. 2450 */ 2451 int 2452 vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo) 2453 { 2454 struct lwp *l = curlwp; /* XXX */ 2455 int error; 2456 2457 while ((mp->mnt_iflag & IMNT_SUSPEND)) { 2458 if (slptimeo < 0) 2459 return EWOULDBLOCK; 2460 error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo); 2461 if (error) 2462 return error; 2463 } 2464 mp->mnt_iflag |= IMNT_SUSPEND; 2465 2466 simple_lock(&mp->mnt_slock); 2467 if (mp->mnt_writeopcountupper > 0) 2468 ltsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt", 2469 0, &mp->mnt_slock); 2470 simple_unlock(&mp->mnt_slock); 2471 2472 error = VFS_SYNC(mp, MNT_WAIT, l->l_proc->p_ucred, l); 2473 if (error) { 2474 vfs_write_resume(mp); 2475 return error; 2476 } 2477 mp->mnt_iflag |= IMNT_SUSPENDLOW; 2478 2479 simple_lock(&mp->mnt_slock); 2480 if (mp->mnt_writeopcountlower > 0) 2481 ltsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt", 2482 0, &mp->mnt_slock); 2483 mp->mnt_iflag |= IMNT_SUSPENDED; 2484 simple_unlock(&mp->mnt_slock); 2485 2486 return 0; 2487 } 2488 2489 /* 2490 * Request a filesystem to resume write operations. 2491 */ 2492 void 2493 vfs_write_resume(struct mount *mp) 2494 { 2495 2496 if ((mp->mnt_iflag & IMNT_SUSPEND) == 0) 2497 return; 2498 mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED); 2499 wakeup(&mp->mnt_flag); 2500 } 2501 2502 void 2503 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp) 2504 { 2505 const struct statvfs *mbp; 2506 2507 if (sbp == (mbp = &mp->mnt_stat)) 2508 return; 2509 2510 (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx)); 2511 sbp->f_fsid = mbp->f_fsid; 2512 sbp->f_owner = mbp->f_owner; 2513 sbp->f_flag = mbp->f_flag; 2514 sbp->f_syncwrites = mbp->f_syncwrites; 2515 sbp->f_asyncwrites = mbp->f_asyncwrites; 2516 sbp->f_syncreads = mbp->f_syncreads; 2517 sbp->f_asyncreads = mbp->f_asyncreads; 2518 (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare)); 2519 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename, 2520 sizeof(sbp->f_fstypename)); 2521 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname, 2522 sizeof(sbp->f_mntonname)); 2523 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, 2524 sizeof(sbp->f_mntfromname)); 2525 sbp->f_namemax = mbp->f_namemax; 2526 } 2527 2528 int 2529 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom, 2530 struct mount *mp, struct lwp *l) 2531 { 2532 int error; 2533 size_t size; 2534 struct statvfs *sfs = &mp->mnt_stat; 2535 int (*fun)(const void *, void *, size_t, size_t *); 2536 2537 (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name, 2538 sizeof(mp->mnt_stat.f_fstypename)); 2539 2540 if (onp) { 2541 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 2542 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr; 2543 if (cwdi->cwdi_rdir != NULL) { 2544 size_t len; 2545 char *bp; 2546 char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 2547 2548 if (!path) /* XXX can't happen with M_WAITOK */ 2549 return ENOMEM; 2550 2551 bp = path + MAXPATHLEN; 2552 *--bp = '\0'; 2553 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, 2554 path, MAXPATHLEN / 2, 0, l); 2555 if (error) { 2556 free(path, M_TEMP); 2557 return error; 2558 } 2559 2560 len = strlen(bp); 2561 if (len > sizeof(sfs->f_mntonname) - 1) 2562 len = sizeof(sfs->f_mntonname) - 1; 2563 (void)strncpy(sfs->f_mntonname, bp, len); 2564 free(path, M_TEMP); 2565 2566 if (len < sizeof(sfs->f_mntonname) - 1) { 2567 error = (*fun)(onp, &sfs->f_mntonname[len], 2568 sizeof(sfs->f_mntonname) - len - 1, &size); 2569 if (error) 2570 return error; 2571 size += len; 2572 } else { 2573 size = len; 2574 } 2575 } else { 2576 error = (*fun)(onp, &sfs->f_mntonname, 2577 sizeof(sfs->f_mntonname) - 1, &size); 2578 if (error) 2579 return error; 2580 } 2581 (void)memset(sfs->f_mntonname + size, 0, 2582 sizeof(sfs->f_mntonname) - size); 2583 } 2584 2585 if (fromp) { 2586 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr; 2587 error = (*fun)(fromp, sfs->f_mntfromname, 2588 sizeof(sfs->f_mntfromname) - 1, &size); 2589 if (error) 2590 return error; 2591 (void)memset(sfs->f_mntfromname + size, 0, 2592 sizeof(sfs->f_mntfromname) - size); 2593 } 2594 return 0; 2595 } 2596 2597 #ifdef DDB 2598 static const char buf_flagbits[] = BUF_FLAGBITS; 2599 2600 void 2601 vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...)) 2602 { 2603 char bf[1024]; 2604 2605 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" dev 0x%x\n", 2606 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev); 2607 2608 bitmask_snprintf(bp->b_flags, buf_flagbits, bf, sizeof(bf)); 2609 (*pr)(" error %d flags 0x%s\n", bp->b_error, bf); 2610 2611 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 2612 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2613 (*pr)(" data %p saveaddr %p dep %p\n", 2614 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2615 (*pr)(" iodone %p\n", bp->b_iodone); 2616 } 2617 2618 2619 void 2620 vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...)) 2621 { 2622 char bf[256]; 2623 2624 uvm_object_printit(&vp->v_uobj, full, pr); 2625 bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf)); 2626 (*pr)("\nVNODE flags %s\n", bf); 2627 (*pr)("mp %p numoutput %d size 0x%llx\n", 2628 vp->v_mount, vp->v_numoutput, vp->v_size); 2629 2630 (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n", 2631 vp->v_data, vp->v_usecount, vp->v_writecount, 2632 vp->v_holdcnt, vp->v_numoutput); 2633 2634 (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n", 2635 ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 2636 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 2637 vp->v_mount, vp->v_mountedhere); 2638 2639 if (full) { 2640 struct buf *bp; 2641 2642 (*pr)("clean bufs:\n"); 2643 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 2644 (*pr)(" bp %p\n", bp); 2645 vfs_buf_print(bp, full, pr); 2646 } 2647 2648 (*pr)("dirty bufs:\n"); 2649 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 2650 (*pr)(" bp %p\n", bp); 2651 vfs_buf_print(bp, full, pr); 2652 } 2653 } 2654 } 2655 2656 void 2657 vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...)) 2658 { 2659 char sbuf[256]; 2660 2661 (*pr)("vnodecovered = %p syncer = %p data = %p\n", 2662 mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data); 2663 2664 (*pr)("fs_bshift %d dev_bshift = %d\n", 2665 mp->mnt_fs_bshift,mp->mnt_dev_bshift); 2666 2667 bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf)); 2668 (*pr)("flag = %s\n", sbuf); 2669 2670 bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf)); 2671 (*pr)("iflag = %s\n", sbuf); 2672 2673 /* XXX use lockmgr_printinfo */ 2674 if (mp->mnt_lock.lk_sharecount) 2675 (*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg, 2676 mp->mnt_lock.lk_sharecount); 2677 else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) { 2678 (*pr)(" lock type %s: EXCL (count %d) by ", 2679 mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount); 2680 if (mp->mnt_lock.lk_flags & LK_SPIN) 2681 (*pr)("processor %lu", mp->mnt_lock.lk_cpu); 2682 else 2683 (*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder, 2684 mp->mnt_lock.lk_locklwp); 2685 } else 2686 (*pr)(" not locked"); 2687 if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0) 2688 (*pr)(" with %d pending", mp->mnt_lock.lk_waitcount); 2689 2690 (*pr)("\n"); 2691 2692 if (mp->mnt_unmounter) { 2693 (*pr)("unmounter pid = %d ",mp->mnt_unmounter->l_proc); 2694 } 2695 (*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n", 2696 mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower); 2697 2698 (*pr)("statvfs cache:\n"); 2699 (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize); 2700 (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize); 2701 (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize); 2702 2703 (*pr)("\tblocks = "PRIu64"\n",mp->mnt_stat.f_blocks); 2704 (*pr)("\tbfree = "PRIu64"\n",mp->mnt_stat.f_bfree); 2705 (*pr)("\tbavail = "PRIu64"\n",mp->mnt_stat.f_bavail); 2706 (*pr)("\tbresvd = "PRIu64"\n",mp->mnt_stat.f_bresvd); 2707 2708 (*pr)("\tfiles = "PRIu64"\n",mp->mnt_stat.f_files); 2709 (*pr)("\tffree = "PRIu64"\n",mp->mnt_stat.f_ffree); 2710 (*pr)("\tfavail = "PRIu64"\n",mp->mnt_stat.f_favail); 2711 (*pr)("\tfresvd = "PRIu64"\n",mp->mnt_stat.f_fresvd); 2712 2713 (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n", 2714 mp->mnt_stat.f_fsidx.__fsid_val[0], 2715 mp->mnt_stat.f_fsidx.__fsid_val[1]); 2716 2717 (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner); 2718 (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax); 2719 2720 bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf, 2721 sizeof(sbuf)); 2722 (*pr)("\tflag = %s\n",sbuf); 2723 (*pr)("\tsyncwrites = " PRIu64 "\n",mp->mnt_stat.f_syncwrites); 2724 (*pr)("\tasyncwrites = " PRIu64 "\n",mp->mnt_stat.f_asyncwrites); 2725 (*pr)("\tsyncreads = " PRIu64 "\n",mp->mnt_stat.f_syncreads); 2726 (*pr)("\tasyncreads = " PRIu64 "\n",mp->mnt_stat.f_asyncreads); 2727 (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename); 2728 (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname); 2729 (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname); 2730 2731 { 2732 int cnt = 0; 2733 struct vnode *vp; 2734 (*pr)("locked vnodes ="); 2735 /* XXX would take mountlist lock, except ddb may not have context */ 2736 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2737 if (VOP_ISLOCKED(vp)) { 2738 if ((++cnt % 6) == 0) { 2739 (*pr)(" %p,\n\t", vp); 2740 } else { 2741 (*pr)(" %p,", vp); 2742 } 2743 } 2744 } 2745 (*pr)("\n"); 2746 } 2747 2748 if (full) { 2749 int cnt = 0; 2750 struct vnode *vp; 2751 (*pr)("all vnodes ="); 2752 /* XXX would take mountlist lock, except ddb may not have context */ 2753 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2754 if (!LIST_NEXT(vp, v_mntvnodes)) { 2755 (*pr)(" %p", vp); 2756 } else if ((++cnt % 6) == 0) { 2757 (*pr)(" %p,\n\t", vp); 2758 } else { 2759 (*pr)(" %p,", vp); 2760 } 2761 } 2762 (*pr)("\n", vp); 2763 } 2764 } 2765 #endif /* DDB */ 2766