1 /* $NetBSD: vfs_subr.c,v 1.265 2006/02/25 07:11:31 skrll Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2004, 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * This code is derived from software contributed to The NetBSD Foundation 11 * by Charles M. Hannum. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the NetBSD 24 * Foundation, Inc. and its contributors. 25 * 4. Neither the name of The NetBSD Foundation nor the names of its 26 * contributors may be used to endorse or promote products derived 27 * from this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 30 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 31 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 32 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 33 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 37 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42 /* 43 * Copyright (c) 1989, 1993 44 * The Regents of the University of California. All rights reserved. 45 * (c) UNIX System Laboratories, Inc. 46 * All or some portions of this file are derived from material licensed 47 * to the University of California by American Telephone and Telegraph 48 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 49 * the permission of UNIX System Laboratories, Inc. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions 53 * are met: 54 * 1. Redistributions of source code must retain the above copyright 55 * notice, this list of conditions and the following disclaimer. 56 * 2. Redistributions in binary form must reproduce the above copyright 57 * notice, this list of conditions and the following disclaimer in the 58 * documentation and/or other materials provided with the distribution. 59 * 3. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 76 */ 77 78 /* 79 * External virtual filesystem routines 80 */ 81 82 #include <sys/cdefs.h> 83 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.265 2006/02/25 07:11:31 skrll Exp $"); 84 85 #include "opt_inet.h" 86 #include "opt_ddb.h" 87 #include "opt_compat_netbsd.h" 88 #include "opt_compat_43.h" 89 90 #include <sys/param.h> 91 #include <sys/systm.h> 92 #include <sys/proc.h> 93 #include <sys/kernel.h> 94 #include <sys/mount.h> 95 #include <sys/fcntl.h> 96 #include <sys/vnode.h> 97 #include <sys/stat.h> 98 #include <sys/namei.h> 99 #include <sys/ucred.h> 100 #include <sys/buf.h> 101 #include <sys/errno.h> 102 #include <sys/malloc.h> 103 #include <sys/domain.h> 104 #include <sys/mbuf.h> 105 #include <sys/sa.h> 106 #include <sys/syscallargs.h> 107 #include <sys/device.h> 108 #include <sys/filedesc.h> 109 110 #include <miscfs/specfs/specdev.h> 111 #include <miscfs/genfs/genfs.h> 112 #include <miscfs/syncfs/syncfs.h> 113 114 #include <uvm/uvm.h> 115 #include <uvm/uvm_readahead.h> 116 #include <uvm/uvm_ddb.h> 117 118 #include <sys/sysctl.h> 119 120 const enum vtype iftovt_tab[16] = { 121 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 122 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 123 }; 124 const int vttoif_tab[9] = { 125 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 126 S_IFSOCK, S_IFIFO, S_IFMT, 127 }; 128 129 int doforce = 1; /* 1 => permit forcible unmounting */ 130 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 131 132 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 133 extern int vfs_magiclinks; /* 1 => expand "magic" symlinks */ 134 135 /* 136 * Insq/Remq for the vnode usage lists. 137 */ 138 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 139 #define bufremvn(bp) { \ 140 LIST_REMOVE(bp, b_vnbufs); \ 141 (bp)->b_vnbufs.le_next = NOLIST; \ 142 } 143 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 144 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 145 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 146 147 struct mntlist mountlist = /* mounted filesystem list */ 148 CIRCLEQ_HEAD_INITIALIZER(mountlist); 149 struct vfs_list_head vfs_list = /* vfs list */ 150 LIST_HEAD_INITIALIZER(vfs_list); 151 152 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 153 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 154 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 155 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 156 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 157 158 /* XXX - gross; single global lock to protect v_numoutput */ 159 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER; 160 161 /* 162 * These define the root filesystem and device. 163 */ 164 struct mount *rootfs; 165 struct vnode *rootvnode; 166 struct device *root_device; /* root device */ 167 168 POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 169 &pool_allocator_nointr); 170 171 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 172 173 /* 174 * Local declarations. 175 */ 176 static void insmntque(struct vnode *, struct mount *); 177 static int getdevvp(dev_t, struct vnode **, enum vtype); 178 static void vclean(struct vnode *, int, struct lwp *); 179 static struct vnode *getcleanvnode(struct lwp *); 180 181 #ifdef DEBUG 182 void printlockedvnodes(void); 183 #endif 184 185 /* 186 * Initialize the vnode management data structures. 187 */ 188 void 189 vntblinit(void) 190 { 191 192 /* 193 * Initialize the filesystem syncer. 194 */ 195 vn_initialize_syncerd(); 196 } 197 198 int 199 vfs_drainvnodes(long target, struct lwp *l) 200 { 201 202 simple_lock(&vnode_free_list_slock); 203 while (numvnodes > target) { 204 struct vnode *vp; 205 206 vp = getcleanvnode(l); 207 if (vp == NULL) 208 return EBUSY; /* give up */ 209 pool_put(&vnode_pool, vp); 210 simple_lock(&vnode_free_list_slock); 211 numvnodes--; 212 } 213 simple_unlock(&vnode_free_list_slock); 214 215 return 0; 216 } 217 218 /* 219 * grab a vnode from freelist and clean it. 220 */ 221 struct vnode * 222 getcleanvnode(struct lwp *l) 223 { 224 struct vnode *vp; 225 struct mount *mp; 226 struct freelst *listhd; 227 228 LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock)); 229 230 listhd = &vnode_free_list; 231 try_nextlist: 232 TAILQ_FOREACH(vp, listhd, v_freelist) { 233 if (!simple_lock_try(&vp->v_interlock)) 234 continue; 235 /* 236 * as our lwp might hold the underlying vnode locked, 237 * don't try to reclaim the VLAYER vnode if it's locked. 238 */ 239 if ((vp->v_flag & VXLOCK) == 0 && 240 ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) { 241 if (vn_start_write(vp, &mp, V_NOWAIT) == 0) 242 break; 243 } 244 mp = NULL; 245 simple_unlock(&vp->v_interlock); 246 } 247 248 if (vp == NULLVP) { 249 if (listhd == &vnode_free_list) { 250 listhd = &vnode_hold_list; 251 goto try_nextlist; 252 } 253 simple_unlock(&vnode_free_list_slock); 254 return NULLVP; 255 } 256 257 if (vp->v_usecount) 258 panic("free vnode isn't, vp %p", vp); 259 TAILQ_REMOVE(listhd, vp, v_freelist); 260 /* see comment on why 0xdeadb is set at end of vgone (below) */ 261 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 262 simple_unlock(&vnode_free_list_slock); 263 vp->v_lease = NULL; 264 265 if (vp->v_type != VBAD) 266 vgonel(vp, l); 267 else 268 simple_unlock(&vp->v_interlock); 269 vn_finished_write(mp, 0); 270 #ifdef DIAGNOSTIC 271 if (vp->v_data || vp->v_uobj.uo_npages || 272 TAILQ_FIRST(&vp->v_uobj.memq)) 273 panic("cleaned vnode isn't, vp %p", vp); 274 if (vp->v_numoutput) 275 panic("clean vnode has pending I/O's, vp %p", vp); 276 #endif 277 KASSERT((vp->v_flag & VONWORKLST) == 0); 278 279 return vp; 280 } 281 282 /* 283 * Mark a mount point as busy. Used to synchronize access and to delay 284 * unmounting. Interlock is not released on failure. 285 */ 286 int 287 vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp) 288 { 289 int lkflags; 290 291 while (mp->mnt_iflag & IMNT_UNMOUNT) { 292 int gone, n; 293 294 if (flags & LK_NOWAIT) 295 return (ENOENT); 296 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 297 && mp->mnt_unmounter == curlwp) 298 return (EDEADLK); 299 if (interlkp) 300 simple_unlock(interlkp); 301 /* 302 * Since all busy locks are shared except the exclusive 303 * lock granted when unmounting, the only place that a 304 * wakeup needs to be done is at the release of the 305 * exclusive lock at the end of dounmount. 306 */ 307 simple_lock(&mp->mnt_slock); 308 mp->mnt_wcnt++; 309 ltsleep((caddr_t)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock); 310 n = --mp->mnt_wcnt; 311 simple_unlock(&mp->mnt_slock); 312 gone = mp->mnt_iflag & IMNT_GONE; 313 314 if (n == 0) 315 wakeup(&mp->mnt_wcnt); 316 if (interlkp) 317 simple_lock(interlkp); 318 if (gone) 319 return (ENOENT); 320 } 321 lkflags = LK_SHARED; 322 if (interlkp) 323 lkflags |= LK_INTERLOCK; 324 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 325 panic("vfs_busy: unexpected lock failure"); 326 return (0); 327 } 328 329 /* 330 * Free a busy filesystem. 331 */ 332 void 333 vfs_unbusy(struct mount *mp) 334 { 335 336 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 337 } 338 339 /* 340 * Lookup a filesystem type, and if found allocate and initialize 341 * a mount structure for it. 342 * 343 * Devname is usually updated by mount(8) after booting. 344 */ 345 int 346 vfs_rootmountalloc(const char *fstypename, const char *devname, 347 struct mount **mpp) 348 { 349 struct vfsops *vfsp = NULL; 350 struct mount *mp; 351 352 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 353 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 354 break; 355 356 if (vfsp == NULL) 357 return (ENODEV); 358 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 359 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 360 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 361 simple_lock_init(&mp->mnt_slock); 362 (void)vfs_busy(mp, LK_NOWAIT, 0); 363 LIST_INIT(&mp->mnt_vnodelist); 364 mp->mnt_op = vfsp; 365 mp->mnt_flag = MNT_RDONLY; 366 mp->mnt_vnodecovered = NULLVP; 367 mp->mnt_leaf = mp; 368 vfsp->vfs_refcount++; 369 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 370 mp->mnt_stat.f_mntonname[0] = '/'; 371 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 372 *mpp = mp; 373 return (0); 374 } 375 376 /* 377 * Lookup a mount point by filesystem identifier. 378 */ 379 struct mount * 380 vfs_getvfs(fsid_t *fsid) 381 { 382 struct mount *mp; 383 384 simple_lock(&mountlist_slock); 385 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { 386 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 387 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 388 simple_unlock(&mountlist_slock); 389 return (mp); 390 } 391 } 392 simple_unlock(&mountlist_slock); 393 return ((struct mount *)0); 394 } 395 396 /* 397 * Get a new unique fsid 398 */ 399 void 400 vfs_getnewfsid(struct mount *mp) 401 { 402 static u_short xxxfs_mntid; 403 fsid_t tfsid; 404 int mtype; 405 406 simple_lock(&mntid_slock); 407 mtype = makefstype(mp->mnt_op->vfs_name); 408 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); 409 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; 410 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 411 if (xxxfs_mntid == 0) 412 ++xxxfs_mntid; 413 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 414 tfsid.__fsid_val[1] = mtype; 415 if (!CIRCLEQ_EMPTY(&mountlist)) { 416 while (vfs_getvfs(&tfsid)) { 417 tfsid.__fsid_val[0]++; 418 xxxfs_mntid++; 419 } 420 } 421 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 422 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 423 simple_unlock(&mntid_slock); 424 } 425 426 /* 427 * Make a 'unique' number from a mount type name. 428 */ 429 long 430 makefstype(const char *type) 431 { 432 long rv; 433 434 for (rv = 0; *type; type++) { 435 rv <<= 2; 436 rv ^= *type; 437 } 438 return rv; 439 } 440 441 442 /* 443 * Set vnode attributes to VNOVAL 444 */ 445 void 446 vattr_null(struct vattr *vap) 447 { 448 449 vap->va_type = VNON; 450 451 /* 452 * Assign individually so that it is safe even if size and 453 * sign of each member are varied. 454 */ 455 vap->va_mode = VNOVAL; 456 vap->va_nlink = VNOVAL; 457 vap->va_uid = VNOVAL; 458 vap->va_gid = VNOVAL; 459 vap->va_fsid = VNOVAL; 460 vap->va_fileid = VNOVAL; 461 vap->va_size = VNOVAL; 462 vap->va_blocksize = VNOVAL; 463 vap->va_atime.tv_sec = 464 vap->va_mtime.tv_sec = 465 vap->va_ctime.tv_sec = 466 vap->va_birthtime.tv_sec = VNOVAL; 467 vap->va_atime.tv_nsec = 468 vap->va_mtime.tv_nsec = 469 vap->va_ctime.tv_nsec = 470 vap->va_birthtime.tv_nsec = VNOVAL; 471 vap->va_gen = VNOVAL; 472 vap->va_flags = VNOVAL; 473 vap->va_rdev = VNOVAL; 474 vap->va_bytes = VNOVAL; 475 vap->va_vaflags = 0; 476 } 477 478 /* 479 * Routines having to do with the management of the vnode table. 480 */ 481 extern int (**dead_vnodeop_p)(void *); 482 long numvnodes; 483 484 /* 485 * Return the next vnode from the free list. 486 */ 487 int 488 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), 489 struct vnode **vpp) 490 { 491 extern struct uvm_pagerops uvm_vnodeops; 492 struct uvm_object *uobj; 493 struct lwp *l = curlwp; /* XXX */ 494 static int toggle; 495 struct vnode *vp; 496 int error = 0, tryalloc; 497 498 try_again: 499 if (mp) { 500 /* 501 * Mark filesystem busy while we're creating a vnode. 502 * If unmount is in progress, this will wait; if the 503 * unmount succeeds (only if umount -f), this will 504 * return an error. If the unmount fails, we'll keep 505 * going afterwards. 506 * (This puts the per-mount vnode list logically under 507 * the protection of the vfs_busy lock). 508 */ 509 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 510 if (error && error != EDEADLK) 511 return error; 512 } 513 514 /* 515 * We must choose whether to allocate a new vnode or recycle an 516 * existing one. The criterion for allocating a new one is that 517 * the total number of vnodes is less than the number desired or 518 * there are no vnodes on either free list. Generally we only 519 * want to recycle vnodes that have no buffers associated with 520 * them, so we look first on the vnode_free_list. If it is empty, 521 * we next consider vnodes with referencing buffers on the 522 * vnode_hold_list. The toggle ensures that half the time we 523 * will use a buffer from the vnode_hold_list, and half the time 524 * we will allocate a new one unless the list has grown to twice 525 * the desired size. We are reticent to recycle vnodes from the 526 * vnode_hold_list because we will lose the identity of all its 527 * referencing buffers. 528 */ 529 530 vp = NULL; 531 532 simple_lock(&vnode_free_list_slock); 533 534 toggle ^= 1; 535 if (numvnodes > 2 * desiredvnodes) 536 toggle = 0; 537 538 tryalloc = numvnodes < desiredvnodes || 539 (TAILQ_FIRST(&vnode_free_list) == NULL && 540 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 541 542 if (tryalloc && 543 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { 544 numvnodes++; 545 simple_unlock(&vnode_free_list_slock); 546 memset(vp, 0, sizeof(*vp)); 547 UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 1); 548 /* 549 * done by memset() above. 550 * LIST_INIT(&vp->v_nclist); 551 * LIST_INIT(&vp->v_dnclist); 552 */ 553 } else { 554 vp = getcleanvnode(l); 555 /* 556 * Unless this is a bad time of the month, at most 557 * the first NCPUS items on the free list are 558 * locked, so this is close enough to being empty. 559 */ 560 if (vp == NULLVP) { 561 if (mp && error != EDEADLK) 562 vfs_unbusy(mp); 563 if (tryalloc) { 564 printf("WARNING: unable to allocate new " 565 "vnode, retrying...\n"); 566 (void) tsleep(&lbolt, PRIBIO, "newvn", hz); 567 goto try_again; 568 } 569 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 570 *vpp = 0; 571 return (ENFILE); 572 } 573 vp->v_usecount = 1; 574 vp->v_flag = 0; 575 vp->v_socket = NULL; 576 } 577 vp->v_type = VNON; 578 vp->v_vnlock = &vp->v_lock; 579 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 580 KASSERT(LIST_EMPTY(&vp->v_nclist)); 581 KASSERT(LIST_EMPTY(&vp->v_dnclist)); 582 vp->v_tag = tag; 583 vp->v_op = vops; 584 insmntque(vp, mp); 585 *vpp = vp; 586 vp->v_data = 0; 587 simple_lock_init(&vp->v_interlock); 588 589 /* 590 * initialize uvm_object within vnode. 591 */ 592 593 uobj = &vp->v_uobj; 594 KASSERT(uobj->pgops == &uvm_vnodeops); 595 KASSERT(uobj->uo_npages == 0); 596 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 597 vp->v_size = VSIZENOTSET; 598 599 if (mp && error != EDEADLK) 600 vfs_unbusy(mp); 601 return (0); 602 } 603 604 /* 605 * This is really just the reverse of getnewvnode(). Needed for 606 * VFS_VGET functions who may need to push back a vnode in case 607 * of a locking race. 608 */ 609 void 610 ungetnewvnode(struct vnode *vp) 611 { 612 #ifdef DIAGNOSTIC 613 if (vp->v_usecount != 1) 614 panic("ungetnewvnode: busy vnode"); 615 #endif 616 vp->v_usecount--; 617 insmntque(vp, NULL); 618 vp->v_type = VBAD; 619 620 simple_lock(&vp->v_interlock); 621 /* 622 * Insert at head of LRU list 623 */ 624 simple_lock(&vnode_free_list_slock); 625 if (vp->v_holdcnt > 0) 626 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 627 else 628 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 629 simple_unlock(&vnode_free_list_slock); 630 simple_unlock(&vp->v_interlock); 631 } 632 633 /* 634 * Move a vnode from one mount queue to another. 635 */ 636 static void 637 insmntque(struct vnode *vp, struct mount *mp) 638 { 639 640 #ifdef DIAGNOSTIC 641 if ((mp != NULL) && 642 (mp->mnt_iflag & IMNT_UNMOUNT) && 643 !(mp->mnt_flag & MNT_SOFTDEP) && 644 vp->v_tag != VT_VFS) { 645 panic("insmntque into dying filesystem"); 646 } 647 #endif 648 649 simple_lock(&mntvnode_slock); 650 /* 651 * Delete from old mount point vnode list, if on one. 652 */ 653 if (vp->v_mount != NULL) 654 LIST_REMOVE(vp, v_mntvnodes); 655 /* 656 * Insert into list of vnodes for the new mount point, if available. 657 */ 658 if ((vp->v_mount = mp) != NULL) 659 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 660 simple_unlock(&mntvnode_slock); 661 } 662 663 /* 664 * Update outstanding I/O count and do wakeup if requested. 665 */ 666 void 667 vwakeup(struct buf *bp) 668 { 669 struct vnode *vp; 670 671 if ((vp = bp->b_vp) != NULL) { 672 /* XXX global lock hack 673 * can't use v_interlock here since this is called 674 * in interrupt context from biodone(). 675 */ 676 simple_lock(&global_v_numoutput_slock); 677 if (--vp->v_numoutput < 0) 678 panic("vwakeup: neg numoutput, vp %p", vp); 679 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 680 vp->v_flag &= ~VBWAIT; 681 wakeup((caddr_t)&vp->v_numoutput); 682 } 683 simple_unlock(&global_v_numoutput_slock); 684 } 685 } 686 687 /* 688 * Flush out and invalidate all buffers associated with a vnode. 689 * Called with the underlying vnode locked, which should prevent new dirty 690 * buffers from being queued. 691 */ 692 int 693 vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct lwp *l, 694 int slpflag, int slptimeo) 695 { 696 struct buf *bp, *nbp; 697 int s, error; 698 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 699 (flags & V_SAVE ? PGO_CLEANIT : 0); 700 701 /* XXXUBC this doesn't look at flags or slp* */ 702 simple_lock(&vp->v_interlock); 703 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 704 if (error) { 705 return error; 706 } 707 708 if (flags & V_SAVE) { 709 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, l); 710 if (error) 711 return (error); 712 #ifdef DIAGNOSTIC 713 s = splbio(); 714 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 715 panic("vinvalbuf: dirty bufs, vp %p", vp); 716 splx(s); 717 #endif 718 } 719 720 s = splbio(); 721 722 restart: 723 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 724 nbp = LIST_NEXT(bp, b_vnbufs); 725 simple_lock(&bp->b_interlock); 726 if (bp->b_flags & B_BUSY) { 727 bp->b_flags |= B_WANTED; 728 error = ltsleep((caddr_t)bp, 729 slpflag | (PRIBIO + 1) | PNORELOCK, 730 "vinvalbuf", slptimeo, &bp->b_interlock); 731 if (error) { 732 splx(s); 733 return (error); 734 } 735 goto restart; 736 } 737 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 738 simple_unlock(&bp->b_interlock); 739 brelse(bp); 740 } 741 742 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 743 nbp = LIST_NEXT(bp, b_vnbufs); 744 simple_lock(&bp->b_interlock); 745 if (bp->b_flags & B_BUSY) { 746 bp->b_flags |= B_WANTED; 747 error = ltsleep((caddr_t)bp, 748 slpflag | (PRIBIO + 1) | PNORELOCK, 749 "vinvalbuf", slptimeo, &bp->b_interlock); 750 if (error) { 751 splx(s); 752 return (error); 753 } 754 goto restart; 755 } 756 /* 757 * XXX Since there are no node locks for NFS, I believe 758 * there is a slight chance that a delayed write will 759 * occur while sleeping just above, so check for it. 760 */ 761 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 762 #ifdef DEBUG 763 printf("buffer still DELWRI\n"); 764 #endif 765 bp->b_flags |= B_BUSY | B_VFLUSH; 766 simple_unlock(&bp->b_interlock); 767 VOP_BWRITE(bp); 768 goto restart; 769 } 770 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 771 simple_unlock(&bp->b_interlock); 772 brelse(bp); 773 } 774 775 #ifdef DIAGNOSTIC 776 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 777 panic("vinvalbuf: flush failed, vp %p", vp); 778 #endif 779 780 splx(s); 781 782 return (0); 783 } 784 785 /* 786 * Destroy any in core blocks past the truncation length. 787 * Called with the underlying vnode locked, which should prevent new dirty 788 * buffers from being queued. 789 */ 790 int 791 vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo) 792 { 793 struct buf *bp, *nbp; 794 int s, error; 795 voff_t off; 796 797 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 798 simple_lock(&vp->v_interlock); 799 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 800 if (error) { 801 return error; 802 } 803 804 s = splbio(); 805 806 restart: 807 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 808 nbp = LIST_NEXT(bp, b_vnbufs); 809 if (bp->b_lblkno < lbn) 810 continue; 811 simple_lock(&bp->b_interlock); 812 if (bp->b_flags & B_BUSY) { 813 bp->b_flags |= B_WANTED; 814 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 815 "vtruncbuf", slptimeo, &bp->b_interlock); 816 if (error) { 817 splx(s); 818 return (error); 819 } 820 goto restart; 821 } 822 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 823 simple_unlock(&bp->b_interlock); 824 brelse(bp); 825 } 826 827 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 828 nbp = LIST_NEXT(bp, b_vnbufs); 829 if (bp->b_lblkno < lbn) 830 continue; 831 simple_lock(&bp->b_interlock); 832 if (bp->b_flags & B_BUSY) { 833 bp->b_flags |= B_WANTED; 834 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 835 "vtruncbuf", slptimeo, &bp->b_interlock); 836 if (error) { 837 splx(s); 838 return (error); 839 } 840 goto restart; 841 } 842 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 843 simple_unlock(&bp->b_interlock); 844 brelse(bp); 845 } 846 847 splx(s); 848 849 return (0); 850 } 851 852 void 853 vflushbuf(struct vnode *vp, int sync) 854 { 855 struct buf *bp, *nbp; 856 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); 857 int s; 858 859 simple_lock(&vp->v_interlock); 860 (void) VOP_PUTPAGES(vp, 0, 0, flags); 861 862 loop: 863 s = splbio(); 864 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 865 nbp = LIST_NEXT(bp, b_vnbufs); 866 simple_lock(&bp->b_interlock); 867 if ((bp->b_flags & B_BUSY)) { 868 simple_unlock(&bp->b_interlock); 869 continue; 870 } 871 if ((bp->b_flags & B_DELWRI) == 0) 872 panic("vflushbuf: not dirty, bp %p", bp); 873 bp->b_flags |= B_BUSY | B_VFLUSH; 874 simple_unlock(&bp->b_interlock); 875 splx(s); 876 /* 877 * Wait for I/O associated with indirect blocks to complete, 878 * since there is no way to quickly wait for them below. 879 */ 880 if (bp->b_vp == vp || sync == 0) 881 (void) bawrite(bp); 882 else 883 (void) bwrite(bp); 884 goto loop; 885 } 886 if (sync == 0) { 887 splx(s); 888 return; 889 } 890 simple_lock(&global_v_numoutput_slock); 891 while (vp->v_numoutput) { 892 vp->v_flag |= VBWAIT; 893 ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0, 894 &global_v_numoutput_slock); 895 } 896 simple_unlock(&global_v_numoutput_slock); 897 splx(s); 898 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 899 vprint("vflushbuf: dirty", vp); 900 goto loop; 901 } 902 } 903 904 /* 905 * Associate a buffer with a vnode. 906 */ 907 void 908 bgetvp(struct vnode *vp, struct buf *bp) 909 { 910 int s; 911 912 if (bp->b_vp) 913 panic("bgetvp: not free, bp %p", bp); 914 VHOLD(vp); 915 s = splbio(); 916 bp->b_vp = vp; 917 if (vp->v_type == VBLK || vp->v_type == VCHR) 918 bp->b_dev = vp->v_rdev; 919 else 920 bp->b_dev = NODEV; 921 /* 922 * Insert onto list for new vnode. 923 */ 924 bufinsvn(bp, &vp->v_cleanblkhd); 925 splx(s); 926 } 927 928 /* 929 * Disassociate a buffer from a vnode. 930 */ 931 void 932 brelvp(struct buf *bp) 933 { 934 struct vnode *vp; 935 int s; 936 937 if (bp->b_vp == NULL) 938 panic("brelvp: vp NULL, bp %p", bp); 939 940 s = splbio(); 941 vp = bp->b_vp; 942 /* 943 * Delete from old vnode list, if on one. 944 */ 945 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 946 bufremvn(bp); 947 948 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) && 949 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 950 vp->v_flag &= ~(VWRITEMAPDIRTY|VONWORKLST); 951 LIST_REMOVE(vp, v_synclist); 952 } 953 954 bp->b_vp = NULL; 955 HOLDRELE(vp); 956 splx(s); 957 } 958 959 /* 960 * Reassign a buffer from one vnode to another. 961 * Used to assign file specific control information 962 * (indirect blocks) to the vnode to which they belong. 963 * 964 * This function must be called at splbio(). 965 */ 966 void 967 reassignbuf(struct buf *bp, struct vnode *newvp) 968 { 969 struct buflists *listheadp; 970 int delayx; 971 972 /* 973 * Delete from old vnode list, if on one. 974 */ 975 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 976 bufremvn(bp); 977 /* 978 * If dirty, put on list of dirty buffers; 979 * otherwise insert onto list of clean buffers. 980 */ 981 if ((bp->b_flags & B_DELWRI) == 0) { 982 listheadp = &newvp->v_cleanblkhd; 983 if (TAILQ_EMPTY(&newvp->v_uobj.memq) && 984 (newvp->v_flag & VONWORKLST) && 985 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 986 newvp->v_flag &= ~(VWRITEMAPDIRTY|VONWORKLST); 987 LIST_REMOVE(newvp, v_synclist); 988 } 989 } else { 990 listheadp = &newvp->v_dirtyblkhd; 991 if ((newvp->v_flag & VONWORKLST) == 0) { 992 switch (newvp->v_type) { 993 case VDIR: 994 delayx = dirdelay; 995 break; 996 case VBLK: 997 if (newvp->v_specmountpoint != NULL) { 998 delayx = metadelay; 999 break; 1000 } 1001 /* fall through */ 1002 default: 1003 delayx = filedelay; 1004 break; 1005 } 1006 if (!newvp->v_mount || 1007 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 1008 vn_syncer_add_to_worklist(newvp, delayx); 1009 } 1010 } 1011 bufinsvn(bp, listheadp); 1012 } 1013 1014 /* 1015 * Create a vnode for a block device. 1016 * Used for root filesystem and swap areas. 1017 * Also used for memory file system special devices. 1018 */ 1019 int 1020 bdevvp(dev_t dev, struct vnode **vpp) 1021 { 1022 1023 return (getdevvp(dev, vpp, VBLK)); 1024 } 1025 1026 /* 1027 * Create a vnode for a character device. 1028 * Used for kernfs and some console handling. 1029 */ 1030 int 1031 cdevvp(dev_t dev, struct vnode **vpp) 1032 { 1033 1034 return (getdevvp(dev, vpp, VCHR)); 1035 } 1036 1037 /* 1038 * Create a vnode for a device. 1039 * Used by bdevvp (block device) for root file system etc., 1040 * and by cdevvp (character device) for console and kernfs. 1041 */ 1042 static int 1043 getdevvp(dev_t dev, struct vnode **vpp, enum vtype type) 1044 { 1045 struct vnode *vp; 1046 struct vnode *nvp; 1047 int error; 1048 1049 if (dev == NODEV) { 1050 *vpp = NULLVP; 1051 return (0); 1052 } 1053 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1054 if (error) { 1055 *vpp = NULLVP; 1056 return (error); 1057 } 1058 vp = nvp; 1059 vp->v_type = type; 1060 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1061 vput(vp); 1062 vp = nvp; 1063 } 1064 *vpp = vp; 1065 return (0); 1066 } 1067 1068 /* 1069 * Check to see if the new vnode represents a special device 1070 * for which we already have a vnode (either because of 1071 * bdevvp() or because of a different vnode representing 1072 * the same block device). If such an alias exists, deallocate 1073 * the existing contents and return the aliased vnode. The 1074 * caller is responsible for filling it with its new contents. 1075 */ 1076 struct vnode * 1077 checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp) 1078 { 1079 struct lwp *l = curlwp; /* XXX */ 1080 struct vnode *vp; 1081 struct vnode **vpp; 1082 1083 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1084 return (NULLVP); 1085 1086 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1087 loop: 1088 simple_lock(&spechash_slock); 1089 for (vp = *vpp; vp; vp = vp->v_specnext) { 1090 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1091 continue; 1092 /* 1093 * Alias, but not in use, so flush it out. 1094 */ 1095 simple_lock(&vp->v_interlock); 1096 simple_unlock(&spechash_slock); 1097 if (vp->v_usecount == 0) { 1098 vgonel(vp, l); 1099 goto loop; 1100 } 1101 /* 1102 * What we're interested to know here is if someone else has 1103 * removed this vnode from the device hash list while we were 1104 * waiting. This can only happen if vclean() did it, and 1105 * this requires the vnode to be locked. 1106 */ 1107 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) 1108 goto loop; 1109 if (vp->v_specinfo == NULL) { 1110 vput(vp); 1111 goto loop; 1112 } 1113 simple_lock(&spechash_slock); 1114 break; 1115 } 1116 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1117 MALLOC(nvp->v_specinfo, struct specinfo *, 1118 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1119 /* XXX Erg. */ 1120 if (nvp->v_specinfo == NULL) { 1121 simple_unlock(&spechash_slock); 1122 uvm_wait("checkalias"); 1123 goto loop; 1124 } 1125 1126 nvp->v_rdev = nvp_rdev; 1127 nvp->v_hashchain = vpp; 1128 nvp->v_specnext = *vpp; 1129 nvp->v_specmountpoint = NULL; 1130 simple_unlock(&spechash_slock); 1131 nvp->v_speclockf = NULL; 1132 simple_lock_init(&nvp->v_spec_cow_slock); 1133 SLIST_INIT(&nvp->v_spec_cow_head); 1134 nvp->v_spec_cow_req = 0; 1135 nvp->v_spec_cow_count = 0; 1136 1137 *vpp = nvp; 1138 if (vp != NULLVP) { 1139 nvp->v_flag |= VALIASED; 1140 vp->v_flag |= VALIASED; 1141 vput(vp); 1142 } 1143 return (NULLVP); 1144 } 1145 simple_unlock(&spechash_slock); 1146 VOP_UNLOCK(vp, 0); 1147 simple_lock(&vp->v_interlock); 1148 vclean(vp, 0, l); 1149 vp->v_op = nvp->v_op; 1150 vp->v_tag = nvp->v_tag; 1151 vp->v_vnlock = &vp->v_lock; 1152 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1153 nvp->v_type = VNON; 1154 insmntque(vp, mp); 1155 return (vp); 1156 } 1157 1158 /* 1159 * Grab a particular vnode from the free list, increment its 1160 * reference count and lock it. If the vnode lock bit is set the 1161 * vnode is being eliminated in vgone. In that case, we can not 1162 * grab the vnode, so the process is awakened when the transition is 1163 * completed, and an error returned to indicate that the vnode is no 1164 * longer usable (possibly having been changed to a new file system type). 1165 */ 1166 int 1167 vget(struct vnode *vp, int flags) 1168 { 1169 int error; 1170 1171 /* 1172 * If the vnode is in the process of being cleaned out for 1173 * another use, we wait for the cleaning to finish and then 1174 * return failure. Cleaning is determined by checking that 1175 * the VXLOCK flag is set. 1176 */ 1177 1178 if ((flags & LK_INTERLOCK) == 0) 1179 simple_lock(&vp->v_interlock); 1180 if ((vp->v_flag & (VXLOCK | VFREEING)) != 0) { 1181 if (flags & LK_NOWAIT) { 1182 simple_unlock(&vp->v_interlock); 1183 return EBUSY; 1184 } 1185 vp->v_flag |= VXWANT; 1186 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock); 1187 return (ENOENT); 1188 } 1189 if (vp->v_usecount == 0) { 1190 simple_lock(&vnode_free_list_slock); 1191 if (vp->v_holdcnt > 0) 1192 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1193 else 1194 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1195 simple_unlock(&vnode_free_list_slock); 1196 } 1197 vp->v_usecount++; 1198 #ifdef DIAGNOSTIC 1199 if (vp->v_usecount == 0) { 1200 vprint("vget", vp); 1201 panic("vget: usecount overflow, vp %p", vp); 1202 } 1203 #endif 1204 if (flags & LK_TYPE_MASK) { 1205 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1206 vrele(vp); 1207 } 1208 return (error); 1209 } 1210 simple_unlock(&vp->v_interlock); 1211 return (0); 1212 } 1213 1214 /* 1215 * vput(), just unlock and vrele() 1216 */ 1217 void 1218 vput(struct vnode *vp) 1219 { 1220 struct lwp *l = curlwp; /* XXX */ 1221 1222 #ifdef DIAGNOSTIC 1223 if (vp == NULL) 1224 panic("vput: null vp"); 1225 #endif 1226 simple_lock(&vp->v_interlock); 1227 vp->v_usecount--; 1228 if (vp->v_usecount > 0) { 1229 simple_unlock(&vp->v_interlock); 1230 VOP_UNLOCK(vp, 0); 1231 return; 1232 } 1233 #ifdef DIAGNOSTIC 1234 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1235 vprint("vput: bad ref count", vp); 1236 panic("vput: ref cnt"); 1237 } 1238 #endif 1239 /* 1240 * Insert at tail of LRU list. 1241 */ 1242 simple_lock(&vnode_free_list_slock); 1243 if (vp->v_holdcnt > 0) 1244 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1245 else 1246 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1247 simple_unlock(&vnode_free_list_slock); 1248 if (vp->v_flag & VEXECMAP) { 1249 uvmexp.execpages -= vp->v_uobj.uo_npages; 1250 uvmexp.filepages += vp->v_uobj.uo_npages; 1251 } 1252 vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP); 1253 simple_unlock(&vp->v_interlock); 1254 VOP_INACTIVE(vp, l); 1255 } 1256 1257 /* 1258 * Vnode release. 1259 * If count drops to zero, call inactive routine and return to freelist. 1260 */ 1261 void 1262 vrele(struct vnode *vp) 1263 { 1264 struct lwp *l = curlwp; /* XXX */ 1265 1266 #ifdef DIAGNOSTIC 1267 if (vp == NULL) 1268 panic("vrele: null vp"); 1269 #endif 1270 simple_lock(&vp->v_interlock); 1271 vp->v_usecount--; 1272 if (vp->v_usecount > 0) { 1273 simple_unlock(&vp->v_interlock); 1274 return; 1275 } 1276 #ifdef DIAGNOSTIC 1277 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1278 vprint("vrele: bad ref count", vp); 1279 panic("vrele: ref cnt vp %p", vp); 1280 } 1281 #endif 1282 /* 1283 * Insert at tail of LRU list. 1284 */ 1285 simple_lock(&vnode_free_list_slock); 1286 if (vp->v_holdcnt > 0) 1287 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1288 else 1289 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1290 simple_unlock(&vnode_free_list_slock); 1291 if (vp->v_flag & VEXECMAP) { 1292 uvmexp.execpages -= vp->v_uobj.uo_npages; 1293 uvmexp.filepages += vp->v_uobj.uo_npages; 1294 } 1295 vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP); 1296 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1297 VOP_INACTIVE(vp, l); 1298 } 1299 1300 /* 1301 * Page or buffer structure gets a reference. 1302 * Called with v_interlock held. 1303 */ 1304 void 1305 vholdl(struct vnode *vp) 1306 { 1307 1308 /* 1309 * If it is on the freelist and the hold count is currently 1310 * zero, move it to the hold list. The test of the back 1311 * pointer and the use reference count of zero is because 1312 * it will be removed from a free list by getnewvnode, 1313 * but will not have its reference count incremented until 1314 * after calling vgone. If the reference count were 1315 * incremented first, vgone would (incorrectly) try to 1316 * close the previous instance of the underlying object. 1317 * So, the back pointer is explicitly set to `0xdeadb' in 1318 * getnewvnode after removing it from a freelist to ensure 1319 * that we do not try to move it here. 1320 */ 1321 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1322 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1323 simple_lock(&vnode_free_list_slock); 1324 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1325 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1326 simple_unlock(&vnode_free_list_slock); 1327 } 1328 vp->v_holdcnt++; 1329 } 1330 1331 /* 1332 * Page or buffer structure frees a reference. 1333 * Called with v_interlock held. 1334 */ 1335 void 1336 holdrelel(struct vnode *vp) 1337 { 1338 1339 if (vp->v_holdcnt <= 0) 1340 panic("holdrelel: holdcnt vp %p", vp); 1341 vp->v_holdcnt--; 1342 1343 /* 1344 * If it is on the holdlist and the hold count drops to 1345 * zero, move it to the free list. The test of the back 1346 * pointer and the use reference count of zero is because 1347 * it will be removed from a free list by getnewvnode, 1348 * but will not have its reference count incremented until 1349 * after calling vgone. If the reference count were 1350 * incremented first, vgone would (incorrectly) try to 1351 * close the previous instance of the underlying object. 1352 * So, the back pointer is explicitly set to `0xdeadb' in 1353 * getnewvnode after removing it from a freelist to ensure 1354 * that we do not try to move it here. 1355 */ 1356 1357 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1358 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1359 simple_lock(&vnode_free_list_slock); 1360 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1361 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1362 simple_unlock(&vnode_free_list_slock); 1363 } 1364 } 1365 1366 /* 1367 * Vnode reference. 1368 */ 1369 void 1370 vref(struct vnode *vp) 1371 { 1372 1373 simple_lock(&vp->v_interlock); 1374 if (vp->v_usecount <= 0) 1375 panic("vref used where vget required, vp %p", vp); 1376 vp->v_usecount++; 1377 #ifdef DIAGNOSTIC 1378 if (vp->v_usecount == 0) { 1379 vprint("vref", vp); 1380 panic("vref: usecount overflow, vp %p", vp); 1381 } 1382 #endif 1383 simple_unlock(&vp->v_interlock); 1384 } 1385 1386 /* 1387 * Remove any vnodes in the vnode table belonging to mount point mp. 1388 * 1389 * If FORCECLOSE is not specified, there should not be any active ones, 1390 * return error if any are found (nb: this is a user error, not a 1391 * system error). If FORCECLOSE is specified, detach any active vnodes 1392 * that are found. 1393 * 1394 * If WRITECLOSE is set, only flush out regular file vnodes open for 1395 * writing. 1396 * 1397 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped. 1398 */ 1399 #ifdef DEBUG 1400 int busyprt = 0; /* print out busy vnodes */ 1401 struct ctldebug debug1 = { "busyprt", &busyprt }; 1402 #endif 1403 1404 int 1405 vflush(struct mount *mp, struct vnode *skipvp, int flags) 1406 { 1407 struct lwp *l = curlwp; /* XXX */ 1408 struct vnode *vp, *nvp; 1409 int busy = 0; 1410 1411 simple_lock(&mntvnode_slock); 1412 loop: 1413 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 1414 if (vp->v_mount != mp) 1415 goto loop; 1416 nvp = LIST_NEXT(vp, v_mntvnodes); 1417 /* 1418 * Skip over a selected vnode. 1419 */ 1420 if (vp == skipvp) 1421 continue; 1422 simple_lock(&vp->v_interlock); 1423 /* 1424 * Skip over a vnodes marked VSYSTEM. 1425 */ 1426 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1427 simple_unlock(&vp->v_interlock); 1428 continue; 1429 } 1430 /* 1431 * If WRITECLOSE is set, only flush out regular file 1432 * vnodes open for writing. 1433 */ 1434 if ((flags & WRITECLOSE) && 1435 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1436 simple_unlock(&vp->v_interlock); 1437 continue; 1438 } 1439 /* 1440 * With v_usecount == 0, all we need to do is clear 1441 * out the vnode data structures and we are done. 1442 */ 1443 if (vp->v_usecount == 0) { 1444 simple_unlock(&mntvnode_slock); 1445 vgonel(vp, l); 1446 simple_lock(&mntvnode_slock); 1447 continue; 1448 } 1449 /* 1450 * If FORCECLOSE is set, forcibly close the vnode. 1451 * For block or character devices, revert to an 1452 * anonymous device. For all other files, just kill them. 1453 */ 1454 if (flags & FORCECLOSE) { 1455 simple_unlock(&mntvnode_slock); 1456 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1457 vgonel(vp, l); 1458 } else { 1459 vclean(vp, 0, l); 1460 vp->v_op = spec_vnodeop_p; 1461 insmntque(vp, (struct mount *)0); 1462 } 1463 simple_lock(&mntvnode_slock); 1464 continue; 1465 } 1466 #ifdef DEBUG 1467 if (busyprt) 1468 vprint("vflush: busy vnode", vp); 1469 #endif 1470 simple_unlock(&vp->v_interlock); 1471 busy++; 1472 } 1473 simple_unlock(&mntvnode_slock); 1474 if (busy) 1475 return (EBUSY); 1476 return (0); 1477 } 1478 1479 /* 1480 * Disassociate the underlying file system from a vnode. 1481 */ 1482 static void 1483 vclean(struct vnode *vp, int flags, struct lwp *l) 1484 { 1485 struct mount *mp; 1486 int active; 1487 1488 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1489 1490 /* 1491 * Check to see if the vnode is in use. 1492 * If so we have to reference it before we clean it out 1493 * so that its count cannot fall to zero and generate a 1494 * race against ourselves to recycle it. 1495 */ 1496 1497 if ((active = vp->v_usecount) != 0) { 1498 vp->v_usecount++; 1499 #ifdef DIAGNOSTIC 1500 if (vp->v_usecount == 0) { 1501 vprint("vclean", vp); 1502 panic("vclean: usecount overflow"); 1503 } 1504 #endif 1505 } 1506 1507 /* 1508 * Prevent the vnode from being recycled or 1509 * brought into use while we clean it out. 1510 */ 1511 if (vp->v_flag & VXLOCK) 1512 panic("vclean: deadlock, vp %p", vp); 1513 vp->v_flag |= VXLOCK; 1514 if (vp->v_flag & VEXECMAP) { 1515 uvmexp.execpages -= vp->v_uobj.uo_npages; 1516 uvmexp.filepages += vp->v_uobj.uo_npages; 1517 } 1518 vp->v_flag &= ~(VTEXT|VEXECMAP); 1519 1520 /* 1521 * Even if the count is zero, the VOP_INACTIVE routine may still 1522 * have the object locked while it cleans it out. The VOP_LOCK 1523 * ensures that the VOP_INACTIVE routine is done with its work. 1524 * For active vnodes, it ensures that no other activity can 1525 * occur while the underlying object is being cleaned out. 1526 */ 1527 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1528 1529 /* 1530 * Clean out any cached data associated with the vnode. 1531 * If special device, remove it from special device alias list. 1532 * if it is on one. 1533 */ 1534 if (flags & DOCLOSE) { 1535 int error; 1536 struct vnode *vq, *vx; 1537 1538 vn_start_write(vp, &mp, V_WAIT | V_LOWER); 1539 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1540 vn_finished_write(mp, V_LOWER); 1541 if (error) 1542 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1543 KASSERT(error == 0); 1544 KASSERT((vp->v_flag & VONWORKLST) == 0); 1545 1546 if (active) 1547 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1548 1549 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 1550 vp->v_specinfo != 0) { 1551 simple_lock(&spechash_slock); 1552 if (vp->v_hashchain != NULL) { 1553 if (*vp->v_hashchain == vp) { 1554 *vp->v_hashchain = vp->v_specnext; 1555 } else { 1556 for (vq = *vp->v_hashchain; vq; 1557 vq = vq->v_specnext) { 1558 if (vq->v_specnext != vp) 1559 continue; 1560 vq->v_specnext = vp->v_specnext; 1561 break; 1562 } 1563 if (vq == NULL) 1564 panic("missing bdev"); 1565 } 1566 if (vp->v_flag & VALIASED) { 1567 vx = NULL; 1568 for (vq = *vp->v_hashchain; vq; 1569 vq = vq->v_specnext) { 1570 if (vq->v_rdev != vp->v_rdev || 1571 vq->v_type != vp->v_type) 1572 continue; 1573 if (vx) 1574 break; 1575 vx = vq; 1576 } 1577 if (vx == NULL) 1578 panic("missing alias"); 1579 if (vq == NULL) 1580 vx->v_flag &= ~VALIASED; 1581 vp->v_flag &= ~VALIASED; 1582 } 1583 } 1584 simple_unlock(&spechash_slock); 1585 FREE(vp->v_specinfo, M_VNODE); 1586 vp->v_specinfo = NULL; 1587 } 1588 } 1589 LOCK_ASSERT(!simple_lock_held(&vp->v_interlock)); 1590 1591 /* 1592 * If purging an active vnode, it must be closed and 1593 * deactivated before being reclaimed. Note that the 1594 * VOP_INACTIVE will unlock the vnode. 1595 */ 1596 if (active) { 1597 VOP_INACTIVE(vp, l); 1598 } else { 1599 /* 1600 * Any other processes trying to obtain this lock must first 1601 * wait for VXLOCK to clear, then call the new lock operation. 1602 */ 1603 VOP_UNLOCK(vp, 0); 1604 } 1605 /* 1606 * Reclaim the vnode. 1607 */ 1608 if (VOP_RECLAIM(vp, l)) 1609 panic("vclean: cannot reclaim, vp %p", vp); 1610 if (active) { 1611 /* 1612 * Inline copy of vrele() since VOP_INACTIVE 1613 * has already been called. 1614 */ 1615 simple_lock(&vp->v_interlock); 1616 if (--vp->v_usecount <= 0) { 1617 #ifdef DIAGNOSTIC 1618 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1619 vprint("vclean: bad ref count", vp); 1620 panic("vclean: ref cnt"); 1621 } 1622 #endif 1623 /* 1624 * Insert at tail of LRU list. 1625 */ 1626 1627 simple_unlock(&vp->v_interlock); 1628 simple_lock(&vnode_free_list_slock); 1629 #ifdef DIAGNOSTIC 1630 if (vp->v_holdcnt > 0) 1631 panic("vclean: not clean, vp %p", vp); 1632 #endif 1633 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1634 simple_unlock(&vnode_free_list_slock); 1635 } else 1636 simple_unlock(&vp->v_interlock); 1637 } 1638 1639 KASSERT(vp->v_uobj.uo_npages == 0); 1640 if (vp->v_type == VREG && vp->v_ractx != NULL) { 1641 uvm_ra_freectx(vp->v_ractx); 1642 vp->v_ractx = NULL; 1643 } 1644 cache_purge(vp); 1645 1646 /* 1647 * Done with purge, notify sleepers of the grim news. 1648 */ 1649 vp->v_op = dead_vnodeop_p; 1650 vp->v_tag = VT_NON; 1651 simple_lock(&vp->v_interlock); 1652 VN_KNOTE(vp, NOTE_REVOKE); /* FreeBSD has this in vn_pollgone() */ 1653 vp->v_flag &= ~(VXLOCK|VLOCKSWORK); 1654 if (vp->v_flag & VXWANT) { 1655 vp->v_flag &= ~VXWANT; 1656 simple_unlock(&vp->v_interlock); 1657 wakeup((caddr_t)vp); 1658 } else 1659 simple_unlock(&vp->v_interlock); 1660 } 1661 1662 /* 1663 * Recycle an unused vnode to the front of the free list. 1664 * Release the passed interlock if the vnode will be recycled. 1665 */ 1666 int 1667 vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct lwp *l) 1668 { 1669 1670 simple_lock(&vp->v_interlock); 1671 if (vp->v_usecount == 0) { 1672 if (inter_lkp) 1673 simple_unlock(inter_lkp); 1674 vgonel(vp, l); 1675 return (1); 1676 } 1677 simple_unlock(&vp->v_interlock); 1678 return (0); 1679 } 1680 1681 /* 1682 * Eliminate all activity associated with a vnode 1683 * in preparation for reuse. 1684 */ 1685 void 1686 vgone(struct vnode *vp) 1687 { 1688 struct lwp *l = curlwp; /* XXX */ 1689 1690 simple_lock(&vp->v_interlock); 1691 vgonel(vp, l); 1692 } 1693 1694 /* 1695 * vgone, with the vp interlock held. 1696 */ 1697 void 1698 vgonel(struct vnode *vp, struct lwp *l) 1699 { 1700 1701 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1702 1703 /* 1704 * If a vgone (or vclean) is already in progress, 1705 * wait until it is done and return. 1706 */ 1707 1708 if (vp->v_flag & VXLOCK) { 1709 vp->v_flag |= VXWANT; 1710 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock); 1711 return; 1712 } 1713 1714 /* 1715 * Clean out the filesystem specific data. 1716 */ 1717 1718 vclean(vp, DOCLOSE, l); 1719 KASSERT((vp->v_flag & VONWORKLST) == 0); 1720 1721 /* 1722 * Delete from old mount point vnode list, if on one. 1723 */ 1724 1725 if (vp->v_mount != NULL) 1726 insmntque(vp, (struct mount *)0); 1727 1728 /* 1729 * The test of the back pointer and the reference count of 1730 * zero is because it will be removed from the free list by 1731 * getcleanvnode, but will not have its reference count 1732 * incremented until after calling vgone. If the reference 1733 * count were incremented first, vgone would (incorrectly) 1734 * try to close the previous instance of the underlying object. 1735 * So, the back pointer is explicitly set to `0xdeadb' in 1736 * getnewvnode after removing it from the freelist to ensure 1737 * that we do not try to move it here. 1738 */ 1739 1740 vp->v_type = VBAD; 1741 if (vp->v_usecount == 0) { 1742 boolean_t dofree; 1743 1744 simple_lock(&vnode_free_list_slock); 1745 if (vp->v_holdcnt > 0) 1746 panic("vgonel: not clean, vp %p", vp); 1747 /* 1748 * if it isn't on the freelist, we're called by getcleanvnode 1749 * and vnode is being re-used. otherwise, we'll free it. 1750 */ 1751 dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb; 1752 if (dofree) { 1753 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1754 numvnodes--; 1755 } 1756 simple_unlock(&vnode_free_list_slock); 1757 if (dofree) 1758 pool_put(&vnode_pool, vp); 1759 } 1760 } 1761 1762 /* 1763 * Lookup a vnode by device number. 1764 */ 1765 int 1766 vfinddev(dev_t dev, enum vtype type, struct vnode **vpp) 1767 { 1768 struct vnode *vp; 1769 int rc = 0; 1770 1771 simple_lock(&spechash_slock); 1772 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1773 if (dev != vp->v_rdev || type != vp->v_type) 1774 continue; 1775 *vpp = vp; 1776 rc = 1; 1777 break; 1778 } 1779 simple_unlock(&spechash_slock); 1780 return (rc); 1781 } 1782 1783 /* 1784 * Revoke all the vnodes corresponding to the specified minor number 1785 * range (endpoints inclusive) of the specified major. 1786 */ 1787 void 1788 vdevgone(int maj, int minl, int minh, enum vtype type) 1789 { 1790 struct vnode *vp; 1791 int mn; 1792 1793 for (mn = minl; mn <= minh; mn++) 1794 if (vfinddev(makedev(maj, mn), type, &vp)) 1795 VOP_REVOKE(vp, REVOKEALL); 1796 } 1797 1798 /* 1799 * Calculate the total number of references to a special device. 1800 */ 1801 int 1802 vcount(struct vnode *vp) 1803 { 1804 struct vnode *vq, *vnext; 1805 int count; 1806 1807 loop: 1808 if ((vp->v_flag & VALIASED) == 0) 1809 return (vp->v_usecount); 1810 simple_lock(&spechash_slock); 1811 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1812 vnext = vq->v_specnext; 1813 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1814 continue; 1815 /* 1816 * Alias, but not in use, so flush it out. 1817 */ 1818 if (vq->v_usecount == 0 && vq != vp && 1819 (vq->v_flag & VXLOCK) == 0) { 1820 simple_unlock(&spechash_slock); 1821 vgone(vq); 1822 goto loop; 1823 } 1824 count += vq->v_usecount; 1825 } 1826 simple_unlock(&spechash_slock); 1827 return (count); 1828 } 1829 1830 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 1831 #define ARRAY_PRINT(idx, arr) \ 1832 ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN") 1833 1834 const char * const vnode_tags[] = { VNODE_TAGS }; 1835 const char * const vnode_types[] = { VNODE_TYPES }; 1836 const char vnode_flagbits[] = VNODE_FLAGBITS; 1837 1838 /* 1839 * Print out a description of a vnode. 1840 */ 1841 void 1842 vprint(const char *label, struct vnode *vp) 1843 { 1844 char bf[96]; 1845 1846 if (label != NULL) 1847 printf("%s: ", label); 1848 printf("tag %s(%d) type %s(%d), usecount %d, writecount %ld, " 1849 "refcount %ld,", ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 1850 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 1851 vp->v_usecount, vp->v_writecount, vp->v_holdcnt); 1852 bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf)); 1853 if (bf[0] != '\0') 1854 printf(" flags (%s)", &bf[1]); 1855 if (vp->v_data == NULL) { 1856 printf("\n"); 1857 } else { 1858 printf("\n\t"); 1859 VOP_PRINT(vp); 1860 } 1861 } 1862 1863 #ifdef DEBUG 1864 /* 1865 * List all of the locked vnodes in the system. 1866 * Called when debugging the kernel. 1867 */ 1868 void 1869 printlockedvnodes(void) 1870 { 1871 struct mount *mp, *nmp; 1872 struct vnode *vp; 1873 1874 printf("Locked vnodes\n"); 1875 simple_lock(&mountlist_slock); 1876 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1877 mp = nmp) { 1878 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1879 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1880 continue; 1881 } 1882 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1883 if (VOP_ISLOCKED(vp)) 1884 vprint(NULL, vp); 1885 } 1886 simple_lock(&mountlist_slock); 1887 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1888 vfs_unbusy(mp); 1889 } 1890 simple_unlock(&mountlist_slock); 1891 } 1892 #endif 1893 1894 /* 1895 * sysctl helper routine to return list of supported fstypes 1896 */ 1897 static int 1898 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS) 1899 { 1900 char bf[MFSNAMELEN]; 1901 char *where = oldp; 1902 struct vfsops *v; 1903 size_t needed, left, slen; 1904 int error, first; 1905 1906 if (newp != NULL) 1907 return (EPERM); 1908 if (namelen != 0) 1909 return (EINVAL); 1910 1911 first = 1; 1912 error = 0; 1913 needed = 0; 1914 left = *oldlenp; 1915 1916 LIST_FOREACH(v, &vfs_list, vfs_list) { 1917 if (where == NULL) 1918 needed += strlen(v->vfs_name) + 1; 1919 else { 1920 memset(bf, 0, sizeof(bf)); 1921 if (first) { 1922 strncpy(bf, v->vfs_name, sizeof(bf)); 1923 first = 0; 1924 } else { 1925 bf[0] = ' '; 1926 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1); 1927 } 1928 bf[sizeof(bf)-1] = '\0'; 1929 slen = strlen(bf); 1930 if (left < slen + 1) 1931 break; 1932 /* +1 to copy out the trailing NUL byte */ 1933 error = copyout(bf, where, slen + 1); 1934 if (error) 1935 break; 1936 where += slen; 1937 needed += slen; 1938 left -= slen; 1939 } 1940 } 1941 *oldlenp = needed; 1942 return (error); 1943 } 1944 1945 /* 1946 * Top level filesystem related information gathering. 1947 */ 1948 SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup") 1949 { 1950 sysctl_createv(clog, 0, NULL, NULL, 1951 CTLFLAG_PERMANENT, 1952 CTLTYPE_NODE, "vfs", NULL, 1953 NULL, 0, NULL, 0, 1954 CTL_VFS, CTL_EOL); 1955 sysctl_createv(clog, 0, NULL, NULL, 1956 CTLFLAG_PERMANENT, 1957 CTLTYPE_NODE, "generic", 1958 SYSCTL_DESCR("Non-specific vfs related information"), 1959 NULL, 0, NULL, 0, 1960 CTL_VFS, VFS_GENERIC, CTL_EOL); 1961 sysctl_createv(clog, 0, NULL, NULL, 1962 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1963 CTLTYPE_INT, "usermount", 1964 SYSCTL_DESCR("Whether unprivileged users may mount " 1965 "filesystems"), 1966 NULL, 0, &dovfsusermount, 0, 1967 CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL); 1968 sysctl_createv(clog, 0, NULL, NULL, 1969 CTLFLAG_PERMANENT, 1970 CTLTYPE_STRING, "fstypes", 1971 SYSCTL_DESCR("List of file systems present"), 1972 sysctl_vfs_generic_fstypes, 0, NULL, 0, 1973 CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL); 1974 sysctl_createv(clog, 0, NULL, NULL, 1975 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1976 CTLTYPE_INT, "magiclinks", 1977 SYSCTL_DESCR("Whether \"magic\" symlinks are expanded"), 1978 NULL, 0, &vfs_magiclinks, 0, 1979 CTL_VFS, VFS_GENERIC, VFS_MAGICLINKS, CTL_EOL); 1980 } 1981 1982 1983 int kinfo_vdebug = 1; 1984 int kinfo_vgetfailed; 1985 #define KINFO_VNODESLOP 10 1986 /* 1987 * Dump vnode list (via sysctl). 1988 * Copyout address of vnode followed by vnode. 1989 */ 1990 /* ARGSUSED */ 1991 int 1992 sysctl_kern_vnode(SYSCTLFN_ARGS) 1993 { 1994 char *where = oldp; 1995 size_t *sizep = oldlenp; 1996 struct mount *mp, *nmp; 1997 struct vnode *nvp, *vp; 1998 char *bp = where, *savebp; 1999 char *ewhere; 2000 int error; 2001 2002 if (namelen != 0) 2003 return (EOPNOTSUPP); 2004 if (newp != NULL) 2005 return (EPERM); 2006 2007 #define VPTRSZ sizeof(struct vnode *) 2008 #define VNODESZ sizeof(struct vnode) 2009 if (where == NULL) { 2010 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 2011 return (0); 2012 } 2013 ewhere = where + *sizep; 2014 2015 simple_lock(&mountlist_slock); 2016 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2017 mp = nmp) { 2018 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 2019 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2020 continue; 2021 } 2022 savebp = bp; 2023 again: 2024 simple_lock(&mntvnode_slock); 2025 for (vp = LIST_FIRST(&mp->mnt_vnodelist); 2026 vp != NULL; 2027 vp = nvp) { 2028 /* 2029 * Check that the vp is still associated with 2030 * this filesystem. RACE: could have been 2031 * recycled onto the same filesystem. 2032 */ 2033 if (vp->v_mount != mp) { 2034 simple_unlock(&mntvnode_slock); 2035 if (kinfo_vdebug) 2036 printf("kinfo: vp changed\n"); 2037 bp = savebp; 2038 goto again; 2039 } 2040 nvp = LIST_NEXT(vp, v_mntvnodes); 2041 if (bp + VPTRSZ + VNODESZ > ewhere) { 2042 simple_unlock(&mntvnode_slock); 2043 *sizep = bp - where; 2044 return (ENOMEM); 2045 } 2046 simple_unlock(&mntvnode_slock); 2047 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2048 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2049 return (error); 2050 bp += VPTRSZ + VNODESZ; 2051 simple_lock(&mntvnode_slock); 2052 } 2053 simple_unlock(&mntvnode_slock); 2054 simple_lock(&mountlist_slock); 2055 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2056 vfs_unbusy(mp); 2057 } 2058 simple_unlock(&mountlist_slock); 2059 2060 *sizep = bp - where; 2061 return (0); 2062 } 2063 2064 /* 2065 * Check to see if a filesystem is mounted on a block device. 2066 */ 2067 int 2068 vfs_mountedon(struct vnode *vp) 2069 { 2070 struct vnode *vq; 2071 int error = 0; 2072 2073 if (vp->v_type != VBLK) 2074 return ENOTBLK; 2075 if (vp->v_specmountpoint != NULL) 2076 return (EBUSY); 2077 if (vp->v_flag & VALIASED) { 2078 simple_lock(&spechash_slock); 2079 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2080 if (vq->v_rdev != vp->v_rdev || 2081 vq->v_type != vp->v_type) 2082 continue; 2083 if (vq->v_specmountpoint != NULL) { 2084 error = EBUSY; 2085 break; 2086 } 2087 } 2088 simple_unlock(&spechash_slock); 2089 } 2090 return (error); 2091 } 2092 2093 /* 2094 * Do the usual access checking. 2095 * file_mode, uid and gid are from the vnode in question, 2096 * while acc_mode and cred are from the VOP_ACCESS parameter list 2097 */ 2098 int 2099 vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid, 2100 mode_t acc_mode, struct ucred *cred) 2101 { 2102 mode_t mask; 2103 2104 /* 2105 * Super-user always gets read/write access, but execute access depends 2106 * on at least one execute bit being set. 2107 */ 2108 if (cred->cr_uid == 0) { 2109 if ((acc_mode & VEXEC) && type != VDIR && 2110 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2111 return (EACCES); 2112 return (0); 2113 } 2114 2115 mask = 0; 2116 2117 /* Otherwise, check the owner. */ 2118 if (cred->cr_uid == uid) { 2119 if (acc_mode & VEXEC) 2120 mask |= S_IXUSR; 2121 if (acc_mode & VREAD) 2122 mask |= S_IRUSR; 2123 if (acc_mode & VWRITE) 2124 mask |= S_IWUSR; 2125 return ((file_mode & mask) == mask ? 0 : EACCES); 2126 } 2127 2128 /* Otherwise, check the groups. */ 2129 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2130 if (acc_mode & VEXEC) 2131 mask |= S_IXGRP; 2132 if (acc_mode & VREAD) 2133 mask |= S_IRGRP; 2134 if (acc_mode & VWRITE) 2135 mask |= S_IWGRP; 2136 return ((file_mode & mask) == mask ? 0 : EACCES); 2137 } 2138 2139 /* Otherwise, check everyone else. */ 2140 if (acc_mode & VEXEC) 2141 mask |= S_IXOTH; 2142 if (acc_mode & VREAD) 2143 mask |= S_IROTH; 2144 if (acc_mode & VWRITE) 2145 mask |= S_IWOTH; 2146 return ((file_mode & mask) == mask ? 0 : EACCES); 2147 } 2148 2149 /* 2150 * Unmount all file systems. 2151 * We traverse the list in reverse order under the assumption that doing so 2152 * will avoid needing to worry about dependencies. 2153 */ 2154 void 2155 vfs_unmountall(struct lwp *l) 2156 { 2157 struct mount *mp, *nmp; 2158 int allerror, error; 2159 2160 printf("unmounting file systems..."); 2161 for (allerror = 0, 2162 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2163 nmp = mp->mnt_list.cqe_prev; 2164 #ifdef DEBUG 2165 printf("\nunmounting %s (%s)...", 2166 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2167 #endif 2168 /* 2169 * XXX Freeze syncer. Must do this before locking the 2170 * mount point. See dounmount() for details. 2171 */ 2172 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2173 if (vfs_busy(mp, 0, 0)) { 2174 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2175 continue; 2176 } 2177 if ((error = dounmount(mp, MNT_FORCE, l)) != 0) { 2178 printf("unmount of %s failed with error %d\n", 2179 mp->mnt_stat.f_mntonname, error); 2180 allerror = 1; 2181 } 2182 } 2183 printf(" done\n"); 2184 if (allerror) 2185 printf("WARNING: some file systems would not unmount\n"); 2186 } 2187 2188 extern struct simplelock bqueue_slock; /* XXX */ 2189 2190 /* 2191 * Sync and unmount file systems before shutting down. 2192 */ 2193 void 2194 vfs_shutdown(void) 2195 { 2196 struct lwp *l; 2197 2198 /* XXX we're certainly not running in lwp0's context! */ 2199 l = curlwp; 2200 if (l == NULL) 2201 l = &lwp0; 2202 2203 printf("syncing disks... "); 2204 2205 /* remove user process from run queue */ 2206 suspendsched(); 2207 (void) spl0(); 2208 2209 /* avoid coming back this way again if we panic. */ 2210 doing_shutdown = 1; 2211 2212 sys_sync(l, NULL, NULL); 2213 2214 /* Wait for sync to finish. */ 2215 if (buf_syncwait() != 0) { 2216 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2217 Debugger(); 2218 #endif 2219 printf("giving up\n"); 2220 return; 2221 } else 2222 printf("done\n"); 2223 2224 /* 2225 * If we've panic'd, don't make the situation potentially 2226 * worse by unmounting the file systems. 2227 */ 2228 if (panicstr != NULL) 2229 return; 2230 2231 /* Release inodes held by texts before update. */ 2232 #ifdef notdef 2233 vnshutdown(); 2234 #endif 2235 /* Unmount file systems. */ 2236 vfs_unmountall(l); 2237 } 2238 2239 /* 2240 * Mount the root file system. If the operator didn't specify a 2241 * file system to use, try all possible file systems until one 2242 * succeeds. 2243 */ 2244 int 2245 vfs_mountroot(void) 2246 { 2247 struct vfsops *v; 2248 int error = ENODEV; 2249 2250 if (root_device == NULL) 2251 panic("vfs_mountroot: root device unknown"); 2252 2253 switch (device_class(root_device)) { 2254 case DV_IFNET: 2255 if (rootdev != NODEV) 2256 panic("vfs_mountroot: rootdev set for DV_IFNET " 2257 "(0x%08x -> %d,%d)", rootdev, 2258 major(rootdev), minor(rootdev)); 2259 break; 2260 2261 case DV_DISK: 2262 if (rootdev == NODEV) 2263 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2264 if (bdevvp(rootdev, &rootvp)) 2265 panic("vfs_mountroot: can't get vnode for rootdev"); 2266 error = VOP_OPEN(rootvp, FREAD, FSCRED, curlwp); 2267 if (error) { 2268 printf("vfs_mountroot: can't open root device\n"); 2269 return (error); 2270 } 2271 break; 2272 2273 default: 2274 printf("%s: inappropriate for root file system\n", 2275 root_device->dv_xname); 2276 return (ENODEV); 2277 } 2278 2279 /* 2280 * If user specified a file system, use it. 2281 */ 2282 if (mountroot != NULL) { 2283 error = (*mountroot)(); 2284 goto done; 2285 } 2286 2287 /* 2288 * Try each file system currently configured into the kernel. 2289 */ 2290 LIST_FOREACH(v, &vfs_list, vfs_list) { 2291 if (v->vfs_mountroot == NULL) 2292 continue; 2293 #ifdef DEBUG 2294 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 2295 #endif 2296 error = (*v->vfs_mountroot)(); 2297 if (!error) { 2298 aprint_normal("root file system type: %s\n", 2299 v->vfs_name); 2300 break; 2301 } 2302 } 2303 2304 if (v == NULL) { 2305 printf("no file system for %s", root_device->dv_xname); 2306 if (device_class(root_device) == DV_DISK) 2307 printf(" (dev 0x%x)", rootdev); 2308 printf("\n"); 2309 error = EFTYPE; 2310 } 2311 2312 done: 2313 if (error && device_class(root_device) == DV_DISK) { 2314 VOP_CLOSE(rootvp, FREAD, FSCRED, curlwp); 2315 vrele(rootvp); 2316 } 2317 return (error); 2318 } 2319 2320 /* 2321 * Given a file system name, look up the vfsops for that 2322 * file system, or return NULL if file system isn't present 2323 * in the kernel. 2324 */ 2325 struct vfsops * 2326 vfs_getopsbyname(const char *name) 2327 { 2328 struct vfsops *v; 2329 2330 LIST_FOREACH(v, &vfs_list, vfs_list) { 2331 if (strcmp(v->vfs_name, name) == 0) 2332 break; 2333 } 2334 2335 return (v); 2336 } 2337 2338 /* 2339 * Establish a file system and initialize it. 2340 */ 2341 int 2342 vfs_attach(struct vfsops *vfs) 2343 { 2344 struct vfsops *v; 2345 int error = 0; 2346 2347 2348 /* 2349 * Make sure this file system doesn't already exist. 2350 */ 2351 LIST_FOREACH(v, &vfs_list, vfs_list) { 2352 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2353 error = EEXIST; 2354 goto out; 2355 } 2356 } 2357 2358 /* 2359 * Initialize the vnode operations for this file system. 2360 */ 2361 vfs_opv_init(vfs->vfs_opv_descs); 2362 2363 /* 2364 * Now initialize the file system itself. 2365 */ 2366 (*vfs->vfs_init)(); 2367 2368 /* 2369 * ...and link it into the kernel's list. 2370 */ 2371 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2372 2373 /* 2374 * Sanity: make sure the reference count is 0. 2375 */ 2376 vfs->vfs_refcount = 0; 2377 2378 out: 2379 return (error); 2380 } 2381 2382 /* 2383 * Remove a file system from the kernel. 2384 */ 2385 int 2386 vfs_detach(struct vfsops *vfs) 2387 { 2388 struct vfsops *v; 2389 2390 /* 2391 * Make sure no one is using the filesystem. 2392 */ 2393 if (vfs->vfs_refcount != 0) 2394 return (EBUSY); 2395 2396 /* 2397 * ...and remove it from the kernel's list. 2398 */ 2399 LIST_FOREACH(v, &vfs_list, vfs_list) { 2400 if (v == vfs) { 2401 LIST_REMOVE(v, vfs_list); 2402 break; 2403 } 2404 } 2405 2406 if (v == NULL) 2407 return (ESRCH); 2408 2409 /* 2410 * Now run the file system-specific cleanups. 2411 */ 2412 (*vfs->vfs_done)(); 2413 2414 /* 2415 * Free the vnode operations vector. 2416 */ 2417 vfs_opv_free(vfs->vfs_opv_descs); 2418 return (0); 2419 } 2420 2421 void 2422 vfs_reinit(void) 2423 { 2424 struct vfsops *vfs; 2425 2426 LIST_FOREACH(vfs, &vfs_list, vfs_list) { 2427 if (vfs->vfs_reinit) { 2428 (*vfs->vfs_reinit)(); 2429 } 2430 } 2431 } 2432 2433 /* 2434 * Request a filesystem to suspend write operations. 2435 */ 2436 int 2437 vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo) 2438 { 2439 struct lwp *l = curlwp; /* XXX */ 2440 int error; 2441 2442 while ((mp->mnt_iflag & IMNT_SUSPEND)) { 2443 if (slptimeo < 0) 2444 return EWOULDBLOCK; 2445 error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo); 2446 if (error) 2447 return error; 2448 } 2449 mp->mnt_iflag |= IMNT_SUSPEND; 2450 2451 simple_lock(&mp->mnt_slock); 2452 if (mp->mnt_writeopcountupper > 0) 2453 ltsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt", 2454 0, &mp->mnt_slock); 2455 simple_unlock(&mp->mnt_slock); 2456 2457 error = VFS_SYNC(mp, MNT_WAIT, l->l_proc->p_ucred, l); 2458 if (error) { 2459 vfs_write_resume(mp); 2460 return error; 2461 } 2462 mp->mnt_iflag |= IMNT_SUSPENDLOW; 2463 2464 simple_lock(&mp->mnt_slock); 2465 if (mp->mnt_writeopcountlower > 0) 2466 ltsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt", 2467 0, &mp->mnt_slock); 2468 mp->mnt_iflag |= IMNT_SUSPENDED; 2469 simple_unlock(&mp->mnt_slock); 2470 2471 return 0; 2472 } 2473 2474 /* 2475 * Request a filesystem to resume write operations. 2476 */ 2477 void 2478 vfs_write_resume(struct mount *mp) 2479 { 2480 2481 if ((mp->mnt_iflag & IMNT_SUSPEND) == 0) 2482 return; 2483 mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED); 2484 wakeup(&mp->mnt_flag); 2485 } 2486 2487 void 2488 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp) 2489 { 2490 const struct statvfs *mbp; 2491 2492 if (sbp == (mbp = &mp->mnt_stat)) 2493 return; 2494 2495 (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx)); 2496 sbp->f_fsid = mbp->f_fsid; 2497 sbp->f_owner = mbp->f_owner; 2498 sbp->f_flag = mbp->f_flag; 2499 sbp->f_syncwrites = mbp->f_syncwrites; 2500 sbp->f_asyncwrites = mbp->f_asyncwrites; 2501 sbp->f_syncreads = mbp->f_syncreads; 2502 sbp->f_asyncreads = mbp->f_asyncreads; 2503 (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare)); 2504 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename, 2505 sizeof(sbp->f_fstypename)); 2506 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname, 2507 sizeof(sbp->f_mntonname)); 2508 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, 2509 sizeof(sbp->f_mntfromname)); 2510 sbp->f_namemax = mbp->f_namemax; 2511 } 2512 2513 int 2514 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom, 2515 struct mount *mp, struct lwp *l) 2516 { 2517 int error; 2518 size_t size; 2519 struct statvfs *sfs = &mp->mnt_stat; 2520 int (*fun)(const void *, void *, size_t, size_t *); 2521 2522 (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name, 2523 sizeof(mp->mnt_stat.f_fstypename)); 2524 2525 if (onp) { 2526 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 2527 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr; 2528 if (cwdi->cwdi_rdir != NULL) { 2529 size_t len; 2530 char *bp; 2531 char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 2532 2533 if (!path) /* XXX can't happen with M_WAITOK */ 2534 return ENOMEM; 2535 2536 bp = path + MAXPATHLEN; 2537 *--bp = '\0'; 2538 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, 2539 path, MAXPATHLEN / 2, 0, l); 2540 if (error) { 2541 free(path, M_TEMP); 2542 return error; 2543 } 2544 2545 len = strlen(bp); 2546 if (len > sizeof(sfs->f_mntonname) - 1) 2547 len = sizeof(sfs->f_mntonname) - 1; 2548 (void)strncpy(sfs->f_mntonname, bp, len); 2549 free(path, M_TEMP); 2550 2551 if (len < sizeof(sfs->f_mntonname) - 1) { 2552 error = (*fun)(onp, &sfs->f_mntonname[len], 2553 sizeof(sfs->f_mntonname) - len - 1, &size); 2554 if (error) 2555 return error; 2556 size += len; 2557 } else { 2558 size = len; 2559 } 2560 } else { 2561 error = (*fun)(onp, &sfs->f_mntonname, 2562 sizeof(sfs->f_mntonname) - 1, &size); 2563 if (error) 2564 return error; 2565 } 2566 (void)memset(sfs->f_mntonname + size, 0, 2567 sizeof(sfs->f_mntonname) - size); 2568 } 2569 2570 if (fromp) { 2571 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr; 2572 error = (*fun)(fromp, sfs->f_mntfromname, 2573 sizeof(sfs->f_mntfromname) - 1, &size); 2574 if (error) 2575 return error; 2576 (void)memset(sfs->f_mntfromname + size, 0, 2577 sizeof(sfs->f_mntfromname) - size); 2578 } 2579 return 0; 2580 } 2581 2582 #ifdef DDB 2583 static const char buf_flagbits[] = BUF_FLAGBITS; 2584 2585 void 2586 vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...)) 2587 { 2588 char bf[1024]; 2589 2590 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%" 2591 PRIx64 " dev 0x%x\n", 2592 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev); 2593 2594 bitmask_snprintf(bp->b_flags, buf_flagbits, bf, sizeof(bf)); 2595 (*pr)(" error %d flags 0x%s\n", bp->b_error, bf); 2596 2597 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 2598 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2599 (*pr)(" data %p saveaddr %p dep %p\n", 2600 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2601 (*pr)(" iodone %p\n", bp->b_iodone); 2602 } 2603 2604 2605 void 2606 vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...)) 2607 { 2608 char bf[256]; 2609 2610 uvm_object_printit(&vp->v_uobj, full, pr); 2611 bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf)); 2612 (*pr)("\nVNODE flags %s\n", bf); 2613 (*pr)("mp %p numoutput %d size 0x%llx\n", 2614 vp->v_mount, vp->v_numoutput, vp->v_size); 2615 2616 (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n", 2617 vp->v_data, vp->v_usecount, vp->v_writecount, 2618 vp->v_holdcnt, vp->v_numoutput); 2619 2620 (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n", 2621 ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 2622 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 2623 vp->v_mount, vp->v_mountedhere); 2624 2625 if (full) { 2626 struct buf *bp; 2627 2628 (*pr)("clean bufs:\n"); 2629 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 2630 (*pr)(" bp %p\n", bp); 2631 vfs_buf_print(bp, full, pr); 2632 } 2633 2634 (*pr)("dirty bufs:\n"); 2635 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 2636 (*pr)(" bp %p\n", bp); 2637 vfs_buf_print(bp, full, pr); 2638 } 2639 } 2640 } 2641 2642 void 2643 vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...)) 2644 { 2645 char sbuf[256]; 2646 2647 (*pr)("vnodecovered = %p syncer = %p data = %p\n", 2648 mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data); 2649 2650 (*pr)("fs_bshift %d dev_bshift = %d\n", 2651 mp->mnt_fs_bshift,mp->mnt_dev_bshift); 2652 2653 bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf)); 2654 (*pr)("flag = %s\n", sbuf); 2655 2656 bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf)); 2657 (*pr)("iflag = %s\n", sbuf); 2658 2659 /* XXX use lockmgr_printinfo */ 2660 if (mp->mnt_lock.lk_sharecount) 2661 (*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg, 2662 mp->mnt_lock.lk_sharecount); 2663 else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) { 2664 (*pr)(" lock type %s: EXCL (count %d) by ", 2665 mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount); 2666 if (mp->mnt_lock.lk_flags & LK_SPIN) 2667 (*pr)("processor %lu", mp->mnt_lock.lk_cpu); 2668 else 2669 (*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder, 2670 mp->mnt_lock.lk_locklwp); 2671 } else 2672 (*pr)(" not locked"); 2673 if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0) 2674 (*pr)(" with %d pending", mp->mnt_lock.lk_waitcount); 2675 2676 (*pr)("\n"); 2677 2678 if (mp->mnt_unmounter) { 2679 (*pr)("unmounter pid = %d ",mp->mnt_unmounter->l_proc); 2680 } 2681 (*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n", 2682 mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower); 2683 2684 (*pr)("statvfs cache:\n"); 2685 (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize); 2686 (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize); 2687 (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize); 2688 2689 (*pr)("\tblocks = "PRIu64"\n",mp->mnt_stat.f_blocks); 2690 (*pr)("\tbfree = "PRIu64"\n",mp->mnt_stat.f_bfree); 2691 (*pr)("\tbavail = "PRIu64"\n",mp->mnt_stat.f_bavail); 2692 (*pr)("\tbresvd = "PRIu64"\n",mp->mnt_stat.f_bresvd); 2693 2694 (*pr)("\tfiles = "PRIu64"\n",mp->mnt_stat.f_files); 2695 (*pr)("\tffree = "PRIu64"\n",mp->mnt_stat.f_ffree); 2696 (*pr)("\tfavail = "PRIu64"\n",mp->mnt_stat.f_favail); 2697 (*pr)("\tfresvd = "PRIu64"\n",mp->mnt_stat.f_fresvd); 2698 2699 (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n", 2700 mp->mnt_stat.f_fsidx.__fsid_val[0], 2701 mp->mnt_stat.f_fsidx.__fsid_val[1]); 2702 2703 (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner); 2704 (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax); 2705 2706 bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf, 2707 sizeof(sbuf)); 2708 (*pr)("\tflag = %s\n",sbuf); 2709 (*pr)("\tsyncwrites = " PRIu64 "\n",mp->mnt_stat.f_syncwrites); 2710 (*pr)("\tasyncwrites = " PRIu64 "\n",mp->mnt_stat.f_asyncwrites); 2711 (*pr)("\tsyncreads = " PRIu64 "\n",mp->mnt_stat.f_syncreads); 2712 (*pr)("\tasyncreads = " PRIu64 "\n",mp->mnt_stat.f_asyncreads); 2713 (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename); 2714 (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname); 2715 (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname); 2716 2717 { 2718 int cnt = 0; 2719 struct vnode *vp; 2720 (*pr)("locked vnodes ="); 2721 /* XXX would take mountlist lock, except ddb may not have context */ 2722 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2723 if (VOP_ISLOCKED(vp)) { 2724 if ((++cnt % 6) == 0) { 2725 (*pr)(" %p,\n\t", vp); 2726 } else { 2727 (*pr)(" %p,", vp); 2728 } 2729 } 2730 } 2731 (*pr)("\n"); 2732 } 2733 2734 if (full) { 2735 int cnt = 0; 2736 struct vnode *vp; 2737 (*pr)("all vnodes ="); 2738 /* XXX would take mountlist lock, except ddb may not have context */ 2739 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2740 if (!LIST_NEXT(vp, v_mntvnodes)) { 2741 (*pr)(" %p", vp); 2742 } else if ((++cnt % 6) == 0) { 2743 (*pr)(" %p,\n\t", vp); 2744 } else { 2745 (*pr)(" %p,", vp); 2746 } 2747 } 2748 (*pr)("\n", vp); 2749 } 2750 } 2751 #endif /* DDB */ 2752