1 /* $NetBSD: vfs_subr.c,v 1.262 2006/02/04 12:01:26 yamt Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2004, 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * This code is derived from software contributed to The NetBSD Foundation 11 * by Charles M. Hannum. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the NetBSD 24 * Foundation, Inc. and its contributors. 25 * 4. Neither the name of The NetBSD Foundation nor the names of its 26 * contributors may be used to endorse or promote products derived 27 * from this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 30 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 31 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 32 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 33 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 37 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42 /* 43 * Copyright (c) 1989, 1993 44 * The Regents of the University of California. All rights reserved. 45 * (c) UNIX System Laboratories, Inc. 46 * All or some portions of this file are derived from material licensed 47 * to the University of California by American Telephone and Telegraph 48 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 49 * the permission of UNIX System Laboratories, Inc. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions 53 * are met: 54 * 1. Redistributions of source code must retain the above copyright 55 * notice, this list of conditions and the following disclaimer. 56 * 2. Redistributions in binary form must reproduce the above copyright 57 * notice, this list of conditions and the following disclaimer in the 58 * documentation and/or other materials provided with the distribution. 59 * 3. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 76 */ 77 78 /* 79 * External virtual filesystem routines 80 */ 81 82 #include <sys/cdefs.h> 83 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.262 2006/02/04 12:01:26 yamt Exp $"); 84 85 #include "opt_inet.h" 86 #include "opt_ddb.h" 87 #include "opt_compat_netbsd.h" 88 #include "opt_compat_43.h" 89 90 #include <sys/param.h> 91 #include <sys/systm.h> 92 #include <sys/proc.h> 93 #include <sys/kernel.h> 94 #include <sys/mount.h> 95 #include <sys/fcntl.h> 96 #include <sys/vnode.h> 97 #include <sys/stat.h> 98 #include <sys/namei.h> 99 #include <sys/ucred.h> 100 #include <sys/buf.h> 101 #include <sys/errno.h> 102 #include <sys/malloc.h> 103 #include <sys/domain.h> 104 #include <sys/mbuf.h> 105 #include <sys/sa.h> 106 #include <sys/syscallargs.h> 107 #include <sys/device.h> 108 #include <sys/filedesc.h> 109 110 #include <miscfs/specfs/specdev.h> 111 #include <miscfs/genfs/genfs.h> 112 #include <miscfs/syncfs/syncfs.h> 113 114 #include <uvm/uvm.h> 115 #include <uvm/uvm_readahead.h> 116 #include <uvm/uvm_ddb.h> 117 118 #include <sys/sysctl.h> 119 120 const enum vtype iftovt_tab[16] = { 121 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 122 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 123 }; 124 const int vttoif_tab[9] = { 125 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 126 S_IFSOCK, S_IFIFO, S_IFMT, 127 }; 128 129 int doforce = 1; /* 1 => permit forcible unmounting */ 130 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 131 132 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 133 134 /* 135 * Insq/Remq for the vnode usage lists. 136 */ 137 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 138 #define bufremvn(bp) { \ 139 LIST_REMOVE(bp, b_vnbufs); \ 140 (bp)->b_vnbufs.le_next = NOLIST; \ 141 } 142 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 143 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 144 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 145 146 struct mntlist mountlist = /* mounted filesystem list */ 147 CIRCLEQ_HEAD_INITIALIZER(mountlist); 148 struct vfs_list_head vfs_list = /* vfs list */ 149 LIST_HEAD_INITIALIZER(vfs_list); 150 151 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 152 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 153 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 154 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 155 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 156 157 /* XXX - gross; single global lock to protect v_numoutput */ 158 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER; 159 160 /* 161 * These define the root filesystem and device. 162 */ 163 struct mount *rootfs; 164 struct vnode *rootvnode; 165 struct device *root_device; /* root device */ 166 167 POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 168 &pool_allocator_nointr); 169 170 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 171 172 /* 173 * Local declarations. 174 */ 175 static void insmntque(struct vnode *, struct mount *); 176 static int getdevvp(dev_t, struct vnode **, enum vtype); 177 static void vclean(struct vnode *, int, struct lwp *); 178 static struct vnode *getcleanvnode(struct lwp *); 179 180 #ifdef DEBUG 181 void printlockedvnodes(void); 182 #endif 183 184 /* 185 * Initialize the vnode management data structures. 186 */ 187 void 188 vntblinit(void) 189 { 190 191 /* 192 * Initialize the filesystem syncer. 193 */ 194 vn_initialize_syncerd(); 195 } 196 197 int 198 vfs_drainvnodes(long target, struct lwp *l) 199 { 200 201 simple_lock(&vnode_free_list_slock); 202 while (numvnodes > target) { 203 struct vnode *vp; 204 205 vp = getcleanvnode(l); 206 if (vp == NULL) 207 return EBUSY; /* give up */ 208 pool_put(&vnode_pool, vp); 209 simple_lock(&vnode_free_list_slock); 210 numvnodes--; 211 } 212 simple_unlock(&vnode_free_list_slock); 213 214 return 0; 215 } 216 217 /* 218 * grab a vnode from freelist and clean it. 219 */ 220 struct vnode * 221 getcleanvnode(struct lwp *l) 222 { 223 struct vnode *vp; 224 struct mount *mp; 225 struct freelst *listhd; 226 227 LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock)); 228 229 listhd = &vnode_free_list; 230 try_nextlist: 231 TAILQ_FOREACH(vp, listhd, v_freelist) { 232 if (!simple_lock_try(&vp->v_interlock)) 233 continue; 234 /* 235 * as our lwp might hold the underlying vnode locked, 236 * don't try to reclaim the VLAYER vnode if it's locked. 237 */ 238 if ((vp->v_flag & VXLOCK) == 0 && 239 ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) { 240 if (vn_start_write(vp, &mp, V_NOWAIT) == 0) 241 break; 242 } 243 mp = NULL; 244 simple_unlock(&vp->v_interlock); 245 } 246 247 if (vp == NULLVP) { 248 if (listhd == &vnode_free_list) { 249 listhd = &vnode_hold_list; 250 goto try_nextlist; 251 } 252 simple_unlock(&vnode_free_list_slock); 253 return NULLVP; 254 } 255 256 if (vp->v_usecount) 257 panic("free vnode isn't, vp %p", vp); 258 TAILQ_REMOVE(listhd, vp, v_freelist); 259 /* see comment on why 0xdeadb is set at end of vgone (below) */ 260 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 261 simple_unlock(&vnode_free_list_slock); 262 vp->v_lease = NULL; 263 264 if (vp->v_type != VBAD) 265 vgonel(vp, l); 266 else 267 simple_unlock(&vp->v_interlock); 268 vn_finished_write(mp, 0); 269 #ifdef DIAGNOSTIC 270 if (vp->v_data || vp->v_uobj.uo_npages || 271 TAILQ_FIRST(&vp->v_uobj.memq)) 272 panic("cleaned vnode isn't, vp %p", vp); 273 if (vp->v_numoutput) 274 panic("clean vnode has pending I/O's, vp %p", vp); 275 #endif 276 KASSERT((vp->v_flag & VONWORKLST) == 0); 277 278 return vp; 279 } 280 281 /* 282 * Mark a mount point as busy. Used to synchronize access and to delay 283 * unmounting. Interlock is not released on failure. 284 */ 285 int 286 vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp) 287 { 288 int lkflags; 289 290 while (mp->mnt_iflag & IMNT_UNMOUNT) { 291 int gone, n; 292 293 if (flags & LK_NOWAIT) 294 return (ENOENT); 295 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 296 && mp->mnt_unmounter == curlwp) 297 return (EDEADLK); 298 if (interlkp) 299 simple_unlock(interlkp); 300 /* 301 * Since all busy locks are shared except the exclusive 302 * lock granted when unmounting, the only place that a 303 * wakeup needs to be done is at the release of the 304 * exclusive lock at the end of dounmount. 305 */ 306 simple_lock(&mp->mnt_slock); 307 mp->mnt_wcnt++; 308 ltsleep((caddr_t)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock); 309 n = --mp->mnt_wcnt; 310 simple_unlock(&mp->mnt_slock); 311 gone = mp->mnt_iflag & IMNT_GONE; 312 313 if (n == 0) 314 wakeup(&mp->mnt_wcnt); 315 if (interlkp) 316 simple_lock(interlkp); 317 if (gone) 318 return (ENOENT); 319 } 320 lkflags = LK_SHARED; 321 if (interlkp) 322 lkflags |= LK_INTERLOCK; 323 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 324 panic("vfs_busy: unexpected lock failure"); 325 return (0); 326 } 327 328 /* 329 * Free a busy filesystem. 330 */ 331 void 332 vfs_unbusy(struct mount *mp) 333 { 334 335 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 336 } 337 338 /* 339 * Lookup a filesystem type, and if found allocate and initialize 340 * a mount structure for it. 341 * 342 * Devname is usually updated by mount(8) after booting. 343 */ 344 int 345 vfs_rootmountalloc(const char *fstypename, const char *devname, 346 struct mount **mpp) 347 { 348 struct vfsops *vfsp = NULL; 349 struct mount *mp; 350 351 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 352 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 353 break; 354 355 if (vfsp == NULL) 356 return (ENODEV); 357 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 358 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 359 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 360 simple_lock_init(&mp->mnt_slock); 361 (void)vfs_busy(mp, LK_NOWAIT, 0); 362 LIST_INIT(&mp->mnt_vnodelist); 363 mp->mnt_op = vfsp; 364 mp->mnt_flag = MNT_RDONLY; 365 mp->mnt_vnodecovered = NULLVP; 366 mp->mnt_leaf = mp; 367 vfsp->vfs_refcount++; 368 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 369 mp->mnt_stat.f_mntonname[0] = '/'; 370 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 371 *mpp = mp; 372 return (0); 373 } 374 375 /* 376 * Lookup a mount point by filesystem identifier. 377 */ 378 struct mount * 379 vfs_getvfs(fsid_t *fsid) 380 { 381 struct mount *mp; 382 383 simple_lock(&mountlist_slock); 384 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { 385 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 386 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 387 simple_unlock(&mountlist_slock); 388 return (mp); 389 } 390 } 391 simple_unlock(&mountlist_slock); 392 return ((struct mount *)0); 393 } 394 395 /* 396 * Get a new unique fsid 397 */ 398 void 399 vfs_getnewfsid(struct mount *mp) 400 { 401 static u_short xxxfs_mntid; 402 fsid_t tfsid; 403 int mtype; 404 405 simple_lock(&mntid_slock); 406 mtype = makefstype(mp->mnt_op->vfs_name); 407 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); 408 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; 409 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 410 if (xxxfs_mntid == 0) 411 ++xxxfs_mntid; 412 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 413 tfsid.__fsid_val[1] = mtype; 414 if (!CIRCLEQ_EMPTY(&mountlist)) { 415 while (vfs_getvfs(&tfsid)) { 416 tfsid.__fsid_val[0]++; 417 xxxfs_mntid++; 418 } 419 } 420 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 421 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 422 simple_unlock(&mntid_slock); 423 } 424 425 /* 426 * Make a 'unique' number from a mount type name. 427 */ 428 long 429 makefstype(const char *type) 430 { 431 long rv; 432 433 for (rv = 0; *type; type++) { 434 rv <<= 2; 435 rv ^= *type; 436 } 437 return rv; 438 } 439 440 441 /* 442 * Set vnode attributes to VNOVAL 443 */ 444 void 445 vattr_null(struct vattr *vap) 446 { 447 448 vap->va_type = VNON; 449 450 /* 451 * Assign individually so that it is safe even if size and 452 * sign of each member are varied. 453 */ 454 vap->va_mode = VNOVAL; 455 vap->va_nlink = VNOVAL; 456 vap->va_uid = VNOVAL; 457 vap->va_gid = VNOVAL; 458 vap->va_fsid = VNOVAL; 459 vap->va_fileid = VNOVAL; 460 vap->va_size = VNOVAL; 461 vap->va_blocksize = VNOVAL; 462 vap->va_atime.tv_sec = 463 vap->va_mtime.tv_sec = 464 vap->va_ctime.tv_sec = 465 vap->va_birthtime.tv_sec = VNOVAL; 466 vap->va_atime.tv_nsec = 467 vap->va_mtime.tv_nsec = 468 vap->va_ctime.tv_nsec = 469 vap->va_birthtime.tv_nsec = VNOVAL; 470 vap->va_gen = VNOVAL; 471 vap->va_flags = VNOVAL; 472 vap->va_rdev = VNOVAL; 473 vap->va_bytes = VNOVAL; 474 vap->va_vaflags = 0; 475 } 476 477 /* 478 * Routines having to do with the management of the vnode table. 479 */ 480 extern int (**dead_vnodeop_p)(void *); 481 long numvnodes; 482 483 /* 484 * Return the next vnode from the free list. 485 */ 486 int 487 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), 488 struct vnode **vpp) 489 { 490 extern struct uvm_pagerops uvm_vnodeops; 491 struct uvm_object *uobj; 492 struct lwp *l = curlwp; /* XXX */ 493 static int toggle; 494 struct vnode *vp; 495 int error = 0, tryalloc; 496 497 try_again: 498 if (mp) { 499 /* 500 * Mark filesystem busy while we're creating a vnode. 501 * If unmount is in progress, this will wait; if the 502 * unmount succeeds (only if umount -f), this will 503 * return an error. If the unmount fails, we'll keep 504 * going afterwards. 505 * (This puts the per-mount vnode list logically under 506 * the protection of the vfs_busy lock). 507 */ 508 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 509 if (error && error != EDEADLK) 510 return error; 511 } 512 513 /* 514 * We must choose whether to allocate a new vnode or recycle an 515 * existing one. The criterion for allocating a new one is that 516 * the total number of vnodes is less than the number desired or 517 * there are no vnodes on either free list. Generally we only 518 * want to recycle vnodes that have no buffers associated with 519 * them, so we look first on the vnode_free_list. If it is empty, 520 * we next consider vnodes with referencing buffers on the 521 * vnode_hold_list. The toggle ensures that half the time we 522 * will use a buffer from the vnode_hold_list, and half the time 523 * we will allocate a new one unless the list has grown to twice 524 * the desired size. We are reticent to recycle vnodes from the 525 * vnode_hold_list because we will lose the identity of all its 526 * referencing buffers. 527 */ 528 529 vp = NULL; 530 531 simple_lock(&vnode_free_list_slock); 532 533 toggle ^= 1; 534 if (numvnodes > 2 * desiredvnodes) 535 toggle = 0; 536 537 tryalloc = numvnodes < desiredvnodes || 538 (TAILQ_FIRST(&vnode_free_list) == NULL && 539 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 540 541 if (tryalloc && 542 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { 543 numvnodes++; 544 simple_unlock(&vnode_free_list_slock); 545 memset(vp, 0, sizeof(*vp)); 546 UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 1); 547 /* 548 * done by memset() above. 549 * LIST_INIT(&vp->v_nclist); 550 * LIST_INIT(&vp->v_dnclist); 551 */ 552 } else { 553 vp = getcleanvnode(l); 554 /* 555 * Unless this is a bad time of the month, at most 556 * the first NCPUS items on the free list are 557 * locked, so this is close enough to being empty. 558 */ 559 if (vp == NULLVP) { 560 if (mp && error != EDEADLK) 561 vfs_unbusy(mp); 562 if (tryalloc) { 563 printf("WARNING: unable to allocate new " 564 "vnode, retrying...\n"); 565 (void) tsleep(&lbolt, PRIBIO, "newvn", hz); 566 goto try_again; 567 } 568 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 569 *vpp = 0; 570 return (ENFILE); 571 } 572 vp->v_usecount = 1; 573 vp->v_flag = 0; 574 vp->v_socket = NULL; 575 } 576 vp->v_type = VNON; 577 vp->v_vnlock = &vp->v_lock; 578 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 579 KASSERT(LIST_EMPTY(&vp->v_nclist)); 580 KASSERT(LIST_EMPTY(&vp->v_dnclist)); 581 vp->v_tag = tag; 582 vp->v_op = vops; 583 insmntque(vp, mp); 584 *vpp = vp; 585 vp->v_data = 0; 586 simple_lock_init(&vp->v_interlock); 587 588 /* 589 * initialize uvm_object within vnode. 590 */ 591 592 uobj = &vp->v_uobj; 593 KASSERT(uobj->pgops == &uvm_vnodeops); 594 KASSERT(uobj->uo_npages == 0); 595 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 596 vp->v_size = VSIZENOTSET; 597 598 if (mp && error != EDEADLK) 599 vfs_unbusy(mp); 600 return (0); 601 } 602 603 /* 604 * This is really just the reverse of getnewvnode(). Needed for 605 * VFS_VGET functions who may need to push back a vnode in case 606 * of a locking race. 607 */ 608 void 609 ungetnewvnode(struct vnode *vp) 610 { 611 #ifdef DIAGNOSTIC 612 if (vp->v_usecount != 1) 613 panic("ungetnewvnode: busy vnode"); 614 #endif 615 vp->v_usecount--; 616 insmntque(vp, NULL); 617 vp->v_type = VBAD; 618 619 simple_lock(&vp->v_interlock); 620 /* 621 * Insert at head of LRU list 622 */ 623 simple_lock(&vnode_free_list_slock); 624 if (vp->v_holdcnt > 0) 625 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 626 else 627 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 628 simple_unlock(&vnode_free_list_slock); 629 simple_unlock(&vp->v_interlock); 630 } 631 632 /* 633 * Move a vnode from one mount queue to another. 634 */ 635 static void 636 insmntque(struct vnode *vp, struct mount *mp) 637 { 638 639 #ifdef DIAGNOSTIC 640 if ((mp != NULL) && 641 (mp->mnt_iflag & IMNT_UNMOUNT) && 642 !(mp->mnt_flag & MNT_SOFTDEP) && 643 vp->v_tag != VT_VFS) { 644 panic("insmntque into dying filesystem"); 645 } 646 #endif 647 648 simple_lock(&mntvnode_slock); 649 /* 650 * Delete from old mount point vnode list, if on one. 651 */ 652 if (vp->v_mount != NULL) 653 LIST_REMOVE(vp, v_mntvnodes); 654 /* 655 * Insert into list of vnodes for the new mount point, if available. 656 */ 657 if ((vp->v_mount = mp) != NULL) 658 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 659 simple_unlock(&mntvnode_slock); 660 } 661 662 /* 663 * Update outstanding I/O count and do wakeup if requested. 664 */ 665 void 666 vwakeup(struct buf *bp) 667 { 668 struct vnode *vp; 669 670 if ((vp = bp->b_vp) != NULL) { 671 /* XXX global lock hack 672 * can't use v_interlock here since this is called 673 * in interrupt context from biodone(). 674 */ 675 simple_lock(&global_v_numoutput_slock); 676 if (--vp->v_numoutput < 0) 677 panic("vwakeup: neg numoutput, vp %p", vp); 678 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 679 vp->v_flag &= ~VBWAIT; 680 wakeup((caddr_t)&vp->v_numoutput); 681 } 682 simple_unlock(&global_v_numoutput_slock); 683 } 684 } 685 686 /* 687 * Flush out and invalidate all buffers associated with a vnode. 688 * Called with the underlying vnode locked, which should prevent new dirty 689 * buffers from being queued. 690 */ 691 int 692 vinvalbuf(struct vnode *vp, int flags, struct ucred *cred, struct lwp *l, 693 int slpflag, int slptimeo) 694 { 695 struct buf *bp, *nbp; 696 int s, error; 697 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 698 (flags & V_SAVE ? PGO_CLEANIT : 0); 699 700 /* XXXUBC this doesn't look at flags or slp* */ 701 simple_lock(&vp->v_interlock); 702 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 703 if (error) { 704 return error; 705 } 706 707 if (flags & V_SAVE) { 708 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, l); 709 if (error) 710 return (error); 711 #ifdef DIAGNOSTIC 712 s = splbio(); 713 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 714 panic("vinvalbuf: dirty bufs, vp %p", vp); 715 splx(s); 716 #endif 717 } 718 719 s = splbio(); 720 721 restart: 722 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 723 nbp = LIST_NEXT(bp, b_vnbufs); 724 simple_lock(&bp->b_interlock); 725 if (bp->b_flags & B_BUSY) { 726 bp->b_flags |= B_WANTED; 727 error = ltsleep((caddr_t)bp, 728 slpflag | (PRIBIO + 1) | PNORELOCK, 729 "vinvalbuf", slptimeo, &bp->b_interlock); 730 if (error) { 731 splx(s); 732 return (error); 733 } 734 goto restart; 735 } 736 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 737 simple_unlock(&bp->b_interlock); 738 brelse(bp); 739 } 740 741 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 742 nbp = LIST_NEXT(bp, b_vnbufs); 743 simple_lock(&bp->b_interlock); 744 if (bp->b_flags & B_BUSY) { 745 bp->b_flags |= B_WANTED; 746 error = ltsleep((caddr_t)bp, 747 slpflag | (PRIBIO + 1) | PNORELOCK, 748 "vinvalbuf", slptimeo, &bp->b_interlock); 749 if (error) { 750 splx(s); 751 return (error); 752 } 753 goto restart; 754 } 755 /* 756 * XXX Since there are no node locks for NFS, I believe 757 * there is a slight chance that a delayed write will 758 * occur while sleeping just above, so check for it. 759 */ 760 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 761 #ifdef DEBUG 762 printf("buffer still DELWRI\n"); 763 #endif 764 bp->b_flags |= B_BUSY | B_VFLUSH; 765 simple_unlock(&bp->b_interlock); 766 VOP_BWRITE(bp); 767 goto restart; 768 } 769 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 770 simple_unlock(&bp->b_interlock); 771 brelse(bp); 772 } 773 774 #ifdef DIAGNOSTIC 775 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 776 panic("vinvalbuf: flush failed, vp %p", vp); 777 #endif 778 779 splx(s); 780 781 return (0); 782 } 783 784 /* 785 * Destroy any in core blocks past the truncation length. 786 * Called with the underlying vnode locked, which should prevent new dirty 787 * buffers from being queued. 788 */ 789 int 790 vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo) 791 { 792 struct buf *bp, *nbp; 793 int s, error; 794 voff_t off; 795 796 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 797 simple_lock(&vp->v_interlock); 798 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 799 if (error) { 800 return error; 801 } 802 803 s = splbio(); 804 805 restart: 806 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 807 nbp = LIST_NEXT(bp, b_vnbufs); 808 if (bp->b_lblkno < lbn) 809 continue; 810 simple_lock(&bp->b_interlock); 811 if (bp->b_flags & B_BUSY) { 812 bp->b_flags |= B_WANTED; 813 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 814 "vtruncbuf", slptimeo, &bp->b_interlock); 815 if (error) { 816 splx(s); 817 return (error); 818 } 819 goto restart; 820 } 821 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 822 simple_unlock(&bp->b_interlock); 823 brelse(bp); 824 } 825 826 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 827 nbp = LIST_NEXT(bp, b_vnbufs); 828 if (bp->b_lblkno < lbn) 829 continue; 830 simple_lock(&bp->b_interlock); 831 if (bp->b_flags & B_BUSY) { 832 bp->b_flags |= B_WANTED; 833 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 834 "vtruncbuf", slptimeo, &bp->b_interlock); 835 if (error) { 836 splx(s); 837 return (error); 838 } 839 goto restart; 840 } 841 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 842 simple_unlock(&bp->b_interlock); 843 brelse(bp); 844 } 845 846 splx(s); 847 848 return (0); 849 } 850 851 void 852 vflushbuf(struct vnode *vp, int sync) 853 { 854 struct buf *bp, *nbp; 855 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); 856 int s; 857 858 simple_lock(&vp->v_interlock); 859 (void) VOP_PUTPAGES(vp, 0, 0, flags); 860 861 loop: 862 s = splbio(); 863 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 864 nbp = LIST_NEXT(bp, b_vnbufs); 865 simple_lock(&bp->b_interlock); 866 if ((bp->b_flags & B_BUSY)) { 867 simple_unlock(&bp->b_interlock); 868 continue; 869 } 870 if ((bp->b_flags & B_DELWRI) == 0) 871 panic("vflushbuf: not dirty, bp %p", bp); 872 bp->b_flags |= B_BUSY | B_VFLUSH; 873 simple_unlock(&bp->b_interlock); 874 splx(s); 875 /* 876 * Wait for I/O associated with indirect blocks to complete, 877 * since there is no way to quickly wait for them below. 878 */ 879 if (bp->b_vp == vp || sync == 0) 880 (void) bawrite(bp); 881 else 882 (void) bwrite(bp); 883 goto loop; 884 } 885 if (sync == 0) { 886 splx(s); 887 return; 888 } 889 simple_lock(&global_v_numoutput_slock); 890 while (vp->v_numoutput) { 891 vp->v_flag |= VBWAIT; 892 ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0, 893 &global_v_numoutput_slock); 894 } 895 simple_unlock(&global_v_numoutput_slock); 896 splx(s); 897 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 898 vprint("vflushbuf: dirty", vp); 899 goto loop; 900 } 901 } 902 903 /* 904 * Associate a buffer with a vnode. 905 */ 906 void 907 bgetvp(struct vnode *vp, struct buf *bp) 908 { 909 int s; 910 911 if (bp->b_vp) 912 panic("bgetvp: not free, bp %p", bp); 913 VHOLD(vp); 914 s = splbio(); 915 bp->b_vp = vp; 916 if (vp->v_type == VBLK || vp->v_type == VCHR) 917 bp->b_dev = vp->v_rdev; 918 else 919 bp->b_dev = NODEV; 920 /* 921 * Insert onto list for new vnode. 922 */ 923 bufinsvn(bp, &vp->v_cleanblkhd); 924 splx(s); 925 } 926 927 /* 928 * Disassociate a buffer from a vnode. 929 */ 930 void 931 brelvp(struct buf *bp) 932 { 933 struct vnode *vp; 934 int s; 935 936 if (bp->b_vp == NULL) 937 panic("brelvp: vp NULL, bp %p", bp); 938 939 s = splbio(); 940 vp = bp->b_vp; 941 /* 942 * Delete from old vnode list, if on one. 943 */ 944 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 945 bufremvn(bp); 946 947 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) && 948 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 949 vp->v_flag &= ~(VWRITEMAPDIRTY|VONWORKLST); 950 LIST_REMOVE(vp, v_synclist); 951 } 952 953 bp->b_vp = NULL; 954 HOLDRELE(vp); 955 splx(s); 956 } 957 958 /* 959 * Reassign a buffer from one vnode to another. 960 * Used to assign file specific control information 961 * (indirect blocks) to the vnode to which they belong. 962 * 963 * This function must be called at splbio(). 964 */ 965 void 966 reassignbuf(struct buf *bp, struct vnode *newvp) 967 { 968 struct buflists *listheadp; 969 int delayx; 970 971 /* 972 * Delete from old vnode list, if on one. 973 */ 974 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 975 bufremvn(bp); 976 /* 977 * If dirty, put on list of dirty buffers; 978 * otherwise insert onto list of clean buffers. 979 */ 980 if ((bp->b_flags & B_DELWRI) == 0) { 981 listheadp = &newvp->v_cleanblkhd; 982 if (TAILQ_EMPTY(&newvp->v_uobj.memq) && 983 (newvp->v_flag & VONWORKLST) && 984 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 985 newvp->v_flag &= ~(VWRITEMAPDIRTY|VONWORKLST); 986 LIST_REMOVE(newvp, v_synclist); 987 } 988 } else { 989 listheadp = &newvp->v_dirtyblkhd; 990 if ((newvp->v_flag & VONWORKLST) == 0) { 991 switch (newvp->v_type) { 992 case VDIR: 993 delayx = dirdelay; 994 break; 995 case VBLK: 996 if (newvp->v_specmountpoint != NULL) { 997 delayx = metadelay; 998 break; 999 } 1000 /* fall through */ 1001 default: 1002 delayx = filedelay; 1003 break; 1004 } 1005 if (!newvp->v_mount || 1006 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 1007 vn_syncer_add_to_worklist(newvp, delayx); 1008 } 1009 } 1010 bufinsvn(bp, listheadp); 1011 } 1012 1013 /* 1014 * Create a vnode for a block device. 1015 * Used for root filesystem and swap areas. 1016 * Also used for memory file system special devices. 1017 */ 1018 int 1019 bdevvp(dev_t dev, struct vnode **vpp) 1020 { 1021 1022 return (getdevvp(dev, vpp, VBLK)); 1023 } 1024 1025 /* 1026 * Create a vnode for a character device. 1027 * Used for kernfs and some console handling. 1028 */ 1029 int 1030 cdevvp(dev_t dev, struct vnode **vpp) 1031 { 1032 1033 return (getdevvp(dev, vpp, VCHR)); 1034 } 1035 1036 /* 1037 * Create a vnode for a device. 1038 * Used by bdevvp (block device) for root file system etc., 1039 * and by cdevvp (character device) for console and kernfs. 1040 */ 1041 static int 1042 getdevvp(dev_t dev, struct vnode **vpp, enum vtype type) 1043 { 1044 struct vnode *vp; 1045 struct vnode *nvp; 1046 int error; 1047 1048 if (dev == NODEV) { 1049 *vpp = NULLVP; 1050 return (0); 1051 } 1052 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1053 if (error) { 1054 *vpp = NULLVP; 1055 return (error); 1056 } 1057 vp = nvp; 1058 vp->v_type = type; 1059 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1060 vput(vp); 1061 vp = nvp; 1062 } 1063 *vpp = vp; 1064 return (0); 1065 } 1066 1067 /* 1068 * Check to see if the new vnode represents a special device 1069 * for which we already have a vnode (either because of 1070 * bdevvp() or because of a different vnode representing 1071 * the same block device). If such an alias exists, deallocate 1072 * the existing contents and return the aliased vnode. The 1073 * caller is responsible for filling it with its new contents. 1074 */ 1075 struct vnode * 1076 checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp) 1077 { 1078 struct lwp *l = curlwp; /* XXX */ 1079 struct vnode *vp; 1080 struct vnode **vpp; 1081 1082 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1083 return (NULLVP); 1084 1085 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1086 loop: 1087 simple_lock(&spechash_slock); 1088 for (vp = *vpp; vp; vp = vp->v_specnext) { 1089 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1090 continue; 1091 /* 1092 * Alias, but not in use, so flush it out. 1093 */ 1094 simple_lock(&vp->v_interlock); 1095 simple_unlock(&spechash_slock); 1096 if (vp->v_usecount == 0) { 1097 vgonel(vp, l); 1098 goto loop; 1099 } 1100 /* 1101 * What we're interested to know here is if someone else has 1102 * removed this vnode from the device hash list while we were 1103 * waiting. This can only happen if vclean() did it, and 1104 * this requires the vnode to be locked. 1105 */ 1106 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) 1107 goto loop; 1108 if (vp->v_specinfo == NULL) { 1109 vput(vp); 1110 goto loop; 1111 } 1112 simple_lock(&spechash_slock); 1113 break; 1114 } 1115 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1116 MALLOC(nvp->v_specinfo, struct specinfo *, 1117 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1118 /* XXX Erg. */ 1119 if (nvp->v_specinfo == NULL) { 1120 simple_unlock(&spechash_slock); 1121 uvm_wait("checkalias"); 1122 goto loop; 1123 } 1124 1125 nvp->v_rdev = nvp_rdev; 1126 nvp->v_hashchain = vpp; 1127 nvp->v_specnext = *vpp; 1128 nvp->v_specmountpoint = NULL; 1129 simple_unlock(&spechash_slock); 1130 nvp->v_speclockf = NULL; 1131 simple_lock_init(&nvp->v_spec_cow_slock); 1132 SLIST_INIT(&nvp->v_spec_cow_head); 1133 nvp->v_spec_cow_req = 0; 1134 nvp->v_spec_cow_count = 0; 1135 1136 *vpp = nvp; 1137 if (vp != NULLVP) { 1138 nvp->v_flag |= VALIASED; 1139 vp->v_flag |= VALIASED; 1140 vput(vp); 1141 } 1142 return (NULLVP); 1143 } 1144 simple_unlock(&spechash_slock); 1145 VOP_UNLOCK(vp, 0); 1146 simple_lock(&vp->v_interlock); 1147 vclean(vp, 0, l); 1148 vp->v_op = nvp->v_op; 1149 vp->v_tag = nvp->v_tag; 1150 vp->v_vnlock = &vp->v_lock; 1151 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1152 nvp->v_type = VNON; 1153 insmntque(vp, mp); 1154 return (vp); 1155 } 1156 1157 /* 1158 * Grab a particular vnode from the free list, increment its 1159 * reference count and lock it. If the vnode lock bit is set the 1160 * vnode is being eliminated in vgone. In that case, we can not 1161 * grab the vnode, so the process is awakened when the transition is 1162 * completed, and an error returned to indicate that the vnode is no 1163 * longer usable (possibly having been changed to a new file system type). 1164 */ 1165 int 1166 vget(struct vnode *vp, int flags) 1167 { 1168 int error; 1169 1170 /* 1171 * If the vnode is in the process of being cleaned out for 1172 * another use, we wait for the cleaning to finish and then 1173 * return failure. Cleaning is determined by checking that 1174 * the VXLOCK flag is set. 1175 */ 1176 1177 if ((flags & LK_INTERLOCK) == 0) 1178 simple_lock(&vp->v_interlock); 1179 if ((vp->v_flag & (VXLOCK | VFREEING)) != 0) { 1180 if (flags & LK_NOWAIT) { 1181 simple_unlock(&vp->v_interlock); 1182 return EBUSY; 1183 } 1184 vp->v_flag |= VXWANT; 1185 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock); 1186 return (ENOENT); 1187 } 1188 if (vp->v_usecount == 0) { 1189 simple_lock(&vnode_free_list_slock); 1190 if (vp->v_holdcnt > 0) 1191 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1192 else 1193 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1194 simple_unlock(&vnode_free_list_slock); 1195 } 1196 vp->v_usecount++; 1197 #ifdef DIAGNOSTIC 1198 if (vp->v_usecount == 0) { 1199 vprint("vget", vp); 1200 panic("vget: usecount overflow, vp %p", vp); 1201 } 1202 #endif 1203 if (flags & LK_TYPE_MASK) { 1204 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1205 vrele(vp); 1206 } 1207 return (error); 1208 } 1209 simple_unlock(&vp->v_interlock); 1210 return (0); 1211 } 1212 1213 /* 1214 * vput(), just unlock and vrele() 1215 */ 1216 void 1217 vput(struct vnode *vp) 1218 { 1219 struct lwp *l = curlwp; /* XXX */ 1220 1221 #ifdef DIAGNOSTIC 1222 if (vp == NULL) 1223 panic("vput: null vp"); 1224 #endif 1225 simple_lock(&vp->v_interlock); 1226 vp->v_usecount--; 1227 if (vp->v_usecount > 0) { 1228 simple_unlock(&vp->v_interlock); 1229 VOP_UNLOCK(vp, 0); 1230 return; 1231 } 1232 #ifdef DIAGNOSTIC 1233 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1234 vprint("vput: bad ref count", vp); 1235 panic("vput: ref cnt"); 1236 } 1237 #endif 1238 /* 1239 * Insert at tail of LRU list. 1240 */ 1241 simple_lock(&vnode_free_list_slock); 1242 if (vp->v_holdcnt > 0) 1243 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1244 else 1245 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1246 simple_unlock(&vnode_free_list_slock); 1247 if (vp->v_flag & VEXECMAP) { 1248 uvmexp.execpages -= vp->v_uobj.uo_npages; 1249 uvmexp.filepages += vp->v_uobj.uo_npages; 1250 } 1251 vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP); 1252 simple_unlock(&vp->v_interlock); 1253 VOP_INACTIVE(vp, l); 1254 } 1255 1256 /* 1257 * Vnode release. 1258 * If count drops to zero, call inactive routine and return to freelist. 1259 */ 1260 void 1261 vrele(struct vnode *vp) 1262 { 1263 struct lwp *l = curlwp; /* XXX */ 1264 1265 #ifdef DIAGNOSTIC 1266 if (vp == NULL) 1267 panic("vrele: null vp"); 1268 #endif 1269 simple_lock(&vp->v_interlock); 1270 vp->v_usecount--; 1271 if (vp->v_usecount > 0) { 1272 simple_unlock(&vp->v_interlock); 1273 return; 1274 } 1275 #ifdef DIAGNOSTIC 1276 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1277 vprint("vrele: bad ref count", vp); 1278 panic("vrele: ref cnt vp %p", vp); 1279 } 1280 #endif 1281 /* 1282 * Insert at tail of LRU list. 1283 */ 1284 simple_lock(&vnode_free_list_slock); 1285 if (vp->v_holdcnt > 0) 1286 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1287 else 1288 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1289 simple_unlock(&vnode_free_list_slock); 1290 if (vp->v_flag & VEXECMAP) { 1291 uvmexp.execpages -= vp->v_uobj.uo_npages; 1292 uvmexp.filepages += vp->v_uobj.uo_npages; 1293 } 1294 vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP); 1295 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1296 VOP_INACTIVE(vp, l); 1297 } 1298 1299 /* 1300 * Page or buffer structure gets a reference. 1301 * Called with v_interlock held. 1302 */ 1303 void 1304 vholdl(struct vnode *vp) 1305 { 1306 1307 /* 1308 * If it is on the freelist and the hold count is currently 1309 * zero, move it to the hold list. The test of the back 1310 * pointer and the use reference count of zero is because 1311 * it will be removed from a free list by getnewvnode, 1312 * but will not have its reference count incremented until 1313 * after calling vgone. If the reference count were 1314 * incremented first, vgone would (incorrectly) try to 1315 * close the previous instance of the underlying object. 1316 * So, the back pointer is explicitly set to `0xdeadb' in 1317 * getnewvnode after removing it from a freelist to ensure 1318 * that we do not try to move it here. 1319 */ 1320 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1321 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1322 simple_lock(&vnode_free_list_slock); 1323 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1324 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1325 simple_unlock(&vnode_free_list_slock); 1326 } 1327 vp->v_holdcnt++; 1328 } 1329 1330 /* 1331 * Page or buffer structure frees a reference. 1332 * Called with v_interlock held. 1333 */ 1334 void 1335 holdrelel(struct vnode *vp) 1336 { 1337 1338 if (vp->v_holdcnt <= 0) 1339 panic("holdrelel: holdcnt vp %p", vp); 1340 vp->v_holdcnt--; 1341 1342 /* 1343 * If it is on the holdlist and the hold count drops to 1344 * zero, move it to the free list. The test of the back 1345 * pointer and the use reference count of zero is because 1346 * it will be removed from a free list by getnewvnode, 1347 * but will not have its reference count incremented until 1348 * after calling vgone. If the reference count were 1349 * incremented first, vgone would (incorrectly) try to 1350 * close the previous instance of the underlying object. 1351 * So, the back pointer is explicitly set to `0xdeadb' in 1352 * getnewvnode after removing it from a freelist to ensure 1353 * that we do not try to move it here. 1354 */ 1355 1356 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1357 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1358 simple_lock(&vnode_free_list_slock); 1359 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1360 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1361 simple_unlock(&vnode_free_list_slock); 1362 } 1363 } 1364 1365 /* 1366 * Vnode reference. 1367 */ 1368 void 1369 vref(struct vnode *vp) 1370 { 1371 1372 simple_lock(&vp->v_interlock); 1373 if (vp->v_usecount <= 0) 1374 panic("vref used where vget required, vp %p", vp); 1375 vp->v_usecount++; 1376 #ifdef DIAGNOSTIC 1377 if (vp->v_usecount == 0) { 1378 vprint("vref", vp); 1379 panic("vref: usecount overflow, vp %p", vp); 1380 } 1381 #endif 1382 simple_unlock(&vp->v_interlock); 1383 } 1384 1385 /* 1386 * Remove any vnodes in the vnode table belonging to mount point mp. 1387 * 1388 * If FORCECLOSE is not specified, there should not be any active ones, 1389 * return error if any are found (nb: this is a user error, not a 1390 * system error). If FORCECLOSE is specified, detach any active vnodes 1391 * that are found. 1392 * 1393 * If WRITECLOSE is set, only flush out regular file vnodes open for 1394 * writing. 1395 * 1396 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped. 1397 */ 1398 #ifdef DEBUG 1399 int busyprt = 0; /* print out busy vnodes */ 1400 struct ctldebug debug1 = { "busyprt", &busyprt }; 1401 #endif 1402 1403 int 1404 vflush(struct mount *mp, struct vnode *skipvp, int flags) 1405 { 1406 struct lwp *l = curlwp; /* XXX */ 1407 struct vnode *vp, *nvp; 1408 int busy = 0; 1409 1410 simple_lock(&mntvnode_slock); 1411 loop: 1412 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 1413 if (vp->v_mount != mp) 1414 goto loop; 1415 nvp = LIST_NEXT(vp, v_mntvnodes); 1416 /* 1417 * Skip over a selected vnode. 1418 */ 1419 if (vp == skipvp) 1420 continue; 1421 simple_lock(&vp->v_interlock); 1422 /* 1423 * Skip over a vnodes marked VSYSTEM. 1424 */ 1425 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1426 simple_unlock(&vp->v_interlock); 1427 continue; 1428 } 1429 /* 1430 * If WRITECLOSE is set, only flush out regular file 1431 * vnodes open for writing. 1432 */ 1433 if ((flags & WRITECLOSE) && 1434 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1435 simple_unlock(&vp->v_interlock); 1436 continue; 1437 } 1438 /* 1439 * With v_usecount == 0, all we need to do is clear 1440 * out the vnode data structures and we are done. 1441 */ 1442 if (vp->v_usecount == 0) { 1443 simple_unlock(&mntvnode_slock); 1444 vgonel(vp, l); 1445 simple_lock(&mntvnode_slock); 1446 continue; 1447 } 1448 /* 1449 * If FORCECLOSE is set, forcibly close the vnode. 1450 * For block or character devices, revert to an 1451 * anonymous device. For all other files, just kill them. 1452 */ 1453 if (flags & FORCECLOSE) { 1454 simple_unlock(&mntvnode_slock); 1455 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1456 vgonel(vp, l); 1457 } else { 1458 vclean(vp, 0, l); 1459 vp->v_op = spec_vnodeop_p; 1460 insmntque(vp, (struct mount *)0); 1461 } 1462 simple_lock(&mntvnode_slock); 1463 continue; 1464 } 1465 #ifdef DEBUG 1466 if (busyprt) 1467 vprint("vflush: busy vnode", vp); 1468 #endif 1469 simple_unlock(&vp->v_interlock); 1470 busy++; 1471 } 1472 simple_unlock(&mntvnode_slock); 1473 if (busy) 1474 return (EBUSY); 1475 return (0); 1476 } 1477 1478 /* 1479 * Disassociate the underlying file system from a vnode. 1480 */ 1481 static void 1482 vclean(struct vnode *vp, int flags, struct lwp *l) 1483 { 1484 struct mount *mp; 1485 int active; 1486 1487 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1488 1489 /* 1490 * Check to see if the vnode is in use. 1491 * If so we have to reference it before we clean it out 1492 * so that its count cannot fall to zero and generate a 1493 * race against ourselves to recycle it. 1494 */ 1495 1496 if ((active = vp->v_usecount) != 0) { 1497 vp->v_usecount++; 1498 #ifdef DIAGNOSTIC 1499 if (vp->v_usecount == 0) { 1500 vprint("vclean", vp); 1501 panic("vclean: usecount overflow"); 1502 } 1503 #endif 1504 } 1505 1506 /* 1507 * Prevent the vnode from being recycled or 1508 * brought into use while we clean it out. 1509 */ 1510 if (vp->v_flag & VXLOCK) 1511 panic("vclean: deadlock, vp %p", vp); 1512 vp->v_flag |= VXLOCK; 1513 if (vp->v_flag & VEXECMAP) { 1514 uvmexp.execpages -= vp->v_uobj.uo_npages; 1515 uvmexp.filepages += vp->v_uobj.uo_npages; 1516 } 1517 vp->v_flag &= ~(VTEXT|VEXECMAP); 1518 1519 /* 1520 * Even if the count is zero, the VOP_INACTIVE routine may still 1521 * have the object locked while it cleans it out. The VOP_LOCK 1522 * ensures that the VOP_INACTIVE routine is done with its work. 1523 * For active vnodes, it ensures that no other activity can 1524 * occur while the underlying object is being cleaned out. 1525 */ 1526 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1527 1528 /* 1529 * Clean out any cached data associated with the vnode. 1530 * If special device, remove it from special device alias list. 1531 * if it is on one. 1532 */ 1533 if (flags & DOCLOSE) { 1534 int error; 1535 struct vnode *vq, *vx; 1536 1537 vn_start_write(vp, &mp, V_WAIT | V_LOWER); 1538 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1539 vn_finished_write(mp, V_LOWER); 1540 if (error) 1541 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1542 KASSERT(error == 0); 1543 KASSERT((vp->v_flag & VONWORKLST) == 0); 1544 1545 if (active) 1546 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1547 1548 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 1549 vp->v_specinfo != 0) { 1550 simple_lock(&spechash_slock); 1551 if (vp->v_hashchain != NULL) { 1552 if (*vp->v_hashchain == vp) { 1553 *vp->v_hashchain = vp->v_specnext; 1554 } else { 1555 for (vq = *vp->v_hashchain; vq; 1556 vq = vq->v_specnext) { 1557 if (vq->v_specnext != vp) 1558 continue; 1559 vq->v_specnext = vp->v_specnext; 1560 break; 1561 } 1562 if (vq == NULL) 1563 panic("missing bdev"); 1564 } 1565 if (vp->v_flag & VALIASED) { 1566 vx = NULL; 1567 for (vq = *vp->v_hashchain; vq; 1568 vq = vq->v_specnext) { 1569 if (vq->v_rdev != vp->v_rdev || 1570 vq->v_type != vp->v_type) 1571 continue; 1572 if (vx) 1573 break; 1574 vx = vq; 1575 } 1576 if (vx == NULL) 1577 panic("missing alias"); 1578 if (vq == NULL) 1579 vx->v_flag &= ~VALIASED; 1580 vp->v_flag &= ~VALIASED; 1581 } 1582 } 1583 simple_unlock(&spechash_slock); 1584 FREE(vp->v_specinfo, M_VNODE); 1585 vp->v_specinfo = NULL; 1586 } 1587 } 1588 LOCK_ASSERT(!simple_lock_held(&vp->v_interlock)); 1589 1590 /* 1591 * If purging an active vnode, it must be closed and 1592 * deactivated before being reclaimed. Note that the 1593 * VOP_INACTIVE will unlock the vnode. 1594 */ 1595 if (active) { 1596 VOP_INACTIVE(vp, l); 1597 } else { 1598 /* 1599 * Any other processes trying to obtain this lock must first 1600 * wait for VXLOCK to clear, then call the new lock operation. 1601 */ 1602 VOP_UNLOCK(vp, 0); 1603 } 1604 /* 1605 * Reclaim the vnode. 1606 */ 1607 if (VOP_RECLAIM(vp, l)) 1608 panic("vclean: cannot reclaim, vp %p", vp); 1609 if (active) { 1610 /* 1611 * Inline copy of vrele() since VOP_INACTIVE 1612 * has already been called. 1613 */ 1614 simple_lock(&vp->v_interlock); 1615 if (--vp->v_usecount <= 0) { 1616 #ifdef DIAGNOSTIC 1617 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1618 vprint("vclean: bad ref count", vp); 1619 panic("vclean: ref cnt"); 1620 } 1621 #endif 1622 /* 1623 * Insert at tail of LRU list. 1624 */ 1625 1626 simple_unlock(&vp->v_interlock); 1627 simple_lock(&vnode_free_list_slock); 1628 #ifdef DIAGNOSTIC 1629 if (vp->v_holdcnt > 0) 1630 panic("vclean: not clean, vp %p", vp); 1631 #endif 1632 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1633 simple_unlock(&vnode_free_list_slock); 1634 } else 1635 simple_unlock(&vp->v_interlock); 1636 } 1637 1638 KASSERT(vp->v_uobj.uo_npages == 0); 1639 if (vp->v_type == VREG && vp->v_ractx != NULL) { 1640 uvm_ra_freectx(vp->v_ractx); 1641 vp->v_ractx = NULL; 1642 } 1643 cache_purge(vp); 1644 1645 /* 1646 * Done with purge, notify sleepers of the grim news. 1647 */ 1648 vp->v_op = dead_vnodeop_p; 1649 vp->v_tag = VT_NON; 1650 simple_lock(&vp->v_interlock); 1651 VN_KNOTE(vp, NOTE_REVOKE); /* FreeBSD has this in vn_pollgone() */ 1652 vp->v_flag &= ~(VXLOCK|VLOCKSWORK); 1653 if (vp->v_flag & VXWANT) { 1654 vp->v_flag &= ~VXWANT; 1655 simple_unlock(&vp->v_interlock); 1656 wakeup((caddr_t)vp); 1657 } else 1658 simple_unlock(&vp->v_interlock); 1659 } 1660 1661 /* 1662 * Recycle an unused vnode to the front of the free list. 1663 * Release the passed interlock if the vnode will be recycled. 1664 */ 1665 int 1666 vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct lwp *l) 1667 { 1668 1669 simple_lock(&vp->v_interlock); 1670 if (vp->v_usecount == 0) { 1671 if (inter_lkp) 1672 simple_unlock(inter_lkp); 1673 vgonel(vp, l); 1674 return (1); 1675 } 1676 simple_unlock(&vp->v_interlock); 1677 return (0); 1678 } 1679 1680 /* 1681 * Eliminate all activity associated with a vnode 1682 * in preparation for reuse. 1683 */ 1684 void 1685 vgone(struct vnode *vp) 1686 { 1687 struct lwp *l = curlwp; /* XXX */ 1688 1689 simple_lock(&vp->v_interlock); 1690 vgonel(vp, l); 1691 } 1692 1693 /* 1694 * vgone, with the vp interlock held. 1695 */ 1696 void 1697 vgonel(struct vnode *vp, struct lwp *l) 1698 { 1699 1700 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1701 1702 /* 1703 * If a vgone (or vclean) is already in progress, 1704 * wait until it is done and return. 1705 */ 1706 1707 if (vp->v_flag & VXLOCK) { 1708 vp->v_flag |= VXWANT; 1709 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock); 1710 return; 1711 } 1712 1713 /* 1714 * Clean out the filesystem specific data. 1715 */ 1716 1717 vclean(vp, DOCLOSE, l); 1718 KASSERT((vp->v_flag & VONWORKLST) == 0); 1719 1720 /* 1721 * Delete from old mount point vnode list, if on one. 1722 */ 1723 1724 if (vp->v_mount != NULL) 1725 insmntque(vp, (struct mount *)0); 1726 1727 /* 1728 * The test of the back pointer and the reference count of 1729 * zero is because it will be removed from the free list by 1730 * getcleanvnode, but will not have its reference count 1731 * incremented until after calling vgone. If the reference 1732 * count were incremented first, vgone would (incorrectly) 1733 * try to close the previous instance of the underlying object. 1734 * So, the back pointer is explicitly set to `0xdeadb' in 1735 * getnewvnode after removing it from the freelist to ensure 1736 * that we do not try to move it here. 1737 */ 1738 1739 vp->v_type = VBAD; 1740 if (vp->v_usecount == 0) { 1741 boolean_t dofree; 1742 1743 simple_lock(&vnode_free_list_slock); 1744 if (vp->v_holdcnt > 0) 1745 panic("vgonel: not clean, vp %p", vp); 1746 /* 1747 * if it isn't on the freelist, we're called by getcleanvnode 1748 * and vnode is being re-used. otherwise, we'll free it. 1749 */ 1750 dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb; 1751 if (dofree) { 1752 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1753 numvnodes--; 1754 } 1755 simple_unlock(&vnode_free_list_slock); 1756 if (dofree) 1757 pool_put(&vnode_pool, vp); 1758 } 1759 } 1760 1761 /* 1762 * Lookup a vnode by device number. 1763 */ 1764 int 1765 vfinddev(dev_t dev, enum vtype type, struct vnode **vpp) 1766 { 1767 struct vnode *vp; 1768 int rc = 0; 1769 1770 simple_lock(&spechash_slock); 1771 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1772 if (dev != vp->v_rdev || type != vp->v_type) 1773 continue; 1774 *vpp = vp; 1775 rc = 1; 1776 break; 1777 } 1778 simple_unlock(&spechash_slock); 1779 return (rc); 1780 } 1781 1782 /* 1783 * Revoke all the vnodes corresponding to the specified minor number 1784 * range (endpoints inclusive) of the specified major. 1785 */ 1786 void 1787 vdevgone(int maj, int minl, int minh, enum vtype type) 1788 { 1789 struct vnode *vp; 1790 int mn; 1791 1792 for (mn = minl; mn <= minh; mn++) 1793 if (vfinddev(makedev(maj, mn), type, &vp)) 1794 VOP_REVOKE(vp, REVOKEALL); 1795 } 1796 1797 /* 1798 * Calculate the total number of references to a special device. 1799 */ 1800 int 1801 vcount(struct vnode *vp) 1802 { 1803 struct vnode *vq, *vnext; 1804 int count; 1805 1806 loop: 1807 if ((vp->v_flag & VALIASED) == 0) 1808 return (vp->v_usecount); 1809 simple_lock(&spechash_slock); 1810 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1811 vnext = vq->v_specnext; 1812 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1813 continue; 1814 /* 1815 * Alias, but not in use, so flush it out. 1816 */ 1817 if (vq->v_usecount == 0 && vq != vp && 1818 (vq->v_flag & VXLOCK) == 0) { 1819 simple_unlock(&spechash_slock); 1820 vgone(vq); 1821 goto loop; 1822 } 1823 count += vq->v_usecount; 1824 } 1825 simple_unlock(&spechash_slock); 1826 return (count); 1827 } 1828 1829 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 1830 #define ARRAY_PRINT(idx, arr) \ 1831 ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN") 1832 1833 const char * const vnode_tags[] = { VNODE_TAGS }; 1834 const char * const vnode_types[] = { VNODE_TYPES }; 1835 const char vnode_flagbits[] = VNODE_FLAGBITS; 1836 1837 /* 1838 * Print out a description of a vnode. 1839 */ 1840 void 1841 vprint(const char *label, struct vnode *vp) 1842 { 1843 char bf[96]; 1844 1845 if (label != NULL) 1846 printf("%s: ", label); 1847 printf("tag %s(%d) type %s(%d), usecount %d, writecount %ld, " 1848 "refcount %ld,", ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 1849 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 1850 vp->v_usecount, vp->v_writecount, vp->v_holdcnt); 1851 bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf)); 1852 if (bf[0] != '\0') 1853 printf(" flags (%s)", &bf[1]); 1854 if (vp->v_data == NULL) { 1855 printf("\n"); 1856 } else { 1857 printf("\n\t"); 1858 VOP_PRINT(vp); 1859 } 1860 } 1861 1862 #ifdef DEBUG 1863 /* 1864 * List all of the locked vnodes in the system. 1865 * Called when debugging the kernel. 1866 */ 1867 void 1868 printlockedvnodes(void) 1869 { 1870 struct mount *mp, *nmp; 1871 struct vnode *vp; 1872 1873 printf("Locked vnodes\n"); 1874 simple_lock(&mountlist_slock); 1875 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1876 mp = nmp) { 1877 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1878 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1879 continue; 1880 } 1881 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1882 if (VOP_ISLOCKED(vp)) 1883 vprint(NULL, vp); 1884 } 1885 simple_lock(&mountlist_slock); 1886 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1887 vfs_unbusy(mp); 1888 } 1889 simple_unlock(&mountlist_slock); 1890 } 1891 #endif 1892 1893 /* 1894 * sysctl helper routine to return list of supported fstypes 1895 */ 1896 static int 1897 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS) 1898 { 1899 char bf[MFSNAMELEN]; 1900 char *where = oldp; 1901 struct vfsops *v; 1902 size_t needed, left, slen; 1903 int error, first; 1904 1905 if (newp != NULL) 1906 return (EPERM); 1907 if (namelen != 0) 1908 return (EINVAL); 1909 1910 first = 1; 1911 error = 0; 1912 needed = 0; 1913 left = *oldlenp; 1914 1915 LIST_FOREACH(v, &vfs_list, vfs_list) { 1916 if (where == NULL) 1917 needed += strlen(v->vfs_name) + 1; 1918 else { 1919 memset(bf, 0, sizeof(bf)); 1920 if (first) { 1921 strncpy(bf, v->vfs_name, sizeof(bf)); 1922 first = 0; 1923 } else { 1924 bf[0] = ' '; 1925 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1); 1926 } 1927 bf[sizeof(bf)-1] = '\0'; 1928 slen = strlen(bf); 1929 if (left < slen + 1) 1930 break; 1931 /* +1 to copy out the trailing NUL byte */ 1932 error = copyout(bf, where, slen + 1); 1933 if (error) 1934 break; 1935 where += slen; 1936 needed += slen; 1937 left -= slen; 1938 } 1939 } 1940 *oldlenp = needed; 1941 return (error); 1942 } 1943 1944 /* 1945 * Top level filesystem related information gathering. 1946 */ 1947 SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup") 1948 { 1949 sysctl_createv(clog, 0, NULL, NULL, 1950 CTLFLAG_PERMANENT, 1951 CTLTYPE_NODE, "vfs", NULL, 1952 NULL, 0, NULL, 0, 1953 CTL_VFS, CTL_EOL); 1954 sysctl_createv(clog, 0, NULL, NULL, 1955 CTLFLAG_PERMANENT, 1956 CTLTYPE_NODE, "generic", 1957 SYSCTL_DESCR("Non-specific vfs related information"), 1958 NULL, 0, NULL, 0, 1959 CTL_VFS, VFS_GENERIC, CTL_EOL); 1960 sysctl_createv(clog, 0, NULL, NULL, 1961 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1962 CTLTYPE_INT, "usermount", 1963 SYSCTL_DESCR("Whether unprivileged users may mount " 1964 "filesystems"), 1965 NULL, 0, &dovfsusermount, 0, 1966 CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL); 1967 sysctl_createv(clog, 0, NULL, NULL, 1968 CTLFLAG_PERMANENT, 1969 CTLTYPE_STRING, "fstypes", 1970 SYSCTL_DESCR("List of file systems present"), 1971 sysctl_vfs_generic_fstypes, 0, NULL, 0, 1972 CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL); 1973 } 1974 1975 1976 int kinfo_vdebug = 1; 1977 int kinfo_vgetfailed; 1978 #define KINFO_VNODESLOP 10 1979 /* 1980 * Dump vnode list (via sysctl). 1981 * Copyout address of vnode followed by vnode. 1982 */ 1983 /* ARGSUSED */ 1984 int 1985 sysctl_kern_vnode(SYSCTLFN_ARGS) 1986 { 1987 char *where = oldp; 1988 size_t *sizep = oldlenp; 1989 struct mount *mp, *nmp; 1990 struct vnode *nvp, *vp; 1991 char *bp = where, *savebp; 1992 char *ewhere; 1993 int error; 1994 1995 if (namelen != 0) 1996 return (EOPNOTSUPP); 1997 if (newp != NULL) 1998 return (EPERM); 1999 2000 #define VPTRSZ sizeof(struct vnode *) 2001 #define VNODESZ sizeof(struct vnode) 2002 if (where == NULL) { 2003 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 2004 return (0); 2005 } 2006 ewhere = where + *sizep; 2007 2008 simple_lock(&mountlist_slock); 2009 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2010 mp = nmp) { 2011 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 2012 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2013 continue; 2014 } 2015 savebp = bp; 2016 again: 2017 simple_lock(&mntvnode_slock); 2018 for (vp = LIST_FIRST(&mp->mnt_vnodelist); 2019 vp != NULL; 2020 vp = nvp) { 2021 /* 2022 * Check that the vp is still associated with 2023 * this filesystem. RACE: could have been 2024 * recycled onto the same filesystem. 2025 */ 2026 if (vp->v_mount != mp) { 2027 simple_unlock(&mntvnode_slock); 2028 if (kinfo_vdebug) 2029 printf("kinfo: vp changed\n"); 2030 bp = savebp; 2031 goto again; 2032 } 2033 nvp = LIST_NEXT(vp, v_mntvnodes); 2034 if (bp + VPTRSZ + VNODESZ > ewhere) { 2035 simple_unlock(&mntvnode_slock); 2036 *sizep = bp - where; 2037 return (ENOMEM); 2038 } 2039 simple_unlock(&mntvnode_slock); 2040 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2041 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2042 return (error); 2043 bp += VPTRSZ + VNODESZ; 2044 simple_lock(&mntvnode_slock); 2045 } 2046 simple_unlock(&mntvnode_slock); 2047 simple_lock(&mountlist_slock); 2048 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2049 vfs_unbusy(mp); 2050 } 2051 simple_unlock(&mountlist_slock); 2052 2053 *sizep = bp - where; 2054 return (0); 2055 } 2056 2057 /* 2058 * Check to see if a filesystem is mounted on a block device. 2059 */ 2060 int 2061 vfs_mountedon(struct vnode *vp) 2062 { 2063 struct vnode *vq; 2064 int error = 0; 2065 2066 if (vp->v_type != VBLK) 2067 return ENOTBLK; 2068 if (vp->v_specmountpoint != NULL) 2069 return (EBUSY); 2070 if (vp->v_flag & VALIASED) { 2071 simple_lock(&spechash_slock); 2072 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2073 if (vq->v_rdev != vp->v_rdev || 2074 vq->v_type != vp->v_type) 2075 continue; 2076 if (vq->v_specmountpoint != NULL) { 2077 error = EBUSY; 2078 break; 2079 } 2080 } 2081 simple_unlock(&spechash_slock); 2082 } 2083 return (error); 2084 } 2085 2086 /* 2087 * Do the usual access checking. 2088 * file_mode, uid and gid are from the vnode in question, 2089 * while acc_mode and cred are from the VOP_ACCESS parameter list 2090 */ 2091 int 2092 vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid, 2093 mode_t acc_mode, struct ucred *cred) 2094 { 2095 mode_t mask; 2096 2097 /* 2098 * Super-user always gets read/write access, but execute access depends 2099 * on at least one execute bit being set. 2100 */ 2101 if (cred->cr_uid == 0) { 2102 if ((acc_mode & VEXEC) && type != VDIR && 2103 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2104 return (EACCES); 2105 return (0); 2106 } 2107 2108 mask = 0; 2109 2110 /* Otherwise, check the owner. */ 2111 if (cred->cr_uid == uid) { 2112 if (acc_mode & VEXEC) 2113 mask |= S_IXUSR; 2114 if (acc_mode & VREAD) 2115 mask |= S_IRUSR; 2116 if (acc_mode & VWRITE) 2117 mask |= S_IWUSR; 2118 return ((file_mode & mask) == mask ? 0 : EACCES); 2119 } 2120 2121 /* Otherwise, check the groups. */ 2122 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2123 if (acc_mode & VEXEC) 2124 mask |= S_IXGRP; 2125 if (acc_mode & VREAD) 2126 mask |= S_IRGRP; 2127 if (acc_mode & VWRITE) 2128 mask |= S_IWGRP; 2129 return ((file_mode & mask) == mask ? 0 : EACCES); 2130 } 2131 2132 /* Otherwise, check everyone else. */ 2133 if (acc_mode & VEXEC) 2134 mask |= S_IXOTH; 2135 if (acc_mode & VREAD) 2136 mask |= S_IROTH; 2137 if (acc_mode & VWRITE) 2138 mask |= S_IWOTH; 2139 return ((file_mode & mask) == mask ? 0 : EACCES); 2140 } 2141 2142 /* 2143 * Unmount all file systems. 2144 * We traverse the list in reverse order under the assumption that doing so 2145 * will avoid needing to worry about dependencies. 2146 */ 2147 void 2148 vfs_unmountall(struct lwp *l) 2149 { 2150 struct mount *mp, *nmp; 2151 int allerror, error; 2152 2153 printf("unmounting file systems..."); 2154 for (allerror = 0, 2155 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2156 nmp = mp->mnt_list.cqe_prev; 2157 #ifdef DEBUG 2158 printf("\nunmounting %s (%s)...", 2159 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2160 #endif 2161 /* 2162 * XXX Freeze syncer. Must do this before locking the 2163 * mount point. See dounmount() for details. 2164 */ 2165 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2166 if (vfs_busy(mp, 0, 0)) { 2167 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2168 continue; 2169 } 2170 if ((error = dounmount(mp, MNT_FORCE, l)) != 0) { 2171 printf("unmount of %s failed with error %d\n", 2172 mp->mnt_stat.f_mntonname, error); 2173 allerror = 1; 2174 } 2175 } 2176 printf(" done\n"); 2177 if (allerror) 2178 printf("WARNING: some file systems would not unmount\n"); 2179 } 2180 2181 extern struct simplelock bqueue_slock; /* XXX */ 2182 2183 /* 2184 * Sync and unmount file systems before shutting down. 2185 */ 2186 void 2187 vfs_shutdown(void) 2188 { 2189 struct lwp *l = curlwp; 2190 struct proc *p; 2191 2192 /* XXX we're certainly not running in proc0's context! */ 2193 if (l == NULL || (p = l->l_proc) == NULL) 2194 p = &proc0; 2195 2196 printf("syncing disks... "); 2197 2198 /* remove user process from run queue */ 2199 suspendsched(); 2200 (void) spl0(); 2201 2202 /* avoid coming back this way again if we panic. */ 2203 doing_shutdown = 1; 2204 2205 sys_sync(l, NULL, NULL); 2206 2207 /* Wait for sync to finish. */ 2208 if (buf_syncwait() != 0) { 2209 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2210 Debugger(); 2211 #endif 2212 printf("giving up\n"); 2213 return; 2214 } else 2215 printf("done\n"); 2216 2217 /* 2218 * If we've panic'd, don't make the situation potentially 2219 * worse by unmounting the file systems. 2220 */ 2221 if (panicstr != NULL) 2222 return; 2223 2224 /* Release inodes held by texts before update. */ 2225 #ifdef notdef 2226 vnshutdown(); 2227 #endif 2228 /* Unmount file systems. */ 2229 vfs_unmountall(l); 2230 } 2231 2232 /* 2233 * Mount the root file system. If the operator didn't specify a 2234 * file system to use, try all possible file systems until one 2235 * succeeds. 2236 */ 2237 int 2238 vfs_mountroot(void) 2239 { 2240 struct vfsops *v; 2241 int error = ENODEV; 2242 2243 if (root_device == NULL) 2244 panic("vfs_mountroot: root device unknown"); 2245 2246 switch (root_device->dv_class) { 2247 case DV_IFNET: 2248 if (rootdev != NODEV) 2249 panic("vfs_mountroot: rootdev set for DV_IFNET " 2250 "(0x%08x -> %d,%d)", rootdev, 2251 major(rootdev), minor(rootdev)); 2252 break; 2253 2254 case DV_DISK: 2255 if (rootdev == NODEV) 2256 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2257 if (bdevvp(rootdev, &rootvp)) 2258 panic("vfs_mountroot: can't get vnode for rootdev"); 2259 error = VOP_OPEN(rootvp, FREAD, FSCRED, curlwp); 2260 if (error) { 2261 printf("vfs_mountroot: can't open root device\n"); 2262 return (error); 2263 } 2264 break; 2265 2266 default: 2267 printf("%s: inappropriate for root file system\n", 2268 root_device->dv_xname); 2269 return (ENODEV); 2270 } 2271 2272 /* 2273 * If user specified a file system, use it. 2274 */ 2275 if (mountroot != NULL) { 2276 error = (*mountroot)(); 2277 goto done; 2278 } 2279 2280 /* 2281 * Try each file system currently configured into the kernel. 2282 */ 2283 LIST_FOREACH(v, &vfs_list, vfs_list) { 2284 if (v->vfs_mountroot == NULL) 2285 continue; 2286 #ifdef DEBUG 2287 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 2288 #endif 2289 error = (*v->vfs_mountroot)(); 2290 if (!error) { 2291 aprint_normal("root file system type: %s\n", 2292 v->vfs_name); 2293 break; 2294 } 2295 } 2296 2297 if (v == NULL) { 2298 printf("no file system for %s", root_device->dv_xname); 2299 if (root_device->dv_class == DV_DISK) 2300 printf(" (dev 0x%x)", rootdev); 2301 printf("\n"); 2302 error = EFTYPE; 2303 } 2304 2305 done: 2306 if (error && root_device->dv_class == DV_DISK) { 2307 VOP_CLOSE(rootvp, FREAD, FSCRED, curlwp); 2308 vrele(rootvp); 2309 } 2310 return (error); 2311 } 2312 2313 /* 2314 * Given a file system name, look up the vfsops for that 2315 * file system, or return NULL if file system isn't present 2316 * in the kernel. 2317 */ 2318 struct vfsops * 2319 vfs_getopsbyname(const char *name) 2320 { 2321 struct vfsops *v; 2322 2323 LIST_FOREACH(v, &vfs_list, vfs_list) { 2324 if (strcmp(v->vfs_name, name) == 0) 2325 break; 2326 } 2327 2328 return (v); 2329 } 2330 2331 /* 2332 * Establish a file system and initialize it. 2333 */ 2334 int 2335 vfs_attach(struct vfsops *vfs) 2336 { 2337 struct vfsops *v; 2338 int error = 0; 2339 2340 2341 /* 2342 * Make sure this file system doesn't already exist. 2343 */ 2344 LIST_FOREACH(v, &vfs_list, vfs_list) { 2345 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2346 error = EEXIST; 2347 goto out; 2348 } 2349 } 2350 2351 /* 2352 * Initialize the vnode operations for this file system. 2353 */ 2354 vfs_opv_init(vfs->vfs_opv_descs); 2355 2356 /* 2357 * Now initialize the file system itself. 2358 */ 2359 (*vfs->vfs_init)(); 2360 2361 /* 2362 * ...and link it into the kernel's list. 2363 */ 2364 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2365 2366 /* 2367 * Sanity: make sure the reference count is 0. 2368 */ 2369 vfs->vfs_refcount = 0; 2370 2371 out: 2372 return (error); 2373 } 2374 2375 /* 2376 * Remove a file system from the kernel. 2377 */ 2378 int 2379 vfs_detach(struct vfsops *vfs) 2380 { 2381 struct vfsops *v; 2382 2383 /* 2384 * Make sure no one is using the filesystem. 2385 */ 2386 if (vfs->vfs_refcount != 0) 2387 return (EBUSY); 2388 2389 /* 2390 * ...and remove it from the kernel's list. 2391 */ 2392 LIST_FOREACH(v, &vfs_list, vfs_list) { 2393 if (v == vfs) { 2394 LIST_REMOVE(v, vfs_list); 2395 break; 2396 } 2397 } 2398 2399 if (v == NULL) 2400 return (ESRCH); 2401 2402 /* 2403 * Now run the file system-specific cleanups. 2404 */ 2405 (*vfs->vfs_done)(); 2406 2407 /* 2408 * Free the vnode operations vector. 2409 */ 2410 vfs_opv_free(vfs->vfs_opv_descs); 2411 return (0); 2412 } 2413 2414 void 2415 vfs_reinit(void) 2416 { 2417 struct vfsops *vfs; 2418 2419 LIST_FOREACH(vfs, &vfs_list, vfs_list) { 2420 if (vfs->vfs_reinit) { 2421 (*vfs->vfs_reinit)(); 2422 } 2423 } 2424 } 2425 2426 /* 2427 * Request a filesystem to suspend write operations. 2428 */ 2429 int 2430 vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo) 2431 { 2432 struct lwp *l = curlwp; /* XXX */ 2433 int error; 2434 2435 while ((mp->mnt_iflag & IMNT_SUSPEND)) { 2436 if (slptimeo < 0) 2437 return EWOULDBLOCK; 2438 error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo); 2439 if (error) 2440 return error; 2441 } 2442 mp->mnt_iflag |= IMNT_SUSPEND; 2443 2444 simple_lock(&mp->mnt_slock); 2445 if (mp->mnt_writeopcountupper > 0) 2446 ltsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt", 2447 0, &mp->mnt_slock); 2448 simple_unlock(&mp->mnt_slock); 2449 2450 error = VFS_SYNC(mp, MNT_WAIT, l->l_proc->p_ucred, l); 2451 if (error) { 2452 vfs_write_resume(mp); 2453 return error; 2454 } 2455 mp->mnt_iflag |= IMNT_SUSPENDLOW; 2456 2457 simple_lock(&mp->mnt_slock); 2458 if (mp->mnt_writeopcountlower > 0) 2459 ltsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt", 2460 0, &mp->mnt_slock); 2461 mp->mnt_iflag |= IMNT_SUSPENDED; 2462 simple_unlock(&mp->mnt_slock); 2463 2464 return 0; 2465 } 2466 2467 /* 2468 * Request a filesystem to resume write operations. 2469 */ 2470 void 2471 vfs_write_resume(struct mount *mp) 2472 { 2473 2474 if ((mp->mnt_iflag & IMNT_SUSPEND) == 0) 2475 return; 2476 mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED); 2477 wakeup(&mp->mnt_flag); 2478 } 2479 2480 void 2481 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp) 2482 { 2483 const struct statvfs *mbp; 2484 2485 if (sbp == (mbp = &mp->mnt_stat)) 2486 return; 2487 2488 (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx)); 2489 sbp->f_fsid = mbp->f_fsid; 2490 sbp->f_owner = mbp->f_owner; 2491 sbp->f_flag = mbp->f_flag; 2492 sbp->f_syncwrites = mbp->f_syncwrites; 2493 sbp->f_asyncwrites = mbp->f_asyncwrites; 2494 sbp->f_syncreads = mbp->f_syncreads; 2495 sbp->f_asyncreads = mbp->f_asyncreads; 2496 (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare)); 2497 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename, 2498 sizeof(sbp->f_fstypename)); 2499 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname, 2500 sizeof(sbp->f_mntonname)); 2501 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, 2502 sizeof(sbp->f_mntfromname)); 2503 sbp->f_namemax = mbp->f_namemax; 2504 } 2505 2506 int 2507 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom, 2508 struct mount *mp, struct lwp *l) 2509 { 2510 int error; 2511 size_t size; 2512 struct statvfs *sfs = &mp->mnt_stat; 2513 int (*fun)(const void *, void *, size_t, size_t *); 2514 2515 (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name, 2516 sizeof(mp->mnt_stat.f_fstypename)); 2517 2518 if (onp) { 2519 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 2520 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr; 2521 if (cwdi->cwdi_rdir != NULL) { 2522 size_t len; 2523 char *bp; 2524 char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 2525 2526 if (!path) /* XXX can't happen with M_WAITOK */ 2527 return ENOMEM; 2528 2529 bp = path + MAXPATHLEN; 2530 *--bp = '\0'; 2531 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, 2532 path, MAXPATHLEN / 2, 0, l); 2533 if (error) { 2534 free(path, M_TEMP); 2535 return error; 2536 } 2537 2538 len = strlen(bp); 2539 if (len > sizeof(sfs->f_mntonname) - 1) 2540 len = sizeof(sfs->f_mntonname) - 1; 2541 (void)strncpy(sfs->f_mntonname, bp, len); 2542 free(path, M_TEMP); 2543 2544 if (len < sizeof(sfs->f_mntonname) - 1) { 2545 error = (*fun)(onp, &sfs->f_mntonname[len], 2546 sizeof(sfs->f_mntonname) - len - 1, &size); 2547 if (error) 2548 return error; 2549 size += len; 2550 } else { 2551 size = len; 2552 } 2553 } else { 2554 error = (*fun)(onp, &sfs->f_mntonname, 2555 sizeof(sfs->f_mntonname) - 1, &size); 2556 if (error) 2557 return error; 2558 } 2559 (void)memset(sfs->f_mntonname + size, 0, 2560 sizeof(sfs->f_mntonname) - size); 2561 } 2562 2563 if (fromp) { 2564 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr; 2565 error = (*fun)(fromp, sfs->f_mntfromname, 2566 sizeof(sfs->f_mntfromname) - 1, &size); 2567 if (error) 2568 return error; 2569 (void)memset(sfs->f_mntfromname + size, 0, 2570 sizeof(sfs->f_mntfromname) - size); 2571 } 2572 return 0; 2573 } 2574 2575 #ifdef DDB 2576 static const char buf_flagbits[] = BUF_FLAGBITS; 2577 2578 void 2579 vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...)) 2580 { 2581 char bf[1024]; 2582 2583 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%" 2584 PRIx64 " dev 0x%x\n", 2585 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev); 2586 2587 bitmask_snprintf(bp->b_flags, buf_flagbits, bf, sizeof(bf)); 2588 (*pr)(" error %d flags 0x%s\n", bp->b_error, bf); 2589 2590 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 2591 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2592 (*pr)(" data %p saveaddr %p dep %p\n", 2593 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2594 (*pr)(" iodone %p\n", bp->b_iodone); 2595 } 2596 2597 2598 void 2599 vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...)) 2600 { 2601 char bf[256]; 2602 2603 uvm_object_printit(&vp->v_uobj, full, pr); 2604 bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf)); 2605 (*pr)("\nVNODE flags %s\n", bf); 2606 (*pr)("mp %p numoutput %d size 0x%llx\n", 2607 vp->v_mount, vp->v_numoutput, vp->v_size); 2608 2609 (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n", 2610 vp->v_data, vp->v_usecount, vp->v_writecount, 2611 vp->v_holdcnt, vp->v_numoutput); 2612 2613 (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n", 2614 ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 2615 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 2616 vp->v_mount, vp->v_mountedhere); 2617 2618 if (full) { 2619 struct buf *bp; 2620 2621 (*pr)("clean bufs:\n"); 2622 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 2623 (*pr)(" bp %p\n", bp); 2624 vfs_buf_print(bp, full, pr); 2625 } 2626 2627 (*pr)("dirty bufs:\n"); 2628 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 2629 (*pr)(" bp %p\n", bp); 2630 vfs_buf_print(bp, full, pr); 2631 } 2632 } 2633 } 2634 2635 void 2636 vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...)) 2637 { 2638 char sbuf[256]; 2639 2640 (*pr)("vnodecovered = %p syncer = %p data = %p\n", 2641 mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data); 2642 2643 (*pr)("fs_bshift %d dev_bshift = %d\n", 2644 mp->mnt_fs_bshift,mp->mnt_dev_bshift); 2645 2646 bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf)); 2647 (*pr)("flag = %s\n", sbuf); 2648 2649 bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf)); 2650 (*pr)("iflag = %s\n", sbuf); 2651 2652 /* XXX use lockmgr_printinfo */ 2653 if (mp->mnt_lock.lk_sharecount) 2654 (*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg, 2655 mp->mnt_lock.lk_sharecount); 2656 else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) { 2657 (*pr)(" lock type %s: EXCL (count %d) by ", 2658 mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount); 2659 if (mp->mnt_lock.lk_flags & LK_SPIN) 2660 (*pr)("processor %lu", mp->mnt_lock.lk_cpu); 2661 else 2662 (*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder, 2663 mp->mnt_lock.lk_locklwp); 2664 } else 2665 (*pr)(" not locked"); 2666 if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0) 2667 (*pr)(" with %d pending", mp->mnt_lock.lk_waitcount); 2668 2669 (*pr)("\n"); 2670 2671 if (mp->mnt_unmounter) { 2672 (*pr)("unmounter pid = %d ",mp->mnt_unmounter->l_proc); 2673 } 2674 (*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n", 2675 mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower); 2676 2677 (*pr)("statvfs cache:\n"); 2678 (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize); 2679 (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize); 2680 (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize); 2681 2682 (*pr)("\tblocks = "PRIu64"\n",mp->mnt_stat.f_blocks); 2683 (*pr)("\tbfree = "PRIu64"\n",mp->mnt_stat.f_bfree); 2684 (*pr)("\tbavail = "PRIu64"\n",mp->mnt_stat.f_bavail); 2685 (*pr)("\tbresvd = "PRIu64"\n",mp->mnt_stat.f_bresvd); 2686 2687 (*pr)("\tfiles = "PRIu64"\n",mp->mnt_stat.f_files); 2688 (*pr)("\tffree = "PRIu64"\n",mp->mnt_stat.f_ffree); 2689 (*pr)("\tfavail = "PRIu64"\n",mp->mnt_stat.f_favail); 2690 (*pr)("\tfresvd = "PRIu64"\n",mp->mnt_stat.f_fresvd); 2691 2692 (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n", 2693 mp->mnt_stat.f_fsidx.__fsid_val[0], 2694 mp->mnt_stat.f_fsidx.__fsid_val[1]); 2695 2696 (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner); 2697 (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax); 2698 2699 bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf, 2700 sizeof(sbuf)); 2701 (*pr)("\tflag = %s\n",sbuf); 2702 (*pr)("\tsyncwrites = " PRIu64 "\n",mp->mnt_stat.f_syncwrites); 2703 (*pr)("\tasyncwrites = " PRIu64 "\n",mp->mnt_stat.f_asyncwrites); 2704 (*pr)("\tsyncreads = " PRIu64 "\n",mp->mnt_stat.f_syncreads); 2705 (*pr)("\tasyncreads = " PRIu64 "\n",mp->mnt_stat.f_asyncreads); 2706 (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename); 2707 (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname); 2708 (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname); 2709 2710 { 2711 int cnt = 0; 2712 struct vnode *vp; 2713 (*pr)("locked vnodes ="); 2714 /* XXX would take mountlist lock, except ddb may not have context */ 2715 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2716 if (VOP_ISLOCKED(vp)) { 2717 if ((++cnt % 6) == 0) { 2718 (*pr)(" %p,\n\t", vp); 2719 } else { 2720 (*pr)(" %p,", vp); 2721 } 2722 } 2723 } 2724 (*pr)("\n"); 2725 } 2726 2727 if (full) { 2728 int cnt = 0; 2729 struct vnode *vp; 2730 (*pr)("all vnodes ="); 2731 /* XXX would take mountlist lock, except ddb may not have context */ 2732 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2733 if (!LIST_NEXT(vp, v_mntvnodes)) { 2734 (*pr)(" %p", vp); 2735 } else if ((++cnt % 6) == 0) { 2736 (*pr)(" %p,\n\t", vp); 2737 } else { 2738 (*pr)(" %p,", vp); 2739 } 2740 } 2741 (*pr)("\n", vp); 2742 } 2743 } 2744 #endif /* DDB */ 2745