1 /* $NetBSD: vfs_subr.c,v 1.276 2006/11/17 17:05:18 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2004, 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * This code is derived from software contributed to The NetBSD Foundation 11 * by Charles M. Hannum. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the NetBSD 24 * Foundation, Inc. and its contributors. 25 * 4. Neither the name of The NetBSD Foundation nor the names of its 26 * contributors may be used to endorse or promote products derived 27 * from this software without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 30 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 31 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 32 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 33 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 34 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 35 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 36 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 37 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 38 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 39 * POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42 /* 43 * Copyright (c) 1989, 1993 44 * The Regents of the University of California. All rights reserved. 45 * (c) UNIX System Laboratories, Inc. 46 * All or some portions of this file are derived from material licensed 47 * to the University of California by American Telephone and Telegraph 48 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 49 * the permission of UNIX System Laboratories, Inc. 50 * 51 * Redistribution and use in source and binary forms, with or without 52 * modification, are permitted provided that the following conditions 53 * are met: 54 * 1. Redistributions of source code must retain the above copyright 55 * notice, this list of conditions and the following disclaimer. 56 * 2. Redistributions in binary form must reproduce the above copyright 57 * notice, this list of conditions and the following disclaimer in the 58 * documentation and/or other materials provided with the distribution. 59 * 3. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 76 */ 77 78 /* 79 * External virtual filesystem routines 80 */ 81 82 #include <sys/cdefs.h> 83 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.276 2006/11/17 17:05:18 hannken Exp $"); 84 85 #include "opt_inet.h" 86 #include "opt_ddb.h" 87 #include "opt_compat_netbsd.h" 88 #include "opt_compat_43.h" 89 90 #include <sys/param.h> 91 #include <sys/systm.h> 92 #include <sys/proc.h> 93 #include <sys/kernel.h> 94 #include <sys/mount.h> 95 #include <sys/fcntl.h> 96 #include <sys/vnode.h> 97 #include <sys/stat.h> 98 #include <sys/namei.h> 99 #include <sys/ucred.h> 100 #include <sys/buf.h> 101 #include <sys/errno.h> 102 #include <sys/malloc.h> 103 #include <sys/domain.h> 104 #include <sys/mbuf.h> 105 #include <sys/sa.h> 106 #include <sys/syscallargs.h> 107 #include <sys/device.h> 108 #include <sys/filedesc.h> 109 #include <sys/kauth.h> 110 111 #include <miscfs/specfs/specdev.h> 112 #include <miscfs/genfs/genfs.h> 113 #include <miscfs/syncfs/syncfs.h> 114 115 #include <uvm/uvm.h> 116 #include <uvm/uvm_readahead.h> 117 #include <uvm/uvm_ddb.h> 118 119 #include <sys/sysctl.h> 120 121 const enum vtype iftovt_tab[16] = { 122 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 123 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 124 }; 125 const int vttoif_tab[9] = { 126 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 127 S_IFSOCK, S_IFIFO, S_IFMT, 128 }; 129 130 int doforce = 1; /* 1 => permit forcible unmounting */ 131 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 132 133 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 134 extern int vfs_magiclinks; /* 1 => expand "magic" symlinks */ 135 136 /* 137 * Insq/Remq for the vnode usage lists. 138 */ 139 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 140 #define bufremvn(bp) { \ 141 LIST_REMOVE(bp, b_vnbufs); \ 142 (bp)->b_vnbufs.le_next = NOLIST; \ 143 } 144 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 145 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 146 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 147 148 struct mntlist mountlist = /* mounted filesystem list */ 149 CIRCLEQ_HEAD_INITIALIZER(mountlist); 150 struct vfs_list_head vfs_list = /* vfs list */ 151 LIST_HEAD_INITIALIZER(vfs_list); 152 153 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 154 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 155 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 156 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 157 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 158 159 /* XXX - gross; single global lock to protect v_numoutput */ 160 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER; 161 162 /* 163 * These define the root filesystem and device. 164 */ 165 struct mount *rootfs; 166 struct vnode *rootvnode; 167 struct device *root_device; /* root device */ 168 169 POOL_INIT(vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 170 &pool_allocator_nointr); 171 172 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 173 174 /* 175 * Local declarations. 176 */ 177 178 static specificdata_domain_t mount_specificdata_domain; 179 180 static void insmntque(struct vnode *, struct mount *); 181 static int getdevvp(dev_t, struct vnode **, enum vtype); 182 static void vclean(struct vnode *, int, struct lwp *); 183 static struct vnode *getcleanvnode(struct lwp *); 184 185 #ifdef DEBUG 186 void printlockedvnodes(void); 187 #endif 188 189 /* 190 * Initialize the vnode management data structures. 191 */ 192 void 193 vntblinit(void) 194 { 195 196 mount_specificdata_domain = specificdata_domain_create(); 197 198 /* 199 * Initialize the filesystem syncer. 200 */ 201 vn_initialize_syncerd(); 202 } 203 204 int 205 vfs_drainvnodes(long target, struct lwp *l) 206 { 207 208 simple_lock(&vnode_free_list_slock); 209 while (numvnodes > target) { 210 struct vnode *vp; 211 212 vp = getcleanvnode(l); 213 if (vp == NULL) 214 return EBUSY; /* give up */ 215 pool_put(&vnode_pool, vp); 216 simple_lock(&vnode_free_list_slock); 217 numvnodes--; 218 } 219 simple_unlock(&vnode_free_list_slock); 220 221 return 0; 222 } 223 224 /* 225 * grab a vnode from freelist and clean it. 226 */ 227 struct vnode * 228 getcleanvnode(struct lwp *l) 229 { 230 struct vnode *vp; 231 struct mount *mp; 232 struct freelst *listhd; 233 234 LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock)); 235 236 listhd = &vnode_free_list; 237 try_nextlist: 238 TAILQ_FOREACH(vp, listhd, v_freelist) { 239 if (!simple_lock_try(&vp->v_interlock)) 240 continue; 241 /* 242 * as our lwp might hold the underlying vnode locked, 243 * don't try to reclaim the VLAYER vnode if it's locked. 244 */ 245 if ((vp->v_flag & VXLOCK) == 0 && 246 ((vp->v_flag & VLAYER) == 0 || VOP_ISLOCKED(vp) == 0)) { 247 if (vn_start_write(vp, &mp, V_NOWAIT) == 0) 248 break; 249 } 250 mp = NULL; 251 simple_unlock(&vp->v_interlock); 252 } 253 254 if (vp == NULLVP) { 255 if (listhd == &vnode_free_list) { 256 listhd = &vnode_hold_list; 257 goto try_nextlist; 258 } 259 simple_unlock(&vnode_free_list_slock); 260 return NULLVP; 261 } 262 263 if (vp->v_usecount) 264 panic("free vnode isn't, vp %p", vp); 265 TAILQ_REMOVE(listhd, vp, v_freelist); 266 /* see comment on why 0xdeadb is set at end of vgone (below) */ 267 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 268 simple_unlock(&vnode_free_list_slock); 269 vp->v_lease = NULL; 270 271 if (vp->v_type != VBAD) 272 vgonel(vp, l); 273 else 274 simple_unlock(&vp->v_interlock); 275 vn_finished_write(mp, 0); 276 #ifdef DIAGNOSTIC 277 if (vp->v_data || vp->v_uobj.uo_npages || 278 TAILQ_FIRST(&vp->v_uobj.memq)) 279 panic("cleaned vnode isn't, vp %p", vp); 280 if (vp->v_numoutput) 281 panic("clean vnode has pending I/O's, vp %p", vp); 282 #endif 283 KASSERT((vp->v_flag & VONWORKLST) == 0); 284 285 return vp; 286 } 287 288 /* 289 * Mark a mount point as busy. Used to synchronize access and to delay 290 * unmounting. Interlock is not released on failure. 291 */ 292 int 293 vfs_busy(struct mount *mp, int flags, struct simplelock *interlkp) 294 { 295 int lkflags; 296 297 while (mp->mnt_iflag & IMNT_UNMOUNT) { 298 int gone, n; 299 300 if (flags & LK_NOWAIT) 301 return (ENOENT); 302 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 303 && mp->mnt_unmounter == curlwp) 304 return (EDEADLK); 305 if (interlkp) 306 simple_unlock(interlkp); 307 /* 308 * Since all busy locks are shared except the exclusive 309 * lock granted when unmounting, the only place that a 310 * wakeup needs to be done is at the release of the 311 * exclusive lock at the end of dounmount. 312 */ 313 simple_lock(&mp->mnt_slock); 314 mp->mnt_wcnt++; 315 ltsleep((caddr_t)mp, PVFS, "vfs_busy", 0, &mp->mnt_slock); 316 n = --mp->mnt_wcnt; 317 simple_unlock(&mp->mnt_slock); 318 gone = mp->mnt_iflag & IMNT_GONE; 319 320 if (n == 0) 321 wakeup(&mp->mnt_wcnt); 322 if (interlkp) 323 simple_lock(interlkp); 324 if (gone) 325 return (ENOENT); 326 } 327 lkflags = LK_SHARED; 328 if (interlkp) 329 lkflags |= LK_INTERLOCK; 330 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 331 panic("vfs_busy: unexpected lock failure"); 332 return (0); 333 } 334 335 /* 336 * Free a busy filesystem. 337 */ 338 void 339 vfs_unbusy(struct mount *mp) 340 { 341 342 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 343 } 344 345 /* 346 * Lookup a filesystem type, and if found allocate and initialize 347 * a mount structure for it. 348 * 349 * Devname is usually updated by mount(8) after booting. 350 */ 351 int 352 vfs_rootmountalloc(const char *fstypename, const char *devname, 353 struct mount **mpp) 354 { 355 struct vfsops *vfsp = NULL; 356 struct mount *mp; 357 358 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 359 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 360 break; 361 362 if (vfsp == NULL) 363 return (ENODEV); 364 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 365 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 366 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 367 simple_lock_init(&mp->mnt_slock); 368 (void)vfs_busy(mp, LK_NOWAIT, 0); 369 TAILQ_INIT(&mp->mnt_vnodelist); 370 mp->mnt_op = vfsp; 371 mp->mnt_flag = MNT_RDONLY; 372 mp->mnt_vnodecovered = NULLVP; 373 mp->mnt_leaf = mp; 374 vfsp->vfs_refcount++; 375 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 376 mp->mnt_stat.f_mntonname[0] = '/'; 377 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 378 mount_initspecific(mp); 379 *mpp = mp; 380 return (0); 381 } 382 383 /* 384 * Lookup a mount point by filesystem identifier. 385 */ 386 struct mount * 387 vfs_getvfs(fsid_t *fsid) 388 { 389 struct mount *mp; 390 391 simple_lock(&mountlist_slock); 392 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { 393 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 394 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 395 simple_unlock(&mountlist_slock); 396 return (mp); 397 } 398 } 399 simple_unlock(&mountlist_slock); 400 return ((struct mount *)0); 401 } 402 403 /* 404 * Get a new unique fsid 405 */ 406 void 407 vfs_getnewfsid(struct mount *mp) 408 { 409 static u_short xxxfs_mntid; 410 fsid_t tfsid; 411 int mtype; 412 413 simple_lock(&mntid_slock); 414 mtype = makefstype(mp->mnt_op->vfs_name); 415 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); 416 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; 417 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 418 if (xxxfs_mntid == 0) 419 ++xxxfs_mntid; 420 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 421 tfsid.__fsid_val[1] = mtype; 422 if (!CIRCLEQ_EMPTY(&mountlist)) { 423 while (vfs_getvfs(&tfsid)) { 424 tfsid.__fsid_val[0]++; 425 xxxfs_mntid++; 426 } 427 } 428 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 429 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 430 simple_unlock(&mntid_slock); 431 } 432 433 /* 434 * Make a 'unique' number from a mount type name. 435 */ 436 long 437 makefstype(const char *type) 438 { 439 long rv; 440 441 for (rv = 0; *type; type++) { 442 rv <<= 2; 443 rv ^= *type; 444 } 445 return rv; 446 } 447 448 449 /* 450 * Set vnode attributes to VNOVAL 451 */ 452 void 453 vattr_null(struct vattr *vap) 454 { 455 456 vap->va_type = VNON; 457 458 /* 459 * Assign individually so that it is safe even if size and 460 * sign of each member are varied. 461 */ 462 vap->va_mode = VNOVAL; 463 vap->va_nlink = VNOVAL; 464 vap->va_uid = VNOVAL; 465 vap->va_gid = VNOVAL; 466 vap->va_fsid = VNOVAL; 467 vap->va_fileid = VNOVAL; 468 vap->va_size = VNOVAL; 469 vap->va_blocksize = VNOVAL; 470 vap->va_atime.tv_sec = 471 vap->va_mtime.tv_sec = 472 vap->va_ctime.tv_sec = 473 vap->va_birthtime.tv_sec = VNOVAL; 474 vap->va_atime.tv_nsec = 475 vap->va_mtime.tv_nsec = 476 vap->va_ctime.tv_nsec = 477 vap->va_birthtime.tv_nsec = VNOVAL; 478 vap->va_gen = VNOVAL; 479 vap->va_flags = VNOVAL; 480 vap->va_rdev = VNOVAL; 481 vap->va_bytes = VNOVAL; 482 vap->va_vaflags = 0; 483 } 484 485 /* 486 * Routines having to do with the management of the vnode table. 487 */ 488 extern int (**dead_vnodeop_p)(void *); 489 long numvnodes; 490 491 /* 492 * Return the next vnode from the free list. 493 */ 494 int 495 getnewvnode(enum vtagtype tag, struct mount *mp, int (**vops)(void *), 496 struct vnode **vpp) 497 { 498 extern struct uvm_pagerops uvm_vnodeops; 499 struct uvm_object *uobj; 500 struct lwp *l = curlwp; /* XXX */ 501 static int toggle; 502 struct vnode *vp; 503 int error = 0, tryalloc; 504 505 try_again: 506 if (mp) { 507 /* 508 * Mark filesystem busy while we're creating a vnode. 509 * If unmount is in progress, this will wait; if the 510 * unmount succeeds (only if umount -f), this will 511 * return an error. If the unmount fails, we'll keep 512 * going afterwards. 513 * (This puts the per-mount vnode list logically under 514 * the protection of the vfs_busy lock). 515 */ 516 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 517 if (error && error != EDEADLK) 518 return error; 519 } 520 521 /* 522 * We must choose whether to allocate a new vnode or recycle an 523 * existing one. The criterion for allocating a new one is that 524 * the total number of vnodes is less than the number desired or 525 * there are no vnodes on either free list. Generally we only 526 * want to recycle vnodes that have no buffers associated with 527 * them, so we look first on the vnode_free_list. If it is empty, 528 * we next consider vnodes with referencing buffers on the 529 * vnode_hold_list. The toggle ensures that half the time we 530 * will use a buffer from the vnode_hold_list, and half the time 531 * we will allocate a new one unless the list has grown to twice 532 * the desired size. We are reticent to recycle vnodes from the 533 * vnode_hold_list because we will lose the identity of all its 534 * referencing buffers. 535 */ 536 537 vp = NULL; 538 539 simple_lock(&vnode_free_list_slock); 540 541 toggle ^= 1; 542 if (numvnodes > 2 * desiredvnodes) 543 toggle = 0; 544 545 tryalloc = numvnodes < desiredvnodes || 546 (TAILQ_FIRST(&vnode_free_list) == NULL && 547 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 548 549 if (tryalloc && 550 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { 551 numvnodes++; 552 simple_unlock(&vnode_free_list_slock); 553 memset(vp, 0, sizeof(*vp)); 554 UVM_OBJ_INIT(&vp->v_uobj, &uvm_vnodeops, 1); 555 /* 556 * done by memset() above. 557 * LIST_INIT(&vp->v_nclist); 558 * LIST_INIT(&vp->v_dnclist); 559 */ 560 } else { 561 vp = getcleanvnode(l); 562 /* 563 * Unless this is a bad time of the month, at most 564 * the first NCPUS items on the free list are 565 * locked, so this is close enough to being empty. 566 */ 567 if (vp == NULLVP) { 568 if (mp && error != EDEADLK) 569 vfs_unbusy(mp); 570 if (tryalloc) { 571 printf("WARNING: unable to allocate new " 572 "vnode, retrying...\n"); 573 (void) tsleep(&lbolt, PRIBIO, "newvn", hz); 574 goto try_again; 575 } 576 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 577 *vpp = 0; 578 return (ENFILE); 579 } 580 vp->v_usecount = 1; 581 vp->v_flag = 0; 582 vp->v_socket = NULL; 583 } 584 vp->v_type = VNON; 585 vp->v_vnlock = &vp->v_lock; 586 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 587 KASSERT(LIST_EMPTY(&vp->v_nclist)); 588 KASSERT(LIST_EMPTY(&vp->v_dnclist)); 589 vp->v_tag = tag; 590 vp->v_op = vops; 591 insmntque(vp, mp); 592 *vpp = vp; 593 vp->v_data = 0; 594 simple_lock_init(&vp->v_interlock); 595 596 /* 597 * initialize uvm_object within vnode. 598 */ 599 600 uobj = &vp->v_uobj; 601 KASSERT(uobj->pgops == &uvm_vnodeops); 602 KASSERT(uobj->uo_npages == 0); 603 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 604 vp->v_size = VSIZENOTSET; 605 606 if (mp && error != EDEADLK) 607 vfs_unbusy(mp); 608 return (0); 609 } 610 611 /* 612 * This is really just the reverse of getnewvnode(). Needed for 613 * VFS_VGET functions who may need to push back a vnode in case 614 * of a locking race. 615 */ 616 void 617 ungetnewvnode(struct vnode *vp) 618 { 619 #ifdef DIAGNOSTIC 620 if (vp->v_usecount != 1) 621 panic("ungetnewvnode: busy vnode"); 622 #endif 623 vp->v_usecount--; 624 insmntque(vp, NULL); 625 vp->v_type = VBAD; 626 627 simple_lock(&vp->v_interlock); 628 /* 629 * Insert at head of LRU list 630 */ 631 simple_lock(&vnode_free_list_slock); 632 if (vp->v_holdcnt > 0) 633 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 634 else 635 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 636 simple_unlock(&vnode_free_list_slock); 637 simple_unlock(&vp->v_interlock); 638 } 639 640 /* 641 * Move a vnode from one mount queue to another. 642 */ 643 static void 644 insmntque(struct vnode *vp, struct mount *mp) 645 { 646 647 #ifdef DIAGNOSTIC 648 if ((mp != NULL) && 649 (mp->mnt_iflag & IMNT_UNMOUNT) && 650 !(mp->mnt_flag & MNT_SOFTDEP) && 651 vp->v_tag != VT_VFS) { 652 panic("insmntque into dying filesystem"); 653 } 654 #endif 655 656 simple_lock(&mntvnode_slock); 657 /* 658 * Delete from old mount point vnode list, if on one. 659 */ 660 if (vp->v_mount != NULL) 661 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes); 662 /* 663 * Insert into list of vnodes for the new mount point, if available. 664 */ 665 if ((vp->v_mount = mp) != NULL) { 666 if (TAILQ_EMPTY(&mp->mnt_vnodelist)) { 667 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 668 } else { 669 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); 670 } 671 } 672 simple_unlock(&mntvnode_slock); 673 } 674 675 /* 676 * Update outstanding I/O count and do wakeup if requested. 677 */ 678 void 679 vwakeup(struct buf *bp) 680 { 681 struct vnode *vp; 682 683 if ((vp = bp->b_vp) != NULL) { 684 /* XXX global lock hack 685 * can't use v_interlock here since this is called 686 * in interrupt context from biodone(). 687 */ 688 simple_lock(&global_v_numoutput_slock); 689 if (--vp->v_numoutput < 0) 690 panic("vwakeup: neg numoutput, vp %p", vp); 691 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 692 vp->v_flag &= ~VBWAIT; 693 wakeup((caddr_t)&vp->v_numoutput); 694 } 695 simple_unlock(&global_v_numoutput_slock); 696 } 697 } 698 699 /* 700 * Flush out and invalidate all buffers associated with a vnode. 701 * Called with the underlying vnode locked, which should prevent new dirty 702 * buffers from being queued. 703 */ 704 int 705 vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l, 706 int slpflag, int slptimeo) 707 { 708 struct buf *bp, *nbp; 709 int s, error; 710 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 711 (flags & V_SAVE ? PGO_CLEANIT : 0); 712 713 /* XXXUBC this doesn't look at flags or slp* */ 714 simple_lock(&vp->v_interlock); 715 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 716 if (error) { 717 return error; 718 } 719 720 if (flags & V_SAVE) { 721 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, l); 722 if (error) 723 return (error); 724 #ifdef DIAGNOSTIC 725 s = splbio(); 726 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 727 panic("vinvalbuf: dirty bufs, vp %p", vp); 728 splx(s); 729 #endif 730 } 731 732 s = splbio(); 733 734 restart: 735 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 736 nbp = LIST_NEXT(bp, b_vnbufs); 737 simple_lock(&bp->b_interlock); 738 if (bp->b_flags & B_BUSY) { 739 bp->b_flags |= B_WANTED; 740 error = ltsleep((caddr_t)bp, 741 slpflag | (PRIBIO + 1) | PNORELOCK, 742 "vinvalbuf", slptimeo, &bp->b_interlock); 743 if (error) { 744 splx(s); 745 return (error); 746 } 747 goto restart; 748 } 749 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 750 simple_unlock(&bp->b_interlock); 751 brelse(bp); 752 } 753 754 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 755 nbp = LIST_NEXT(bp, b_vnbufs); 756 simple_lock(&bp->b_interlock); 757 if (bp->b_flags & B_BUSY) { 758 bp->b_flags |= B_WANTED; 759 error = ltsleep((caddr_t)bp, 760 slpflag | (PRIBIO + 1) | PNORELOCK, 761 "vinvalbuf", slptimeo, &bp->b_interlock); 762 if (error) { 763 splx(s); 764 return (error); 765 } 766 goto restart; 767 } 768 /* 769 * XXX Since there are no node locks for NFS, I believe 770 * there is a slight chance that a delayed write will 771 * occur while sleeping just above, so check for it. 772 */ 773 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 774 #ifdef DEBUG 775 printf("buffer still DELWRI\n"); 776 #endif 777 bp->b_flags |= B_BUSY | B_VFLUSH; 778 simple_unlock(&bp->b_interlock); 779 VOP_BWRITE(bp); 780 goto restart; 781 } 782 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 783 simple_unlock(&bp->b_interlock); 784 brelse(bp); 785 } 786 787 #ifdef DIAGNOSTIC 788 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 789 panic("vinvalbuf: flush failed, vp %p", vp); 790 #endif 791 792 splx(s); 793 794 return (0); 795 } 796 797 /* 798 * Destroy any in core blocks past the truncation length. 799 * Called with the underlying vnode locked, which should prevent new dirty 800 * buffers from being queued. 801 */ 802 int 803 vtruncbuf(struct vnode *vp, daddr_t lbn, int slpflag, int slptimeo) 804 { 805 struct buf *bp, *nbp; 806 int s, error; 807 voff_t off; 808 809 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 810 simple_lock(&vp->v_interlock); 811 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 812 if (error) { 813 return error; 814 } 815 816 s = splbio(); 817 818 restart: 819 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 820 nbp = LIST_NEXT(bp, b_vnbufs); 821 if (bp->b_lblkno < lbn) 822 continue; 823 simple_lock(&bp->b_interlock); 824 if (bp->b_flags & B_BUSY) { 825 bp->b_flags |= B_WANTED; 826 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 827 "vtruncbuf", slptimeo, &bp->b_interlock); 828 if (error) { 829 splx(s); 830 return (error); 831 } 832 goto restart; 833 } 834 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 835 simple_unlock(&bp->b_interlock); 836 brelse(bp); 837 } 838 839 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 840 nbp = LIST_NEXT(bp, b_vnbufs); 841 if (bp->b_lblkno < lbn) 842 continue; 843 simple_lock(&bp->b_interlock); 844 if (bp->b_flags & B_BUSY) { 845 bp->b_flags |= B_WANTED; 846 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 847 "vtruncbuf", slptimeo, &bp->b_interlock); 848 if (error) { 849 splx(s); 850 return (error); 851 } 852 goto restart; 853 } 854 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 855 simple_unlock(&bp->b_interlock); 856 brelse(bp); 857 } 858 859 splx(s); 860 861 return (0); 862 } 863 864 void 865 vflushbuf(struct vnode *vp, int sync) 866 { 867 struct buf *bp, *nbp; 868 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); 869 int s; 870 871 simple_lock(&vp->v_interlock); 872 (void) VOP_PUTPAGES(vp, 0, 0, flags); 873 874 loop: 875 s = splbio(); 876 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 877 nbp = LIST_NEXT(bp, b_vnbufs); 878 simple_lock(&bp->b_interlock); 879 if ((bp->b_flags & B_BUSY)) { 880 simple_unlock(&bp->b_interlock); 881 continue; 882 } 883 if ((bp->b_flags & B_DELWRI) == 0) 884 panic("vflushbuf: not dirty, bp %p", bp); 885 bp->b_flags |= B_BUSY | B_VFLUSH; 886 simple_unlock(&bp->b_interlock); 887 splx(s); 888 /* 889 * Wait for I/O associated with indirect blocks to complete, 890 * since there is no way to quickly wait for them below. 891 */ 892 if (bp->b_vp == vp || sync == 0) 893 (void) bawrite(bp); 894 else 895 (void) bwrite(bp); 896 goto loop; 897 } 898 if (sync == 0) { 899 splx(s); 900 return; 901 } 902 simple_lock(&global_v_numoutput_slock); 903 while (vp->v_numoutput) { 904 vp->v_flag |= VBWAIT; 905 ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0, 906 &global_v_numoutput_slock); 907 } 908 simple_unlock(&global_v_numoutput_slock); 909 splx(s); 910 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 911 vprint("vflushbuf: dirty", vp); 912 goto loop; 913 } 914 } 915 916 /* 917 * Associate a buffer with a vnode. 918 */ 919 void 920 bgetvp(struct vnode *vp, struct buf *bp) 921 { 922 int s; 923 924 if (bp->b_vp) 925 panic("bgetvp: not free, bp %p", bp); 926 VHOLD(vp); 927 s = splbio(); 928 bp->b_vp = vp; 929 if (vp->v_type == VBLK || vp->v_type == VCHR) 930 bp->b_dev = vp->v_rdev; 931 else 932 bp->b_dev = NODEV; 933 /* 934 * Insert onto list for new vnode. 935 */ 936 bufinsvn(bp, &vp->v_cleanblkhd); 937 splx(s); 938 } 939 940 /* 941 * Disassociate a buffer from a vnode. 942 */ 943 void 944 brelvp(struct buf *bp) 945 { 946 struct vnode *vp; 947 int s; 948 949 if (bp->b_vp == NULL) 950 panic("brelvp: vp NULL, bp %p", bp); 951 952 s = splbio(); 953 vp = bp->b_vp; 954 /* 955 * Delete from old vnode list, if on one. 956 */ 957 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 958 bufremvn(bp); 959 960 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) && 961 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 962 vp->v_flag &= ~VWRITEMAPDIRTY; 963 vn_syncer_remove_from_worklist(vp); 964 } 965 966 bp->b_vp = NULL; 967 HOLDRELE(vp); 968 splx(s); 969 } 970 971 /* 972 * Reassign a buffer from one vnode to another. 973 * Used to assign file specific control information 974 * (indirect blocks) to the vnode to which they belong. 975 * 976 * This function must be called at splbio(). 977 */ 978 void 979 reassignbuf(struct buf *bp, struct vnode *newvp) 980 { 981 struct buflists *listheadp; 982 int delayx; 983 984 /* 985 * Delete from old vnode list, if on one. 986 */ 987 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 988 bufremvn(bp); 989 /* 990 * If dirty, put on list of dirty buffers; 991 * otherwise insert onto list of clean buffers. 992 */ 993 if ((bp->b_flags & B_DELWRI) == 0) { 994 listheadp = &newvp->v_cleanblkhd; 995 if (TAILQ_EMPTY(&newvp->v_uobj.memq) && 996 (newvp->v_flag & VONWORKLST) && 997 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 998 newvp->v_flag &= ~VWRITEMAPDIRTY; 999 vn_syncer_remove_from_worklist(newvp); 1000 } 1001 } else { 1002 listheadp = &newvp->v_dirtyblkhd; 1003 if ((newvp->v_flag & VONWORKLST) == 0) { 1004 switch (newvp->v_type) { 1005 case VDIR: 1006 delayx = dirdelay; 1007 break; 1008 case VBLK: 1009 if (newvp->v_specmountpoint != NULL) { 1010 delayx = metadelay; 1011 break; 1012 } 1013 /* fall through */ 1014 default: 1015 delayx = filedelay; 1016 break; 1017 } 1018 if (!newvp->v_mount || 1019 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 1020 vn_syncer_add_to_worklist(newvp, delayx); 1021 } 1022 } 1023 bufinsvn(bp, listheadp); 1024 } 1025 1026 /* 1027 * Create a vnode for a block device. 1028 * Used for root filesystem and swap areas. 1029 * Also used for memory file system special devices. 1030 */ 1031 int 1032 bdevvp(dev_t dev, struct vnode **vpp) 1033 { 1034 1035 return (getdevvp(dev, vpp, VBLK)); 1036 } 1037 1038 /* 1039 * Create a vnode for a character device. 1040 * Used for kernfs and some console handling. 1041 */ 1042 int 1043 cdevvp(dev_t dev, struct vnode **vpp) 1044 { 1045 1046 return (getdevvp(dev, vpp, VCHR)); 1047 } 1048 1049 /* 1050 * Create a vnode for a device. 1051 * Used by bdevvp (block device) for root file system etc., 1052 * and by cdevvp (character device) for console and kernfs. 1053 */ 1054 static int 1055 getdevvp(dev_t dev, struct vnode **vpp, enum vtype type) 1056 { 1057 struct vnode *vp; 1058 struct vnode *nvp; 1059 int error; 1060 1061 if (dev == NODEV) { 1062 *vpp = NULLVP; 1063 return (0); 1064 } 1065 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1066 if (error) { 1067 *vpp = NULLVP; 1068 return (error); 1069 } 1070 vp = nvp; 1071 vp->v_type = type; 1072 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1073 vput(vp); 1074 vp = nvp; 1075 } 1076 *vpp = vp; 1077 return (0); 1078 } 1079 1080 /* 1081 * Check to see if the new vnode represents a special device 1082 * for which we already have a vnode (either because of 1083 * bdevvp() or because of a different vnode representing 1084 * the same block device). If such an alias exists, deallocate 1085 * the existing contents and return the aliased vnode. The 1086 * caller is responsible for filling it with its new contents. 1087 */ 1088 struct vnode * 1089 checkalias(struct vnode *nvp, dev_t nvp_rdev, struct mount *mp) 1090 { 1091 struct lwp *l = curlwp; /* XXX */ 1092 struct vnode *vp; 1093 struct vnode **vpp; 1094 1095 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1096 return (NULLVP); 1097 1098 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1099 loop: 1100 simple_lock(&spechash_slock); 1101 for (vp = *vpp; vp; vp = vp->v_specnext) { 1102 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1103 continue; 1104 /* 1105 * Alias, but not in use, so flush it out. 1106 */ 1107 simple_lock(&vp->v_interlock); 1108 simple_unlock(&spechash_slock); 1109 if (vp->v_usecount == 0) { 1110 vgonel(vp, l); 1111 goto loop; 1112 } 1113 /* 1114 * What we're interested to know here is if someone else has 1115 * removed this vnode from the device hash list while we were 1116 * waiting. This can only happen if vclean() did it, and 1117 * this requires the vnode to be locked. 1118 */ 1119 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) 1120 goto loop; 1121 if (vp->v_specinfo == NULL) { 1122 vput(vp); 1123 goto loop; 1124 } 1125 simple_lock(&spechash_slock); 1126 break; 1127 } 1128 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1129 MALLOC(nvp->v_specinfo, struct specinfo *, 1130 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1131 /* XXX Erg. */ 1132 if (nvp->v_specinfo == NULL) { 1133 simple_unlock(&spechash_slock); 1134 uvm_wait("checkalias"); 1135 goto loop; 1136 } 1137 1138 nvp->v_rdev = nvp_rdev; 1139 nvp->v_hashchain = vpp; 1140 nvp->v_specnext = *vpp; 1141 nvp->v_specmountpoint = NULL; 1142 simple_unlock(&spechash_slock); 1143 nvp->v_speclockf = NULL; 1144 simple_lock_init(&nvp->v_spec_cow_slock); 1145 SLIST_INIT(&nvp->v_spec_cow_head); 1146 nvp->v_spec_cow_req = 0; 1147 nvp->v_spec_cow_count = 0; 1148 1149 *vpp = nvp; 1150 if (vp != NULLVP) { 1151 nvp->v_flag |= VALIASED; 1152 vp->v_flag |= VALIASED; 1153 vput(vp); 1154 } 1155 return (NULLVP); 1156 } 1157 simple_unlock(&spechash_slock); 1158 VOP_UNLOCK(vp, 0); 1159 simple_lock(&vp->v_interlock); 1160 vclean(vp, 0, l); 1161 vp->v_op = nvp->v_op; 1162 vp->v_tag = nvp->v_tag; 1163 vp->v_vnlock = &vp->v_lock; 1164 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1165 nvp->v_type = VNON; 1166 insmntque(vp, mp); 1167 return (vp); 1168 } 1169 1170 /* 1171 * Grab a particular vnode from the free list, increment its 1172 * reference count and lock it. If the vnode lock bit is set the 1173 * vnode is being eliminated in vgone. In that case, we can not 1174 * grab the vnode, so the process is awakened when the transition is 1175 * completed, and an error returned to indicate that the vnode is no 1176 * longer usable (possibly having been changed to a new file system type). 1177 */ 1178 int 1179 vget(struct vnode *vp, int flags) 1180 { 1181 int error; 1182 1183 /* 1184 * If the vnode is in the process of being cleaned out for 1185 * another use, we wait for the cleaning to finish and then 1186 * return failure. Cleaning is determined by checking that 1187 * the VXLOCK flag is set. 1188 */ 1189 1190 if ((flags & LK_INTERLOCK) == 0) 1191 simple_lock(&vp->v_interlock); 1192 if ((vp->v_flag & (VXLOCK | VFREEING)) != 0) { 1193 if (flags & LK_NOWAIT) { 1194 simple_unlock(&vp->v_interlock); 1195 return EBUSY; 1196 } 1197 vp->v_flag |= VXWANT; 1198 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock); 1199 return (ENOENT); 1200 } 1201 if (vp->v_usecount == 0) { 1202 simple_lock(&vnode_free_list_slock); 1203 if (vp->v_holdcnt > 0) 1204 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1205 else 1206 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1207 simple_unlock(&vnode_free_list_slock); 1208 } 1209 vp->v_usecount++; 1210 #ifdef DIAGNOSTIC 1211 if (vp->v_usecount == 0) { 1212 vprint("vget", vp); 1213 panic("vget: usecount overflow, vp %p", vp); 1214 } 1215 #endif 1216 if (flags & LK_TYPE_MASK) { 1217 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1218 vrele(vp); 1219 } 1220 return (error); 1221 } 1222 simple_unlock(&vp->v_interlock); 1223 return (0); 1224 } 1225 1226 /* 1227 * vput(), just unlock and vrele() 1228 */ 1229 void 1230 vput(struct vnode *vp) 1231 { 1232 struct lwp *l = curlwp; /* XXX */ 1233 1234 #ifdef DIAGNOSTIC 1235 if (vp == NULL) 1236 panic("vput: null vp"); 1237 #endif 1238 simple_lock(&vp->v_interlock); 1239 vp->v_usecount--; 1240 if (vp->v_usecount > 0) { 1241 simple_unlock(&vp->v_interlock); 1242 VOP_UNLOCK(vp, 0); 1243 return; 1244 } 1245 #ifdef DIAGNOSTIC 1246 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1247 vprint("vput: bad ref count", vp); 1248 panic("vput: ref cnt"); 1249 } 1250 #endif 1251 /* 1252 * Insert at tail of LRU list. 1253 */ 1254 simple_lock(&vnode_free_list_slock); 1255 if (vp->v_holdcnt > 0) 1256 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1257 else 1258 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1259 simple_unlock(&vnode_free_list_slock); 1260 if (vp->v_flag & VEXECMAP) { 1261 uvmexp.execpages -= vp->v_uobj.uo_npages; 1262 uvmexp.filepages += vp->v_uobj.uo_npages; 1263 } 1264 vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP|VMAPPED); 1265 simple_unlock(&vp->v_interlock); 1266 VOP_INACTIVE(vp, l); 1267 } 1268 1269 /* 1270 * Vnode release. 1271 * If count drops to zero, call inactive routine and return to freelist. 1272 */ 1273 void 1274 vrele(struct vnode *vp) 1275 { 1276 struct lwp *l = curlwp; /* XXX */ 1277 1278 #ifdef DIAGNOSTIC 1279 if (vp == NULL) 1280 panic("vrele: null vp"); 1281 #endif 1282 simple_lock(&vp->v_interlock); 1283 vp->v_usecount--; 1284 if (vp->v_usecount > 0) { 1285 simple_unlock(&vp->v_interlock); 1286 return; 1287 } 1288 #ifdef DIAGNOSTIC 1289 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1290 vprint("vrele: bad ref count", vp); 1291 panic("vrele: ref cnt vp %p", vp); 1292 } 1293 #endif 1294 /* 1295 * Insert at tail of LRU list. 1296 */ 1297 simple_lock(&vnode_free_list_slock); 1298 if (vp->v_holdcnt > 0) 1299 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1300 else 1301 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1302 simple_unlock(&vnode_free_list_slock); 1303 if (vp->v_flag & VEXECMAP) { 1304 uvmexp.execpages -= vp->v_uobj.uo_npages; 1305 uvmexp.filepages += vp->v_uobj.uo_npages; 1306 } 1307 vp->v_flag &= ~(VTEXT|VEXECMAP|VWRITEMAP|VMAPPED); 1308 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1309 VOP_INACTIVE(vp, l); 1310 } 1311 1312 /* 1313 * Page or buffer structure gets a reference. 1314 * Called with v_interlock held. 1315 */ 1316 void 1317 vholdl(struct vnode *vp) 1318 { 1319 1320 /* 1321 * If it is on the freelist and the hold count is currently 1322 * zero, move it to the hold list. The test of the back 1323 * pointer and the use reference count of zero is because 1324 * it will be removed from a free list by getnewvnode, 1325 * but will not have its reference count incremented until 1326 * after calling vgone. If the reference count were 1327 * incremented first, vgone would (incorrectly) try to 1328 * close the previous instance of the underlying object. 1329 * So, the back pointer is explicitly set to `0xdeadb' in 1330 * getnewvnode after removing it from a freelist to ensure 1331 * that we do not try to move it here. 1332 */ 1333 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1334 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1335 simple_lock(&vnode_free_list_slock); 1336 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1337 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1338 simple_unlock(&vnode_free_list_slock); 1339 } 1340 vp->v_holdcnt++; 1341 } 1342 1343 /* 1344 * Page or buffer structure frees a reference. 1345 * Called with v_interlock held. 1346 */ 1347 void 1348 holdrelel(struct vnode *vp) 1349 { 1350 1351 if (vp->v_holdcnt <= 0) 1352 panic("holdrelel: holdcnt vp %p", vp); 1353 vp->v_holdcnt--; 1354 1355 /* 1356 * If it is on the holdlist and the hold count drops to 1357 * zero, move it to the free list. The test of the back 1358 * pointer and the use reference count of zero is because 1359 * it will be removed from a free list by getnewvnode, 1360 * but will not have its reference count incremented until 1361 * after calling vgone. If the reference count were 1362 * incremented first, vgone would (incorrectly) try to 1363 * close the previous instance of the underlying object. 1364 * So, the back pointer is explicitly set to `0xdeadb' in 1365 * getnewvnode after removing it from a freelist to ensure 1366 * that we do not try to move it here. 1367 */ 1368 1369 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1370 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1371 simple_lock(&vnode_free_list_slock); 1372 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1373 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1374 simple_unlock(&vnode_free_list_slock); 1375 } 1376 } 1377 1378 /* 1379 * Vnode reference. 1380 */ 1381 void 1382 vref(struct vnode *vp) 1383 { 1384 1385 simple_lock(&vp->v_interlock); 1386 if (vp->v_usecount <= 0) 1387 panic("vref used where vget required, vp %p", vp); 1388 vp->v_usecount++; 1389 #ifdef DIAGNOSTIC 1390 if (vp->v_usecount == 0) { 1391 vprint("vref", vp); 1392 panic("vref: usecount overflow, vp %p", vp); 1393 } 1394 #endif 1395 simple_unlock(&vp->v_interlock); 1396 } 1397 1398 /* 1399 * Remove any vnodes in the vnode table belonging to mount point mp. 1400 * 1401 * If FORCECLOSE is not specified, there should not be any active ones, 1402 * return error if any are found (nb: this is a user error, not a 1403 * system error). If FORCECLOSE is specified, detach any active vnodes 1404 * that are found. 1405 * 1406 * If WRITECLOSE is set, only flush out regular file vnodes open for 1407 * writing. 1408 * 1409 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped. 1410 */ 1411 #ifdef DEBUG 1412 int busyprt = 0; /* print out busy vnodes */ 1413 struct ctldebug debug1 = { "busyprt", &busyprt }; 1414 #endif 1415 1416 int 1417 vflush(struct mount *mp, struct vnode *skipvp, int flags) 1418 { 1419 struct lwp *l = curlwp; /* XXX */ 1420 struct vnode *vp, *nvp; 1421 int busy = 0; 1422 1423 simple_lock(&mntvnode_slock); 1424 loop: 1425 /* 1426 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() 1427 * and vclean() are called 1428 */ 1429 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 1430 if (vp->v_mount != mp) 1431 goto loop; 1432 nvp = TAILQ_NEXT(vp, v_mntvnodes); 1433 /* 1434 * Skip over a selected vnode. 1435 */ 1436 if (vp == skipvp) 1437 continue; 1438 simple_lock(&vp->v_interlock); 1439 /* 1440 * Skip over a vnodes marked VSYSTEM. 1441 */ 1442 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1443 simple_unlock(&vp->v_interlock); 1444 continue; 1445 } 1446 /* 1447 * If WRITECLOSE is set, only flush out regular file 1448 * vnodes open for writing. 1449 */ 1450 if ((flags & WRITECLOSE) && 1451 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1452 simple_unlock(&vp->v_interlock); 1453 continue; 1454 } 1455 /* 1456 * With v_usecount == 0, all we need to do is clear 1457 * out the vnode data structures and we are done. 1458 */ 1459 if (vp->v_usecount == 0) { 1460 simple_unlock(&mntvnode_slock); 1461 vgonel(vp, l); 1462 simple_lock(&mntvnode_slock); 1463 continue; 1464 } 1465 /* 1466 * If FORCECLOSE is set, forcibly close the vnode. 1467 * For block or character devices, revert to an 1468 * anonymous device. For all other files, just kill them. 1469 */ 1470 if (flags & FORCECLOSE) { 1471 simple_unlock(&mntvnode_slock); 1472 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1473 vgonel(vp, l); 1474 } else { 1475 vclean(vp, 0, l); 1476 vp->v_op = spec_vnodeop_p; 1477 insmntque(vp, (struct mount *)0); 1478 } 1479 simple_lock(&mntvnode_slock); 1480 continue; 1481 } 1482 #ifdef DEBUG 1483 if (busyprt) 1484 vprint("vflush: busy vnode", vp); 1485 #endif 1486 simple_unlock(&vp->v_interlock); 1487 busy++; 1488 } 1489 simple_unlock(&mntvnode_slock); 1490 if (busy) 1491 return (EBUSY); 1492 return (0); 1493 } 1494 1495 /* 1496 * Disassociate the underlying file system from a vnode. 1497 */ 1498 static void 1499 vclean(struct vnode *vp, int flags, struct lwp *l) 1500 { 1501 struct mount *mp; 1502 int active; 1503 1504 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1505 1506 /* 1507 * Check to see if the vnode is in use. 1508 * If so we have to reference it before we clean it out 1509 * so that its count cannot fall to zero and generate a 1510 * race against ourselves to recycle it. 1511 */ 1512 1513 if ((active = vp->v_usecount) != 0) { 1514 vp->v_usecount++; 1515 #ifdef DIAGNOSTIC 1516 if (vp->v_usecount == 0) { 1517 vprint("vclean", vp); 1518 panic("vclean: usecount overflow"); 1519 } 1520 #endif 1521 } 1522 1523 /* 1524 * Prevent the vnode from being recycled or 1525 * brought into use while we clean it out. 1526 */ 1527 if (vp->v_flag & VXLOCK) 1528 panic("vclean: deadlock, vp %p", vp); 1529 vp->v_flag |= VXLOCK; 1530 if (vp->v_flag & VEXECMAP) { 1531 uvmexp.execpages -= vp->v_uobj.uo_npages; 1532 uvmexp.filepages += vp->v_uobj.uo_npages; 1533 } 1534 vp->v_flag &= ~(VTEXT|VEXECMAP); 1535 1536 /* 1537 * Even if the count is zero, the VOP_INACTIVE routine may still 1538 * have the object locked while it cleans it out. The VOP_LOCK 1539 * ensures that the VOP_INACTIVE routine is done with its work. 1540 * For active vnodes, it ensures that no other activity can 1541 * occur while the underlying object is being cleaned out. 1542 */ 1543 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1544 1545 /* 1546 * Clean out any cached data associated with the vnode. 1547 * If special device, remove it from special device alias list. 1548 * if it is on one. 1549 */ 1550 if (flags & DOCLOSE) { 1551 int error; 1552 struct vnode *vq, *vx; 1553 1554 vn_start_write(vp, &mp, V_WAIT | V_LOWER); 1555 error = vinvalbuf(vp, V_SAVE, NOCRED, l, 0, 0); 1556 vn_finished_write(mp, V_LOWER); 1557 if (error) 1558 error = vinvalbuf(vp, 0, NOCRED, l, 0, 0); 1559 KASSERT(error == 0); 1560 KASSERT((vp->v_flag & VONWORKLST) == 0); 1561 1562 if (active) 1563 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1564 1565 if ((vp->v_type == VBLK || vp->v_type == VCHR) && 1566 vp->v_specinfo != 0) { 1567 simple_lock(&spechash_slock); 1568 if (vp->v_hashchain != NULL) { 1569 if (*vp->v_hashchain == vp) { 1570 *vp->v_hashchain = vp->v_specnext; 1571 } else { 1572 for (vq = *vp->v_hashchain; vq; 1573 vq = vq->v_specnext) { 1574 if (vq->v_specnext != vp) 1575 continue; 1576 vq->v_specnext = vp->v_specnext; 1577 break; 1578 } 1579 if (vq == NULL) 1580 panic("missing bdev"); 1581 } 1582 if (vp->v_flag & VALIASED) { 1583 vx = NULL; 1584 for (vq = *vp->v_hashchain; vq; 1585 vq = vq->v_specnext) { 1586 if (vq->v_rdev != vp->v_rdev || 1587 vq->v_type != vp->v_type) 1588 continue; 1589 if (vx) 1590 break; 1591 vx = vq; 1592 } 1593 if (vx == NULL) 1594 panic("missing alias"); 1595 if (vq == NULL) 1596 vx->v_flag &= ~VALIASED; 1597 vp->v_flag &= ~VALIASED; 1598 } 1599 } 1600 simple_unlock(&spechash_slock); 1601 FREE(vp->v_specinfo, M_VNODE); 1602 vp->v_specinfo = NULL; 1603 } 1604 } 1605 LOCK_ASSERT(!simple_lock_held(&vp->v_interlock)); 1606 1607 /* 1608 * If purging an active vnode, it must be closed and 1609 * deactivated before being reclaimed. Note that the 1610 * VOP_INACTIVE will unlock the vnode. 1611 */ 1612 if (active) { 1613 VOP_INACTIVE(vp, l); 1614 } else { 1615 /* 1616 * Any other processes trying to obtain this lock must first 1617 * wait for VXLOCK to clear, then call the new lock operation. 1618 */ 1619 VOP_UNLOCK(vp, 0); 1620 } 1621 /* 1622 * Reclaim the vnode. 1623 */ 1624 if (VOP_RECLAIM(vp, l)) 1625 panic("vclean: cannot reclaim, vp %p", vp); 1626 if (active) { 1627 /* 1628 * Inline copy of vrele() since VOP_INACTIVE 1629 * has already been called. 1630 */ 1631 simple_lock(&vp->v_interlock); 1632 if (--vp->v_usecount <= 0) { 1633 #ifdef DIAGNOSTIC 1634 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1635 vprint("vclean: bad ref count", vp); 1636 panic("vclean: ref cnt"); 1637 } 1638 #endif 1639 /* 1640 * Insert at tail of LRU list. 1641 */ 1642 1643 simple_unlock(&vp->v_interlock); 1644 simple_lock(&vnode_free_list_slock); 1645 #ifdef DIAGNOSTIC 1646 if (vp->v_holdcnt > 0) 1647 panic("vclean: not clean, vp %p", vp); 1648 #endif 1649 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1650 simple_unlock(&vnode_free_list_slock); 1651 } else 1652 simple_unlock(&vp->v_interlock); 1653 } 1654 1655 KASSERT(vp->v_uobj.uo_npages == 0); 1656 if (vp->v_type == VREG && vp->v_ractx != NULL) { 1657 uvm_ra_freectx(vp->v_ractx); 1658 vp->v_ractx = NULL; 1659 } 1660 cache_purge(vp); 1661 1662 /* 1663 * Done with purge, notify sleepers of the grim news. 1664 */ 1665 vp->v_op = dead_vnodeop_p; 1666 vp->v_tag = VT_NON; 1667 simple_lock(&vp->v_interlock); 1668 VN_KNOTE(vp, NOTE_REVOKE); /* FreeBSD has this in vn_pollgone() */ 1669 vp->v_flag &= ~(VXLOCK|VLOCKSWORK); 1670 if (vp->v_flag & VXWANT) { 1671 vp->v_flag &= ~VXWANT; 1672 simple_unlock(&vp->v_interlock); 1673 wakeup((caddr_t)vp); 1674 } else 1675 simple_unlock(&vp->v_interlock); 1676 } 1677 1678 /* 1679 * Recycle an unused vnode to the front of the free list. 1680 * Release the passed interlock if the vnode will be recycled. 1681 */ 1682 int 1683 vrecycle(struct vnode *vp, struct simplelock *inter_lkp, struct lwp *l) 1684 { 1685 1686 simple_lock(&vp->v_interlock); 1687 if (vp->v_usecount == 0) { 1688 if (inter_lkp) 1689 simple_unlock(inter_lkp); 1690 vgonel(vp, l); 1691 return (1); 1692 } 1693 simple_unlock(&vp->v_interlock); 1694 return (0); 1695 } 1696 1697 /* 1698 * Eliminate all activity associated with a vnode 1699 * in preparation for reuse. 1700 */ 1701 void 1702 vgone(struct vnode *vp) 1703 { 1704 struct lwp *l = curlwp; /* XXX */ 1705 1706 simple_lock(&vp->v_interlock); 1707 vgonel(vp, l); 1708 } 1709 1710 /* 1711 * vgone, with the vp interlock held. 1712 */ 1713 void 1714 vgonel(struct vnode *vp, struct lwp *l) 1715 { 1716 1717 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1718 1719 /* 1720 * If a vgone (or vclean) is already in progress, 1721 * wait until it is done and return. 1722 */ 1723 1724 if (vp->v_flag & VXLOCK) { 1725 vp->v_flag |= VXWANT; 1726 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock); 1727 return; 1728 } 1729 1730 /* 1731 * Clean out the filesystem specific data. 1732 */ 1733 1734 vclean(vp, DOCLOSE, l); 1735 KASSERT((vp->v_flag & VONWORKLST) == 0); 1736 1737 /* 1738 * Delete from old mount point vnode list, if on one. 1739 */ 1740 1741 if (vp->v_mount != NULL) 1742 insmntque(vp, (struct mount *)0); 1743 1744 /* 1745 * The test of the back pointer and the reference count of 1746 * zero is because it will be removed from the free list by 1747 * getcleanvnode, but will not have its reference count 1748 * incremented until after calling vgone. If the reference 1749 * count were incremented first, vgone would (incorrectly) 1750 * try to close the previous instance of the underlying object. 1751 * So, the back pointer is explicitly set to `0xdeadb' in 1752 * getnewvnode after removing it from the freelist to ensure 1753 * that we do not try to move it here. 1754 */ 1755 1756 vp->v_type = VBAD; 1757 if (vp->v_usecount == 0) { 1758 boolean_t dofree; 1759 1760 simple_lock(&vnode_free_list_slock); 1761 if (vp->v_holdcnt > 0) 1762 panic("vgonel: not clean, vp %p", vp); 1763 /* 1764 * if it isn't on the freelist, we're called by getcleanvnode 1765 * and vnode is being re-used. otherwise, we'll free it. 1766 */ 1767 dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb; 1768 if (dofree) { 1769 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1770 numvnodes--; 1771 } 1772 simple_unlock(&vnode_free_list_slock); 1773 if (dofree) 1774 pool_put(&vnode_pool, vp); 1775 } 1776 } 1777 1778 /* 1779 * Lookup a vnode by device number. 1780 */ 1781 int 1782 vfinddev(dev_t dev, enum vtype type, struct vnode **vpp) 1783 { 1784 struct vnode *vp; 1785 int rc = 0; 1786 1787 simple_lock(&spechash_slock); 1788 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1789 if (dev != vp->v_rdev || type != vp->v_type) 1790 continue; 1791 *vpp = vp; 1792 rc = 1; 1793 break; 1794 } 1795 simple_unlock(&spechash_slock); 1796 return (rc); 1797 } 1798 1799 /* 1800 * Revoke all the vnodes corresponding to the specified minor number 1801 * range (endpoints inclusive) of the specified major. 1802 */ 1803 void 1804 vdevgone(int maj, int minl, int minh, enum vtype type) 1805 { 1806 struct vnode *vp; 1807 int mn; 1808 1809 vp = NULL; /* XXX gcc */ 1810 1811 for (mn = minl; mn <= minh; mn++) 1812 if (vfinddev(makedev(maj, mn), type, &vp)) 1813 VOP_REVOKE(vp, REVOKEALL); 1814 } 1815 1816 /* 1817 * Calculate the total number of references to a special device. 1818 */ 1819 int 1820 vcount(struct vnode *vp) 1821 { 1822 struct vnode *vq, *vnext; 1823 int count; 1824 1825 loop: 1826 if ((vp->v_flag & VALIASED) == 0) 1827 return (vp->v_usecount); 1828 simple_lock(&spechash_slock); 1829 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1830 vnext = vq->v_specnext; 1831 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1832 continue; 1833 /* 1834 * Alias, but not in use, so flush it out. 1835 */ 1836 if (vq->v_usecount == 0 && vq != vp && 1837 (vq->v_flag & VXLOCK) == 0) { 1838 simple_unlock(&spechash_slock); 1839 vgone(vq); 1840 goto loop; 1841 } 1842 count += vq->v_usecount; 1843 } 1844 simple_unlock(&spechash_slock); 1845 return (count); 1846 } 1847 1848 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 1849 #define ARRAY_PRINT(idx, arr) \ 1850 ((idx) > 0 && (idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN") 1851 1852 const char * const vnode_tags[] = { VNODE_TAGS }; 1853 const char * const vnode_types[] = { VNODE_TYPES }; 1854 const char vnode_flagbits[] = VNODE_FLAGBITS; 1855 1856 /* 1857 * Print out a description of a vnode. 1858 */ 1859 void 1860 vprint(const char *label, struct vnode *vp) 1861 { 1862 char bf[96]; 1863 1864 if (label != NULL) 1865 printf("%s: ", label); 1866 printf("tag %s(%d) type %s(%d), usecount %d, writecount %ld, " 1867 "refcount %ld,", ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 1868 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 1869 vp->v_usecount, vp->v_writecount, vp->v_holdcnt); 1870 bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf)); 1871 if (bf[0] != '\0') 1872 printf(" flags (%s)", &bf[1]); 1873 if (vp->v_data == NULL) { 1874 printf("\n"); 1875 } else { 1876 printf("\n\t"); 1877 VOP_PRINT(vp); 1878 } 1879 } 1880 1881 #ifdef DEBUG 1882 /* 1883 * List all of the locked vnodes in the system. 1884 * Called when debugging the kernel. 1885 */ 1886 void 1887 printlockedvnodes(void) 1888 { 1889 struct mount *mp, *nmp; 1890 struct vnode *vp; 1891 1892 printf("Locked vnodes\n"); 1893 simple_lock(&mountlist_slock); 1894 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1895 mp = nmp) { 1896 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1897 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1898 continue; 1899 } 1900 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1901 if (VOP_ISLOCKED(vp)) 1902 vprint(NULL, vp); 1903 } 1904 simple_lock(&mountlist_slock); 1905 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1906 vfs_unbusy(mp); 1907 } 1908 simple_unlock(&mountlist_slock); 1909 } 1910 #endif 1911 1912 /* 1913 * sysctl helper routine to return list of supported fstypes 1914 */ 1915 static int 1916 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS) 1917 { 1918 char bf[MFSNAMELEN]; 1919 char *where = oldp; 1920 struct vfsops *v; 1921 size_t needed, left, slen; 1922 int error, first; 1923 1924 if (newp != NULL) 1925 return (EPERM); 1926 if (namelen != 0) 1927 return (EINVAL); 1928 1929 first = 1; 1930 error = 0; 1931 needed = 0; 1932 left = *oldlenp; 1933 1934 LIST_FOREACH(v, &vfs_list, vfs_list) { 1935 if (where == NULL) 1936 needed += strlen(v->vfs_name) + 1; 1937 else { 1938 memset(bf, 0, sizeof(bf)); 1939 if (first) { 1940 strncpy(bf, v->vfs_name, sizeof(bf)); 1941 first = 0; 1942 } else { 1943 bf[0] = ' '; 1944 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1); 1945 } 1946 bf[sizeof(bf)-1] = '\0'; 1947 slen = strlen(bf); 1948 if (left < slen + 1) 1949 break; 1950 /* +1 to copy out the trailing NUL byte */ 1951 error = copyout(bf, where, slen + 1); 1952 if (error) 1953 break; 1954 where += slen; 1955 needed += slen; 1956 left -= slen; 1957 } 1958 } 1959 *oldlenp = needed; 1960 return (error); 1961 } 1962 1963 /* 1964 * Top level filesystem related information gathering. 1965 */ 1966 SYSCTL_SETUP(sysctl_vfs_setup, "sysctl vfs subtree setup") 1967 { 1968 sysctl_createv(clog, 0, NULL, NULL, 1969 CTLFLAG_PERMANENT, 1970 CTLTYPE_NODE, "vfs", NULL, 1971 NULL, 0, NULL, 0, 1972 CTL_VFS, CTL_EOL); 1973 sysctl_createv(clog, 0, NULL, NULL, 1974 CTLFLAG_PERMANENT, 1975 CTLTYPE_NODE, "generic", 1976 SYSCTL_DESCR("Non-specific vfs related information"), 1977 NULL, 0, NULL, 0, 1978 CTL_VFS, VFS_GENERIC, CTL_EOL); 1979 sysctl_createv(clog, 0, NULL, NULL, 1980 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1981 CTLTYPE_INT, "usermount", 1982 SYSCTL_DESCR("Whether unprivileged users may mount " 1983 "filesystems"), 1984 NULL, 0, &dovfsusermount, 0, 1985 CTL_VFS, VFS_GENERIC, VFS_USERMOUNT, CTL_EOL); 1986 sysctl_createv(clog, 0, NULL, NULL, 1987 CTLFLAG_PERMANENT, 1988 CTLTYPE_STRING, "fstypes", 1989 SYSCTL_DESCR("List of file systems present"), 1990 sysctl_vfs_generic_fstypes, 0, NULL, 0, 1991 CTL_VFS, VFS_GENERIC, CTL_CREATE, CTL_EOL); 1992 sysctl_createv(clog, 0, NULL, NULL, 1993 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1994 CTLTYPE_INT, "magiclinks", 1995 SYSCTL_DESCR("Whether \"magic\" symlinks are expanded"), 1996 NULL, 0, &vfs_magiclinks, 0, 1997 CTL_VFS, VFS_GENERIC, VFS_MAGICLINKS, CTL_EOL); 1998 } 1999 2000 2001 int kinfo_vdebug = 1; 2002 int kinfo_vgetfailed; 2003 #define KINFO_VNODESLOP 10 2004 /* 2005 * Dump vnode list (via sysctl). 2006 * Copyout address of vnode followed by vnode. 2007 */ 2008 /* ARGSUSED */ 2009 int 2010 sysctl_kern_vnode(SYSCTLFN_ARGS) 2011 { 2012 char *where = oldp; 2013 size_t *sizep = oldlenp; 2014 struct mount *mp, *nmp; 2015 struct vnode *vp; 2016 char *bp = where, *savebp; 2017 char *ewhere; 2018 int error; 2019 2020 if (namelen != 0) 2021 return (EOPNOTSUPP); 2022 if (newp != NULL) 2023 return (EPERM); 2024 2025 #define VPTRSZ sizeof(struct vnode *) 2026 #define VNODESZ sizeof(struct vnode) 2027 if (where == NULL) { 2028 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 2029 return (0); 2030 } 2031 ewhere = where + *sizep; 2032 2033 simple_lock(&mountlist_slock); 2034 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2035 mp = nmp) { 2036 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 2037 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2038 continue; 2039 } 2040 savebp = bp; 2041 again: 2042 simple_lock(&mntvnode_slock); 2043 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2044 /* 2045 * Check that the vp is still associated with 2046 * this filesystem. RACE: could have been 2047 * recycled onto the same filesystem. 2048 */ 2049 if (vp->v_mount != mp) { 2050 simple_unlock(&mntvnode_slock); 2051 if (kinfo_vdebug) 2052 printf("kinfo: vp changed\n"); 2053 bp = savebp; 2054 goto again; 2055 } 2056 if (bp + VPTRSZ + VNODESZ > ewhere) { 2057 simple_unlock(&mntvnode_slock); 2058 *sizep = bp - where; 2059 return (ENOMEM); 2060 } 2061 simple_unlock(&mntvnode_slock); 2062 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2063 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2064 return (error); 2065 bp += VPTRSZ + VNODESZ; 2066 simple_lock(&mntvnode_slock); 2067 } 2068 simple_unlock(&mntvnode_slock); 2069 simple_lock(&mountlist_slock); 2070 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2071 vfs_unbusy(mp); 2072 } 2073 simple_unlock(&mountlist_slock); 2074 2075 *sizep = bp - where; 2076 return (0); 2077 } 2078 2079 /* 2080 * Check to see if a filesystem is mounted on a block device. 2081 */ 2082 int 2083 vfs_mountedon(struct vnode *vp) 2084 { 2085 struct vnode *vq; 2086 int error = 0; 2087 2088 if (vp->v_type != VBLK) 2089 return ENOTBLK; 2090 if (vp->v_specmountpoint != NULL) 2091 return (EBUSY); 2092 if (vp->v_flag & VALIASED) { 2093 simple_lock(&spechash_slock); 2094 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2095 if (vq->v_rdev != vp->v_rdev || 2096 vq->v_type != vp->v_type) 2097 continue; 2098 if (vq->v_specmountpoint != NULL) { 2099 error = EBUSY; 2100 break; 2101 } 2102 } 2103 simple_unlock(&spechash_slock); 2104 } 2105 return (error); 2106 } 2107 2108 /* 2109 * Do the usual access checking. 2110 * file_mode, uid and gid are from the vnode in question, 2111 * while acc_mode and cred are from the VOP_ACCESS parameter list 2112 */ 2113 int 2114 vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid, 2115 mode_t acc_mode, kauth_cred_t cred) 2116 { 2117 mode_t mask; 2118 int error, ismember; 2119 2120 /* 2121 * Super-user always gets read/write access, but execute access depends 2122 * on at least one execute bit being set. 2123 */ 2124 if (kauth_cred_geteuid(cred) == 0) { 2125 if ((acc_mode & VEXEC) && type != VDIR && 2126 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2127 return (EACCES); 2128 return (0); 2129 } 2130 2131 mask = 0; 2132 2133 /* Otherwise, check the owner. */ 2134 if (kauth_cred_geteuid(cred) == uid) { 2135 if (acc_mode & VEXEC) 2136 mask |= S_IXUSR; 2137 if (acc_mode & VREAD) 2138 mask |= S_IRUSR; 2139 if (acc_mode & VWRITE) 2140 mask |= S_IWUSR; 2141 return ((file_mode & mask) == mask ? 0 : EACCES); 2142 } 2143 2144 /* Otherwise, check the groups. */ 2145 error = kauth_cred_ismember_gid(cred, gid, &ismember); 2146 if (error) 2147 return (error); 2148 if (kauth_cred_getegid(cred) == gid || ismember) { 2149 if (acc_mode & VEXEC) 2150 mask |= S_IXGRP; 2151 if (acc_mode & VREAD) 2152 mask |= S_IRGRP; 2153 if (acc_mode & VWRITE) 2154 mask |= S_IWGRP; 2155 return ((file_mode & mask) == mask ? 0 : EACCES); 2156 } 2157 2158 /* Otherwise, check everyone else. */ 2159 if (acc_mode & VEXEC) 2160 mask |= S_IXOTH; 2161 if (acc_mode & VREAD) 2162 mask |= S_IROTH; 2163 if (acc_mode & VWRITE) 2164 mask |= S_IWOTH; 2165 return ((file_mode & mask) == mask ? 0 : EACCES); 2166 } 2167 2168 /* 2169 * Unmount all file systems. 2170 * We traverse the list in reverse order under the assumption that doing so 2171 * will avoid needing to worry about dependencies. 2172 */ 2173 void 2174 vfs_unmountall(struct lwp *l) 2175 { 2176 struct mount *mp, *nmp; 2177 int allerror, error; 2178 2179 printf("unmounting file systems..."); 2180 for (allerror = 0, 2181 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2182 nmp = mp->mnt_list.cqe_prev; 2183 #ifdef DEBUG 2184 printf("\nunmounting %s (%s)...", 2185 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2186 #endif 2187 /* 2188 * XXX Freeze syncer. Must do this before locking the 2189 * mount point. See dounmount() for details. 2190 */ 2191 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2192 if (vfs_busy(mp, 0, 0)) { 2193 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2194 continue; 2195 } 2196 if ((error = dounmount(mp, MNT_FORCE, l)) != 0) { 2197 printf("unmount of %s failed with error %d\n", 2198 mp->mnt_stat.f_mntonname, error); 2199 allerror = 1; 2200 } 2201 } 2202 printf(" done\n"); 2203 if (allerror) 2204 printf("WARNING: some file systems would not unmount\n"); 2205 } 2206 2207 extern struct simplelock bqueue_slock; /* XXX */ 2208 2209 /* 2210 * Sync and unmount file systems before shutting down. 2211 */ 2212 void 2213 vfs_shutdown(void) 2214 { 2215 struct lwp *l; 2216 2217 /* XXX we're certainly not running in lwp0's context! */ 2218 l = curlwp; 2219 if (l == NULL) 2220 l = &lwp0; 2221 2222 printf("syncing disks... "); 2223 2224 /* remove user process from run queue */ 2225 suspendsched(); 2226 (void) spl0(); 2227 2228 /* avoid coming back this way again if we panic. */ 2229 doing_shutdown = 1; 2230 2231 sys_sync(l, NULL, NULL); 2232 2233 /* Wait for sync to finish. */ 2234 if (buf_syncwait() != 0) { 2235 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2236 Debugger(); 2237 #endif 2238 printf("giving up\n"); 2239 return; 2240 } else 2241 printf("done\n"); 2242 2243 /* 2244 * If we've panic'd, don't make the situation potentially 2245 * worse by unmounting the file systems. 2246 */ 2247 if (panicstr != NULL) 2248 return; 2249 2250 /* Release inodes held by texts before update. */ 2251 #ifdef notdef 2252 vnshutdown(); 2253 #endif 2254 /* Unmount file systems. */ 2255 vfs_unmountall(l); 2256 } 2257 2258 /* 2259 * Mount the root file system. If the operator didn't specify a 2260 * file system to use, try all possible file systems until one 2261 * succeeds. 2262 */ 2263 int 2264 vfs_mountroot(void) 2265 { 2266 struct vfsops *v; 2267 int error = ENODEV; 2268 2269 if (root_device == NULL) 2270 panic("vfs_mountroot: root device unknown"); 2271 2272 switch (device_class(root_device)) { 2273 case DV_IFNET: 2274 if (rootdev != NODEV) 2275 panic("vfs_mountroot: rootdev set for DV_IFNET " 2276 "(0x%08x -> %d,%d)", rootdev, 2277 major(rootdev), minor(rootdev)); 2278 break; 2279 2280 case DV_DISK: 2281 if (rootdev == NODEV) 2282 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2283 if (bdevvp(rootdev, &rootvp)) 2284 panic("vfs_mountroot: can't get vnode for rootdev"); 2285 error = VOP_OPEN(rootvp, FREAD, FSCRED, curlwp); 2286 if (error) { 2287 printf("vfs_mountroot: can't open root device\n"); 2288 return (error); 2289 } 2290 break; 2291 2292 default: 2293 printf("%s: inappropriate for root file system\n", 2294 root_device->dv_xname); 2295 return (ENODEV); 2296 } 2297 2298 /* 2299 * If user specified a file system, use it. 2300 */ 2301 if (mountroot != NULL) { 2302 error = (*mountroot)(); 2303 goto done; 2304 } 2305 2306 /* 2307 * Try each file system currently configured into the kernel. 2308 */ 2309 LIST_FOREACH(v, &vfs_list, vfs_list) { 2310 if (v->vfs_mountroot == NULL) 2311 continue; 2312 #ifdef DEBUG 2313 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 2314 #endif 2315 error = (*v->vfs_mountroot)(); 2316 if (!error) { 2317 aprint_normal("root file system type: %s\n", 2318 v->vfs_name); 2319 break; 2320 } 2321 } 2322 2323 if (v == NULL) { 2324 printf("no file system for %s", root_device->dv_xname); 2325 if (device_class(root_device) == DV_DISK) 2326 printf(" (dev 0x%x)", rootdev); 2327 printf("\n"); 2328 error = EFTYPE; 2329 } 2330 2331 done: 2332 if (error && device_class(root_device) == DV_DISK) { 2333 VOP_CLOSE(rootvp, FREAD, FSCRED, curlwp); 2334 vrele(rootvp); 2335 } 2336 return (error); 2337 } 2338 2339 /* 2340 * Given a file system name, look up the vfsops for that 2341 * file system, or return NULL if file system isn't present 2342 * in the kernel. 2343 */ 2344 struct vfsops * 2345 vfs_getopsbyname(const char *name) 2346 { 2347 struct vfsops *v; 2348 2349 LIST_FOREACH(v, &vfs_list, vfs_list) { 2350 if (strcmp(v->vfs_name, name) == 0) 2351 break; 2352 } 2353 2354 return (v); 2355 } 2356 2357 /* 2358 * Establish a file system and initialize it. 2359 */ 2360 int 2361 vfs_attach(struct vfsops *vfs) 2362 { 2363 struct vfsops *v; 2364 int error = 0; 2365 2366 2367 /* 2368 * Make sure this file system doesn't already exist. 2369 */ 2370 LIST_FOREACH(v, &vfs_list, vfs_list) { 2371 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2372 error = EEXIST; 2373 goto out; 2374 } 2375 } 2376 2377 /* 2378 * Initialize the vnode operations for this file system. 2379 */ 2380 vfs_opv_init(vfs->vfs_opv_descs); 2381 2382 /* 2383 * Now initialize the file system itself. 2384 */ 2385 (*vfs->vfs_init)(); 2386 2387 /* 2388 * ...and link it into the kernel's list. 2389 */ 2390 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2391 2392 /* 2393 * Sanity: make sure the reference count is 0. 2394 */ 2395 vfs->vfs_refcount = 0; 2396 2397 out: 2398 return (error); 2399 } 2400 2401 /* 2402 * Remove a file system from the kernel. 2403 */ 2404 int 2405 vfs_detach(struct vfsops *vfs) 2406 { 2407 struct vfsops *v; 2408 2409 /* 2410 * Make sure no one is using the filesystem. 2411 */ 2412 if (vfs->vfs_refcount != 0) 2413 return (EBUSY); 2414 2415 /* 2416 * ...and remove it from the kernel's list. 2417 */ 2418 LIST_FOREACH(v, &vfs_list, vfs_list) { 2419 if (v == vfs) { 2420 LIST_REMOVE(v, vfs_list); 2421 break; 2422 } 2423 } 2424 2425 if (v == NULL) 2426 return (ESRCH); 2427 2428 /* 2429 * Now run the file system-specific cleanups. 2430 */ 2431 (*vfs->vfs_done)(); 2432 2433 /* 2434 * Free the vnode operations vector. 2435 */ 2436 vfs_opv_free(vfs->vfs_opv_descs); 2437 return (0); 2438 } 2439 2440 void 2441 vfs_reinit(void) 2442 { 2443 struct vfsops *vfs; 2444 2445 LIST_FOREACH(vfs, &vfs_list, vfs_list) { 2446 if (vfs->vfs_reinit) { 2447 (*vfs->vfs_reinit)(); 2448 } 2449 } 2450 } 2451 2452 /* 2453 * Request a filesystem to suspend write operations. 2454 */ 2455 int 2456 vfs_write_suspend(struct mount *mp, int slpflag, int slptimeo) 2457 { 2458 struct lwp *l = curlwp; /* XXX */ 2459 int error; 2460 2461 while ((mp->mnt_iflag & IMNT_SUSPEND)) { 2462 if (slptimeo < 0) 2463 return EWOULDBLOCK; 2464 error = tsleep(&mp->mnt_flag, slpflag, "suspwt1", slptimeo); 2465 if (error) 2466 return error; 2467 } 2468 mp->mnt_iflag |= IMNT_SUSPEND; 2469 2470 simple_lock(&mp->mnt_slock); 2471 if (mp->mnt_writeopcountupper > 0) 2472 ltsleep(&mp->mnt_writeopcountupper, PUSER - 1, "suspwt", 2473 0, &mp->mnt_slock); 2474 simple_unlock(&mp->mnt_slock); 2475 2476 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred, l); 2477 if (error) { 2478 vfs_write_resume(mp); 2479 return error; 2480 } 2481 mp->mnt_iflag |= IMNT_SUSPENDLOW; 2482 2483 simple_lock(&mp->mnt_slock); 2484 if (mp->mnt_writeopcountlower > 0) 2485 ltsleep(&mp->mnt_writeopcountlower, PUSER - 1, "suspwt", 2486 0, &mp->mnt_slock); 2487 mp->mnt_iflag |= IMNT_SUSPENDED; 2488 simple_unlock(&mp->mnt_slock); 2489 2490 return 0; 2491 } 2492 2493 /* 2494 * Request a filesystem to resume write operations. 2495 */ 2496 void 2497 vfs_write_resume(struct mount *mp) 2498 { 2499 2500 if ((mp->mnt_iflag & IMNT_SUSPEND) == 0) 2501 return; 2502 mp->mnt_iflag &= ~(IMNT_SUSPEND | IMNT_SUSPENDLOW | IMNT_SUSPENDED); 2503 wakeup(&mp->mnt_flag); 2504 } 2505 2506 void 2507 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp) 2508 { 2509 const struct statvfs *mbp; 2510 2511 if (sbp == (mbp = &mp->mnt_stat)) 2512 return; 2513 2514 (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx)); 2515 sbp->f_fsid = mbp->f_fsid; 2516 sbp->f_owner = mbp->f_owner; 2517 sbp->f_flag = mbp->f_flag; 2518 sbp->f_syncwrites = mbp->f_syncwrites; 2519 sbp->f_asyncwrites = mbp->f_asyncwrites; 2520 sbp->f_syncreads = mbp->f_syncreads; 2521 sbp->f_asyncreads = mbp->f_asyncreads; 2522 (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare)); 2523 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename, 2524 sizeof(sbp->f_fstypename)); 2525 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname, 2526 sizeof(sbp->f_mntonname)); 2527 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, 2528 sizeof(sbp->f_mntfromname)); 2529 sbp->f_namemax = mbp->f_namemax; 2530 } 2531 2532 int 2533 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom, 2534 struct mount *mp, struct lwp *l) 2535 { 2536 int error; 2537 size_t size; 2538 struct statvfs *sfs = &mp->mnt_stat; 2539 int (*fun)(const void *, void *, size_t, size_t *); 2540 2541 (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name, 2542 sizeof(mp->mnt_stat.f_fstypename)); 2543 2544 if (onp) { 2545 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 2546 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr; 2547 if (cwdi->cwdi_rdir != NULL) { 2548 size_t len; 2549 char *bp; 2550 char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 2551 2552 if (!path) /* XXX can't happen with M_WAITOK */ 2553 return ENOMEM; 2554 2555 bp = path + MAXPATHLEN; 2556 *--bp = '\0'; 2557 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, 2558 path, MAXPATHLEN / 2, 0, l); 2559 if (error) { 2560 free(path, M_TEMP); 2561 return error; 2562 } 2563 2564 len = strlen(bp); 2565 if (len > sizeof(sfs->f_mntonname) - 1) 2566 len = sizeof(sfs->f_mntonname) - 1; 2567 (void)strncpy(sfs->f_mntonname, bp, len); 2568 free(path, M_TEMP); 2569 2570 if (len < sizeof(sfs->f_mntonname) - 1) { 2571 error = (*fun)(onp, &sfs->f_mntonname[len], 2572 sizeof(sfs->f_mntonname) - len - 1, &size); 2573 if (error) 2574 return error; 2575 size += len; 2576 } else { 2577 size = len; 2578 } 2579 } else { 2580 error = (*fun)(onp, &sfs->f_mntonname, 2581 sizeof(sfs->f_mntonname) - 1, &size); 2582 if (error) 2583 return error; 2584 } 2585 (void)memset(sfs->f_mntonname + size, 0, 2586 sizeof(sfs->f_mntonname) - size); 2587 } 2588 2589 if (fromp) { 2590 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr; 2591 error = (*fun)(fromp, sfs->f_mntfromname, 2592 sizeof(sfs->f_mntfromname) - 1, &size); 2593 if (error) 2594 return error; 2595 (void)memset(sfs->f_mntfromname + size, 0, 2596 sizeof(sfs->f_mntfromname) - size); 2597 } 2598 return 0; 2599 } 2600 2601 void 2602 vfs_timestamp(struct timespec *ts) 2603 { 2604 2605 nanotime(ts); 2606 } 2607 2608 /* 2609 * mount_specific_key_create -- 2610 * Create a key for subsystem mount-specific data. 2611 */ 2612 int 2613 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 2614 { 2615 2616 return (specificdata_key_create(mount_specificdata_domain, keyp, dtor)); 2617 } 2618 2619 /* 2620 * mount_specific_key_delete -- 2621 * Delete a key for subsystem mount-specific data. 2622 */ 2623 void 2624 mount_specific_key_delete(specificdata_key_t key) 2625 { 2626 2627 specificdata_key_delete(mount_specificdata_domain, key); 2628 } 2629 2630 /* 2631 * mount_initspecific -- 2632 * Initialize a mount's specificdata container. 2633 */ 2634 void 2635 mount_initspecific(struct mount *mp) 2636 { 2637 int error; 2638 2639 error = specificdata_init(mount_specificdata_domain, 2640 &mp->mnt_specdataref); 2641 KASSERT(error == 0); 2642 } 2643 2644 /* 2645 * mount_finispecific -- 2646 * Finalize a mount's specificdata container. 2647 */ 2648 void 2649 mount_finispecific(struct mount *mp) 2650 { 2651 2652 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 2653 } 2654 2655 /* 2656 * mount_getspecific -- 2657 * Return mount-specific data corresponding to the specified key. 2658 */ 2659 void * 2660 mount_getspecific(struct mount *mp, specificdata_key_t key) 2661 { 2662 2663 return (specificdata_getspecific(mount_specificdata_domain, 2664 &mp->mnt_specdataref, key)); 2665 } 2666 2667 /* 2668 * mount_setspecific -- 2669 * Set mount-specific data corresponding to the specified key. 2670 */ 2671 void 2672 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 2673 { 2674 2675 specificdata_setspecific(mount_specificdata_domain, 2676 &mp->mnt_specdataref, key, data); 2677 } 2678 2679 #ifdef DDB 2680 static const char buf_flagbits[] = BUF_FLAGBITS; 2681 2682 void 2683 vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...)) 2684 { 2685 char bf[1024]; 2686 2687 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%" 2688 PRIx64 " dev 0x%x\n", 2689 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev); 2690 2691 bitmask_snprintf(bp->b_flags, buf_flagbits, bf, sizeof(bf)); 2692 (*pr)(" error %d flags 0x%s\n", bp->b_error, bf); 2693 2694 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 2695 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2696 (*pr)(" data %p saveaddr %p dep %p\n", 2697 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2698 (*pr)(" iodone %p\n", bp->b_iodone); 2699 } 2700 2701 2702 void 2703 vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...)) 2704 { 2705 char bf[256]; 2706 2707 uvm_object_printit(&vp->v_uobj, full, pr); 2708 bitmask_snprintf(vp->v_flag, vnode_flagbits, bf, sizeof(bf)); 2709 (*pr)("\nVNODE flags %s\n", bf); 2710 (*pr)("mp %p numoutput %d size 0x%llx\n", 2711 vp->v_mount, vp->v_numoutput, vp->v_size); 2712 2713 (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n", 2714 vp->v_data, vp->v_usecount, vp->v_writecount, 2715 vp->v_holdcnt, vp->v_numoutput); 2716 2717 (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n", 2718 ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 2719 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 2720 vp->v_mount, vp->v_mountedhere); 2721 2722 if (full) { 2723 struct buf *bp; 2724 2725 (*pr)("clean bufs:\n"); 2726 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 2727 (*pr)(" bp %p\n", bp); 2728 vfs_buf_print(bp, full, pr); 2729 } 2730 2731 (*pr)("dirty bufs:\n"); 2732 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 2733 (*pr)(" bp %p\n", bp); 2734 vfs_buf_print(bp, full, pr); 2735 } 2736 } 2737 } 2738 2739 void 2740 vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...)) 2741 { 2742 char sbuf[256]; 2743 2744 (*pr)("vnodecovered = %p syncer = %p data = %p\n", 2745 mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data); 2746 2747 (*pr)("fs_bshift %d dev_bshift = %d\n", 2748 mp->mnt_fs_bshift,mp->mnt_dev_bshift); 2749 2750 bitmask_snprintf(mp->mnt_flag, __MNT_FLAG_BITS, sbuf, sizeof(sbuf)); 2751 (*pr)("flag = %s\n", sbuf); 2752 2753 bitmask_snprintf(mp->mnt_iflag, __IMNT_FLAG_BITS, sbuf, sizeof(sbuf)); 2754 (*pr)("iflag = %s\n", sbuf); 2755 2756 /* XXX use lockmgr_printinfo */ 2757 if (mp->mnt_lock.lk_sharecount) 2758 (*pr)(" lock type %s: SHARED (count %d)", mp->mnt_lock.lk_wmesg, 2759 mp->mnt_lock.lk_sharecount); 2760 else if (mp->mnt_lock.lk_flags & LK_HAVE_EXCL) { 2761 (*pr)(" lock type %s: EXCL (count %d) by ", 2762 mp->mnt_lock.lk_wmesg, mp->mnt_lock.lk_exclusivecount); 2763 if (mp->mnt_lock.lk_flags & LK_SPIN) 2764 (*pr)("processor %lu", mp->mnt_lock.lk_cpu); 2765 else 2766 (*pr)("pid %d.%d", mp->mnt_lock.lk_lockholder, 2767 mp->mnt_lock.lk_locklwp); 2768 } else 2769 (*pr)(" not locked"); 2770 if ((mp->mnt_lock.lk_flags & LK_SPIN) == 0 && mp->mnt_lock.lk_waitcount > 0) 2771 (*pr)(" with %d pending", mp->mnt_lock.lk_waitcount); 2772 2773 (*pr)("\n"); 2774 2775 if (mp->mnt_unmounter) { 2776 (*pr)("unmounter pid = %d ",mp->mnt_unmounter->l_proc); 2777 } 2778 (*pr)("wcnt = %d, writeopcountupper = %d, writeopcountupper = %d\n", 2779 mp->mnt_wcnt,mp->mnt_writeopcountupper,mp->mnt_writeopcountlower); 2780 2781 (*pr)("statvfs cache:\n"); 2782 (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize); 2783 (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize); 2784 (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize); 2785 2786 (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks); 2787 (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree); 2788 (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail); 2789 (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd); 2790 2791 (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files); 2792 (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree); 2793 (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail); 2794 (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd); 2795 2796 (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n", 2797 mp->mnt_stat.f_fsidx.__fsid_val[0], 2798 mp->mnt_stat.f_fsidx.__fsid_val[1]); 2799 2800 (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner); 2801 (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax); 2802 2803 bitmask_snprintf(mp->mnt_stat.f_flag, __MNT_FLAG_BITS, sbuf, 2804 sizeof(sbuf)); 2805 (*pr)("\tflag = %s\n",sbuf); 2806 (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites); 2807 (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites); 2808 (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads); 2809 (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads); 2810 (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename); 2811 (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname); 2812 (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname); 2813 2814 { 2815 int cnt = 0; 2816 struct vnode *vp; 2817 (*pr)("locked vnodes ="); 2818 /* XXX would take mountlist lock, except ddb may not have context */ 2819 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2820 if (VOP_ISLOCKED(vp)) { 2821 if ((++cnt % 6) == 0) { 2822 (*pr)(" %p,\n\t", vp); 2823 } else { 2824 (*pr)(" %p,", vp); 2825 } 2826 } 2827 } 2828 (*pr)("\n"); 2829 } 2830 2831 if (full) { 2832 int cnt = 0; 2833 struct vnode *vp; 2834 (*pr)("all vnodes ="); 2835 /* XXX would take mountlist lock, except ddb may not have context */ 2836 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 2837 if (!TAILQ_NEXT(vp, v_mntvnodes)) { 2838 (*pr)(" %p", vp); 2839 } else if ((++cnt % 6) == 0) { 2840 (*pr)(" %p,\n\t", vp); 2841 } else { 2842 (*pr)(" %p,", vp); 2843 } 2844 } 2845 (*pr)("\n", vp); 2846 } 2847 } 2848 #endif /* DDB */ 2849