1 /* $NetBSD: vfs_subr.c,v 1.186 2003/02/01 06:23:45 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include <sys/cdefs.h> 85 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.186 2003/02/01 06:23:45 thorpej Exp $"); 86 87 #include "opt_ddb.h" 88 #include "opt_compat_netbsd.h" 89 #include "opt_compat_43.h" 90 91 #include <sys/param.h> 92 #include <sys/systm.h> 93 #include <sys/proc.h> 94 #include <sys/kernel.h> 95 #include <sys/mount.h> 96 #include <sys/time.h> 97 #include <sys/event.h> 98 #include <sys/fcntl.h> 99 #include <sys/vnode.h> 100 #include <sys/stat.h> 101 #include <sys/namei.h> 102 #include <sys/ucred.h> 103 #include <sys/buf.h> 104 #include <sys/errno.h> 105 #include <sys/malloc.h> 106 #include <sys/domain.h> 107 #include <sys/mbuf.h> 108 #include <sys/sa.h> 109 #include <sys/syscallargs.h> 110 #include <sys/device.h> 111 #include <sys/dirent.h> 112 113 #include <miscfs/specfs/specdev.h> 114 #include <miscfs/genfs/genfs.h> 115 #include <miscfs/syncfs/syncfs.h> 116 117 #include <uvm/uvm.h> 118 #include <uvm/uvm_ddb.h> 119 120 #include <sys/sysctl.h> 121 122 enum vtype iftovt_tab[16] = { 123 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 124 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 125 }; 126 const int vttoif_tab[9] = { 127 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 128 S_IFSOCK, S_IFIFO, S_IFMT, 129 }; 130 131 int doforce = 1; /* 1 => permit forcible unmounting */ 132 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 133 134 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 135 136 /* 137 * Insq/Remq for the vnode usage lists. 138 */ 139 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 140 #define bufremvn(bp) { \ 141 LIST_REMOVE(bp, b_vnbufs); \ 142 (bp)->b_vnbufs.le_next = NOLIST; \ 143 } 144 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 145 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 146 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 147 148 struct mntlist mountlist = /* mounted filesystem list */ 149 CIRCLEQ_HEAD_INITIALIZER(mountlist); 150 struct vfs_list_head vfs_list = /* vfs list */ 151 LIST_HEAD_INITIALIZER(vfs_list); 152 153 struct nfs_public nfs_pub; /* publicly exported FS */ 154 155 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 156 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 157 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 158 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 159 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 160 161 /* 162 * These define the root filesystem and device. 163 */ 164 struct mount *rootfs; 165 struct vnode *rootvnode; 166 struct device *root_device; /* root device */ 167 168 struct pool vnode_pool; /* memory pool for vnodes */ 169 170 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 171 172 /* 173 * Local declarations. 174 */ 175 void insmntque __P((struct vnode *, struct mount *)); 176 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 177 void vgoneall __P((struct vnode *)); 178 179 void vclean(struct vnode *, int, struct proc *); 180 181 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 182 struct export_args *)); 183 static int vfs_free_netcred __P((struct radix_node *, void *)); 184 static void vfs_free_addrlist __P((struct netexport *)); 185 186 #ifdef DEBUG 187 void printlockedvnodes __P((void)); 188 #endif 189 190 /* 191 * Initialize the vnode management data structures. 192 */ 193 void 194 vntblinit() 195 { 196 197 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 198 &pool_allocator_nointr); 199 200 /* 201 * Initialize the filesystem syncer. 202 */ 203 vn_initialize_syncerd(); 204 } 205 206 /* 207 * Mark a mount point as busy. Used to synchronize access and to delay 208 * unmounting. Interlock is not released on failure. 209 */ 210 int 211 vfs_busy(mp, flags, interlkp) 212 struct mount *mp; 213 int flags; 214 struct simplelock *interlkp; 215 { 216 int lkflags; 217 218 while (mp->mnt_flag & MNT_UNMOUNT) { 219 int gone; 220 221 if (flags & LK_NOWAIT) 222 return (ENOENT); 223 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 224 && mp->mnt_unmounter == curproc) 225 return (EDEADLK); 226 if (interlkp) 227 simple_unlock(interlkp); 228 /* 229 * Since all busy locks are shared except the exclusive 230 * lock granted when unmounting, the only place that a 231 * wakeup needs to be done is at the release of the 232 * exclusive lock at the end of dounmount. 233 * 234 * XXX MP: add spinlock protecting mnt_wcnt here once you 235 * can atomically unlock-and-sleep. 236 */ 237 mp->mnt_wcnt++; 238 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 239 mp->mnt_wcnt--; 240 gone = mp->mnt_flag & MNT_GONE; 241 242 if (mp->mnt_wcnt == 0) 243 wakeup(&mp->mnt_wcnt); 244 if (interlkp) 245 simple_lock(interlkp); 246 if (gone) 247 return (ENOENT); 248 } 249 lkflags = LK_SHARED; 250 if (interlkp) 251 lkflags |= LK_INTERLOCK; 252 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 253 panic("vfs_busy: unexpected lock failure"); 254 return (0); 255 } 256 257 /* 258 * Free a busy filesystem. 259 */ 260 void 261 vfs_unbusy(mp) 262 struct mount *mp; 263 { 264 265 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 266 } 267 268 /* 269 * Lookup a filesystem type, and if found allocate and initialize 270 * a mount structure for it. 271 * 272 * Devname is usually updated by mount(8) after booting. 273 */ 274 int 275 vfs_rootmountalloc(fstypename, devname, mpp) 276 char *fstypename; 277 char *devname; 278 struct mount **mpp; 279 { 280 struct vfsops *vfsp = NULL; 281 struct mount *mp; 282 283 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 284 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 285 break; 286 287 if (vfsp == NULL) 288 return (ENODEV); 289 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 290 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 291 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 292 (void)vfs_busy(mp, LK_NOWAIT, 0); 293 LIST_INIT(&mp->mnt_vnodelist); 294 mp->mnt_op = vfsp; 295 mp->mnt_flag = MNT_RDONLY; 296 mp->mnt_vnodecovered = NULLVP; 297 vfsp->vfs_refcount++; 298 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 299 mp->mnt_stat.f_mntonname[0] = '/'; 300 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 301 *mpp = mp; 302 return (0); 303 } 304 305 /* 306 * Lookup a mount point by filesystem identifier. 307 */ 308 struct mount * 309 vfs_getvfs(fsid) 310 fsid_t *fsid; 311 { 312 struct mount *mp; 313 314 simple_lock(&mountlist_slock); 315 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { 316 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 317 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 318 simple_unlock(&mountlist_slock); 319 return (mp); 320 } 321 } 322 simple_unlock(&mountlist_slock); 323 return ((struct mount *)0); 324 } 325 326 /* 327 * Get a new unique fsid 328 */ 329 void 330 vfs_getnewfsid(mp) 331 struct mount *mp; 332 { 333 static u_short xxxfs_mntid; 334 fsid_t tfsid; 335 int mtype; 336 337 simple_lock(&mntid_slock); 338 mtype = makefstype(mp->mnt_op->vfs_name); 339 mp->mnt_stat.f_fsid.val[0] = makedev(mtype, 0); 340 mp->mnt_stat.f_fsid.val[1] = mtype; 341 if (xxxfs_mntid == 0) 342 ++xxxfs_mntid; 343 tfsid.val[0] = makedev(mtype & 0xff, xxxfs_mntid); 344 tfsid.val[1] = mtype; 345 if (!CIRCLEQ_EMPTY(&mountlist)) { 346 while (vfs_getvfs(&tfsid)) { 347 tfsid.val[0]++; 348 xxxfs_mntid++; 349 } 350 } 351 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 352 simple_unlock(&mntid_slock); 353 } 354 355 /* 356 * Make a 'unique' number from a mount type name. 357 */ 358 long 359 makefstype(type) 360 const char *type; 361 { 362 long rv; 363 364 for (rv = 0; *type; type++) { 365 rv <<= 2; 366 rv ^= *type; 367 } 368 return rv; 369 } 370 371 372 /* 373 * Set vnode attributes to VNOVAL 374 */ 375 void 376 vattr_null(vap) 377 struct vattr *vap; 378 { 379 380 vap->va_type = VNON; 381 382 /* 383 * Assign individually so that it is safe even if size and 384 * sign of each member are varied. 385 */ 386 vap->va_mode = VNOVAL; 387 vap->va_nlink = VNOVAL; 388 vap->va_uid = VNOVAL; 389 vap->va_gid = VNOVAL; 390 vap->va_fsid = VNOVAL; 391 vap->va_fileid = VNOVAL; 392 vap->va_size = VNOVAL; 393 vap->va_blocksize = VNOVAL; 394 vap->va_atime.tv_sec = 395 vap->va_mtime.tv_sec = 396 vap->va_ctime.tv_sec = VNOVAL; 397 vap->va_atime.tv_nsec = 398 vap->va_mtime.tv_nsec = 399 vap->va_ctime.tv_nsec = VNOVAL; 400 vap->va_gen = VNOVAL; 401 vap->va_flags = VNOVAL; 402 vap->va_rdev = VNOVAL; 403 vap->va_bytes = VNOVAL; 404 vap->va_vaflags = 0; 405 } 406 407 /* 408 * Routines having to do with the management of the vnode table. 409 */ 410 extern int (**dead_vnodeop_p) __P((void *)); 411 long numvnodes; 412 413 /* 414 * Return the next vnode from the free list. 415 */ 416 int 417 getnewvnode(tag, mp, vops, vpp) 418 enum vtagtype tag; 419 struct mount *mp; 420 int (**vops) __P((void *)); 421 struct vnode **vpp; 422 { 423 extern struct uvm_pagerops uvm_vnodeops; 424 struct uvm_object *uobj; 425 struct proc *p = curproc; /* XXX */ 426 struct freelst *listhd; 427 static int toggle; 428 struct vnode *vp; 429 int error = 0, tryalloc; 430 431 try_again: 432 if (mp) { 433 /* 434 * Mark filesystem busy while we're creating a vnode. 435 * If unmount is in progress, this will wait; if the 436 * unmount succeeds (only if umount -f), this will 437 * return an error. If the unmount fails, we'll keep 438 * going afterwards. 439 * (This puts the per-mount vnode list logically under 440 * the protection of the vfs_busy lock). 441 */ 442 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 443 if (error && error != EDEADLK) 444 return error; 445 } 446 447 /* 448 * We must choose whether to allocate a new vnode or recycle an 449 * existing one. The criterion for allocating a new one is that 450 * the total number of vnodes is less than the number desired or 451 * there are no vnodes on either free list. Generally we only 452 * want to recycle vnodes that have no buffers associated with 453 * them, so we look first on the vnode_free_list. If it is empty, 454 * we next consider vnodes with referencing buffers on the 455 * vnode_hold_list. The toggle ensures that half the time we 456 * will use a buffer from the vnode_hold_list, and half the time 457 * we will allocate a new one unless the list has grown to twice 458 * the desired size. We are reticent to recycle vnodes from the 459 * vnode_hold_list because we will lose the identity of all its 460 * referencing buffers. 461 */ 462 463 vp = NULL; 464 465 simple_lock(&vnode_free_list_slock); 466 467 toggle ^= 1; 468 if (numvnodes > 2 * desiredvnodes) 469 toggle = 0; 470 471 tryalloc = numvnodes < desiredvnodes || 472 (TAILQ_FIRST(&vnode_free_list) == NULL && 473 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 474 475 if (tryalloc && 476 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { 477 simple_unlock(&vnode_free_list_slock); 478 memset(vp, 0, sizeof(*vp)); 479 simple_lock_init(&vp->v_interlock); 480 uobj = &vp->v_uobj; 481 uobj->pgops = &uvm_vnodeops; 482 uobj->uo_npages = 0; 483 TAILQ_INIT(&uobj->memq); 484 numvnodes++; 485 } else { 486 if ((vp = TAILQ_FIRST(listhd = &vnode_free_list)) == NULL) 487 vp = TAILQ_FIRST(listhd = &vnode_hold_list); 488 for (; vp != NULL; vp = TAILQ_NEXT(vp, v_freelist)) { 489 if (simple_lock_try(&vp->v_interlock)) { 490 if ((vp->v_flag & VLAYER) == 0) { 491 break; 492 } 493 if (VOP_ISLOCKED(vp) == 0) 494 break; 495 else 496 simple_unlock(&vp->v_interlock); 497 } 498 } 499 /* 500 * Unless this is a bad time of the month, at most 501 * the first NCPUS items on the free list are 502 * locked, so this is close enough to being empty. 503 */ 504 if (vp == NULLVP) { 505 simple_unlock(&vnode_free_list_slock); 506 if (mp && error != EDEADLK) 507 vfs_unbusy(mp); 508 if (tryalloc) { 509 printf("WARNING: unable to allocate new " 510 "vnode, retrying...\n"); 511 (void) tsleep(&lbolt, PRIBIO, "newvn", hz); 512 goto try_again; 513 } 514 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 515 *vpp = 0; 516 return (ENFILE); 517 } 518 if (vp->v_usecount) 519 panic("free vnode isn't, vp %p", vp); 520 TAILQ_REMOVE(listhd, vp, v_freelist); 521 /* see comment on why 0xdeadb is set at end of vgone (below) */ 522 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 523 simple_unlock(&vnode_free_list_slock); 524 vp->v_lease = NULL; 525 526 if (vp->v_type != VBAD) 527 vgonel(vp, p); 528 else 529 simple_unlock(&vp->v_interlock); 530 #ifdef DIAGNOSTIC 531 if (vp->v_data || vp->v_uobj.uo_npages || 532 TAILQ_FIRST(&vp->v_uobj.memq)) 533 panic("cleaned vnode isn't, vp %p", vp); 534 if (vp->v_numoutput) 535 panic("clean vnode has pending I/O's, vp %p", vp); 536 #endif 537 KASSERT((vp->v_flag & VONWORKLST) == 0); 538 vp->v_flag = 0; 539 vp->v_socket = NULL; 540 #ifdef VERIFIED_EXEC 541 vp->fp_status = FINGERPRINT_INVALID; 542 #endif 543 } 544 vp->v_type = VNON; 545 vp->v_vnlock = &vp->v_lock; 546 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 547 cache_purge(vp); 548 vp->v_tag = tag; 549 vp->v_op = vops; 550 insmntque(vp, mp); 551 *vpp = vp; 552 vp->v_usecount = 1; 553 vp->v_data = 0; 554 simple_lock_init(&vp->v_uobj.vmobjlock); 555 556 /* 557 * initialize uvm_object within vnode. 558 */ 559 560 uobj = &vp->v_uobj; 561 KASSERT(uobj->pgops == &uvm_vnodeops); 562 KASSERT(uobj->uo_npages == 0); 563 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 564 vp->v_size = VSIZENOTSET; 565 566 if (mp && error != EDEADLK) 567 vfs_unbusy(mp); 568 return (0); 569 } 570 571 /* 572 * This is really just the reverse of getnewvnode(). Needed for 573 * VFS_VGET functions who may need to push back a vnode in case 574 * of a locking race. 575 */ 576 void 577 ungetnewvnode(vp) 578 struct vnode *vp; 579 { 580 #ifdef DIAGNOSTIC 581 if (vp->v_usecount != 1) 582 panic("ungetnewvnode: busy vnode"); 583 #endif 584 vp->v_usecount--; 585 insmntque(vp, NULL); 586 vp->v_type = VBAD; 587 588 simple_lock(&vp->v_interlock); 589 /* 590 * Insert at head of LRU list 591 */ 592 simple_lock(&vnode_free_list_slock); 593 if (vp->v_holdcnt > 0) 594 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 595 else 596 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 597 simple_unlock(&vnode_free_list_slock); 598 simple_unlock(&vp->v_interlock); 599 } 600 601 /* 602 * Move a vnode from one mount queue to another. 603 */ 604 void 605 insmntque(vp, mp) 606 struct vnode *vp; 607 struct mount *mp; 608 { 609 610 #ifdef DIAGNOSTIC 611 if ((mp != NULL) && 612 (mp->mnt_flag & MNT_UNMOUNT) && 613 !(mp->mnt_flag & MNT_SOFTDEP) && 614 vp->v_tag != VT_VFS) { 615 panic("insmntque into dying filesystem"); 616 } 617 #endif 618 619 simple_lock(&mntvnode_slock); 620 /* 621 * Delete from old mount point vnode list, if on one. 622 */ 623 if (vp->v_mount != NULL) 624 LIST_REMOVE(vp, v_mntvnodes); 625 /* 626 * Insert into list of vnodes for the new mount point, if available. 627 */ 628 if ((vp->v_mount = mp) != NULL) 629 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 630 simple_unlock(&mntvnode_slock); 631 } 632 633 /* 634 * Update outstanding I/O count and do wakeup if requested. 635 */ 636 void 637 vwakeup(bp) 638 struct buf *bp; 639 { 640 struct vnode *vp; 641 642 if ((vp = bp->b_vp) != NULL) { 643 if (--vp->v_numoutput < 0) 644 panic("vwakeup: neg numoutput, vp %p", vp); 645 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 646 vp->v_flag &= ~VBWAIT; 647 wakeup((caddr_t)&vp->v_numoutput); 648 } 649 } 650 } 651 652 /* 653 * Flush out and invalidate all buffers associated with a vnode. 654 * Called with the underlying vnode locked, which should prevent new dirty 655 * buffers from being queued. 656 */ 657 int 658 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 659 struct vnode *vp; 660 int flags; 661 struct ucred *cred; 662 struct proc *p; 663 int slpflag, slptimeo; 664 { 665 struct buf *bp, *nbp; 666 int s, error; 667 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 668 (flags & V_SAVE ? PGO_CLEANIT : 0); 669 670 /* XXXUBC this doesn't look at flags or slp* */ 671 simple_lock(&vp->v_interlock); 672 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 673 if (error) { 674 return error; 675 } 676 677 if (flags & V_SAVE) { 678 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p); 679 if (error) 680 return (error); 681 #ifdef DIAGNOSTIC 682 s = splbio(); 683 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 684 panic("vinvalbuf: dirty bufs, vp %p", vp); 685 splx(s); 686 #endif 687 } 688 689 s = splbio(); 690 691 restart: 692 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 693 nbp = LIST_NEXT(bp, b_vnbufs); 694 if (bp->b_flags & B_BUSY) { 695 bp->b_flags |= B_WANTED; 696 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 697 "vinvalbuf", slptimeo); 698 if (error) { 699 splx(s); 700 return (error); 701 } 702 goto restart; 703 } 704 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 705 brelse(bp); 706 } 707 708 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 709 nbp = LIST_NEXT(bp, b_vnbufs); 710 if (bp->b_flags & B_BUSY) { 711 bp->b_flags |= B_WANTED; 712 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 713 "vinvalbuf", slptimeo); 714 if (error) { 715 splx(s); 716 return (error); 717 } 718 goto restart; 719 } 720 /* 721 * XXX Since there are no node locks for NFS, I believe 722 * there is a slight chance that a delayed write will 723 * occur while sleeping just above, so check for it. 724 */ 725 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 726 #ifdef DEBUG 727 printf("buffer still DELWRI\n"); 728 #endif 729 bp->b_flags |= B_BUSY | B_VFLUSH; 730 VOP_BWRITE(bp); 731 goto restart; 732 } 733 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 734 brelse(bp); 735 } 736 737 #ifdef DIAGNOSTIC 738 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 739 panic("vinvalbuf: flush failed, vp %p", vp); 740 #endif 741 742 splx(s); 743 744 return (0); 745 } 746 747 /* 748 * Destroy any in core blocks past the truncation length. 749 * Called with the underlying vnode locked, which should prevent new dirty 750 * buffers from being queued. 751 */ 752 int 753 vtruncbuf(vp, lbn, slpflag, slptimeo) 754 struct vnode *vp; 755 daddr_t lbn; 756 int slpflag, slptimeo; 757 { 758 struct buf *bp, *nbp; 759 int s, error; 760 voff_t off; 761 762 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 763 simple_lock(&vp->v_interlock); 764 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 765 if (error) { 766 return error; 767 } 768 769 s = splbio(); 770 771 restart: 772 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 773 nbp = LIST_NEXT(bp, b_vnbufs); 774 if (bp->b_lblkno < lbn) 775 continue; 776 if (bp->b_flags & B_BUSY) { 777 bp->b_flags |= B_WANTED; 778 error = tsleep(bp, slpflag | (PRIBIO + 1), 779 "vtruncbuf", slptimeo); 780 if (error) { 781 splx(s); 782 return (error); 783 } 784 goto restart; 785 } 786 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 787 brelse(bp); 788 } 789 790 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 791 nbp = LIST_NEXT(bp, b_vnbufs); 792 if (bp->b_lblkno < lbn) 793 continue; 794 if (bp->b_flags & B_BUSY) { 795 bp->b_flags |= B_WANTED; 796 error = tsleep(bp, slpflag | (PRIBIO + 1), 797 "vtruncbuf", slptimeo); 798 if (error) { 799 splx(s); 800 return (error); 801 } 802 goto restart; 803 } 804 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 805 brelse(bp); 806 } 807 808 splx(s); 809 810 return (0); 811 } 812 813 void 814 vflushbuf(vp, sync) 815 struct vnode *vp; 816 int sync; 817 { 818 struct buf *bp, *nbp; 819 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); 820 int s; 821 822 simple_lock(&vp->v_interlock); 823 (void) VOP_PUTPAGES(vp, 0, 0, flags); 824 825 loop: 826 s = splbio(); 827 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 828 nbp = LIST_NEXT(bp, b_vnbufs); 829 if ((bp->b_flags & B_BUSY)) 830 continue; 831 if ((bp->b_flags & B_DELWRI) == 0) 832 panic("vflushbuf: not dirty, bp %p", bp); 833 bp->b_flags |= B_BUSY | B_VFLUSH; 834 splx(s); 835 /* 836 * Wait for I/O associated with indirect blocks to complete, 837 * since there is no way to quickly wait for them below. 838 */ 839 if (bp->b_vp == vp || sync == 0) 840 (void) bawrite(bp); 841 else 842 (void) bwrite(bp); 843 goto loop; 844 } 845 if (sync == 0) { 846 splx(s); 847 return; 848 } 849 while (vp->v_numoutput) { 850 vp->v_flag |= VBWAIT; 851 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 852 } 853 splx(s); 854 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 855 vprint("vflushbuf: dirty", vp); 856 goto loop; 857 } 858 } 859 860 /* 861 * Associate a buffer with a vnode. 862 */ 863 void 864 bgetvp(vp, bp) 865 struct vnode *vp; 866 struct buf *bp; 867 { 868 int s; 869 870 if (bp->b_vp) 871 panic("bgetvp: not free, bp %p", bp); 872 VHOLD(vp); 873 s = splbio(); 874 bp->b_vp = vp; 875 if (vp->v_type == VBLK || vp->v_type == VCHR) 876 bp->b_dev = vp->v_rdev; 877 else 878 bp->b_dev = NODEV; 879 /* 880 * Insert onto list for new vnode. 881 */ 882 bufinsvn(bp, &vp->v_cleanblkhd); 883 splx(s); 884 } 885 886 /* 887 * Disassociate a buffer from a vnode. 888 */ 889 void 890 brelvp(bp) 891 struct buf *bp; 892 { 893 struct vnode *vp; 894 int s; 895 896 if (bp->b_vp == NULL) 897 panic("brelvp: vp NULL, bp %p", bp); 898 899 s = splbio(); 900 vp = bp->b_vp; 901 /* 902 * Delete from old vnode list, if on one. 903 */ 904 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 905 bufremvn(bp); 906 907 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) && 908 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 909 vp->v_flag &= ~VONWORKLST; 910 LIST_REMOVE(vp, v_synclist); 911 } 912 913 bp->b_vp = NULL; 914 HOLDRELE(vp); 915 splx(s); 916 } 917 918 /* 919 * Reassign a buffer from one vnode to another. 920 * Used to assign file specific control information 921 * (indirect blocks) to the vnode to which they belong. 922 * 923 * This function must be called at splbio(). 924 */ 925 void 926 reassignbuf(bp, newvp) 927 struct buf *bp; 928 struct vnode *newvp; 929 { 930 struct buflists *listheadp; 931 int delay; 932 933 /* 934 * Delete from old vnode list, if on one. 935 */ 936 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 937 bufremvn(bp); 938 /* 939 * If dirty, put on list of dirty buffers; 940 * otherwise insert onto list of clean buffers. 941 */ 942 if ((bp->b_flags & B_DELWRI) == 0) { 943 listheadp = &newvp->v_cleanblkhd; 944 if (TAILQ_EMPTY(&newvp->v_uobj.memq) && 945 (newvp->v_flag & VONWORKLST) && 946 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 947 newvp->v_flag &= ~VONWORKLST; 948 LIST_REMOVE(newvp, v_synclist); 949 } 950 } else { 951 listheadp = &newvp->v_dirtyblkhd; 952 if ((newvp->v_flag & VONWORKLST) == 0) { 953 switch (newvp->v_type) { 954 case VDIR: 955 delay = dirdelay; 956 break; 957 case VBLK: 958 if (newvp->v_specmountpoint != NULL) { 959 delay = metadelay; 960 break; 961 } 962 /* fall through */ 963 default: 964 delay = filedelay; 965 break; 966 } 967 if (!newvp->v_mount || 968 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 969 vn_syncer_add_to_worklist(newvp, delay); 970 } 971 } 972 bufinsvn(bp, listheadp); 973 } 974 975 /* 976 * Create a vnode for a block device. 977 * Used for root filesystem and swap areas. 978 * Also used for memory file system special devices. 979 */ 980 int 981 bdevvp(dev, vpp) 982 dev_t dev; 983 struct vnode **vpp; 984 { 985 986 return (getdevvp(dev, vpp, VBLK)); 987 } 988 989 /* 990 * Create a vnode for a character device. 991 * Used for kernfs and some console handling. 992 */ 993 int 994 cdevvp(dev, vpp) 995 dev_t dev; 996 struct vnode **vpp; 997 { 998 999 return (getdevvp(dev, vpp, VCHR)); 1000 } 1001 1002 /* 1003 * Create a vnode for a device. 1004 * Used by bdevvp (block device) for root file system etc., 1005 * and by cdevvp (character device) for console and kernfs. 1006 */ 1007 int 1008 getdevvp(dev, vpp, type) 1009 dev_t dev; 1010 struct vnode **vpp; 1011 enum vtype type; 1012 { 1013 struct vnode *vp; 1014 struct vnode *nvp; 1015 int error; 1016 1017 if (dev == NODEV) { 1018 *vpp = NULLVP; 1019 return (0); 1020 } 1021 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1022 if (error) { 1023 *vpp = NULLVP; 1024 return (error); 1025 } 1026 vp = nvp; 1027 vp->v_type = type; 1028 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1029 vput(vp); 1030 vp = nvp; 1031 } 1032 *vpp = vp; 1033 return (0); 1034 } 1035 1036 /* 1037 * Check to see if the new vnode represents a special device 1038 * for which we already have a vnode (either because of 1039 * bdevvp() or because of a different vnode representing 1040 * the same block device). If such an alias exists, deallocate 1041 * the existing contents and return the aliased vnode. The 1042 * caller is responsible for filling it with its new contents. 1043 */ 1044 struct vnode * 1045 checkalias(nvp, nvp_rdev, mp) 1046 struct vnode *nvp; 1047 dev_t nvp_rdev; 1048 struct mount *mp; 1049 { 1050 struct proc *p = curproc; /* XXX */ 1051 struct vnode *vp; 1052 struct vnode **vpp; 1053 1054 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1055 return (NULLVP); 1056 1057 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1058 loop: 1059 simple_lock(&spechash_slock); 1060 for (vp = *vpp; vp; vp = vp->v_specnext) { 1061 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1062 continue; 1063 /* 1064 * Alias, but not in use, so flush it out. 1065 */ 1066 simple_lock(&vp->v_interlock); 1067 if (vp->v_usecount == 0) { 1068 simple_unlock(&spechash_slock); 1069 vgonel(vp, p); 1070 goto loop; 1071 } 1072 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 1073 simple_unlock(&spechash_slock); 1074 goto loop; 1075 } 1076 break; 1077 } 1078 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1079 MALLOC(nvp->v_specinfo, struct specinfo *, 1080 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1081 /* XXX Erg. */ 1082 if (nvp->v_specinfo == NULL) { 1083 simple_unlock(&spechash_slock); 1084 uvm_wait("checkalias"); 1085 goto loop; 1086 } 1087 1088 nvp->v_rdev = nvp_rdev; 1089 nvp->v_hashchain = vpp; 1090 nvp->v_specnext = *vpp; 1091 nvp->v_specmountpoint = NULL; 1092 simple_unlock(&spechash_slock); 1093 nvp->v_speclockf = NULL; 1094 *vpp = nvp; 1095 if (vp != NULLVP) { 1096 nvp->v_flag |= VALIASED; 1097 vp->v_flag |= VALIASED; 1098 vput(vp); 1099 } 1100 return (NULLVP); 1101 } 1102 simple_unlock(&spechash_slock); 1103 VOP_UNLOCK(vp, 0); 1104 simple_lock(&vp->v_interlock); 1105 vclean(vp, 0, p); 1106 vp->v_op = nvp->v_op; 1107 vp->v_tag = nvp->v_tag; 1108 vp->v_vnlock = &vp->v_lock; 1109 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1110 nvp->v_type = VNON; 1111 insmntque(vp, mp); 1112 return (vp); 1113 } 1114 1115 /* 1116 * Grab a particular vnode from the free list, increment its 1117 * reference count and lock it. If the vnode lock bit is set the 1118 * vnode is being eliminated in vgone. In that case, we can not 1119 * grab the vnode, so the process is awakened when the transition is 1120 * completed, and an error returned to indicate that the vnode is no 1121 * longer usable (possibly having been changed to a new file system type). 1122 */ 1123 int 1124 vget(vp, flags) 1125 struct vnode *vp; 1126 int flags; 1127 { 1128 int error; 1129 1130 /* 1131 * If the vnode is in the process of being cleaned out for 1132 * another use, we wait for the cleaning to finish and then 1133 * return failure. Cleaning is determined by checking that 1134 * the VXLOCK flag is set. 1135 */ 1136 1137 if ((flags & LK_INTERLOCK) == 0) 1138 simple_lock(&vp->v_interlock); 1139 if (vp->v_flag & VXLOCK) { 1140 if (flags & LK_NOWAIT) { 1141 simple_unlock(&vp->v_interlock); 1142 return EBUSY; 1143 } 1144 vp->v_flag |= VXWANT; 1145 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock); 1146 return (ENOENT); 1147 } 1148 if (vp->v_usecount == 0) { 1149 simple_lock(&vnode_free_list_slock); 1150 if (vp->v_holdcnt > 0) 1151 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1152 else 1153 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1154 simple_unlock(&vnode_free_list_slock); 1155 } 1156 vp->v_usecount++; 1157 #ifdef DIAGNOSTIC 1158 if (vp->v_usecount == 0) { 1159 vprint("vget", vp); 1160 panic("vget: usecount overflow, vp %p", vp); 1161 } 1162 #endif 1163 if (flags & LK_TYPE_MASK) { 1164 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1165 /* 1166 * must expand vrele here because we do not want 1167 * to call VOP_INACTIVE if the reference count 1168 * drops back to zero since it was never really 1169 * active. We must remove it from the free list 1170 * before sleeping so that multiple processes do 1171 * not try to recycle it. 1172 */ 1173 simple_lock(&vp->v_interlock); 1174 vp->v_usecount--; 1175 if (vp->v_usecount > 0) { 1176 simple_unlock(&vp->v_interlock); 1177 return (error); 1178 } 1179 /* 1180 * insert at tail of LRU list 1181 */ 1182 simple_lock(&vnode_free_list_slock); 1183 if (vp->v_holdcnt > 0) 1184 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, 1185 v_freelist); 1186 else 1187 TAILQ_INSERT_TAIL(&vnode_free_list, vp, 1188 v_freelist); 1189 simple_unlock(&vnode_free_list_slock); 1190 simple_unlock(&vp->v_interlock); 1191 } 1192 return (error); 1193 } 1194 simple_unlock(&vp->v_interlock); 1195 return (0); 1196 } 1197 1198 /* 1199 * vput(), just unlock and vrele() 1200 */ 1201 void 1202 vput(vp) 1203 struct vnode *vp; 1204 { 1205 struct proc *p = curproc; /* XXX */ 1206 1207 #ifdef DIAGNOSTIC 1208 if (vp == NULL) 1209 panic("vput: null vp"); 1210 #endif 1211 simple_lock(&vp->v_interlock); 1212 vp->v_usecount--; 1213 if (vp->v_usecount > 0) { 1214 simple_unlock(&vp->v_interlock); 1215 VOP_UNLOCK(vp, 0); 1216 return; 1217 } 1218 #ifdef DIAGNOSTIC 1219 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1220 vprint("vput: bad ref count", vp); 1221 panic("vput: ref cnt"); 1222 } 1223 #endif 1224 /* 1225 * Insert at tail of LRU list. 1226 */ 1227 simple_lock(&vnode_free_list_slock); 1228 if (vp->v_holdcnt > 0) 1229 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1230 else 1231 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1232 simple_unlock(&vnode_free_list_slock); 1233 if (vp->v_flag & VEXECMAP) { 1234 uvmexp.execpages -= vp->v_uobj.uo_npages; 1235 uvmexp.filepages += vp->v_uobj.uo_npages; 1236 } 1237 vp->v_flag &= ~(VTEXT|VEXECMAP); 1238 simple_unlock(&vp->v_interlock); 1239 VOP_INACTIVE(vp, p); 1240 } 1241 1242 /* 1243 * Vnode release. 1244 * If count drops to zero, call inactive routine and return to freelist. 1245 */ 1246 void 1247 vrele(vp) 1248 struct vnode *vp; 1249 { 1250 struct proc *p = curproc; /* XXX */ 1251 1252 #ifdef DIAGNOSTIC 1253 if (vp == NULL) 1254 panic("vrele: null vp"); 1255 #endif 1256 simple_lock(&vp->v_interlock); 1257 vp->v_usecount--; 1258 if (vp->v_usecount > 0) { 1259 simple_unlock(&vp->v_interlock); 1260 return; 1261 } 1262 #ifdef DIAGNOSTIC 1263 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1264 vprint("vrele: bad ref count", vp); 1265 panic("vrele: ref cnt vp %p", vp); 1266 } 1267 #endif 1268 /* 1269 * Insert at tail of LRU list. 1270 */ 1271 simple_lock(&vnode_free_list_slock); 1272 if (vp->v_holdcnt > 0) 1273 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1274 else 1275 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1276 simple_unlock(&vnode_free_list_slock); 1277 if (vp->v_flag & VEXECMAP) { 1278 uvmexp.execpages -= vp->v_uobj.uo_npages; 1279 uvmexp.filepages += vp->v_uobj.uo_npages; 1280 } 1281 vp->v_flag &= ~(VTEXT|VEXECMAP); 1282 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1283 VOP_INACTIVE(vp, p); 1284 } 1285 1286 #ifdef DIAGNOSTIC 1287 /* 1288 * Page or buffer structure gets a reference. 1289 */ 1290 void 1291 vhold(vp) 1292 struct vnode *vp; 1293 { 1294 1295 /* 1296 * If it is on the freelist and the hold count is currently 1297 * zero, move it to the hold list. The test of the back 1298 * pointer and the use reference count of zero is because 1299 * it will be removed from a free list by getnewvnode, 1300 * but will not have its reference count incremented until 1301 * after calling vgone. If the reference count were 1302 * incremented first, vgone would (incorrectly) try to 1303 * close the previous instance of the underlying object. 1304 * So, the back pointer is explicitly set to `0xdeadb' in 1305 * getnewvnode after removing it from a freelist to ensure 1306 * that we do not try to move it here. 1307 */ 1308 simple_lock(&vp->v_interlock); 1309 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1310 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1311 simple_lock(&vnode_free_list_slock); 1312 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1313 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1314 simple_unlock(&vnode_free_list_slock); 1315 } 1316 vp->v_holdcnt++; 1317 simple_unlock(&vp->v_interlock); 1318 } 1319 1320 /* 1321 * Page or buffer structure frees a reference. 1322 */ 1323 void 1324 holdrele(vp) 1325 struct vnode *vp; 1326 { 1327 1328 simple_lock(&vp->v_interlock); 1329 if (vp->v_holdcnt <= 0) 1330 panic("holdrele: holdcnt vp %p", vp); 1331 vp->v_holdcnt--; 1332 1333 /* 1334 * If it is on the holdlist and the hold count drops to 1335 * zero, move it to the free list. The test of the back 1336 * pointer and the use reference count of zero is because 1337 * it will be removed from a free list by getnewvnode, 1338 * but will not have its reference count incremented until 1339 * after calling vgone. If the reference count were 1340 * incremented first, vgone would (incorrectly) try to 1341 * close the previous instance of the underlying object. 1342 * So, the back pointer is explicitly set to `0xdeadb' in 1343 * getnewvnode after removing it from a freelist to ensure 1344 * that we do not try to move it here. 1345 */ 1346 1347 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1348 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1349 simple_lock(&vnode_free_list_slock); 1350 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1351 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1352 simple_unlock(&vnode_free_list_slock); 1353 } 1354 simple_unlock(&vp->v_interlock); 1355 } 1356 1357 /* 1358 * Vnode reference. 1359 */ 1360 void 1361 vref(vp) 1362 struct vnode *vp; 1363 { 1364 1365 simple_lock(&vp->v_interlock); 1366 if (vp->v_usecount <= 0) 1367 panic("vref used where vget required, vp %p", vp); 1368 vp->v_usecount++; 1369 #ifdef DIAGNOSTIC 1370 if (vp->v_usecount == 0) { 1371 vprint("vref", vp); 1372 panic("vref: usecount overflow, vp %p", vp); 1373 } 1374 #endif 1375 simple_unlock(&vp->v_interlock); 1376 } 1377 #endif /* DIAGNOSTIC */ 1378 1379 /* 1380 * Remove any vnodes in the vnode table belonging to mount point mp. 1381 * 1382 * If FORCECLOSE is not specified, there should not be any active ones, 1383 * return error if any are found (nb: this is a user error, not a 1384 * system error). If FORCECLOSE is specified, detach any active vnodes 1385 * that are found. 1386 * 1387 * If WRITECLOSE is set, only flush out regular file vnodes open for 1388 * writing. 1389 * 1390 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped. 1391 */ 1392 #ifdef DEBUG 1393 int busyprt = 0; /* print out busy vnodes */ 1394 struct ctldebug debug1 = { "busyprt", &busyprt }; 1395 #endif 1396 1397 int 1398 vflush(mp, skipvp, flags) 1399 struct mount *mp; 1400 struct vnode *skipvp; 1401 int flags; 1402 { 1403 struct proc *p = curproc; /* XXX */ 1404 struct vnode *vp, *nvp; 1405 int busy = 0; 1406 1407 simple_lock(&mntvnode_slock); 1408 loop: 1409 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 1410 if (vp->v_mount != mp) 1411 goto loop; 1412 nvp = LIST_NEXT(vp, v_mntvnodes); 1413 /* 1414 * Skip over a selected vnode. 1415 */ 1416 if (vp == skipvp) 1417 continue; 1418 simple_lock(&vp->v_interlock); 1419 /* 1420 * Skip over a vnodes marked VSYSTEM. 1421 */ 1422 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1423 simple_unlock(&vp->v_interlock); 1424 continue; 1425 } 1426 /* 1427 * If WRITECLOSE is set, only flush out regular file 1428 * vnodes open for writing. 1429 */ 1430 if ((flags & WRITECLOSE) && 1431 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1432 simple_unlock(&vp->v_interlock); 1433 continue; 1434 } 1435 /* 1436 * With v_usecount == 0, all we need to do is clear 1437 * out the vnode data structures and we are done. 1438 */ 1439 if (vp->v_usecount == 0) { 1440 simple_unlock(&mntvnode_slock); 1441 vgonel(vp, p); 1442 simple_lock(&mntvnode_slock); 1443 continue; 1444 } 1445 /* 1446 * If FORCECLOSE is set, forcibly close the vnode. 1447 * For block or character devices, revert to an 1448 * anonymous device. For all other files, just kill them. 1449 */ 1450 if (flags & FORCECLOSE) { 1451 simple_unlock(&mntvnode_slock); 1452 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1453 vgonel(vp, p); 1454 } else { 1455 vclean(vp, 0, p); 1456 vp->v_op = spec_vnodeop_p; 1457 insmntque(vp, (struct mount *)0); 1458 } 1459 simple_lock(&mntvnode_slock); 1460 continue; 1461 } 1462 #ifdef DEBUG 1463 if (busyprt) 1464 vprint("vflush: busy vnode", vp); 1465 #endif 1466 simple_unlock(&vp->v_interlock); 1467 busy++; 1468 } 1469 simple_unlock(&mntvnode_slock); 1470 if (busy) 1471 return (EBUSY); 1472 return (0); 1473 } 1474 1475 /* 1476 * Disassociate the underlying file system from a vnode. 1477 */ 1478 void 1479 vclean(vp, flags, p) 1480 struct vnode *vp; 1481 int flags; 1482 struct proc *p; 1483 { 1484 int active; 1485 1486 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1487 1488 /* 1489 * Check to see if the vnode is in use. 1490 * If so we have to reference it before we clean it out 1491 * so that its count cannot fall to zero and generate a 1492 * race against ourselves to recycle it. 1493 */ 1494 1495 if ((active = vp->v_usecount) != 0) { 1496 vp->v_usecount++; 1497 #ifdef DIAGNOSTIC 1498 if (vp->v_usecount == 0) { 1499 vprint("vclean", vp); 1500 panic("vclean: usecount overflow"); 1501 } 1502 #endif 1503 } 1504 1505 /* 1506 * Prevent the vnode from being recycled or 1507 * brought into use while we clean it out. 1508 */ 1509 if (vp->v_flag & VXLOCK) 1510 panic("vclean: deadlock, vp %p", vp); 1511 vp->v_flag |= VXLOCK; 1512 if (vp->v_flag & VEXECMAP) { 1513 uvmexp.execpages -= vp->v_uobj.uo_npages; 1514 uvmexp.filepages += vp->v_uobj.uo_npages; 1515 } 1516 vp->v_flag &= ~(VTEXT|VEXECMAP); 1517 1518 /* 1519 * Even if the count is zero, the VOP_INACTIVE routine may still 1520 * have the object locked while it cleans it out. The VOP_LOCK 1521 * ensures that the VOP_INACTIVE routine is done with its work. 1522 * For active vnodes, it ensures that no other activity can 1523 * occur while the underlying object is being cleaned out. 1524 */ 1525 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1526 1527 /* 1528 * Clean out any cached data associated with the vnode. 1529 */ 1530 if (flags & DOCLOSE) { 1531 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1532 KASSERT((vp->v_flag & VONWORKLST) == 0); 1533 } 1534 LOCK_ASSERT(!simple_lock_held(&vp->v_interlock)); 1535 1536 /* 1537 * If purging an active vnode, it must be closed and 1538 * deactivated before being reclaimed. Note that the 1539 * VOP_INACTIVE will unlock the vnode. 1540 */ 1541 if (active) { 1542 if (flags & DOCLOSE) 1543 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1544 VOP_INACTIVE(vp, p); 1545 } else { 1546 /* 1547 * Any other processes trying to obtain this lock must first 1548 * wait for VXLOCK to clear, then call the new lock operation. 1549 */ 1550 VOP_UNLOCK(vp, 0); 1551 } 1552 /* 1553 * Reclaim the vnode. 1554 */ 1555 if (VOP_RECLAIM(vp, p)) 1556 panic("vclean: cannot reclaim, vp %p", vp); 1557 if (active) { 1558 /* 1559 * Inline copy of vrele() since VOP_INACTIVE 1560 * has already been called. 1561 */ 1562 simple_lock(&vp->v_interlock); 1563 if (--vp->v_usecount <= 0) { 1564 #ifdef DIAGNOSTIC 1565 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1566 vprint("vclean: bad ref count", vp); 1567 panic("vclean: ref cnt"); 1568 } 1569 #endif 1570 /* 1571 * Insert at tail of LRU list. 1572 */ 1573 1574 simple_unlock(&vp->v_interlock); 1575 simple_lock(&vnode_free_list_slock); 1576 #ifdef DIAGNOSTIC 1577 if (vp->v_holdcnt > 0) 1578 panic("vclean: not clean, vp %p", vp); 1579 #endif 1580 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1581 simple_unlock(&vnode_free_list_slock); 1582 } else 1583 simple_unlock(&vp->v_interlock); 1584 } 1585 1586 KASSERT(vp->v_uobj.uo_npages == 0); 1587 cache_purge(vp); 1588 1589 /* 1590 * Done with purge, notify sleepers of the grim news. 1591 */ 1592 vp->v_op = dead_vnodeop_p; 1593 vp->v_tag = VT_NON; 1594 simple_lock(&vp->v_interlock); 1595 VN_KNOTE(vp, NOTE_REVOKE); /* FreeBSD has this in vn_pollgone() */ 1596 vp->v_flag &= ~VXLOCK; 1597 if (vp->v_flag & VXWANT) { 1598 vp->v_flag &= ~VXWANT; 1599 simple_unlock(&vp->v_interlock); 1600 wakeup((caddr_t)vp); 1601 } else 1602 simple_unlock(&vp->v_interlock); 1603 } 1604 1605 /* 1606 * Recycle an unused vnode to the front of the free list. 1607 * Release the passed interlock if the vnode will be recycled. 1608 */ 1609 int 1610 vrecycle(vp, inter_lkp, p) 1611 struct vnode *vp; 1612 struct simplelock *inter_lkp; 1613 struct proc *p; 1614 { 1615 1616 simple_lock(&vp->v_interlock); 1617 if (vp->v_usecount == 0) { 1618 if (inter_lkp) 1619 simple_unlock(inter_lkp); 1620 vgonel(vp, p); 1621 return (1); 1622 } 1623 simple_unlock(&vp->v_interlock); 1624 return (0); 1625 } 1626 1627 /* 1628 * Eliminate all activity associated with a vnode 1629 * in preparation for reuse. 1630 */ 1631 void 1632 vgone(vp) 1633 struct vnode *vp; 1634 { 1635 struct proc *p = curproc; /* XXX */ 1636 1637 simple_lock(&vp->v_interlock); 1638 vgonel(vp, p); 1639 } 1640 1641 /* 1642 * vgone, with the vp interlock held. 1643 */ 1644 void 1645 vgonel(vp, p) 1646 struct vnode *vp; 1647 struct proc *p; 1648 { 1649 struct vnode *vq; 1650 struct vnode *vx; 1651 1652 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1653 1654 /* 1655 * If a vgone (or vclean) is already in progress, 1656 * wait until it is done and return. 1657 */ 1658 1659 if (vp->v_flag & VXLOCK) { 1660 vp->v_flag |= VXWANT; 1661 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock); 1662 return; 1663 } 1664 1665 /* 1666 * Clean out the filesystem specific data. 1667 */ 1668 1669 vclean(vp, DOCLOSE, p); 1670 KASSERT((vp->v_flag & VONWORKLST) == 0); 1671 1672 /* 1673 * Delete from old mount point vnode list, if on one. 1674 */ 1675 1676 if (vp->v_mount != NULL) 1677 insmntque(vp, (struct mount *)0); 1678 1679 /* 1680 * If special device, remove it from special device alias list. 1681 * if it is on one. 1682 */ 1683 1684 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1685 simple_lock(&spechash_slock); 1686 if (vp->v_hashchain != NULL) { 1687 if (*vp->v_hashchain == vp) { 1688 *vp->v_hashchain = vp->v_specnext; 1689 } else { 1690 for (vq = *vp->v_hashchain; vq; 1691 vq = vq->v_specnext) { 1692 if (vq->v_specnext != vp) 1693 continue; 1694 vq->v_specnext = vp->v_specnext; 1695 break; 1696 } 1697 if (vq == NULL) 1698 panic("missing bdev"); 1699 } 1700 if (vp->v_flag & VALIASED) { 1701 vx = NULL; 1702 for (vq = *vp->v_hashchain; vq; 1703 vq = vq->v_specnext) { 1704 if (vq->v_rdev != vp->v_rdev || 1705 vq->v_type != vp->v_type) 1706 continue; 1707 if (vx) 1708 break; 1709 vx = vq; 1710 } 1711 if (vx == NULL) 1712 panic("missing alias"); 1713 if (vq == NULL) 1714 vx->v_flag &= ~VALIASED; 1715 vp->v_flag &= ~VALIASED; 1716 } 1717 } 1718 simple_unlock(&spechash_slock); 1719 FREE(vp->v_specinfo, M_VNODE); 1720 vp->v_specinfo = NULL; 1721 } 1722 1723 /* 1724 * If it is on the freelist and not already at the head, 1725 * move it to the head of the list. The test of the back 1726 * pointer and the reference count of zero is because 1727 * it will be removed from the free list by getnewvnode, 1728 * but will not have its reference count incremented until 1729 * after calling vgone. If the reference count were 1730 * incremented first, vgone would (incorrectly) try to 1731 * close the previous instance of the underlying object. 1732 * So, the back pointer is explicitly set to `0xdeadb' in 1733 * getnewvnode after removing it from the freelist to ensure 1734 * that we do not try to move it here. 1735 */ 1736 1737 if (vp->v_usecount == 0) { 1738 simple_lock(&vnode_free_list_slock); 1739 if (vp->v_holdcnt > 0) 1740 panic("vgonel: not clean, vp %p", vp); 1741 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1742 TAILQ_FIRST(&vnode_free_list) != vp) { 1743 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1744 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1745 } 1746 simple_unlock(&vnode_free_list_slock); 1747 } 1748 vp->v_type = VBAD; 1749 } 1750 1751 /* 1752 * Lookup a vnode by device number. 1753 */ 1754 int 1755 vfinddev(dev, type, vpp) 1756 dev_t dev; 1757 enum vtype type; 1758 struct vnode **vpp; 1759 { 1760 struct vnode *vp; 1761 int rc = 0; 1762 1763 simple_lock(&spechash_slock); 1764 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1765 if (dev != vp->v_rdev || type != vp->v_type) 1766 continue; 1767 *vpp = vp; 1768 rc = 1; 1769 break; 1770 } 1771 simple_unlock(&spechash_slock); 1772 return (rc); 1773 } 1774 1775 /* 1776 * Revoke all the vnodes corresponding to the specified minor number 1777 * range (endpoints inclusive) of the specified major. 1778 */ 1779 void 1780 vdevgone(maj, minl, minh, type) 1781 int maj, minl, minh; 1782 enum vtype type; 1783 { 1784 struct vnode *vp; 1785 int mn; 1786 1787 for (mn = minl; mn <= minh; mn++) 1788 if (vfinddev(makedev(maj, mn), type, &vp)) 1789 VOP_REVOKE(vp, REVOKEALL); 1790 } 1791 1792 /* 1793 * Calculate the total number of references to a special device. 1794 */ 1795 int 1796 vcount(vp) 1797 struct vnode *vp; 1798 { 1799 struct vnode *vq, *vnext; 1800 int count; 1801 1802 loop: 1803 if ((vp->v_flag & VALIASED) == 0) 1804 return (vp->v_usecount); 1805 simple_lock(&spechash_slock); 1806 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1807 vnext = vq->v_specnext; 1808 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1809 continue; 1810 /* 1811 * Alias, but not in use, so flush it out. 1812 */ 1813 if (vq->v_usecount == 0 && vq != vp && 1814 (vq->v_flag & VXLOCK) == 0) { 1815 simple_unlock(&spechash_slock); 1816 vgone(vq); 1817 goto loop; 1818 } 1819 count += vq->v_usecount; 1820 } 1821 simple_unlock(&spechash_slock); 1822 return (count); 1823 } 1824 1825 /* 1826 * Print out a description of a vnode. 1827 */ 1828 static const char * const typename[] = 1829 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1830 1831 void 1832 vprint(label, vp) 1833 char *label; 1834 struct vnode *vp; 1835 { 1836 char buf[96]; 1837 1838 if (label != NULL) 1839 printf("%s: ", label); 1840 printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,", 1841 vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1842 vp->v_holdcnt); 1843 buf[0] = '\0'; 1844 if (vp->v_flag & VROOT) 1845 strcat(buf, "|VROOT"); 1846 if (vp->v_flag & VTEXT) 1847 strcat(buf, "|VTEXT"); 1848 if (vp->v_flag & VEXECMAP) 1849 strcat(buf, "|VEXECMAP"); 1850 if (vp->v_flag & VSYSTEM) 1851 strcat(buf, "|VSYSTEM"); 1852 if (vp->v_flag & VXLOCK) 1853 strcat(buf, "|VXLOCK"); 1854 if (vp->v_flag & VXWANT) 1855 strcat(buf, "|VXWANT"); 1856 if (vp->v_flag & VBWAIT) 1857 strcat(buf, "|VBWAIT"); 1858 if (vp->v_flag & VALIASED) 1859 strcat(buf, "|VALIASED"); 1860 if (buf[0] != '\0') 1861 printf(" flags (%s)", &buf[1]); 1862 if (vp->v_data == NULL) { 1863 printf("\n"); 1864 } else { 1865 printf("\n\t"); 1866 VOP_PRINT(vp); 1867 } 1868 } 1869 1870 #ifdef DEBUG 1871 /* 1872 * List all of the locked vnodes in the system. 1873 * Called when debugging the kernel. 1874 */ 1875 void 1876 printlockedvnodes() 1877 { 1878 struct mount *mp, *nmp; 1879 struct vnode *vp; 1880 1881 printf("Locked vnodes\n"); 1882 simple_lock(&mountlist_slock); 1883 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1884 mp = nmp) { 1885 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1886 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1887 continue; 1888 } 1889 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1890 if (VOP_ISLOCKED(vp)) 1891 vprint(NULL, vp); 1892 } 1893 simple_lock(&mountlist_slock); 1894 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1895 vfs_unbusy(mp); 1896 } 1897 simple_unlock(&mountlist_slock); 1898 } 1899 #endif 1900 1901 /* 1902 * Top level filesystem related information gathering. 1903 */ 1904 int 1905 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1906 int *name; 1907 u_int namelen; 1908 void *oldp; 1909 size_t *oldlenp; 1910 void *newp; 1911 size_t newlen; 1912 struct proc *p; 1913 { 1914 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1915 struct vfsconf vfc; 1916 extern const char * const mountcompatnames[]; 1917 extern int nmountcompatnames; 1918 #endif 1919 struct vfsops *vfsp; 1920 1921 /* all sysctl names at this level are at least name and field */ 1922 if (namelen < 2) 1923 return (ENOTDIR); /* overloaded */ 1924 1925 /* Not generic: goes to file system. */ 1926 if (name[0] != VFS_GENERIC) { 1927 static const struct ctlname vfsnames[VFS_MAXID+1]=CTL_VFS_NAMES; 1928 const char *vfsname; 1929 1930 if (name[0] < 0 || name[0] > VFS_MAXID 1931 || (vfsname = vfsnames[name[0]].ctl_name) == NULL) 1932 return (EOPNOTSUPP); 1933 1934 vfsp = vfs_getopsbyname(vfsname); 1935 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1936 return (EOPNOTSUPP); 1937 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1938 oldp, oldlenp, newp, newlen, p)); 1939 } 1940 1941 /* The rest are generic vfs sysctls. */ 1942 switch (name[1]) { 1943 case VFS_USERMOUNT: 1944 return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount); 1945 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1946 case VFS_MAXTYPENUM: 1947 /* 1948 * Provided for 4.4BSD-Lite2 compatibility. 1949 */ 1950 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1951 case VFS_CONF: 1952 /* 1953 * Special: a node, next is a file system name. 1954 * Provided for 4.4BSD-Lite2 compatibility. 1955 */ 1956 if (namelen < 3) 1957 return (ENOTDIR); /* overloaded */ 1958 if (name[2] >= nmountcompatnames || name[2] < 0 || 1959 mountcompatnames[name[2]] == NULL) 1960 return (EOPNOTSUPP); 1961 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 1962 if (vfsp == NULL) 1963 return (EOPNOTSUPP); 1964 vfc.vfc_vfsops = vfsp; 1965 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 1966 vfc.vfc_typenum = name[2]; 1967 vfc.vfc_refcount = vfsp->vfs_refcount; 1968 vfc.vfc_flags = 0; 1969 vfc.vfc_mountroot = vfsp->vfs_mountroot; 1970 vfc.vfc_next = NULL; 1971 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 1972 sizeof(struct vfsconf))); 1973 #endif 1974 default: 1975 break; 1976 } 1977 return (EOPNOTSUPP); 1978 } 1979 1980 int kinfo_vdebug = 1; 1981 int kinfo_vgetfailed; 1982 #define KINFO_VNODESLOP 10 1983 /* 1984 * Dump vnode list (via sysctl). 1985 * Copyout address of vnode followed by vnode. 1986 */ 1987 /* ARGSUSED */ 1988 int 1989 sysctl_vnode(where, sizep, p) 1990 char *where; 1991 size_t *sizep; 1992 struct proc *p; 1993 { 1994 struct mount *mp, *nmp; 1995 struct vnode *nvp, *vp; 1996 char *bp = where, *savebp; 1997 char *ewhere; 1998 int error; 1999 2000 #define VPTRSZ sizeof(struct vnode *) 2001 #define VNODESZ sizeof(struct vnode) 2002 if (where == NULL) { 2003 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 2004 return (0); 2005 } 2006 ewhere = where + *sizep; 2007 2008 simple_lock(&mountlist_slock); 2009 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2010 mp = nmp) { 2011 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 2012 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2013 continue; 2014 } 2015 savebp = bp; 2016 again: 2017 simple_lock(&mntvnode_slock); 2018 for (vp = LIST_FIRST(&mp->mnt_vnodelist); 2019 vp != NULL; 2020 vp = nvp) { 2021 /* 2022 * Check that the vp is still associated with 2023 * this filesystem. RACE: could have been 2024 * recycled onto the same filesystem. 2025 */ 2026 if (vp->v_mount != mp) { 2027 simple_unlock(&mntvnode_slock); 2028 if (kinfo_vdebug) 2029 printf("kinfo: vp changed\n"); 2030 bp = savebp; 2031 goto again; 2032 } 2033 nvp = LIST_NEXT(vp, v_mntvnodes); 2034 if (bp + VPTRSZ + VNODESZ > ewhere) { 2035 simple_unlock(&mntvnode_slock); 2036 *sizep = bp - where; 2037 return (ENOMEM); 2038 } 2039 simple_unlock(&mntvnode_slock); 2040 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2041 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2042 return (error); 2043 bp += VPTRSZ + VNODESZ; 2044 simple_lock(&mntvnode_slock); 2045 } 2046 simple_unlock(&mntvnode_slock); 2047 simple_lock(&mountlist_slock); 2048 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2049 vfs_unbusy(mp); 2050 } 2051 simple_unlock(&mountlist_slock); 2052 2053 *sizep = bp - where; 2054 return (0); 2055 } 2056 2057 /* 2058 * Check to see if a filesystem is mounted on a block device. 2059 */ 2060 int 2061 vfs_mountedon(vp) 2062 struct vnode *vp; 2063 { 2064 struct vnode *vq; 2065 int error = 0; 2066 2067 if (vp->v_specmountpoint != NULL) 2068 return (EBUSY); 2069 if (vp->v_flag & VALIASED) { 2070 simple_lock(&spechash_slock); 2071 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2072 if (vq->v_rdev != vp->v_rdev || 2073 vq->v_type != vp->v_type) 2074 continue; 2075 if (vq->v_specmountpoint != NULL) { 2076 error = EBUSY; 2077 break; 2078 } 2079 } 2080 simple_unlock(&spechash_slock); 2081 } 2082 return (error); 2083 } 2084 2085 /* 2086 * Build hash lists of net addresses and hang them off the mount point. 2087 * Called by ufs_mount() to set up the lists of export addresses. 2088 */ 2089 static int 2090 vfs_hang_addrlist(mp, nep, argp) 2091 struct mount *mp; 2092 struct netexport *nep; 2093 struct export_args *argp; 2094 { 2095 struct netcred *np, *enp; 2096 struct radix_node_head *rnh; 2097 int i; 2098 struct radix_node *rn; 2099 struct sockaddr *saddr, *smask = 0; 2100 struct domain *dom; 2101 int error; 2102 2103 if (argp->ex_addrlen == 0) { 2104 if (mp->mnt_flag & MNT_DEFEXPORTED) 2105 return (EPERM); 2106 np = &nep->ne_defexported; 2107 np->netc_exflags = argp->ex_flags; 2108 crcvt(&np->netc_anon, &argp->ex_anon); 2109 np->netc_anon.cr_ref = 1; 2110 mp->mnt_flag |= MNT_DEFEXPORTED; 2111 return (0); 2112 } 2113 2114 if (argp->ex_addrlen > MLEN) 2115 return (EINVAL); 2116 2117 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2118 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 2119 memset((caddr_t)np, 0, i); 2120 saddr = (struct sockaddr *)(np + 1); 2121 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 2122 if (error) 2123 goto out; 2124 if (saddr->sa_len > argp->ex_addrlen) 2125 saddr->sa_len = argp->ex_addrlen; 2126 if (argp->ex_masklen) { 2127 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 2128 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 2129 if (error) 2130 goto out; 2131 if (smask->sa_len > argp->ex_masklen) 2132 smask->sa_len = argp->ex_masklen; 2133 } 2134 i = saddr->sa_family; 2135 if ((rnh = nep->ne_rtable[i]) == 0) { 2136 /* 2137 * Seems silly to initialize every AF when most are not 2138 * used, do so on demand here 2139 */ 2140 for (dom = domains; dom; dom = dom->dom_next) 2141 if (dom->dom_family == i && dom->dom_rtattach) { 2142 dom->dom_rtattach((void **)&nep->ne_rtable[i], 2143 dom->dom_rtoffset); 2144 break; 2145 } 2146 if ((rnh = nep->ne_rtable[i]) == 0) { 2147 error = ENOBUFS; 2148 goto out; 2149 } 2150 } 2151 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 2152 np->netc_rnodes); 2153 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 2154 if (rn == 0) { 2155 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 2156 smask, rnh); 2157 if (enp == 0) { 2158 error = EPERM; 2159 goto out; 2160 } 2161 } else 2162 enp = (struct netcred *)rn; 2163 2164 if (enp->netc_exflags != argp->ex_flags || 2165 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 2166 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 2167 enp->netc_anon.cr_ngroups != 2168 (uint32_t) argp->ex_anon.cr_ngroups || 2169 memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 2170 enp->netc_anon.cr_ngroups)) 2171 error = EPERM; 2172 else 2173 error = 0; 2174 goto out; 2175 } 2176 np->netc_exflags = argp->ex_flags; 2177 crcvt(&np->netc_anon, &argp->ex_anon); 2178 np->netc_anon.cr_ref = 1; 2179 return (0); 2180 out: 2181 free(np, M_NETADDR); 2182 return (error); 2183 } 2184 2185 /* ARGSUSED */ 2186 static int 2187 vfs_free_netcred(rn, w) 2188 struct radix_node *rn; 2189 void *w; 2190 { 2191 struct radix_node_head *rnh = (struct radix_node_head *)w; 2192 2193 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 2194 free((caddr_t)rn, M_NETADDR); 2195 return (0); 2196 } 2197 2198 /* 2199 * Free the net address hash lists that are hanging off the mount points. 2200 */ 2201 static void 2202 vfs_free_addrlist(nep) 2203 struct netexport *nep; 2204 { 2205 int i; 2206 struct radix_node_head *rnh; 2207 2208 for (i = 0; i <= AF_MAX; i++) 2209 if ((rnh = nep->ne_rtable[i]) != NULL) { 2210 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 2211 free((caddr_t)rnh, M_RTABLE); 2212 nep->ne_rtable[i] = 0; 2213 } 2214 } 2215 2216 int 2217 vfs_export(mp, nep, argp) 2218 struct mount *mp; 2219 struct netexport *nep; 2220 struct export_args *argp; 2221 { 2222 int error; 2223 2224 if (argp->ex_flags & MNT_DELEXPORT) { 2225 if (mp->mnt_flag & MNT_EXPUBLIC) { 2226 vfs_setpublicfs(NULL, NULL, NULL); 2227 mp->mnt_flag &= ~MNT_EXPUBLIC; 2228 } 2229 vfs_free_addrlist(nep); 2230 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2231 } 2232 if (argp->ex_flags & MNT_EXPORTED) { 2233 if (argp->ex_flags & MNT_EXPUBLIC) { 2234 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2235 return (error); 2236 mp->mnt_flag |= MNT_EXPUBLIC; 2237 } 2238 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 2239 return (error); 2240 mp->mnt_flag |= MNT_EXPORTED; 2241 } 2242 return (0); 2243 } 2244 2245 /* 2246 * Set the publicly exported filesystem (WebNFS). Currently, only 2247 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2248 */ 2249 int 2250 vfs_setpublicfs(mp, nep, argp) 2251 struct mount *mp; 2252 struct netexport *nep; 2253 struct export_args *argp; 2254 { 2255 int error; 2256 struct vnode *rvp; 2257 char *cp; 2258 2259 /* 2260 * mp == NULL -> invalidate the current info, the FS is 2261 * no longer exported. May be called from either vfs_export 2262 * or unmount, so check if it hasn't already been done. 2263 */ 2264 if (mp == NULL) { 2265 if (nfs_pub.np_valid) { 2266 nfs_pub.np_valid = 0; 2267 if (nfs_pub.np_index != NULL) { 2268 FREE(nfs_pub.np_index, M_TEMP); 2269 nfs_pub.np_index = NULL; 2270 } 2271 } 2272 return (0); 2273 } 2274 2275 /* 2276 * Only one allowed at a time. 2277 */ 2278 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2279 return (EBUSY); 2280 2281 /* 2282 * Get real filehandle for root of exported FS. 2283 */ 2284 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 2285 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2286 2287 if ((error = VFS_ROOT(mp, &rvp))) 2288 return (error); 2289 2290 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2291 return (error); 2292 2293 vput(rvp); 2294 2295 /* 2296 * If an indexfile was specified, pull it in. 2297 */ 2298 if (argp->ex_indexfile != NULL) { 2299 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2300 M_WAITOK); 2301 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2302 MAXNAMLEN, (size_t *)0); 2303 if (!error) { 2304 /* 2305 * Check for illegal filenames. 2306 */ 2307 for (cp = nfs_pub.np_index; *cp; cp++) { 2308 if (*cp == '/') { 2309 error = EINVAL; 2310 break; 2311 } 2312 } 2313 } 2314 if (error) { 2315 FREE(nfs_pub.np_index, M_TEMP); 2316 return (error); 2317 } 2318 } 2319 2320 nfs_pub.np_mount = mp; 2321 nfs_pub.np_valid = 1; 2322 return (0); 2323 } 2324 2325 struct netcred * 2326 vfs_export_lookup(mp, nep, nam) 2327 struct mount *mp; 2328 struct netexport *nep; 2329 struct mbuf *nam; 2330 { 2331 struct netcred *np; 2332 struct radix_node_head *rnh; 2333 struct sockaddr *saddr; 2334 2335 np = NULL; 2336 if (mp->mnt_flag & MNT_EXPORTED) { 2337 /* 2338 * Lookup in the export list first. 2339 */ 2340 if (nam != NULL) { 2341 saddr = mtod(nam, struct sockaddr *); 2342 rnh = nep->ne_rtable[saddr->sa_family]; 2343 if (rnh != NULL) { 2344 np = (struct netcred *) 2345 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2346 rnh); 2347 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2348 np = NULL; 2349 } 2350 } 2351 /* 2352 * If no address match, use the default if it exists. 2353 */ 2354 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2355 np = &nep->ne_defexported; 2356 } 2357 return (np); 2358 } 2359 2360 /* 2361 * Do the usual access checking. 2362 * file_mode, uid and gid are from the vnode in question, 2363 * while acc_mode and cred are from the VOP_ACCESS parameter list 2364 */ 2365 int 2366 vaccess(type, file_mode, uid, gid, acc_mode, cred) 2367 enum vtype type; 2368 mode_t file_mode; 2369 uid_t uid; 2370 gid_t gid; 2371 mode_t acc_mode; 2372 struct ucred *cred; 2373 { 2374 mode_t mask; 2375 2376 /* 2377 * Super-user always gets read/write access, but execute access depends 2378 * on at least one execute bit being set. 2379 */ 2380 if (cred->cr_uid == 0) { 2381 if ((acc_mode & VEXEC) && type != VDIR && 2382 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2383 return (EACCES); 2384 return (0); 2385 } 2386 2387 mask = 0; 2388 2389 /* Otherwise, check the owner. */ 2390 if (cred->cr_uid == uid) { 2391 if (acc_mode & VEXEC) 2392 mask |= S_IXUSR; 2393 if (acc_mode & VREAD) 2394 mask |= S_IRUSR; 2395 if (acc_mode & VWRITE) 2396 mask |= S_IWUSR; 2397 return ((file_mode & mask) == mask ? 0 : EACCES); 2398 } 2399 2400 /* Otherwise, check the groups. */ 2401 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2402 if (acc_mode & VEXEC) 2403 mask |= S_IXGRP; 2404 if (acc_mode & VREAD) 2405 mask |= S_IRGRP; 2406 if (acc_mode & VWRITE) 2407 mask |= S_IWGRP; 2408 return ((file_mode & mask) == mask ? 0 : EACCES); 2409 } 2410 2411 /* Otherwise, check everyone else. */ 2412 if (acc_mode & VEXEC) 2413 mask |= S_IXOTH; 2414 if (acc_mode & VREAD) 2415 mask |= S_IROTH; 2416 if (acc_mode & VWRITE) 2417 mask |= S_IWOTH; 2418 return ((file_mode & mask) == mask ? 0 : EACCES); 2419 } 2420 2421 /* 2422 * Unmount all file systems. 2423 * We traverse the list in reverse order under the assumption that doing so 2424 * will avoid needing to worry about dependencies. 2425 */ 2426 void 2427 vfs_unmountall(p) 2428 struct proc *p; 2429 { 2430 struct mount *mp, *nmp; 2431 int allerror, error; 2432 2433 for (allerror = 0, 2434 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2435 nmp = mp->mnt_list.cqe_prev; 2436 #ifdef DEBUG 2437 printf("unmounting %s (%s)...\n", 2438 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2439 #endif 2440 /* 2441 * XXX Freeze syncer. Must do this before locking the 2442 * mount point. See dounmount() for details. 2443 */ 2444 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2445 if (vfs_busy(mp, 0, 0)) { 2446 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2447 continue; 2448 } 2449 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2450 printf("unmount of %s failed with error %d\n", 2451 mp->mnt_stat.f_mntonname, error); 2452 allerror = 1; 2453 } 2454 } 2455 if (allerror) 2456 printf("WARNING: some file systems would not unmount\n"); 2457 } 2458 2459 /* 2460 * Sync and unmount file systems before shutting down. 2461 */ 2462 void 2463 vfs_shutdown() 2464 { 2465 struct buf *bp; 2466 int iter, nbusy, nbusy_prev = 0, dcount, s; 2467 struct lwp *l = curlwp; 2468 struct proc *p; 2469 2470 /* XXX we're certainly not running in proc0's context! */ 2471 if (l == NULL || (p = l->l_proc) == NULL) 2472 p = &proc0; 2473 2474 printf("syncing disks... "); 2475 2476 /* remove user process from run queue */ 2477 suspendsched(); 2478 (void) spl0(); 2479 2480 /* avoid coming back this way again if we panic. */ 2481 doing_shutdown = 1; 2482 2483 sys_sync(l, NULL, NULL); 2484 2485 /* Wait for sync to finish. */ 2486 dcount = 10000; 2487 for (iter = 0; iter < 20;) { 2488 nbusy = 0; 2489 for (bp = &buf[nbuf]; --bp >= buf; ) { 2490 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2491 nbusy++; 2492 /* 2493 * With soft updates, some buffers that are 2494 * written will be remarked as dirty until other 2495 * buffers are written. 2496 */ 2497 if (bp->b_vp && bp->b_vp->v_mount 2498 && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP) 2499 && (bp->b_flags & B_DELWRI)) { 2500 s = splbio(); 2501 bremfree(bp); 2502 bp->b_flags |= B_BUSY; 2503 splx(s); 2504 nbusy++; 2505 bawrite(bp); 2506 if (dcount-- <= 0) { 2507 printf("softdep "); 2508 goto fail; 2509 } 2510 } 2511 } 2512 if (nbusy == 0) 2513 break; 2514 if (nbusy_prev == 0) 2515 nbusy_prev = nbusy; 2516 printf("%d ", nbusy); 2517 tsleep(&nbusy, PRIBIO, "bflush", 2518 (iter == 0) ? 1 : hz / 25 * iter); 2519 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 2520 iter++; 2521 else 2522 nbusy_prev = nbusy; 2523 } 2524 if (nbusy) { 2525 fail: 2526 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 2527 printf("giving up\nPrinting vnodes for busy buffers\n"); 2528 for (bp = &buf[nbuf]; --bp >= buf; ) 2529 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2530 vprint(NULL, bp->b_vp); 2531 2532 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2533 Debugger(); 2534 #endif 2535 2536 #else /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2537 printf("giving up\n"); 2538 #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2539 return; 2540 } else 2541 printf("done\n"); 2542 2543 /* 2544 * If we've panic'd, don't make the situation potentially 2545 * worse by unmounting the file systems. 2546 */ 2547 if (panicstr != NULL) 2548 return; 2549 2550 /* Release inodes held by texts before update. */ 2551 #ifdef notdef 2552 vnshutdown(); 2553 #endif 2554 /* Unmount file systems. */ 2555 vfs_unmountall(p); 2556 } 2557 2558 /* 2559 * Mount the root file system. If the operator didn't specify a 2560 * file system to use, try all possible file systems until one 2561 * succeeds. 2562 */ 2563 int 2564 vfs_mountroot() 2565 { 2566 struct vfsops *v; 2567 2568 if (root_device == NULL) 2569 panic("vfs_mountroot: root device unknown"); 2570 2571 switch (root_device->dv_class) { 2572 case DV_IFNET: 2573 if (rootdev != NODEV) 2574 panic("vfs_mountroot: rootdev set for DV_IFNET " 2575 "(0x%08x -> %d,%d)", rootdev, 2576 major(rootdev), minor(rootdev)); 2577 break; 2578 2579 case DV_DISK: 2580 if (rootdev == NODEV) 2581 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2582 break; 2583 2584 default: 2585 printf("%s: inappropriate for root file system\n", 2586 root_device->dv_xname); 2587 return (ENODEV); 2588 } 2589 2590 /* 2591 * If user specified a file system, use it. 2592 */ 2593 if (mountroot != NULL) 2594 return ((*mountroot)()); 2595 2596 /* 2597 * Try each file system currently configured into the kernel. 2598 */ 2599 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2600 if (v->vfs_mountroot == NULL) 2601 continue; 2602 #ifdef DEBUG 2603 printf("mountroot: trying %s...\n", v->vfs_name); 2604 #endif 2605 if ((*v->vfs_mountroot)() == 0) { 2606 printf("root file system type: %s\n", v->vfs_name); 2607 break; 2608 } 2609 } 2610 2611 if (v == NULL) { 2612 printf("no file system for %s", root_device->dv_xname); 2613 if (root_device->dv_class == DV_DISK) 2614 printf(" (dev 0x%x)", rootdev); 2615 printf("\n"); 2616 return (EFTYPE); 2617 } 2618 return (0); 2619 } 2620 2621 /* 2622 * Given a file system name, look up the vfsops for that 2623 * file system, or return NULL if file system isn't present 2624 * in the kernel. 2625 */ 2626 struct vfsops * 2627 vfs_getopsbyname(name) 2628 const char *name; 2629 { 2630 struct vfsops *v; 2631 2632 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2633 if (strcmp(v->vfs_name, name) == 0) 2634 break; 2635 } 2636 2637 return (v); 2638 } 2639 2640 /* 2641 * Establish a file system and initialize it. 2642 */ 2643 int 2644 vfs_attach(vfs) 2645 struct vfsops *vfs; 2646 { 2647 struct vfsops *v; 2648 int error = 0; 2649 2650 2651 /* 2652 * Make sure this file system doesn't already exist. 2653 */ 2654 LIST_FOREACH(v, &vfs_list, vfs_list) { 2655 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2656 error = EEXIST; 2657 goto out; 2658 } 2659 } 2660 2661 /* 2662 * Initialize the vnode operations for this file system. 2663 */ 2664 vfs_opv_init(vfs->vfs_opv_descs); 2665 2666 /* 2667 * Now initialize the file system itself. 2668 */ 2669 (*vfs->vfs_init)(); 2670 2671 /* 2672 * ...and link it into the kernel's list. 2673 */ 2674 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2675 2676 /* 2677 * Sanity: make sure the reference count is 0. 2678 */ 2679 vfs->vfs_refcount = 0; 2680 2681 out: 2682 return (error); 2683 } 2684 2685 /* 2686 * Remove a file system from the kernel. 2687 */ 2688 int 2689 vfs_detach(vfs) 2690 struct vfsops *vfs; 2691 { 2692 struct vfsops *v; 2693 2694 /* 2695 * Make sure no one is using the filesystem. 2696 */ 2697 if (vfs->vfs_refcount != 0) 2698 return (EBUSY); 2699 2700 /* 2701 * ...and remove it from the kernel's list. 2702 */ 2703 LIST_FOREACH(v, &vfs_list, vfs_list) { 2704 if (v == vfs) { 2705 LIST_REMOVE(v, vfs_list); 2706 break; 2707 } 2708 } 2709 2710 if (v == NULL) 2711 return (ESRCH); 2712 2713 /* 2714 * Now run the file system-specific cleanups. 2715 */ 2716 (*vfs->vfs_done)(); 2717 2718 /* 2719 * Free the vnode operations vector. 2720 */ 2721 vfs_opv_free(vfs->vfs_opv_descs); 2722 return (0); 2723 } 2724 2725 void 2726 vfs_reinit(void) 2727 { 2728 struct vfsops *vfs; 2729 2730 LIST_FOREACH(vfs, &vfs_list, vfs_list) { 2731 if (vfs->vfs_reinit) { 2732 (*vfs->vfs_reinit)(); 2733 } 2734 } 2735 } 2736 2737 #ifdef DDB 2738 const char buf_flagbits[] = 2739 "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI" 2740 "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE" 2741 "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED" 2742 "\32XXX\33VFLUSH"; 2743 2744 void 2745 vfs_buf_print(bp, full, pr) 2746 struct buf *bp; 2747 int full; 2748 void (*pr) __P((const char *, ...)); 2749 { 2750 char buf[1024]; 2751 2752 (*pr)(" vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n", 2753 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev); 2754 2755 bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf)); 2756 (*pr)(" error %d flags 0x%s\n", bp->b_error, buf); 2757 2758 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 2759 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2760 (*pr)(" data %p saveaddr %p dep %p\n", 2761 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2762 (*pr)(" iodone %p\n", bp->b_iodone); 2763 } 2764 2765 2766 const char vnode_flagbits[] = 2767 "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\5EXECMAP" 2768 "\11XLOCK\12XWANT\13BWAIT\14ALIASED" 2769 "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY"; 2770 2771 const char *vnode_types[] = { 2772 "VNON", 2773 "VREG", 2774 "VDIR", 2775 "VBLK", 2776 "VCHR", 2777 "VLNK", 2778 "VSOCK", 2779 "VFIFO", 2780 "VBAD", 2781 }; 2782 2783 const char *vnode_tags[] = { 2784 "VT_NON", 2785 "VT_UFS", 2786 "VT_NFS", 2787 "VT_MFS", 2788 "VT_MSDOSFS", 2789 "VT_LFS", 2790 "VT_LOFS", 2791 "VT_FDESC", 2792 "VT_PORTAL", 2793 "VT_NULL", 2794 "VT_UMAP", 2795 "VT_KERNFS", 2796 "VT_PROCFS", 2797 "VT_AFS", 2798 "VT_ISOFS", 2799 "VT_UNION", 2800 "VT_ADOSFS", 2801 "VT_EXT2FS", 2802 "VT_CODA", 2803 "VT_FILECORE", 2804 "VT_NTFS", 2805 "VT_VFS", 2806 "VT_OVERLAY" 2807 }; 2808 2809 void 2810 vfs_vnode_print(vp, full, pr) 2811 struct vnode *vp; 2812 int full; 2813 void (*pr) __P((const char *, ...)); 2814 { 2815 char buf[256]; 2816 const char *vtype, *vtag; 2817 2818 uvm_object_printit(&vp->v_uobj, full, pr); 2819 bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf)); 2820 (*pr)("\nVNODE flags %s\n", buf); 2821 (*pr)("mp %p numoutput %d size 0x%llx\n", 2822 vp->v_mount, vp->v_numoutput, vp->v_size); 2823 2824 (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n", 2825 vp->v_data, vp->v_usecount, vp->v_writecount, 2826 vp->v_holdcnt, vp->v_numoutput); 2827 2828 vtype = (vp->v_type >= 0 && 2829 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ? 2830 vnode_types[vp->v_type] : "UNKNOWN"; 2831 vtag = (vp->v_tag >= 0 && 2832 vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ? 2833 vnode_tags[vp->v_tag] : "UNKNOWN"; 2834 2835 (*pr)("type %s(%d) tag %s(%d) id 0x%lx mount %p typedata %p\n", 2836 vtype, vp->v_type, vtag, vp->v_tag, 2837 vp->v_id, vp->v_mount, vp->v_mountedhere); 2838 2839 if (full) { 2840 struct buf *bp; 2841 2842 (*pr)("clean bufs:\n"); 2843 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 2844 (*pr)(" bp %p\n", bp); 2845 vfs_buf_print(bp, full, pr); 2846 } 2847 2848 (*pr)("dirty bufs:\n"); 2849 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 2850 (*pr)(" bp %p\n", bp); 2851 vfs_buf_print(bp, full, pr); 2852 } 2853 } 2854 } 2855 #endif 2856