1 /* $NetBSD: vfs_subr.c,v 1.150 2001/06/05 04:42:05 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include "opt_ddb.h" 85 #include "opt_compat_netbsd.h" 86 #include "opt_compat_43.h" 87 88 #include <sys/param.h> 89 #include <sys/systm.h> 90 #include <sys/proc.h> 91 #include <sys/kernel.h> 92 #include <sys/mount.h> 93 #include <sys/time.h> 94 #include <sys/fcntl.h> 95 #include <sys/vnode.h> 96 #include <sys/stat.h> 97 #include <sys/namei.h> 98 #include <sys/ucred.h> 99 #include <sys/buf.h> 100 #include <sys/errno.h> 101 #include <sys/malloc.h> 102 #include <sys/domain.h> 103 #include <sys/mbuf.h> 104 #include <sys/syscallargs.h> 105 #include <sys/device.h> 106 #include <sys/dirent.h> 107 108 #include <miscfs/specfs/specdev.h> 109 #include <miscfs/genfs/genfs.h> 110 #include <miscfs/syncfs/syncfs.h> 111 112 #include <uvm/uvm.h> 113 #include <uvm/uvm_ddb.h> 114 115 #include <sys/sysctl.h> 116 117 enum vtype iftovt_tab[16] = { 118 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 119 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 120 }; 121 const int vttoif_tab[9] = { 122 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 123 S_IFSOCK, S_IFIFO, S_IFMT, 124 }; 125 126 int doforce = 1; /* 1 => permit forcible unmounting */ 127 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 128 129 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 130 131 /* 132 * Insq/Remq for the vnode usage lists. 133 */ 134 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 135 #define bufremvn(bp) { \ 136 LIST_REMOVE(bp, b_vnbufs); \ 137 (bp)->b_vnbufs.le_next = NOLIST; \ 138 } 139 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 140 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 141 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 142 143 struct mntlist mountlist = /* mounted filesystem list */ 144 CIRCLEQ_HEAD_INITIALIZER(mountlist); 145 struct vfs_list_head vfs_list = /* vfs list */ 146 LIST_HEAD_INITIALIZER(vfs_list); 147 148 struct nfs_public nfs_pub; /* publicly exported FS */ 149 150 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 151 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 152 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 153 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 154 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 155 156 /* 157 * These define the root filesystem and device. 158 */ 159 struct mount *rootfs; 160 struct vnode *rootvnode; 161 struct device *root_device; /* root device */ 162 163 struct pool vnode_pool; /* memory pool for vnodes */ 164 165 /* 166 * Local declarations. 167 */ 168 void insmntque __P((struct vnode *, struct mount *)); 169 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 170 void vgoneall __P((struct vnode *)); 171 172 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 173 struct export_args *)); 174 static int vfs_free_netcred __P((struct radix_node *, void *)); 175 static void vfs_free_addrlist __P((struct netexport *)); 176 177 #ifdef DEBUG 178 void printlockedvnodes __P((void)); 179 #endif 180 181 /* 182 * Initialize the vnode management data structures. 183 */ 184 void 185 vntblinit() 186 { 187 188 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 189 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE); 190 191 /* 192 * Initialize the filesystem syncer. 193 */ 194 vn_initialize_syncerd(); 195 } 196 197 /* 198 * Mark a mount point as busy. Used to synchronize access and to delay 199 * unmounting. Interlock is not released on failure. 200 */ 201 int 202 vfs_busy(mp, flags, interlkp) 203 struct mount *mp; 204 int flags; 205 struct simplelock *interlkp; 206 { 207 int lkflags; 208 209 while (mp->mnt_flag & MNT_UNMOUNT) { 210 int gone; 211 212 if (flags & LK_NOWAIT) 213 return (ENOENT); 214 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 215 && mp->mnt_unmounter == curproc) 216 return (EDEADLK); 217 if (interlkp) 218 simple_unlock(interlkp); 219 /* 220 * Since all busy locks are shared except the exclusive 221 * lock granted when unmounting, the only place that a 222 * wakeup needs to be done is at the release of the 223 * exclusive lock at the end of dounmount. 224 * 225 * XXX MP: add spinlock protecting mnt_wcnt here once you 226 * can atomically unlock-and-sleep. 227 */ 228 mp->mnt_wcnt++; 229 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 230 mp->mnt_wcnt--; 231 gone = mp->mnt_flag & MNT_GONE; 232 233 if (mp->mnt_wcnt == 0) 234 wakeup(&mp->mnt_wcnt); 235 if (interlkp) 236 simple_lock(interlkp); 237 if (gone) 238 return (ENOENT); 239 } 240 lkflags = LK_SHARED; 241 if (interlkp) 242 lkflags |= LK_INTERLOCK; 243 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 244 panic("vfs_busy: unexpected lock failure"); 245 return (0); 246 } 247 248 /* 249 * Free a busy filesystem. 250 */ 251 void 252 vfs_unbusy(mp) 253 struct mount *mp; 254 { 255 256 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 257 } 258 259 /* 260 * Lookup a filesystem type, and if found allocate and initialize 261 * a mount structure for it. 262 * 263 * Devname is usually updated by mount(8) after booting. 264 */ 265 int 266 vfs_rootmountalloc(fstypename, devname, mpp) 267 char *fstypename; 268 char *devname; 269 struct mount **mpp; 270 { 271 struct vfsops *vfsp = NULL; 272 struct mount *mp; 273 274 for (vfsp = LIST_FIRST(&vfs_list); vfsp != NULL; 275 vfsp = LIST_NEXT(vfsp, vfs_list)) 276 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 277 break; 278 279 if (vfsp == NULL) 280 return (ENODEV); 281 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 282 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 283 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 284 (void)vfs_busy(mp, LK_NOWAIT, 0); 285 LIST_INIT(&mp->mnt_vnodelist); 286 mp->mnt_op = vfsp; 287 mp->mnt_flag = MNT_RDONLY; 288 mp->mnt_vnodecovered = NULLVP; 289 vfsp->vfs_refcount++; 290 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 291 mp->mnt_stat.f_mntonname[0] = '/'; 292 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 293 *mpp = mp; 294 return (0); 295 } 296 297 /* 298 * Lookup a mount point by filesystem identifier. 299 */ 300 struct mount * 301 vfs_getvfs(fsid) 302 fsid_t *fsid; 303 { 304 struct mount *mp; 305 306 simple_lock(&mountlist_slock); 307 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 308 mp = mp->mnt_list.cqe_next) { 309 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 310 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 311 simple_unlock(&mountlist_slock); 312 return (mp); 313 } 314 } 315 simple_unlock(&mountlist_slock); 316 return ((struct mount *)0); 317 } 318 319 /* 320 * Get a new unique fsid 321 */ 322 void 323 vfs_getnewfsid(mp) 324 struct mount *mp; 325 { 326 static u_short xxxfs_mntid; 327 fsid_t tfsid; 328 int mtype; 329 330 simple_lock(&mntid_slock); 331 mtype = makefstype(mp->mnt_op->vfs_name); 332 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 333 mp->mnt_stat.f_fsid.val[1] = mtype; 334 if (xxxfs_mntid == 0) 335 ++xxxfs_mntid; 336 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 337 tfsid.val[1] = mtype; 338 if (mountlist.cqh_first != (void *)&mountlist) { 339 while (vfs_getvfs(&tfsid)) { 340 tfsid.val[0]++; 341 xxxfs_mntid++; 342 } 343 } 344 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 345 simple_unlock(&mntid_slock); 346 } 347 348 /* 349 * Make a 'unique' number from a mount type name. 350 */ 351 long 352 makefstype(type) 353 const char *type; 354 { 355 long rv; 356 357 for (rv = 0; *type; type++) { 358 rv <<= 2; 359 rv ^= *type; 360 } 361 return rv; 362 } 363 364 365 /* 366 * Set vnode attributes to VNOVAL 367 */ 368 void 369 vattr_null(vap) 370 struct vattr *vap; 371 { 372 373 vap->va_type = VNON; 374 375 /* 376 * Assign individually so that it is safe even if size and 377 * sign of each member are varied. 378 */ 379 vap->va_mode = VNOVAL; 380 vap->va_nlink = VNOVAL; 381 vap->va_uid = VNOVAL; 382 vap->va_gid = VNOVAL; 383 vap->va_fsid = VNOVAL; 384 vap->va_fileid = VNOVAL; 385 vap->va_size = VNOVAL; 386 vap->va_blocksize = VNOVAL; 387 vap->va_atime.tv_sec = 388 vap->va_mtime.tv_sec = 389 vap->va_ctime.tv_sec = VNOVAL; 390 vap->va_atime.tv_nsec = 391 vap->va_mtime.tv_nsec = 392 vap->va_ctime.tv_nsec = VNOVAL; 393 vap->va_gen = VNOVAL; 394 vap->va_flags = VNOVAL; 395 vap->va_rdev = VNOVAL; 396 vap->va_bytes = VNOVAL; 397 vap->va_vaflags = 0; 398 } 399 400 /* 401 * Routines having to do with the management of the vnode table. 402 */ 403 extern int (**dead_vnodeop_p) __P((void *)); 404 long numvnodes; 405 406 /* 407 * Return the next vnode from the free list. 408 */ 409 int 410 getnewvnode(tag, mp, vops, vpp) 411 enum vtagtype tag; 412 struct mount *mp; 413 int (**vops) __P((void *)); 414 struct vnode **vpp; 415 { 416 extern struct uvm_pagerops uvm_vnodeops; 417 struct uvm_object *uobj; 418 struct proc *p = curproc; /* XXX */ 419 struct freelst *listhd; 420 static int toggle; 421 struct vnode *vp; 422 int error = 0; 423 #ifdef DIAGNOSTIC 424 int s; 425 #endif 426 if (mp) { 427 /* 428 * Mark filesystem busy while we're creating a vnode. 429 * If unmount is in progress, this will wait; if the 430 * unmount succeeds (only if umount -f), this will 431 * return an error. If the unmount fails, we'll keep 432 * going afterwards. 433 * (This puts the per-mount vnode list logically under 434 * the protection of the vfs_busy lock). 435 */ 436 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 437 if (error && error != EDEADLK) 438 return error; 439 } 440 441 /* 442 * We must choose whether to allocate a new vnode or recycle an 443 * existing one. The criterion for allocating a new one is that 444 * the total number of vnodes is less than the number desired or 445 * there are no vnodes on either free list. Generally we only 446 * want to recycle vnodes that have no buffers associated with 447 * them, so we look first on the vnode_free_list. If it is empty, 448 * we next consider vnodes with referencing buffers on the 449 * vnode_hold_list. The toggle ensures that half the time we 450 * will use a buffer from the vnode_hold_list, and half the time 451 * we will allocate a new one unless the list has grown to twice 452 * the desired size. We are reticent to recycle vnodes from the 453 * vnode_hold_list because we will lose the identity of all its 454 * referencing buffers. 455 */ 456 457 toggle ^= 1; 458 if (numvnodes > 2 * desiredvnodes) 459 toggle = 0; 460 461 simple_lock(&vnode_free_list_slock); 462 if (numvnodes < desiredvnodes || 463 (TAILQ_FIRST(listhd = &vnode_free_list) == NULL && 464 (TAILQ_FIRST(listhd = &vnode_hold_list) == NULL || toggle))) { 465 simple_unlock(&vnode_free_list_slock); 466 vp = pool_get(&vnode_pool, PR_WAITOK); 467 memset(vp, 0, sizeof(*vp)); 468 simple_lock_init(&vp->v_interlock); 469 numvnodes++; 470 } else { 471 for (vp = TAILQ_FIRST(listhd); vp != NULLVP; 472 vp = TAILQ_NEXT(vp, v_freelist)) { 473 if (simple_lock_try(&vp->v_interlock)) { 474 if ((vp->v_flag & VLAYER) == 0) { 475 break; 476 } 477 if (VOP_ISLOCKED(vp) == 0) 478 break; 479 else 480 simple_unlock(&vp->v_interlock); 481 } 482 } 483 /* 484 * Unless this is a bad time of the month, at most 485 * the first NCPUS items on the free list are 486 * locked, so this is close enough to being empty. 487 */ 488 if (vp == NULLVP) { 489 simple_unlock(&vnode_free_list_slock); 490 if (mp && error != EDEADLK) 491 vfs_unbusy(mp); 492 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 493 *vpp = 0; 494 return (ENFILE); 495 } 496 if (vp->v_usecount) 497 panic("free vnode isn't, vp %p", vp); 498 TAILQ_REMOVE(listhd, vp, v_freelist); 499 /* see comment on why 0xdeadb is set at end of vgone (below) */ 500 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 501 simple_unlock(&vnode_free_list_slock); 502 vp->v_lease = NULL; 503 if (vp->v_type != VBAD) 504 vgonel(vp, p); 505 else 506 simple_unlock(&vp->v_interlock); 507 #ifdef DIAGNOSTIC 508 if (vp->v_data) 509 panic("cleaned vnode isn't, vp %p", vp); 510 s = splbio(); 511 if (vp->v_numoutput) 512 panic("clean vnode has pending I/O's, vp %p", vp); 513 splx(s); 514 #endif 515 vp->v_flag = 0; 516 vp->v_lastr = 0; 517 vp->v_ralen = 0; 518 vp->v_maxra = 0; 519 vp->v_lastw = 0; 520 vp->v_lasta = 0; 521 vp->v_cstart = 0; 522 vp->v_clen = 0; 523 vp->v_socket = 0; 524 } 525 vp->v_type = VNON; 526 vp->v_vnlock = &vp->v_lock; 527 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 528 lockinit(&vp->v_glock, PVFS, "glock", 0, 0); 529 cache_purge(vp); 530 vp->v_tag = tag; 531 vp->v_op = vops; 532 insmntque(vp, mp); 533 *vpp = vp; 534 vp->v_usecount = 1; 535 vp->v_data = 0; 536 simple_lock_init(&vp->v_uvm.u_obj.vmobjlock); 537 538 /* 539 * initialize uvm_object within vnode. 540 */ 541 542 uobj = &vp->v_uvm.u_obj; 543 uobj->pgops = &uvm_vnodeops; 544 TAILQ_INIT(&uobj->memq); 545 vp->v_uvm.u_size = VSIZENOTSET; 546 547 if (mp && error != EDEADLK) 548 vfs_unbusy(mp); 549 return (0); 550 } 551 552 /* 553 * This is really just the reverse of getnewvnode(). Needed for 554 * VFS_VGET functions who may need to push back a vnode in case 555 * of a locking race. 556 */ 557 void 558 ungetnewvnode(vp) 559 struct vnode *vp; 560 { 561 #ifdef DIAGNOSTIC 562 if (vp->v_usecount != 1) 563 panic("ungetnewvnode: busy vnode"); 564 #endif 565 vp->v_usecount--; 566 insmntque(vp, NULL); 567 vp->v_type = VBAD; 568 569 simple_lock(&vp->v_interlock); 570 /* 571 * Insert at head of LRU list 572 */ 573 simple_lock(&vnode_free_list_slock); 574 if (vp->v_holdcnt > 0) 575 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 576 else 577 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 578 simple_unlock(&vnode_free_list_slock); 579 simple_unlock(&vp->v_interlock); 580 } 581 582 /* 583 * Move a vnode from one mount queue to another. 584 */ 585 void 586 insmntque(vp, mp) 587 struct vnode *vp; 588 struct mount *mp; 589 { 590 591 #ifdef DIAGNOSTIC 592 if ((mp != NULL) && 593 (mp->mnt_flag & MNT_UNMOUNT) && 594 !(mp->mnt_flag & MNT_SOFTDEP) && 595 vp->v_tag != VT_VFS) { 596 panic("insmntque into dying filesystem"); 597 } 598 #endif 599 600 simple_lock(&mntvnode_slock); 601 /* 602 * Delete from old mount point vnode list, if on one. 603 */ 604 if (vp->v_mount != NULL) 605 LIST_REMOVE(vp, v_mntvnodes); 606 /* 607 * Insert into list of vnodes for the new mount point, if available. 608 */ 609 if ((vp->v_mount = mp) != NULL) 610 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 611 simple_unlock(&mntvnode_slock); 612 } 613 614 /* 615 * Update outstanding I/O count and do wakeup if requested. 616 */ 617 void 618 vwakeup(bp) 619 struct buf *bp; 620 { 621 struct vnode *vp; 622 623 if ((vp = bp->b_vp) != NULL) { 624 if (--vp->v_numoutput < 0) 625 panic("vwakeup: neg numoutput, vp %p", vp); 626 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 627 vp->v_flag &= ~VBWAIT; 628 wakeup((caddr_t)&vp->v_numoutput); 629 } 630 } 631 } 632 633 /* 634 * Flush out and invalidate all buffers associated with a vnode. 635 * Called with the underlying vnode locked, which should prevent new dirty 636 * buffers from being queued. 637 */ 638 int 639 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 640 struct vnode *vp; 641 int flags; 642 struct ucred *cred; 643 struct proc *p; 644 int slpflag, slptimeo; 645 { 646 struct uvm_object *uobj = &vp->v_uvm.u_obj; 647 struct buf *bp, *nbp; 648 int s, error, rv; 649 int flushflags = PGO_ALLPAGES|PGO_FREE|PGO_SYNCIO| 650 (flags & V_SAVE ? PGO_CLEANIT : 0); 651 652 /* XXXUBC this doesn't look at flags or slp* */ 653 if (vp->v_type == VREG) { 654 simple_lock(&uobj->vmobjlock); 655 rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags); 656 simple_unlock(&uobj->vmobjlock); 657 if (!rv) { 658 return EIO; 659 } 660 } 661 if (flags & V_SAVE) { 662 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p); 663 if (error) 664 return (error); 665 #ifdef DIAGNOSTIC 666 s = splbio(); 667 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 668 panic("vinvalbuf: dirty bufs, vp %p", vp); 669 splx(s); 670 #endif 671 } 672 673 s = splbio(); 674 675 restart: 676 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 677 nbp = LIST_NEXT(bp, b_vnbufs); 678 if (bp->b_flags & B_BUSY) { 679 bp->b_flags |= B_WANTED; 680 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 681 "vinvalbuf", slptimeo); 682 if (error) { 683 splx(s); 684 return (error); 685 } 686 goto restart; 687 } 688 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 689 brelse(bp); 690 } 691 692 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 693 nbp = LIST_NEXT(bp, b_vnbufs); 694 if (bp->b_flags & B_BUSY) { 695 bp->b_flags |= B_WANTED; 696 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 697 "vinvalbuf", slptimeo); 698 if (error) { 699 splx(s); 700 return (error); 701 } 702 goto restart; 703 } 704 /* 705 * XXX Since there are no node locks for NFS, I believe 706 * there is a slight chance that a delayed write will 707 * occur while sleeping just above, so check for it. 708 */ 709 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 710 #ifdef DEBUG 711 printf("buffer still DELWRI\n"); 712 #endif 713 bp->b_flags |= B_BUSY | B_VFLUSH; 714 VOP_BWRITE(bp); 715 goto restart; 716 } 717 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 718 brelse(bp); 719 } 720 721 #ifdef DIAGNOSTIC 722 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 723 panic("vinvalbuf: flush failed, vp %p", vp); 724 #endif 725 726 splx(s); 727 728 return (0); 729 } 730 731 /* 732 * Destroy any in core blocks past the truncation length. 733 * Called with the underlying vnode locked, which should prevent new dirty 734 * buffers from being queued. 735 */ 736 int 737 vtruncbuf(vp, lbn, slpflag, slptimeo) 738 struct vnode *vp; 739 daddr_t lbn; 740 int slpflag, slptimeo; 741 { 742 struct uvm_object *uobj = &vp->v_uvm.u_obj; 743 struct buf *bp, *nbp; 744 int s, error, rv; 745 746 s = splbio(); 747 if (vp->v_type == VREG) { 748 simple_lock(&uobj->vmobjlock); 749 rv = (uobj->pgops->pgo_flush)(uobj, 750 round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift), 0, 751 PGO_FREE|PGO_SYNCIO); 752 simple_unlock(&uobj->vmobjlock); 753 if (!rv) { 754 splx(s); 755 return EIO; 756 } 757 } 758 759 restart: 760 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 761 nbp = LIST_NEXT(bp, b_vnbufs); 762 if (bp->b_lblkno < lbn) 763 continue; 764 if (bp->b_flags & B_BUSY) { 765 bp->b_flags |= B_WANTED; 766 error = tsleep(bp, slpflag | (PRIBIO + 1), 767 "vtruncbuf", slptimeo); 768 if (error) { 769 splx(s); 770 return (error); 771 } 772 goto restart; 773 } 774 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 775 brelse(bp); 776 } 777 778 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 779 nbp = LIST_NEXT(bp, b_vnbufs); 780 if (bp->b_lblkno < lbn) 781 continue; 782 if (bp->b_flags & B_BUSY) { 783 bp->b_flags |= B_WANTED; 784 error = tsleep(bp, slpflag | (PRIBIO + 1), 785 "vtruncbuf", slptimeo); 786 if (error) { 787 splx(s); 788 return (error); 789 } 790 goto restart; 791 } 792 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 793 brelse(bp); 794 } 795 796 splx(s); 797 798 return (0); 799 } 800 801 void 802 vflushbuf(vp, sync) 803 struct vnode *vp; 804 int sync; 805 { 806 struct uvm_object *uobj = &vp->v_uvm.u_obj; 807 struct buf *bp, *nbp; 808 int s; 809 810 if (vp->v_type == VREG) { 811 int flags = PGO_CLEANIT|PGO_ALLPAGES| (sync ? PGO_SYNCIO : 0); 812 813 simple_lock(&uobj->vmobjlock); 814 (uobj->pgops->pgo_flush)(uobj, 0, 0, flags); 815 simple_unlock(&uobj->vmobjlock); 816 } 817 818 loop: 819 s = splbio(); 820 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 821 nbp = LIST_NEXT(bp, b_vnbufs); 822 if ((bp->b_flags & B_BUSY)) 823 continue; 824 if ((bp->b_flags & B_DELWRI) == 0) 825 panic("vflushbuf: not dirty, bp %p", bp); 826 bp->b_flags |= B_BUSY | B_VFLUSH; 827 splx(s); 828 /* 829 * Wait for I/O associated with indirect blocks to complete, 830 * since there is no way to quickly wait for them below. 831 */ 832 if (bp->b_vp == vp || sync == 0) 833 (void) bawrite(bp); 834 else 835 (void) bwrite(bp); 836 goto loop; 837 } 838 if (sync == 0) { 839 splx(s); 840 return; 841 } 842 while (vp->v_numoutput) { 843 vp->v_flag |= VBWAIT; 844 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 845 } 846 splx(s); 847 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 848 vprint("vflushbuf: dirty", vp); 849 goto loop; 850 } 851 } 852 853 /* 854 * Associate a buffer with a vnode. 855 */ 856 void 857 bgetvp(vp, bp) 858 struct vnode *vp; 859 struct buf *bp; 860 { 861 int s; 862 863 if (bp->b_vp) 864 panic("bgetvp: not free, bp %p", bp); 865 VHOLD(vp); 866 s = splbio(); 867 bp->b_vp = vp; 868 if (vp->v_type == VBLK || vp->v_type == VCHR) 869 bp->b_dev = vp->v_rdev; 870 else 871 bp->b_dev = NODEV; 872 /* 873 * Insert onto list for new vnode. 874 */ 875 bufinsvn(bp, &vp->v_cleanblkhd); 876 splx(s); 877 } 878 879 /* 880 * Disassociate a buffer from a vnode. 881 */ 882 void 883 brelvp(bp) 884 struct buf *bp; 885 { 886 struct vnode *vp; 887 int s; 888 889 if (bp->b_vp == NULL) 890 panic("brelvp: vp NULL, bp %p", bp); 891 892 s = splbio(); 893 vp = bp->b_vp; 894 /* 895 * Delete from old vnode list, if on one. 896 */ 897 if (bp->b_vnbufs.le_next != NOLIST) 898 bufremvn(bp); 899 900 if (vp->v_type != VREG && (vp->v_flag & VONWORKLST) && 901 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 902 vp->v_flag &= ~VONWORKLST; 903 LIST_REMOVE(vp, v_synclist); 904 } 905 906 bp->b_vp = NULL; 907 HOLDRELE(vp); 908 splx(s); 909 } 910 911 /* 912 * Reassign a buffer from one vnode to another. 913 * Used to assign file specific control information 914 * (indirect blocks) to the vnode to which they belong. 915 * 916 * This function must be called at splbio(). 917 */ 918 void 919 reassignbuf(bp, newvp) 920 struct buf *bp; 921 struct vnode *newvp; 922 { 923 struct buflists *listheadp; 924 int delay; 925 926 /* 927 * Delete from old vnode list, if on one. 928 */ 929 if (bp->b_vnbufs.le_next != NOLIST) 930 bufremvn(bp); 931 /* 932 * If dirty, put on list of dirty buffers; 933 * otherwise insert onto list of clean buffers. 934 */ 935 if ((bp->b_flags & B_DELWRI) == 0) { 936 listheadp = &newvp->v_cleanblkhd; 937 if (newvp->v_type != VREG && 938 (newvp->v_flag & VONWORKLST) && 939 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 940 newvp->v_flag &= ~VONWORKLST; 941 LIST_REMOVE(newvp, v_synclist); 942 } 943 } else { 944 listheadp = &newvp->v_dirtyblkhd; 945 if ((newvp->v_flag & VONWORKLST) == 0) { 946 switch (newvp->v_type) { 947 case VDIR: 948 delay = dirdelay; 949 break; 950 case VBLK: 951 if (newvp->v_specmountpoint != NULL) { 952 delay = metadelay; 953 break; 954 } 955 /* fall through */ 956 default: 957 delay = filedelay; 958 break; 959 } 960 if (!newvp->v_mount || 961 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 962 vn_syncer_add_to_worklist(newvp, delay); 963 } 964 } 965 bufinsvn(bp, listheadp); 966 } 967 968 /* 969 * Create a vnode for a block device. 970 * Used for root filesystem and swap areas. 971 * Also used for memory file system special devices. 972 */ 973 int 974 bdevvp(dev, vpp) 975 dev_t dev; 976 struct vnode **vpp; 977 { 978 979 return (getdevvp(dev, vpp, VBLK)); 980 } 981 982 /* 983 * Create a vnode for a character device. 984 * Used for kernfs and some console handling. 985 */ 986 int 987 cdevvp(dev, vpp) 988 dev_t dev; 989 struct vnode **vpp; 990 { 991 992 return (getdevvp(dev, vpp, VCHR)); 993 } 994 995 /* 996 * Create a vnode for a device. 997 * Used by bdevvp (block device) for root file system etc., 998 * and by cdevvp (character device) for console and kernfs. 999 */ 1000 int 1001 getdevvp(dev, vpp, type) 1002 dev_t dev; 1003 struct vnode **vpp; 1004 enum vtype type; 1005 { 1006 struct vnode *vp; 1007 struct vnode *nvp; 1008 int error; 1009 1010 if (dev == NODEV) { 1011 *vpp = NULLVP; 1012 return (0); 1013 } 1014 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1015 if (error) { 1016 *vpp = NULLVP; 1017 return (error); 1018 } 1019 vp = nvp; 1020 vp->v_type = type; 1021 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1022 vput(vp); 1023 vp = nvp; 1024 } 1025 *vpp = vp; 1026 return (0); 1027 } 1028 1029 /* 1030 * Check to see if the new vnode represents a special device 1031 * for which we already have a vnode (either because of 1032 * bdevvp() or because of a different vnode representing 1033 * the same block device). If such an alias exists, deallocate 1034 * the existing contents and return the aliased vnode. The 1035 * caller is responsible for filling it with its new contents. 1036 */ 1037 struct vnode * 1038 checkalias(nvp, nvp_rdev, mp) 1039 struct vnode *nvp; 1040 dev_t nvp_rdev; 1041 struct mount *mp; 1042 { 1043 struct proc *p = curproc; /* XXX */ 1044 struct vnode *vp; 1045 struct vnode **vpp; 1046 1047 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1048 return (NULLVP); 1049 1050 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1051 loop: 1052 simple_lock(&spechash_slock); 1053 for (vp = *vpp; vp; vp = vp->v_specnext) { 1054 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1055 continue; 1056 /* 1057 * Alias, but not in use, so flush it out. 1058 */ 1059 simple_lock(&vp->v_interlock); 1060 if (vp->v_usecount == 0) { 1061 simple_unlock(&spechash_slock); 1062 vgonel(vp, p); 1063 goto loop; 1064 } 1065 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 1066 simple_unlock(&spechash_slock); 1067 goto loop; 1068 } 1069 break; 1070 } 1071 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1072 MALLOC(nvp->v_specinfo, struct specinfo *, 1073 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1074 /* XXX Erg. */ 1075 if (nvp->v_specinfo == NULL) { 1076 simple_unlock(&spechash_slock); 1077 uvm_wait("checkalias"); 1078 goto loop; 1079 } 1080 1081 nvp->v_rdev = nvp_rdev; 1082 nvp->v_hashchain = vpp; 1083 nvp->v_specnext = *vpp; 1084 nvp->v_specmountpoint = NULL; 1085 simple_unlock(&spechash_slock); 1086 nvp->v_speclockf = NULL; 1087 *vpp = nvp; 1088 if (vp != NULLVP) { 1089 nvp->v_flag |= VALIASED; 1090 vp->v_flag |= VALIASED; 1091 vput(vp); 1092 } 1093 return (NULLVP); 1094 } 1095 simple_unlock(&spechash_slock); 1096 VOP_UNLOCK(vp, 0); 1097 simple_lock(&vp->v_interlock); 1098 vclean(vp, 0, p); 1099 vp->v_op = nvp->v_op; 1100 vp->v_tag = nvp->v_tag; 1101 vp->v_vnlock = &vp->v_lock; 1102 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1103 nvp->v_type = VNON; 1104 insmntque(vp, mp); 1105 return (vp); 1106 } 1107 1108 /* 1109 * Grab a particular vnode from the free list, increment its 1110 * reference count and lock it. If the vnode lock bit is set the 1111 * vnode is being eliminated in vgone. In that case, we can not 1112 * grab the vnode, so the process is awakened when the transition is 1113 * completed, and an error returned to indicate that the vnode is no 1114 * longer usable (possibly having been changed to a new file system type). 1115 */ 1116 int 1117 vget(vp, flags) 1118 struct vnode *vp; 1119 int flags; 1120 { 1121 int error; 1122 1123 /* 1124 * If the vnode is in the process of being cleaned out for 1125 * another use, we wait for the cleaning to finish and then 1126 * return failure. Cleaning is determined by checking that 1127 * the VXLOCK flag is set. 1128 */ 1129 1130 if ((flags & LK_INTERLOCK) == 0) 1131 simple_lock(&vp->v_interlock); 1132 if (vp->v_flag & VXLOCK) { 1133 if (flags & LK_NOWAIT) { 1134 simple_unlock(&vp->v_interlock); 1135 return EBUSY; 1136 } 1137 vp->v_flag |= VXWANT; 1138 ltsleep((caddr_t)vp, PINOD|PNORELOCK, 1139 "vget", 0, &vp->v_interlock); 1140 return (ENOENT); 1141 } 1142 if (vp->v_usecount == 0) { 1143 simple_lock(&vnode_free_list_slock); 1144 if (vp->v_holdcnt > 0) 1145 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1146 else 1147 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1148 simple_unlock(&vnode_free_list_slock); 1149 } 1150 vp->v_usecount++; 1151 #ifdef DIAGNOSTIC 1152 if (vp->v_usecount == 0) { 1153 vprint("vget", vp); 1154 panic("vget: usecount overflow, vp %p", vp); 1155 } 1156 #endif 1157 if (flags & LK_TYPE_MASK) { 1158 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1159 /* 1160 * must expand vrele here because we do not want 1161 * to call VOP_INACTIVE if the reference count 1162 * drops back to zero since it was never really 1163 * active. We must remove it from the free list 1164 * before sleeping so that multiple processes do 1165 * not try to recycle it. 1166 */ 1167 simple_lock(&vp->v_interlock); 1168 vp->v_usecount--; 1169 if (vp->v_usecount > 0) { 1170 simple_unlock(&vp->v_interlock); 1171 return (error); 1172 } 1173 /* 1174 * insert at tail of LRU list 1175 */ 1176 simple_lock(&vnode_free_list_slock); 1177 if (vp->v_holdcnt > 0) 1178 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, 1179 v_freelist); 1180 else 1181 TAILQ_INSERT_TAIL(&vnode_free_list, vp, 1182 v_freelist); 1183 simple_unlock(&vnode_free_list_slock); 1184 simple_unlock(&vp->v_interlock); 1185 } 1186 return (error); 1187 } 1188 simple_unlock(&vp->v_interlock); 1189 return (0); 1190 } 1191 1192 /* 1193 * vput(), just unlock and vrele() 1194 */ 1195 void 1196 vput(vp) 1197 struct vnode *vp; 1198 { 1199 struct proc *p = curproc; /* XXX */ 1200 1201 #ifdef DIAGNOSTIC 1202 if (vp == NULL) 1203 panic("vput: null vp"); 1204 #endif 1205 simple_lock(&vp->v_interlock); 1206 vp->v_usecount--; 1207 if (vp->v_usecount > 0) { 1208 simple_unlock(&vp->v_interlock); 1209 VOP_UNLOCK(vp, 0); 1210 return; 1211 } 1212 #ifdef DIAGNOSTIC 1213 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1214 vprint("vput: bad ref count", vp); 1215 panic("vput: ref cnt"); 1216 } 1217 #endif 1218 /* 1219 * Insert at tail of LRU list. 1220 */ 1221 simple_lock(&vnode_free_list_slock); 1222 if (vp->v_holdcnt > 0) 1223 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1224 else 1225 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1226 simple_unlock(&vnode_free_list_slock); 1227 if (vp->v_flag & VTEXT) { 1228 uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages; 1229 uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages; 1230 } 1231 vp->v_flag &= ~VTEXT; 1232 simple_unlock(&vp->v_interlock); 1233 VOP_INACTIVE(vp, p); 1234 } 1235 1236 /* 1237 * Vnode release. 1238 * If count drops to zero, call inactive routine and return to freelist. 1239 */ 1240 void 1241 vrele(vp) 1242 struct vnode *vp; 1243 { 1244 struct proc *p = curproc; /* XXX */ 1245 1246 #ifdef DIAGNOSTIC 1247 if (vp == NULL) 1248 panic("vrele: null vp"); 1249 #endif 1250 simple_lock(&vp->v_interlock); 1251 vp->v_usecount--; 1252 if (vp->v_usecount > 0) { 1253 simple_unlock(&vp->v_interlock); 1254 return; 1255 } 1256 #ifdef DIAGNOSTIC 1257 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1258 vprint("vrele: bad ref count", vp); 1259 panic("vrele: ref cnt vp %p", vp); 1260 } 1261 #endif 1262 /* 1263 * Insert at tail of LRU list. 1264 */ 1265 simple_lock(&vnode_free_list_slock); 1266 if (vp->v_holdcnt > 0) 1267 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1268 else 1269 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1270 simple_unlock(&vnode_free_list_slock); 1271 if (vp->v_flag & VTEXT) { 1272 uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages; 1273 uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages; 1274 } 1275 vp->v_flag &= ~VTEXT; 1276 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1277 VOP_INACTIVE(vp, p); 1278 } 1279 1280 #ifdef DIAGNOSTIC 1281 /* 1282 * Page or buffer structure gets a reference. 1283 */ 1284 void 1285 vhold(vp) 1286 struct vnode *vp; 1287 { 1288 1289 /* 1290 * If it is on the freelist and the hold count is currently 1291 * zero, move it to the hold list. The test of the back 1292 * pointer and the use reference count of zero is because 1293 * it will be removed from a free list by getnewvnode, 1294 * but will not have its reference count incremented until 1295 * after calling vgone. If the reference count were 1296 * incremented first, vgone would (incorrectly) try to 1297 * close the previous instance of the underlying object. 1298 * So, the back pointer is explicitly set to `0xdeadb' in 1299 * getnewvnode after removing it from a freelist to ensure 1300 * that we do not try to move it here. 1301 */ 1302 simple_lock(&vp->v_interlock); 1303 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1304 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1305 simple_lock(&vnode_free_list_slock); 1306 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1307 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1308 simple_unlock(&vnode_free_list_slock); 1309 } 1310 vp->v_holdcnt++; 1311 simple_unlock(&vp->v_interlock); 1312 } 1313 1314 /* 1315 * Page or buffer structure frees a reference. 1316 */ 1317 void 1318 holdrele(vp) 1319 struct vnode *vp; 1320 { 1321 1322 simple_lock(&vp->v_interlock); 1323 if (vp->v_holdcnt <= 0) 1324 panic("holdrele: holdcnt vp %p", vp); 1325 vp->v_holdcnt--; 1326 1327 /* 1328 * If it is on the holdlist and the hold count drops to 1329 * zero, move it to the free list. The test of the back 1330 * pointer and the use reference count of zero is because 1331 * it will be removed from a free list by getnewvnode, 1332 * but will not have its reference count incremented until 1333 * after calling vgone. If the reference count were 1334 * incremented first, vgone would (incorrectly) try to 1335 * close the previous instance of the underlying object. 1336 * So, the back pointer is explicitly set to `0xdeadb' in 1337 * getnewvnode after removing it from a freelist to ensure 1338 * that we do not try to move it here. 1339 */ 1340 1341 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1342 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1343 simple_lock(&vnode_free_list_slock); 1344 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1345 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1346 simple_unlock(&vnode_free_list_slock); 1347 } 1348 simple_unlock(&vp->v_interlock); 1349 } 1350 1351 /* 1352 * Vnode reference. 1353 */ 1354 void 1355 vref(vp) 1356 struct vnode *vp; 1357 { 1358 1359 simple_lock(&vp->v_interlock); 1360 if (vp->v_usecount <= 0) 1361 panic("vref used where vget required, vp %p", vp); 1362 vp->v_usecount++; 1363 #ifdef DIAGNOSTIC 1364 if (vp->v_usecount == 0) { 1365 vprint("vref", vp); 1366 panic("vref: usecount overflow, vp %p", vp); 1367 } 1368 #endif 1369 simple_unlock(&vp->v_interlock); 1370 } 1371 #endif /* DIAGNOSTIC */ 1372 1373 /* 1374 * Remove any vnodes in the vnode table belonging to mount point mp. 1375 * 1376 * If MNT_NOFORCE is specified, there should not be any active ones, 1377 * return error if any are found (nb: this is a user error, not a 1378 * system error). If MNT_FORCE is specified, detach any active vnodes 1379 * that are found. 1380 */ 1381 #ifdef DEBUG 1382 int busyprt = 0; /* print out busy vnodes */ 1383 struct ctldebug debug1 = { "busyprt", &busyprt }; 1384 #endif 1385 1386 int 1387 vflush(mp, skipvp, flags) 1388 struct mount *mp; 1389 struct vnode *skipvp; 1390 int flags; 1391 { 1392 struct proc *p = curproc; /* XXX */ 1393 struct vnode *vp, *nvp; 1394 int busy = 0; 1395 1396 simple_lock(&mntvnode_slock); 1397 loop: 1398 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1399 if (vp->v_mount != mp) 1400 goto loop; 1401 nvp = vp->v_mntvnodes.le_next; 1402 /* 1403 * Skip over a selected vnode. 1404 */ 1405 if (vp == skipvp) 1406 continue; 1407 simple_lock(&vp->v_interlock); 1408 /* 1409 * Skip over a vnodes marked VSYSTEM. 1410 */ 1411 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1412 simple_unlock(&vp->v_interlock); 1413 continue; 1414 } 1415 /* 1416 * If WRITECLOSE is set, only flush out regular file 1417 * vnodes open for writing. 1418 */ 1419 if ((flags & WRITECLOSE) && 1420 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1421 simple_unlock(&vp->v_interlock); 1422 continue; 1423 } 1424 /* 1425 * With v_usecount == 0, all we need to do is clear 1426 * out the vnode data structures and we are done. 1427 */ 1428 if (vp->v_usecount == 0) { 1429 simple_unlock(&mntvnode_slock); 1430 vgonel(vp, p); 1431 simple_lock(&mntvnode_slock); 1432 continue; 1433 } 1434 /* 1435 * If FORCECLOSE is set, forcibly close the vnode. 1436 * For block or character devices, revert to an 1437 * anonymous device. For all other files, just kill them. 1438 */ 1439 if (flags & FORCECLOSE) { 1440 simple_unlock(&mntvnode_slock); 1441 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1442 vgonel(vp, p); 1443 } else { 1444 vclean(vp, 0, p); 1445 vp->v_op = spec_vnodeop_p; 1446 insmntque(vp, (struct mount *)0); 1447 } 1448 simple_lock(&mntvnode_slock); 1449 continue; 1450 } 1451 #ifdef DEBUG 1452 if (busyprt) 1453 vprint("vflush: busy vnode", vp); 1454 #endif 1455 simple_unlock(&vp->v_interlock); 1456 busy++; 1457 } 1458 simple_unlock(&mntvnode_slock); 1459 if (busy) 1460 return (EBUSY); 1461 return (0); 1462 } 1463 1464 /* 1465 * Disassociate the underlying file system from a vnode. 1466 */ 1467 void 1468 vclean(vp, flags, p) 1469 struct vnode *vp; 1470 int flags; 1471 struct proc *p; 1472 { 1473 int active; 1474 1475 /* 1476 * Check to see if the vnode is in use. 1477 * If so we have to reference it before we clean it out 1478 * so that its count cannot fall to zero and generate a 1479 * race against ourselves to recycle it. 1480 */ 1481 if ((active = vp->v_usecount) != 0) { 1482 /* We have the vnode interlock. */ 1483 vp->v_usecount++; 1484 #ifdef DIAGNOSTIC 1485 if (vp->v_usecount == 0) { 1486 vprint("vclean", vp); 1487 panic("vclean: usecount overflow"); 1488 } 1489 #endif 1490 } 1491 1492 /* 1493 * Prevent the vnode from being recycled or 1494 * brought into use while we clean it out. 1495 */ 1496 if (vp->v_flag & VXLOCK) 1497 panic("vclean: deadlock, vp %p", vp); 1498 vp->v_flag |= VXLOCK; 1499 if (vp->v_flag & VTEXT) { 1500 uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages; 1501 uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages; 1502 } 1503 vp->v_flag &= ~VTEXT; 1504 1505 /* 1506 * Even if the count is zero, the VOP_INACTIVE routine may still 1507 * have the object locked while it cleans it out. The VOP_LOCK 1508 * ensures that the VOP_INACTIVE routine is done with its work. 1509 * For active vnodes, it ensures that no other activity can 1510 * occur while the underlying object is being cleaned out. 1511 */ 1512 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1513 1514 /* 1515 * Clean out any cached data associated with the vnode. 1516 */ 1517 if (flags & DOCLOSE) 1518 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1519 1520 /* 1521 * If purging an active vnode, it must be closed and 1522 * deactivated before being reclaimed. Note that the 1523 * VOP_INACTIVE will unlock the vnode. 1524 */ 1525 if (active) { 1526 if (flags & DOCLOSE) 1527 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1528 VOP_INACTIVE(vp, p); 1529 } else { 1530 /* 1531 * Any other processes trying to obtain this lock must first 1532 * wait for VXLOCK to clear, then call the new lock operation. 1533 */ 1534 VOP_UNLOCK(vp, 0); 1535 } 1536 /* 1537 * Reclaim the vnode. 1538 */ 1539 if (VOP_RECLAIM(vp, p)) 1540 panic("vclean: cannot reclaim, vp %p", vp); 1541 if (active) { 1542 /* 1543 * Inline copy of vrele() since VOP_INACTIVE 1544 * has already been called. 1545 */ 1546 simple_lock(&vp->v_interlock); 1547 if (--vp->v_usecount <= 0) { 1548 #ifdef DIAGNOSTIC 1549 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1550 vprint("vclean: bad ref count", vp); 1551 panic("vclean: ref cnt"); 1552 } 1553 #endif 1554 /* 1555 * Insert at tail of LRU list. 1556 */ 1557 1558 simple_unlock(&vp->v_interlock); 1559 simple_lock(&vnode_free_list_slock); 1560 #ifdef DIAGNOSTIC 1561 if (vp->v_vnlock) { 1562 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1563 vprint("vclean: lock not drained", vp); 1564 } 1565 if (vp->v_holdcnt > 0) 1566 panic("vclean: not clean, vp %p", vp); 1567 #endif 1568 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1569 simple_unlock(&vnode_free_list_slock); 1570 } else 1571 simple_unlock(&vp->v_interlock); 1572 } 1573 1574 cache_purge(vp); 1575 1576 /* 1577 * Done with purge, notify sleepers of the grim news. 1578 */ 1579 vp->v_op = dead_vnodeop_p; 1580 vp->v_tag = VT_NON; 1581 simple_lock(&vp->v_interlock); 1582 vp->v_flag &= ~VXLOCK; 1583 if (vp->v_flag & VXWANT) { 1584 vp->v_flag &= ~VXWANT; 1585 simple_unlock(&vp->v_interlock); 1586 wakeup((caddr_t)vp); 1587 } else 1588 simple_unlock(&vp->v_interlock); 1589 } 1590 1591 /* 1592 * Recycle an unused vnode to the front of the free list. 1593 * Release the passed interlock if the vnode will be recycled. 1594 */ 1595 int 1596 vrecycle(vp, inter_lkp, p) 1597 struct vnode *vp; 1598 struct simplelock *inter_lkp; 1599 struct proc *p; 1600 { 1601 1602 simple_lock(&vp->v_interlock); 1603 if (vp->v_usecount == 0) { 1604 if (inter_lkp) 1605 simple_unlock(inter_lkp); 1606 vgonel(vp, p); 1607 return (1); 1608 } 1609 simple_unlock(&vp->v_interlock); 1610 return (0); 1611 } 1612 1613 /* 1614 * Eliminate all activity associated with a vnode 1615 * in preparation for reuse. 1616 */ 1617 void 1618 vgone(vp) 1619 struct vnode *vp; 1620 { 1621 struct proc *p = curproc; /* XXX */ 1622 1623 simple_lock(&vp->v_interlock); 1624 vgonel(vp, p); 1625 } 1626 1627 /* 1628 * vgone, with the vp interlock held. 1629 */ 1630 void 1631 vgonel(vp, p) 1632 struct vnode *vp; 1633 struct proc *p; 1634 { 1635 struct vnode *vq; 1636 struct vnode *vx; 1637 1638 /* 1639 * If a vgone (or vclean) is already in progress, 1640 * wait until it is done and return. 1641 */ 1642 if (vp->v_flag & VXLOCK) { 1643 vp->v_flag |= VXWANT; 1644 ltsleep((caddr_t)vp, PINOD | PNORELOCK, 1645 "vgone", 0, &vp->v_interlock); 1646 return; 1647 } 1648 /* 1649 * Clean out the filesystem specific data. 1650 */ 1651 vclean(vp, DOCLOSE, p); 1652 /* 1653 * Delete from old mount point vnode list, if on one. 1654 */ 1655 if (vp->v_mount != NULL) 1656 insmntque(vp, (struct mount *)0); 1657 /* 1658 * If special device, remove it from special device alias list. 1659 * if it is on one. 1660 */ 1661 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1662 simple_lock(&spechash_slock); 1663 if (vp->v_hashchain != NULL) { 1664 if (*vp->v_hashchain == vp) { 1665 *vp->v_hashchain = vp->v_specnext; 1666 } else { 1667 for (vq = *vp->v_hashchain; vq; 1668 vq = vq->v_specnext) { 1669 if (vq->v_specnext != vp) 1670 continue; 1671 vq->v_specnext = vp->v_specnext; 1672 break; 1673 } 1674 if (vq == NULL) 1675 panic("missing bdev"); 1676 } 1677 if (vp->v_flag & VALIASED) { 1678 vx = NULL; 1679 for (vq = *vp->v_hashchain; vq; 1680 vq = vq->v_specnext) { 1681 if (vq->v_rdev != vp->v_rdev || 1682 vq->v_type != vp->v_type) 1683 continue; 1684 if (vx) 1685 break; 1686 vx = vq; 1687 } 1688 if (vx == NULL) 1689 panic("missing alias"); 1690 if (vq == NULL) 1691 vx->v_flag &= ~VALIASED; 1692 vp->v_flag &= ~VALIASED; 1693 } 1694 } 1695 simple_unlock(&spechash_slock); 1696 FREE(vp->v_specinfo, M_VNODE); 1697 vp->v_specinfo = NULL; 1698 } 1699 /* 1700 * If it is on the freelist and not already at the head, 1701 * move it to the head of the list. The test of the back 1702 * pointer and the reference count of zero is because 1703 * it will be removed from the free list by getnewvnode, 1704 * but will not have its reference count incremented until 1705 * after calling vgone. If the reference count were 1706 * incremented first, vgone would (incorrectly) try to 1707 * close the previous instance of the underlying object. 1708 * So, the back pointer is explicitly set to `0xdeadb' in 1709 * getnewvnode after removing it from the freelist to ensure 1710 * that we do not try to move it here. 1711 */ 1712 if (vp->v_usecount == 0) { 1713 simple_lock(&vnode_free_list_slock); 1714 if (vp->v_holdcnt > 0) 1715 panic("vgonel: not clean, vp %p", vp); 1716 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1717 TAILQ_FIRST(&vnode_free_list) != vp) { 1718 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1719 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1720 } 1721 simple_unlock(&vnode_free_list_slock); 1722 } 1723 vp->v_type = VBAD; 1724 } 1725 1726 /* 1727 * Lookup a vnode by device number. 1728 */ 1729 int 1730 vfinddev(dev, type, vpp) 1731 dev_t dev; 1732 enum vtype type; 1733 struct vnode **vpp; 1734 { 1735 struct vnode *vp; 1736 int rc = 0; 1737 1738 simple_lock(&spechash_slock); 1739 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1740 if (dev != vp->v_rdev || type != vp->v_type) 1741 continue; 1742 *vpp = vp; 1743 rc = 1; 1744 break; 1745 } 1746 simple_unlock(&spechash_slock); 1747 return (rc); 1748 } 1749 1750 /* 1751 * Revoke all the vnodes corresponding to the specified minor number 1752 * range (endpoints inclusive) of the specified major. 1753 */ 1754 void 1755 vdevgone(maj, minl, minh, type) 1756 int maj, minl, minh; 1757 enum vtype type; 1758 { 1759 struct vnode *vp; 1760 int mn; 1761 1762 for (mn = minl; mn <= minh; mn++) 1763 if (vfinddev(makedev(maj, mn), type, &vp)) 1764 VOP_REVOKE(vp, REVOKEALL); 1765 } 1766 1767 /* 1768 * Calculate the total number of references to a special device. 1769 */ 1770 int 1771 vcount(vp) 1772 struct vnode *vp; 1773 { 1774 struct vnode *vq, *vnext; 1775 int count; 1776 1777 loop: 1778 if ((vp->v_flag & VALIASED) == 0) 1779 return (vp->v_usecount); 1780 simple_lock(&spechash_slock); 1781 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1782 vnext = vq->v_specnext; 1783 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1784 continue; 1785 /* 1786 * Alias, but not in use, so flush it out. 1787 */ 1788 if (vq->v_usecount == 0 && vq != vp) { 1789 simple_unlock(&spechash_slock); 1790 vgone(vq); 1791 goto loop; 1792 } 1793 count += vq->v_usecount; 1794 } 1795 simple_unlock(&spechash_slock); 1796 return (count); 1797 } 1798 1799 /* 1800 * Print out a description of a vnode. 1801 */ 1802 static const char * const typename[] = 1803 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1804 1805 void 1806 vprint(label, vp) 1807 char *label; 1808 struct vnode *vp; 1809 { 1810 char buf[64]; 1811 1812 if (label != NULL) 1813 printf("%s: ", label); 1814 printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,", 1815 vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1816 vp->v_holdcnt); 1817 buf[0] = '\0'; 1818 if (vp->v_flag & VROOT) 1819 strcat(buf, "|VROOT"); 1820 if (vp->v_flag & VTEXT) 1821 strcat(buf, "|VTEXT"); 1822 if (vp->v_flag & VSYSTEM) 1823 strcat(buf, "|VSYSTEM"); 1824 if (vp->v_flag & VXLOCK) 1825 strcat(buf, "|VXLOCK"); 1826 if (vp->v_flag & VXWANT) 1827 strcat(buf, "|VXWANT"); 1828 if (vp->v_flag & VBWAIT) 1829 strcat(buf, "|VBWAIT"); 1830 if (vp->v_flag & VALIASED) 1831 strcat(buf, "|VALIASED"); 1832 if (buf[0] != '\0') 1833 printf(" flags (%s)", &buf[1]); 1834 if (vp->v_data == NULL) { 1835 printf("\n"); 1836 } else { 1837 printf("\n\t"); 1838 VOP_PRINT(vp); 1839 } 1840 } 1841 1842 #ifdef DEBUG 1843 /* 1844 * List all of the locked vnodes in the system. 1845 * Called when debugging the kernel. 1846 */ 1847 void 1848 printlockedvnodes() 1849 { 1850 struct mount *mp, *nmp; 1851 struct vnode *vp; 1852 1853 printf("Locked vnodes\n"); 1854 simple_lock(&mountlist_slock); 1855 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1856 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1857 nmp = mp->mnt_list.cqe_next; 1858 continue; 1859 } 1860 for (vp = mp->mnt_vnodelist.lh_first; 1861 vp != NULL; 1862 vp = vp->v_mntvnodes.le_next) { 1863 if (VOP_ISLOCKED(vp)) 1864 vprint((char *)0, vp); 1865 } 1866 simple_lock(&mountlist_slock); 1867 nmp = mp->mnt_list.cqe_next; 1868 vfs_unbusy(mp); 1869 } 1870 simple_unlock(&mountlist_slock); 1871 } 1872 #endif 1873 1874 extern const char *mountcompatnames[]; 1875 extern const int nmountcompatnames; 1876 1877 /* 1878 * Top level filesystem related information gathering. 1879 */ 1880 int 1881 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1882 int *name; 1883 u_int namelen; 1884 void *oldp; 1885 size_t *oldlenp; 1886 void *newp; 1887 size_t newlen; 1888 struct proc *p; 1889 { 1890 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1891 struct vfsconf vfc; 1892 #endif 1893 struct vfsops *vfsp; 1894 1895 /* all sysctl names at this level are at least name and field */ 1896 if (namelen < 2) 1897 return (ENOTDIR); /* overloaded */ 1898 1899 /* Not generic: goes to file system. */ 1900 if (name[0] != VFS_GENERIC) { 1901 if (name[0] >= nmountcompatnames || name[0] < 0 || 1902 mountcompatnames[name[0]] == NULL) 1903 return (EOPNOTSUPP); 1904 vfsp = vfs_getopsbyname(mountcompatnames[name[0]]); 1905 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1906 return (EOPNOTSUPP); 1907 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1908 oldp, oldlenp, newp, newlen, p)); 1909 } 1910 1911 /* The rest are generic vfs sysctls. */ 1912 switch (name[1]) { 1913 case VFS_USERMOUNT: 1914 return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount); 1915 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1916 case VFS_MAXTYPENUM: 1917 /* 1918 * Provided for 4.4BSD-Lite2 compatibility. 1919 */ 1920 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1921 case VFS_CONF: 1922 /* 1923 * Special: a node, next is a file system name. 1924 * Provided for 4.4BSD-Lite2 compatibility. 1925 */ 1926 if (namelen < 3) 1927 return (ENOTDIR); /* overloaded */ 1928 if (name[2] >= nmountcompatnames || name[2] < 0 || 1929 mountcompatnames[name[2]] == NULL) 1930 return (EOPNOTSUPP); 1931 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 1932 if (vfsp == NULL) 1933 return (EOPNOTSUPP); 1934 vfc.vfc_vfsops = vfsp; 1935 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 1936 vfc.vfc_typenum = name[2]; 1937 vfc.vfc_refcount = vfsp->vfs_refcount; 1938 vfc.vfc_flags = 0; 1939 vfc.vfc_mountroot = vfsp->vfs_mountroot; 1940 vfc.vfc_next = NULL; 1941 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 1942 sizeof(struct vfsconf))); 1943 #endif 1944 default: 1945 break; 1946 } 1947 return (EOPNOTSUPP); 1948 } 1949 1950 int kinfo_vdebug = 1; 1951 int kinfo_vgetfailed; 1952 #define KINFO_VNODESLOP 10 1953 /* 1954 * Dump vnode list (via sysctl). 1955 * Copyout address of vnode followed by vnode. 1956 */ 1957 /* ARGSUSED */ 1958 int 1959 sysctl_vnode(where, sizep, p) 1960 char *where; 1961 size_t *sizep; 1962 struct proc *p; 1963 { 1964 struct mount *mp, *nmp; 1965 struct vnode *nvp, *vp; 1966 char *bp = where, *savebp; 1967 char *ewhere; 1968 int error; 1969 1970 #define VPTRSZ sizeof(struct vnode *) 1971 #define VNODESZ sizeof(struct vnode) 1972 if (where == NULL) { 1973 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1974 return (0); 1975 } 1976 ewhere = where + *sizep; 1977 1978 simple_lock(&mountlist_slock); 1979 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1980 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1981 nmp = mp->mnt_list.cqe_next; 1982 continue; 1983 } 1984 savebp = bp; 1985 again: 1986 simple_lock(&mntvnode_slock); 1987 for (vp = mp->mnt_vnodelist.lh_first; 1988 vp != NULL; 1989 vp = nvp) { 1990 /* 1991 * Check that the vp is still associated with 1992 * this filesystem. RACE: could have been 1993 * recycled onto the same filesystem. 1994 */ 1995 if (vp->v_mount != mp) { 1996 simple_unlock(&mntvnode_slock); 1997 if (kinfo_vdebug) 1998 printf("kinfo: vp changed\n"); 1999 bp = savebp; 2000 goto again; 2001 } 2002 nvp = vp->v_mntvnodes.le_next; 2003 if (bp + VPTRSZ + VNODESZ > ewhere) { 2004 simple_unlock(&mntvnode_slock); 2005 *sizep = bp - where; 2006 return (ENOMEM); 2007 } 2008 simple_unlock(&mntvnode_slock); 2009 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2010 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2011 return (error); 2012 bp += VPTRSZ + VNODESZ; 2013 simple_lock(&mntvnode_slock); 2014 } 2015 simple_unlock(&mntvnode_slock); 2016 simple_lock(&mountlist_slock); 2017 nmp = mp->mnt_list.cqe_next; 2018 vfs_unbusy(mp); 2019 } 2020 simple_unlock(&mountlist_slock); 2021 2022 *sizep = bp - where; 2023 return (0); 2024 } 2025 2026 /* 2027 * Check to see if a filesystem is mounted on a block device. 2028 */ 2029 int 2030 vfs_mountedon(vp) 2031 struct vnode *vp; 2032 { 2033 struct vnode *vq; 2034 int error = 0; 2035 2036 if (vp->v_specmountpoint != NULL) 2037 return (EBUSY); 2038 if (vp->v_flag & VALIASED) { 2039 simple_lock(&spechash_slock); 2040 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2041 if (vq->v_rdev != vp->v_rdev || 2042 vq->v_type != vp->v_type) 2043 continue; 2044 if (vq->v_specmountpoint != NULL) { 2045 error = EBUSY; 2046 break; 2047 } 2048 } 2049 simple_unlock(&spechash_slock); 2050 } 2051 return (error); 2052 } 2053 2054 /* 2055 * Build hash lists of net addresses and hang them off the mount point. 2056 * Called by ufs_mount() to set up the lists of export addresses. 2057 */ 2058 static int 2059 vfs_hang_addrlist(mp, nep, argp) 2060 struct mount *mp; 2061 struct netexport *nep; 2062 struct export_args *argp; 2063 { 2064 struct netcred *np, *enp; 2065 struct radix_node_head *rnh; 2066 int i; 2067 struct radix_node *rn; 2068 struct sockaddr *saddr, *smask = 0; 2069 struct domain *dom; 2070 int error; 2071 2072 if (argp->ex_addrlen == 0) { 2073 if (mp->mnt_flag & MNT_DEFEXPORTED) 2074 return (EPERM); 2075 np = &nep->ne_defexported; 2076 np->netc_exflags = argp->ex_flags; 2077 np->netc_anon = argp->ex_anon; 2078 np->netc_anon.cr_ref = 1; 2079 mp->mnt_flag |= MNT_DEFEXPORTED; 2080 return (0); 2081 } 2082 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2083 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 2084 memset((caddr_t)np, 0, i); 2085 saddr = (struct sockaddr *)(np + 1); 2086 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 2087 if (error) 2088 goto out; 2089 if (saddr->sa_len > argp->ex_addrlen) 2090 saddr->sa_len = argp->ex_addrlen; 2091 if (argp->ex_masklen) { 2092 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 2093 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 2094 if (error) 2095 goto out; 2096 if (smask->sa_len > argp->ex_masklen) 2097 smask->sa_len = argp->ex_masklen; 2098 } 2099 i = saddr->sa_family; 2100 if ((rnh = nep->ne_rtable[i]) == 0) { 2101 /* 2102 * Seems silly to initialize every AF when most are not 2103 * used, do so on demand here 2104 */ 2105 for (dom = domains; dom; dom = dom->dom_next) 2106 if (dom->dom_family == i && dom->dom_rtattach) { 2107 dom->dom_rtattach((void **)&nep->ne_rtable[i], 2108 dom->dom_rtoffset); 2109 break; 2110 } 2111 if ((rnh = nep->ne_rtable[i]) == 0) { 2112 error = ENOBUFS; 2113 goto out; 2114 } 2115 } 2116 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 2117 np->netc_rnodes); 2118 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 2119 if (rn == 0) { 2120 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 2121 smask, rnh); 2122 if (enp == 0) { 2123 error = EPERM; 2124 goto out; 2125 } 2126 } else 2127 enp = (struct netcred *)rn; 2128 2129 if (enp->netc_exflags != argp->ex_flags || 2130 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 2131 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 2132 enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups || 2133 memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 2134 enp->netc_anon.cr_ngroups)) 2135 error = EPERM; 2136 else 2137 error = 0; 2138 goto out; 2139 } 2140 np->netc_exflags = argp->ex_flags; 2141 np->netc_anon = argp->ex_anon; 2142 np->netc_anon.cr_ref = 1; 2143 return (0); 2144 out: 2145 free(np, M_NETADDR); 2146 return (error); 2147 } 2148 2149 /* ARGSUSED */ 2150 static int 2151 vfs_free_netcred(rn, w) 2152 struct radix_node *rn; 2153 void *w; 2154 { 2155 struct radix_node_head *rnh = (struct radix_node_head *)w; 2156 2157 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 2158 free((caddr_t)rn, M_NETADDR); 2159 return (0); 2160 } 2161 2162 /* 2163 * Free the net address hash lists that are hanging off the mount points. 2164 */ 2165 static void 2166 vfs_free_addrlist(nep) 2167 struct netexport *nep; 2168 { 2169 int i; 2170 struct radix_node_head *rnh; 2171 2172 for (i = 0; i <= AF_MAX; i++) 2173 if ((rnh = nep->ne_rtable[i]) != NULL) { 2174 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 2175 free((caddr_t)rnh, M_RTABLE); 2176 nep->ne_rtable[i] = 0; 2177 } 2178 } 2179 2180 int 2181 vfs_export(mp, nep, argp) 2182 struct mount *mp; 2183 struct netexport *nep; 2184 struct export_args *argp; 2185 { 2186 int error; 2187 2188 if (argp->ex_flags & MNT_DELEXPORT) { 2189 if (mp->mnt_flag & MNT_EXPUBLIC) { 2190 vfs_setpublicfs(NULL, NULL, NULL); 2191 mp->mnt_flag &= ~MNT_EXPUBLIC; 2192 } 2193 vfs_free_addrlist(nep); 2194 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2195 } 2196 if (argp->ex_flags & MNT_EXPORTED) { 2197 if (argp->ex_flags & MNT_EXPUBLIC) { 2198 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2199 return (error); 2200 mp->mnt_flag |= MNT_EXPUBLIC; 2201 } 2202 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 2203 return (error); 2204 mp->mnt_flag |= MNT_EXPORTED; 2205 } 2206 return (0); 2207 } 2208 2209 /* 2210 * Set the publicly exported filesystem (WebNFS). Currently, only 2211 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2212 */ 2213 int 2214 vfs_setpublicfs(mp, nep, argp) 2215 struct mount *mp; 2216 struct netexport *nep; 2217 struct export_args *argp; 2218 { 2219 int error; 2220 struct vnode *rvp; 2221 char *cp; 2222 2223 /* 2224 * mp == NULL -> invalidate the current info, the FS is 2225 * no longer exported. May be called from either vfs_export 2226 * or unmount, so check if it hasn't already been done. 2227 */ 2228 if (mp == NULL) { 2229 if (nfs_pub.np_valid) { 2230 nfs_pub.np_valid = 0; 2231 if (nfs_pub.np_index != NULL) { 2232 FREE(nfs_pub.np_index, M_TEMP); 2233 nfs_pub.np_index = NULL; 2234 } 2235 } 2236 return (0); 2237 } 2238 2239 /* 2240 * Only one allowed at a time. 2241 */ 2242 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2243 return (EBUSY); 2244 2245 /* 2246 * Get real filehandle for root of exported FS. 2247 */ 2248 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 2249 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2250 2251 if ((error = VFS_ROOT(mp, &rvp))) 2252 return (error); 2253 2254 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2255 return (error); 2256 2257 vput(rvp); 2258 2259 /* 2260 * If an indexfile was specified, pull it in. 2261 */ 2262 if (argp->ex_indexfile != NULL) { 2263 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2264 M_WAITOK); 2265 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2266 MAXNAMLEN, (size_t *)0); 2267 if (!error) { 2268 /* 2269 * Check for illegal filenames. 2270 */ 2271 for (cp = nfs_pub.np_index; *cp; cp++) { 2272 if (*cp == '/') { 2273 error = EINVAL; 2274 break; 2275 } 2276 } 2277 } 2278 if (error) { 2279 FREE(nfs_pub.np_index, M_TEMP); 2280 return (error); 2281 } 2282 } 2283 2284 nfs_pub.np_mount = mp; 2285 nfs_pub.np_valid = 1; 2286 return (0); 2287 } 2288 2289 struct netcred * 2290 vfs_export_lookup(mp, nep, nam) 2291 struct mount *mp; 2292 struct netexport *nep; 2293 struct mbuf *nam; 2294 { 2295 struct netcred *np; 2296 struct radix_node_head *rnh; 2297 struct sockaddr *saddr; 2298 2299 np = NULL; 2300 if (mp->mnt_flag & MNT_EXPORTED) { 2301 /* 2302 * Lookup in the export list first. 2303 */ 2304 if (nam != NULL) { 2305 saddr = mtod(nam, struct sockaddr *); 2306 rnh = nep->ne_rtable[saddr->sa_family]; 2307 if (rnh != NULL) { 2308 np = (struct netcred *) 2309 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2310 rnh); 2311 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2312 np = NULL; 2313 } 2314 } 2315 /* 2316 * If no address match, use the default if it exists. 2317 */ 2318 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2319 np = &nep->ne_defexported; 2320 } 2321 return (np); 2322 } 2323 2324 /* 2325 * Do the usual access checking. 2326 * file_mode, uid and gid are from the vnode in question, 2327 * while acc_mode and cred are from the VOP_ACCESS parameter list 2328 */ 2329 int 2330 vaccess(type, file_mode, uid, gid, acc_mode, cred) 2331 enum vtype type; 2332 mode_t file_mode; 2333 uid_t uid; 2334 gid_t gid; 2335 mode_t acc_mode; 2336 struct ucred *cred; 2337 { 2338 mode_t mask; 2339 2340 /* 2341 * Super-user always gets read/write access, but execute access depends 2342 * on at least one execute bit being set. 2343 */ 2344 if (cred->cr_uid == 0) { 2345 if ((acc_mode & VEXEC) && type != VDIR && 2346 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2347 return (EACCES); 2348 return (0); 2349 } 2350 2351 mask = 0; 2352 2353 /* Otherwise, check the owner. */ 2354 if (cred->cr_uid == uid) { 2355 if (acc_mode & VEXEC) 2356 mask |= S_IXUSR; 2357 if (acc_mode & VREAD) 2358 mask |= S_IRUSR; 2359 if (acc_mode & VWRITE) 2360 mask |= S_IWUSR; 2361 return ((file_mode & mask) == mask ? 0 : EACCES); 2362 } 2363 2364 /* Otherwise, check the groups. */ 2365 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2366 if (acc_mode & VEXEC) 2367 mask |= S_IXGRP; 2368 if (acc_mode & VREAD) 2369 mask |= S_IRGRP; 2370 if (acc_mode & VWRITE) 2371 mask |= S_IWGRP; 2372 return ((file_mode & mask) == mask ? 0 : EACCES); 2373 } 2374 2375 /* Otherwise, check everyone else. */ 2376 if (acc_mode & VEXEC) 2377 mask |= S_IXOTH; 2378 if (acc_mode & VREAD) 2379 mask |= S_IROTH; 2380 if (acc_mode & VWRITE) 2381 mask |= S_IWOTH; 2382 return ((file_mode & mask) == mask ? 0 : EACCES); 2383 } 2384 2385 /* 2386 * Unmount all file systems. 2387 * We traverse the list in reverse order under the assumption that doing so 2388 * will avoid needing to worry about dependencies. 2389 */ 2390 void 2391 vfs_unmountall(p) 2392 struct proc *p; 2393 { 2394 struct mount *mp, *nmp; 2395 int allerror, error; 2396 2397 for (allerror = 0, 2398 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2399 nmp = mp->mnt_list.cqe_prev; 2400 #ifdef DEBUG 2401 printf("unmounting %s (%s)...\n", 2402 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2403 #endif 2404 /* 2405 * XXX Freeze syncer. Must do this before locking the 2406 * mount point. See dounmount() for details. 2407 */ 2408 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2409 if (vfs_busy(mp, 0, 0)) { 2410 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2411 continue; 2412 } 2413 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2414 printf("unmount of %s failed with error %d\n", 2415 mp->mnt_stat.f_mntonname, error); 2416 allerror = 1; 2417 } 2418 } 2419 if (allerror) 2420 printf("WARNING: some file systems would not unmount\n"); 2421 } 2422 2423 /* 2424 * Sync and unmount file systems before shutting down. 2425 */ 2426 void 2427 vfs_shutdown() 2428 { 2429 struct buf *bp; 2430 int iter, nbusy, nbusy_prev = 0, dcount, s; 2431 struct proc *p = curproc; 2432 2433 /* XXX we're certainly not running in proc0's context! */ 2434 if (p == NULL) 2435 p = &proc0; 2436 2437 printf("syncing disks... "); 2438 2439 /* remove user process from run queue */ 2440 suspendsched(); 2441 (void) spl0(); 2442 2443 /* avoid coming back this way again if we panic. */ 2444 doing_shutdown = 1; 2445 2446 sys_sync(p, NULL, NULL); 2447 2448 /* Wait for sync to finish. */ 2449 dcount = 10000; 2450 for (iter = 0; iter < 20;) { 2451 nbusy = 0; 2452 for (bp = &buf[nbuf]; --bp >= buf; ) { 2453 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2454 nbusy++; 2455 /* 2456 * With soft updates, some buffers that are 2457 * written will be remarked as dirty until other 2458 * buffers are written. 2459 */ 2460 if (bp->b_vp && bp->b_vp->v_mount 2461 && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP) 2462 && (bp->b_flags & B_DELWRI)) { 2463 s = splbio(); 2464 bremfree(bp); 2465 bp->b_flags |= B_BUSY; 2466 splx(s); 2467 nbusy++; 2468 bawrite(bp); 2469 if (dcount-- <= 0) { 2470 printf("softdep "); 2471 goto fail; 2472 } 2473 } 2474 } 2475 if (nbusy == 0) 2476 break; 2477 if (nbusy_prev == 0) 2478 nbusy_prev = nbusy; 2479 printf("%d ", nbusy); 2480 tsleep(&nbusy, PRIBIO, "bflush", 2481 (iter == 0) ? 1 : hz / 25 * iter); 2482 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 2483 iter++; 2484 else 2485 nbusy_prev = nbusy; 2486 } 2487 if (nbusy) { 2488 fail: 2489 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 2490 printf("giving up\nPrinting vnodes for busy buffers\n"); 2491 for (bp = &buf[nbuf]; --bp >= buf; ) 2492 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2493 vprint(NULL, bp->b_vp); 2494 2495 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2496 Debugger(); 2497 #endif 2498 2499 #else /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2500 printf("giving up\n"); 2501 #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2502 return; 2503 } else 2504 printf("done\n"); 2505 2506 /* 2507 * If we've panic'd, don't make the situation potentially 2508 * worse by unmounting the file systems. 2509 */ 2510 if (panicstr != NULL) 2511 return; 2512 2513 /* Release inodes held by texts before update. */ 2514 #ifdef notdef 2515 vnshutdown(); 2516 #endif 2517 /* Unmount file systems. */ 2518 vfs_unmountall(p); 2519 } 2520 2521 /* 2522 * Mount the root file system. If the operator didn't specify a 2523 * file system to use, try all possible file systems until one 2524 * succeeds. 2525 */ 2526 int 2527 vfs_mountroot() 2528 { 2529 extern int (*mountroot) __P((void)); 2530 struct vfsops *v; 2531 2532 if (root_device == NULL) 2533 panic("vfs_mountroot: root device unknown"); 2534 2535 switch (root_device->dv_class) { 2536 case DV_IFNET: 2537 if (rootdev != NODEV) 2538 panic("vfs_mountroot: rootdev set for DV_IFNET"); 2539 break; 2540 2541 case DV_DISK: 2542 if (rootdev == NODEV) 2543 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2544 break; 2545 2546 default: 2547 printf("%s: inappropriate for root file system\n", 2548 root_device->dv_xname); 2549 return (ENODEV); 2550 } 2551 2552 /* 2553 * If user specified a file system, use it. 2554 */ 2555 if (mountroot != NULL) 2556 return ((*mountroot)()); 2557 2558 /* 2559 * Try each file system currently configured into the kernel. 2560 */ 2561 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2562 if (v->vfs_mountroot == NULL) 2563 continue; 2564 #ifdef DEBUG 2565 printf("mountroot: trying %s...\n", v->vfs_name); 2566 #endif 2567 if ((*v->vfs_mountroot)() == 0) { 2568 printf("root file system type: %s\n", v->vfs_name); 2569 break; 2570 } 2571 } 2572 2573 if (v == NULL) { 2574 printf("no file system for %s", root_device->dv_xname); 2575 if (root_device->dv_class == DV_DISK) 2576 printf(" (dev 0x%x)", rootdev); 2577 printf("\n"); 2578 return (EFTYPE); 2579 } 2580 return (0); 2581 } 2582 2583 /* 2584 * Given a file system name, look up the vfsops for that 2585 * file system, or return NULL if file system isn't present 2586 * in the kernel. 2587 */ 2588 struct vfsops * 2589 vfs_getopsbyname(name) 2590 const char *name; 2591 { 2592 struct vfsops *v; 2593 2594 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2595 if (strcmp(v->vfs_name, name) == 0) 2596 break; 2597 } 2598 2599 return (v); 2600 } 2601 2602 /* 2603 * Establish a file system and initialize it. 2604 */ 2605 int 2606 vfs_attach(vfs) 2607 struct vfsops *vfs; 2608 { 2609 struct vfsops *v; 2610 int error = 0; 2611 2612 2613 /* 2614 * Make sure this file system doesn't already exist. 2615 */ 2616 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2617 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2618 error = EEXIST; 2619 goto out; 2620 } 2621 } 2622 2623 /* 2624 * Initialize the vnode operations for this file system. 2625 */ 2626 vfs_opv_init(vfs->vfs_opv_descs); 2627 2628 /* 2629 * Now initialize the file system itself. 2630 */ 2631 (*vfs->vfs_init)(); 2632 2633 /* 2634 * ...and link it into the kernel's list. 2635 */ 2636 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2637 2638 /* 2639 * Sanity: make sure the reference count is 0. 2640 */ 2641 vfs->vfs_refcount = 0; 2642 2643 out: 2644 return (error); 2645 } 2646 2647 /* 2648 * Remove a file system from the kernel. 2649 */ 2650 int 2651 vfs_detach(vfs) 2652 struct vfsops *vfs; 2653 { 2654 struct vfsops *v; 2655 2656 /* 2657 * Make sure no one is using the filesystem. 2658 */ 2659 if (vfs->vfs_refcount != 0) 2660 return (EBUSY); 2661 2662 /* 2663 * ...and remove it from the kernel's list. 2664 */ 2665 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2666 if (v == vfs) { 2667 LIST_REMOVE(v, vfs_list); 2668 break; 2669 } 2670 } 2671 2672 if (v == NULL) 2673 return (ESRCH); 2674 2675 /* 2676 * Now run the file system-specific cleanups. 2677 */ 2678 (*vfs->vfs_done)(); 2679 2680 /* 2681 * Free the vnode operations vector. 2682 */ 2683 vfs_opv_free(vfs->vfs_opv_descs); 2684 return (0); 2685 } 2686 2687 #ifdef DDB 2688 const char buf_flagbits[] = 2689 "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI" 2690 "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE" 2691 "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED" 2692 "\32XXX\33VFLUSH"; 2693 2694 void 2695 vfs_buf_print(bp, full, pr) 2696 struct buf *bp; 2697 int full; 2698 void (*pr) __P((const char *, ...)); 2699 { 2700 char buf[1024]; 2701 2702 (*pr)(" vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n", 2703 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev); 2704 2705 bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf)); 2706 (*pr)(" error %d flags 0x%s\n", bp->b_error, buf); 2707 2708 (*pr)(" bufsize 0x%x bcount 0x%x resid 0x%x\n", 2709 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2710 (*pr)(" data %p saveaddr %p dep %p\n", 2711 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2712 (*pr)(" iodone %p\n", bp->b_iodone); 2713 } 2714 2715 2716 const char vnode_flagbits[] = 2717 "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\11XLOCK\12XWANT\13BWAIT\14ALIASED" 2718 "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY"; 2719 2720 const char *vnode_types[] = { 2721 "VNON", 2722 "VREG", 2723 "VDIR", 2724 "VBLK", 2725 "VCHR", 2726 "VLNK", 2727 "VSOCK", 2728 "VFIFO", 2729 "VBAD", 2730 }; 2731 2732 const char *vnode_tags[] = { 2733 "VT_NON", 2734 "VT_UFS", 2735 "VT_NFS", 2736 "VT_MFS", 2737 "VT_MSDOSFS", 2738 "VT_LFS", 2739 "VT_LOFS", 2740 "VT_FDESC", 2741 "VT_PORTAL", 2742 "VT_NULL", 2743 "VT_UMAP", 2744 "VT_KERNFS", 2745 "VT_PROCFS", 2746 "VT_AFS", 2747 "VT_ISOFS", 2748 "VT_UNION", 2749 "VT_ADOSFS", 2750 "VT_EXT2FS", 2751 "VT_CODA", 2752 "VT_FILECORE", 2753 "VT_NTFS", 2754 "VT_VFS", 2755 "VT_OVERLAY" 2756 }; 2757 2758 void 2759 vfs_vnode_print(vp, full, pr) 2760 struct vnode *vp; 2761 int full; 2762 void (*pr) __P((const char *, ...)); 2763 { 2764 char buf[256]; 2765 2766 const char *vtype, *vtag; 2767 2768 uvm_object_printit(&vp->v_uvm.u_obj, full, pr); 2769 bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf)); 2770 (*pr)("\nVNODE flags %s\n", buf); 2771 (*pr)("mp %p nio %d size 0x%x rwlock 0x%x glock 0x%x\n", 2772 vp->v_mount, vp->v_uvm.u_nio, (int)vp->v_uvm.u_size, 2773 vp->v_vnlock ? lockstatus(vp->v_vnlock) : 0x999, 2774 lockstatus(&vp->v_glock)); 2775 2776 (*pr)("data %p usecount %d writecount %d holdcnt %d numoutput %d\n", 2777 vp->v_data, vp->v_usecount, vp->v_writecount, 2778 vp->v_holdcnt, vp->v_numoutput); 2779 2780 vtype = (vp->v_type >= 0 && 2781 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ? 2782 vnode_types[vp->v_type] : "UNKNOWN"; 2783 vtag = (vp->v_tag >= 0 && 2784 vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ? 2785 vnode_tags[vp->v_tag] : "UNKNOWN"; 2786 2787 (*pr)("type %s(%d) tag %s(%d) id 0x%x mount %p typedata %p\n", 2788 vtype, vp->v_type, vtag, vp->v_tag, 2789 vp->v_id, vp->v_mount, vp->v_mountedhere); 2790 (*pr)("lastr 0x%x lastw 0x%x lasta 0x%x\n", 2791 vp->v_lastr, vp->v_lastw, vp->v_lasta); 2792 (*pr)("cstart 0x%x clen 0x%x ralen 0x%x maxra 0x%x\n", 2793 vp->v_cstart, vp->v_clen, vp->v_ralen, vp->v_maxra); 2794 2795 if (full) { 2796 struct buf *bp; 2797 2798 (*pr)("clean bufs:\n"); 2799 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 2800 (*pr)(" bp %p\n", bp); 2801 vfs_buf_print(bp, full, pr); 2802 } 2803 2804 (*pr)("dirty bufs:\n"); 2805 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 2806 (*pr)(" bp %p\n", bp); 2807 vfs_buf_print(bp, full, pr); 2808 } 2809 } 2810 } 2811 #endif 2812