1 /* $NetBSD: vfs_subr.c,v 1.156 2001/08/03 06:00:13 jdolecek Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include "opt_ddb.h" 85 #include "opt_compat_netbsd.h" 86 #include "opt_compat_43.h" 87 88 #include <sys/param.h> 89 #include <sys/systm.h> 90 #include <sys/proc.h> 91 #include <sys/kernel.h> 92 #include <sys/mount.h> 93 #include <sys/time.h> 94 #include <sys/fcntl.h> 95 #include <sys/vnode.h> 96 #include <sys/stat.h> 97 #include <sys/namei.h> 98 #include <sys/ucred.h> 99 #include <sys/buf.h> 100 #include <sys/errno.h> 101 #include <sys/malloc.h> 102 #include <sys/domain.h> 103 #include <sys/mbuf.h> 104 #include <sys/syscallargs.h> 105 #include <sys/device.h> 106 #include <sys/dirent.h> 107 108 #include <miscfs/specfs/specdev.h> 109 #include <miscfs/genfs/genfs.h> 110 #include <miscfs/syncfs/syncfs.h> 111 112 #include <uvm/uvm.h> 113 #include <uvm/uvm_ddb.h> 114 115 #include <sys/sysctl.h> 116 117 enum vtype iftovt_tab[16] = { 118 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 119 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 120 }; 121 const int vttoif_tab[9] = { 122 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 123 S_IFSOCK, S_IFIFO, S_IFMT, 124 }; 125 126 int doforce = 1; /* 1 => permit forcible unmounting */ 127 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 128 129 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 130 131 /* 132 * Insq/Remq for the vnode usage lists. 133 */ 134 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 135 #define bufremvn(bp) { \ 136 LIST_REMOVE(bp, b_vnbufs); \ 137 (bp)->b_vnbufs.le_next = NOLIST; \ 138 } 139 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 140 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 141 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 142 143 struct mntlist mountlist = /* mounted filesystem list */ 144 CIRCLEQ_HEAD_INITIALIZER(mountlist); 145 struct vfs_list_head vfs_list = /* vfs list */ 146 LIST_HEAD_INITIALIZER(vfs_list); 147 148 struct nfs_public nfs_pub; /* publicly exported FS */ 149 150 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 151 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 152 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 153 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 154 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 155 156 /* 157 * These define the root filesystem and device. 158 */ 159 struct mount *rootfs; 160 struct vnode *rootvnode; 161 struct device *root_device; /* root device */ 162 163 struct pool vnode_pool; /* memory pool for vnodes */ 164 165 /* 166 * Local declarations. 167 */ 168 void insmntque __P((struct vnode *, struct mount *)); 169 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 170 void vgoneall __P((struct vnode *)); 171 172 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 173 struct export_args *)); 174 static int vfs_free_netcred __P((struct radix_node *, void *)); 175 static void vfs_free_addrlist __P((struct netexport *)); 176 177 #ifdef DEBUG 178 void printlockedvnodes __P((void)); 179 #endif 180 181 /* 182 * Initialize the vnode management data structures. 183 */ 184 void 185 vntblinit() 186 { 187 188 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 189 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE); 190 191 /* 192 * Initialize the filesystem syncer. 193 */ 194 vn_initialize_syncerd(); 195 } 196 197 /* 198 * Mark a mount point as busy. Used to synchronize access and to delay 199 * unmounting. Interlock is not released on failure. 200 */ 201 int 202 vfs_busy(mp, flags, interlkp) 203 struct mount *mp; 204 int flags; 205 struct simplelock *interlkp; 206 { 207 int lkflags; 208 209 while (mp->mnt_flag & MNT_UNMOUNT) { 210 int gone; 211 212 if (flags & LK_NOWAIT) 213 return (ENOENT); 214 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 215 && mp->mnt_unmounter == curproc) 216 return (EDEADLK); 217 if (interlkp) 218 simple_unlock(interlkp); 219 /* 220 * Since all busy locks are shared except the exclusive 221 * lock granted when unmounting, the only place that a 222 * wakeup needs to be done is at the release of the 223 * exclusive lock at the end of dounmount. 224 * 225 * XXX MP: add spinlock protecting mnt_wcnt here once you 226 * can atomically unlock-and-sleep. 227 */ 228 mp->mnt_wcnt++; 229 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 230 mp->mnt_wcnt--; 231 gone = mp->mnt_flag & MNT_GONE; 232 233 if (mp->mnt_wcnt == 0) 234 wakeup(&mp->mnt_wcnt); 235 if (interlkp) 236 simple_lock(interlkp); 237 if (gone) 238 return (ENOENT); 239 } 240 lkflags = LK_SHARED; 241 if (interlkp) 242 lkflags |= LK_INTERLOCK; 243 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 244 panic("vfs_busy: unexpected lock failure"); 245 return (0); 246 } 247 248 /* 249 * Free a busy filesystem. 250 */ 251 void 252 vfs_unbusy(mp) 253 struct mount *mp; 254 { 255 256 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 257 } 258 259 /* 260 * Lookup a filesystem type, and if found allocate and initialize 261 * a mount structure for it. 262 * 263 * Devname is usually updated by mount(8) after booting. 264 */ 265 int 266 vfs_rootmountalloc(fstypename, devname, mpp) 267 char *fstypename; 268 char *devname; 269 struct mount **mpp; 270 { 271 struct vfsops *vfsp = NULL; 272 struct mount *mp; 273 274 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 275 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 276 break; 277 278 if (vfsp == NULL) 279 return (ENODEV); 280 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 281 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 282 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 283 (void)vfs_busy(mp, LK_NOWAIT, 0); 284 LIST_INIT(&mp->mnt_vnodelist); 285 mp->mnt_op = vfsp; 286 mp->mnt_flag = MNT_RDONLY; 287 mp->mnt_vnodecovered = NULLVP; 288 vfsp->vfs_refcount++; 289 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 290 mp->mnt_stat.f_mntonname[0] = '/'; 291 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 292 *mpp = mp; 293 return (0); 294 } 295 296 /* 297 * Lookup a mount point by filesystem identifier. 298 */ 299 struct mount * 300 vfs_getvfs(fsid) 301 fsid_t *fsid; 302 { 303 struct mount *mp; 304 305 simple_lock(&mountlist_slock); 306 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 307 mp = mp->mnt_list.cqe_next) { 308 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 309 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 310 simple_unlock(&mountlist_slock); 311 return (mp); 312 } 313 } 314 simple_unlock(&mountlist_slock); 315 return ((struct mount *)0); 316 } 317 318 /* 319 * Get a new unique fsid 320 */ 321 void 322 vfs_getnewfsid(mp) 323 struct mount *mp; 324 { 325 static u_short xxxfs_mntid; 326 fsid_t tfsid; 327 int mtype; 328 329 simple_lock(&mntid_slock); 330 mtype = makefstype(mp->mnt_op->vfs_name); 331 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 332 mp->mnt_stat.f_fsid.val[1] = mtype; 333 if (xxxfs_mntid == 0) 334 ++xxxfs_mntid; 335 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 336 tfsid.val[1] = mtype; 337 if (mountlist.cqh_first != (void *)&mountlist) { 338 while (vfs_getvfs(&tfsid)) { 339 tfsid.val[0]++; 340 xxxfs_mntid++; 341 } 342 } 343 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 344 simple_unlock(&mntid_slock); 345 } 346 347 /* 348 * Make a 'unique' number from a mount type name. 349 */ 350 long 351 makefstype(type) 352 const char *type; 353 { 354 long rv; 355 356 for (rv = 0; *type; type++) { 357 rv <<= 2; 358 rv ^= *type; 359 } 360 return rv; 361 } 362 363 364 /* 365 * Set vnode attributes to VNOVAL 366 */ 367 void 368 vattr_null(vap) 369 struct vattr *vap; 370 { 371 372 vap->va_type = VNON; 373 374 /* 375 * Assign individually so that it is safe even if size and 376 * sign of each member are varied. 377 */ 378 vap->va_mode = VNOVAL; 379 vap->va_nlink = VNOVAL; 380 vap->va_uid = VNOVAL; 381 vap->va_gid = VNOVAL; 382 vap->va_fsid = VNOVAL; 383 vap->va_fileid = VNOVAL; 384 vap->va_size = VNOVAL; 385 vap->va_blocksize = VNOVAL; 386 vap->va_atime.tv_sec = 387 vap->va_mtime.tv_sec = 388 vap->va_ctime.tv_sec = VNOVAL; 389 vap->va_atime.tv_nsec = 390 vap->va_mtime.tv_nsec = 391 vap->va_ctime.tv_nsec = VNOVAL; 392 vap->va_gen = VNOVAL; 393 vap->va_flags = VNOVAL; 394 vap->va_rdev = VNOVAL; 395 vap->va_bytes = VNOVAL; 396 vap->va_vaflags = 0; 397 } 398 399 /* 400 * Routines having to do with the management of the vnode table. 401 */ 402 extern int (**dead_vnodeop_p) __P((void *)); 403 long numvnodes; 404 405 /* 406 * Return the next vnode from the free list. 407 */ 408 int 409 getnewvnode(tag, mp, vops, vpp) 410 enum vtagtype tag; 411 struct mount *mp; 412 int (**vops) __P((void *)); 413 struct vnode **vpp; 414 { 415 extern struct uvm_pagerops uvm_vnodeops; 416 struct uvm_object *uobj; 417 struct proc *p = curproc; /* XXX */ 418 struct freelst *listhd; 419 static int toggle; 420 struct vnode *vp; 421 int error = 0, tryalloc; 422 #ifdef DIAGNOSTIC 423 int s; 424 #endif 425 if (mp) { 426 /* 427 * Mark filesystem busy while we're creating a vnode. 428 * If unmount is in progress, this will wait; if the 429 * unmount succeeds (only if umount -f), this will 430 * return an error. If the unmount fails, we'll keep 431 * going afterwards. 432 * (This puts the per-mount vnode list logically under 433 * the protection of the vfs_busy lock). 434 */ 435 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 436 if (error && error != EDEADLK) 437 return error; 438 } 439 440 /* 441 * We must choose whether to allocate a new vnode or recycle an 442 * existing one. The criterion for allocating a new one is that 443 * the total number of vnodes is less than the number desired or 444 * there are no vnodes on either free list. Generally we only 445 * want to recycle vnodes that have no buffers associated with 446 * them, so we look first on the vnode_free_list. If it is empty, 447 * we next consider vnodes with referencing buffers on the 448 * vnode_hold_list. The toggle ensures that half the time we 449 * will use a buffer from the vnode_hold_list, and half the time 450 * we will allocate a new one unless the list has grown to twice 451 * the desired size. We are reticent to recycle vnodes from the 452 * vnode_hold_list because we will lose the identity of all its 453 * referencing buffers. 454 */ 455 456 try_again: 457 vp = NULL; 458 459 simple_lock(&vnode_free_list_slock); 460 461 toggle ^= 1; 462 if (numvnodes > 2 * desiredvnodes) 463 toggle = 0; 464 465 tryalloc = numvnodes < desiredvnodes || 466 (TAILQ_FIRST(listhd = &vnode_free_list) == NULL && 467 (TAILQ_FIRST(listhd = &vnode_hold_list) == NULL || toggle)); 468 469 if (tryalloc && 470 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { 471 simple_unlock(&vnode_free_list_slock); 472 memset(vp, 0, sizeof(*vp)); 473 simple_lock_init(&vp->v_interlock); 474 numvnodes++; 475 } else { 476 for (vp = TAILQ_FIRST(listhd); vp != NULLVP; 477 vp = TAILQ_NEXT(vp, v_freelist)) { 478 if (simple_lock_try(&vp->v_interlock)) { 479 if ((vp->v_flag & VLAYER) == 0) 480 break; 481 if (VOP_ISLOCKED(vp) == 0) 482 break; 483 else 484 simple_unlock(&vp->v_interlock); 485 } 486 } 487 /* 488 * Unless this is a bad time of the month, at most 489 * the first NCPUS items on the free list are 490 * locked, so this is close enough to being empty. 491 */ 492 if (vp == NULLVP) { 493 simple_unlock(&vnode_free_list_slock); 494 if (mp && error != EDEADLK) 495 vfs_unbusy(mp); 496 if (tryalloc) { 497 printf("WARNING: unable to allocate new " 498 "vnode, retrying...\n"); 499 (void) tsleep(&lbolt, PRIBIO, "newvn", hz); 500 goto try_again; 501 } 502 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 503 *vpp = 0; 504 return (ENFILE); 505 } 506 if (vp->v_usecount) 507 panic("free vnode isn't, vp %p", vp); 508 TAILQ_REMOVE(listhd, vp, v_freelist); 509 /* see comment on why 0xdeadb is set at end of vgone (below) */ 510 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 511 simple_unlock(&vnode_free_list_slock); 512 vp->v_lease = NULL; 513 if (vp->v_type != VBAD) 514 vgonel(vp, p); 515 else 516 simple_unlock(&vp->v_interlock); 517 #ifdef DIAGNOSTIC 518 if (vp->v_data) 519 panic("cleaned vnode isn't, vp %p", vp); 520 s = splbio(); 521 if (vp->v_numoutput) 522 panic("clean vnode has pending I/O's, vp %p", vp); 523 splx(s); 524 #endif 525 vp->v_flag = 0; 526 vp->v_lastr = 0; 527 vp->v_ralen = 0; 528 vp->v_maxra = 0; 529 vp->v_lastw = 0; 530 vp->v_lasta = 0; 531 vp->v_cstart = 0; 532 vp->v_clen = 0; 533 vp->v_socket = 0; 534 } 535 vp->v_type = VNON; 536 vp->v_vnlock = &vp->v_lock; 537 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 538 lockinit(&vp->v_glock, PVFS, "glock", 0, 0); 539 cache_purge(vp); 540 vp->v_tag = tag; 541 vp->v_op = vops; 542 insmntque(vp, mp); 543 *vpp = vp; 544 vp->v_usecount = 1; 545 vp->v_data = 0; 546 simple_lock_init(&vp->v_uvm.u_obj.vmobjlock); 547 548 /* 549 * initialize uvm_object within vnode. 550 */ 551 552 uobj = &vp->v_uvm.u_obj; 553 uobj->pgops = &uvm_vnodeops; 554 TAILQ_INIT(&uobj->memq); 555 vp->v_uvm.u_size = VSIZENOTSET; 556 557 if (mp && error != EDEADLK) 558 vfs_unbusy(mp); 559 return (0); 560 } 561 562 /* 563 * This is really just the reverse of getnewvnode(). Needed for 564 * VFS_VGET functions who may need to push back a vnode in case 565 * of a locking race. 566 */ 567 void 568 ungetnewvnode(vp) 569 struct vnode *vp; 570 { 571 #ifdef DIAGNOSTIC 572 if (vp->v_usecount != 1) 573 panic("ungetnewvnode: busy vnode"); 574 #endif 575 vp->v_usecount--; 576 insmntque(vp, NULL); 577 vp->v_type = VBAD; 578 579 simple_lock(&vp->v_interlock); 580 /* 581 * Insert at head of LRU list 582 */ 583 simple_lock(&vnode_free_list_slock); 584 if (vp->v_holdcnt > 0) 585 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 586 else 587 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 588 simple_unlock(&vnode_free_list_slock); 589 simple_unlock(&vp->v_interlock); 590 } 591 592 /* 593 * Move a vnode from one mount queue to another. 594 */ 595 void 596 insmntque(vp, mp) 597 struct vnode *vp; 598 struct mount *mp; 599 { 600 601 #ifdef DIAGNOSTIC 602 if ((mp != NULL) && 603 (mp->mnt_flag & MNT_UNMOUNT) && 604 !(mp->mnt_flag & MNT_SOFTDEP) && 605 vp->v_tag != VT_VFS) { 606 panic("insmntque into dying filesystem"); 607 } 608 #endif 609 610 simple_lock(&mntvnode_slock); 611 /* 612 * Delete from old mount point vnode list, if on one. 613 */ 614 if (vp->v_mount != NULL) 615 LIST_REMOVE(vp, v_mntvnodes); 616 /* 617 * Insert into list of vnodes for the new mount point, if available. 618 */ 619 if ((vp->v_mount = mp) != NULL) 620 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 621 simple_unlock(&mntvnode_slock); 622 } 623 624 /* 625 * Update outstanding I/O count and do wakeup if requested. 626 */ 627 void 628 vwakeup(bp) 629 struct buf *bp; 630 { 631 struct vnode *vp; 632 633 if ((vp = bp->b_vp) != NULL) { 634 if (--vp->v_numoutput < 0) 635 panic("vwakeup: neg numoutput, vp %p", vp); 636 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 637 vp->v_flag &= ~VBWAIT; 638 wakeup((caddr_t)&vp->v_numoutput); 639 } 640 } 641 } 642 643 /* 644 * Flush out and invalidate all buffers associated with a vnode. 645 * Called with the underlying vnode locked, which should prevent new dirty 646 * buffers from being queued. 647 */ 648 int 649 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 650 struct vnode *vp; 651 int flags; 652 struct ucred *cred; 653 struct proc *p; 654 int slpflag, slptimeo; 655 { 656 struct uvm_object *uobj = &vp->v_uvm.u_obj; 657 struct buf *bp, *nbp; 658 int s, error, rv; 659 int flushflags = PGO_ALLPAGES|PGO_FREE|PGO_SYNCIO| 660 (flags & V_SAVE ? PGO_CLEANIT : 0); 661 662 /* XXXUBC this doesn't look at flags or slp* */ 663 if (vp->v_type == VREG) { 664 simple_lock(&uobj->vmobjlock); 665 rv = (uobj->pgops->pgo_flush)(uobj, 0, 0, flushflags); 666 simple_unlock(&uobj->vmobjlock); 667 if (!rv) { 668 return EIO; 669 } 670 } 671 if (flags & V_SAVE) { 672 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p); 673 if (error) 674 return (error); 675 #ifdef DIAGNOSTIC 676 s = splbio(); 677 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 678 panic("vinvalbuf: dirty bufs, vp %p", vp); 679 splx(s); 680 #endif 681 } 682 683 s = splbio(); 684 685 restart: 686 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 687 nbp = LIST_NEXT(bp, b_vnbufs); 688 if (bp->b_flags & B_BUSY) { 689 bp->b_flags |= B_WANTED; 690 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 691 "vinvalbuf", slptimeo); 692 if (error) { 693 splx(s); 694 return (error); 695 } 696 goto restart; 697 } 698 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 699 brelse(bp); 700 } 701 702 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 703 nbp = LIST_NEXT(bp, b_vnbufs); 704 if (bp->b_flags & B_BUSY) { 705 bp->b_flags |= B_WANTED; 706 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 707 "vinvalbuf", slptimeo); 708 if (error) { 709 splx(s); 710 return (error); 711 } 712 goto restart; 713 } 714 /* 715 * XXX Since there are no node locks for NFS, I believe 716 * there is a slight chance that a delayed write will 717 * occur while sleeping just above, so check for it. 718 */ 719 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 720 #ifdef DEBUG 721 printf("buffer still DELWRI\n"); 722 #endif 723 bp->b_flags |= B_BUSY | B_VFLUSH; 724 VOP_BWRITE(bp); 725 goto restart; 726 } 727 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 728 brelse(bp); 729 } 730 731 #ifdef DIAGNOSTIC 732 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 733 panic("vinvalbuf: flush failed, vp %p", vp); 734 #endif 735 736 splx(s); 737 738 return (0); 739 } 740 741 /* 742 * Destroy any in core blocks past the truncation length. 743 * Called with the underlying vnode locked, which should prevent new dirty 744 * buffers from being queued. 745 */ 746 int 747 vtruncbuf(vp, lbn, slpflag, slptimeo) 748 struct vnode *vp; 749 daddr_t lbn; 750 int slpflag, slptimeo; 751 { 752 struct uvm_object *uobj = &vp->v_uvm.u_obj; 753 struct buf *bp, *nbp; 754 int s, error, rv; 755 756 s = splbio(); 757 if (vp->v_type == VREG) { 758 simple_lock(&uobj->vmobjlock); 759 rv = (uobj->pgops->pgo_flush)(uobj, 760 round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift), 0, 761 PGO_FREE|PGO_SYNCIO); 762 simple_unlock(&uobj->vmobjlock); 763 if (!rv) { 764 splx(s); 765 return EIO; 766 } 767 } 768 769 restart: 770 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 771 nbp = LIST_NEXT(bp, b_vnbufs); 772 if (bp->b_lblkno < lbn) 773 continue; 774 if (bp->b_flags & B_BUSY) { 775 bp->b_flags |= B_WANTED; 776 error = tsleep(bp, slpflag | (PRIBIO + 1), 777 "vtruncbuf", slptimeo); 778 if (error) { 779 splx(s); 780 return (error); 781 } 782 goto restart; 783 } 784 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 785 brelse(bp); 786 } 787 788 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 789 nbp = LIST_NEXT(bp, b_vnbufs); 790 if (bp->b_lblkno < lbn) 791 continue; 792 if (bp->b_flags & B_BUSY) { 793 bp->b_flags |= B_WANTED; 794 error = tsleep(bp, slpflag | (PRIBIO + 1), 795 "vtruncbuf", slptimeo); 796 if (error) { 797 splx(s); 798 return (error); 799 } 800 goto restart; 801 } 802 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 803 brelse(bp); 804 } 805 806 splx(s); 807 808 return (0); 809 } 810 811 void 812 vflushbuf(vp, sync) 813 struct vnode *vp; 814 int sync; 815 { 816 struct uvm_object *uobj = &vp->v_uvm.u_obj; 817 struct buf *bp, *nbp; 818 int s; 819 820 if (vp->v_type == VREG) { 821 int flags = PGO_CLEANIT|PGO_ALLPAGES| (sync ? PGO_SYNCIO : 0); 822 823 simple_lock(&uobj->vmobjlock); 824 (uobj->pgops->pgo_flush)(uobj, 0, 0, flags); 825 simple_unlock(&uobj->vmobjlock); 826 } 827 828 loop: 829 s = splbio(); 830 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 831 nbp = LIST_NEXT(bp, b_vnbufs); 832 if ((bp->b_flags & B_BUSY)) 833 continue; 834 if ((bp->b_flags & B_DELWRI) == 0) 835 panic("vflushbuf: not dirty, bp %p", bp); 836 bp->b_flags |= B_BUSY | B_VFLUSH; 837 splx(s); 838 /* 839 * Wait for I/O associated with indirect blocks to complete, 840 * since there is no way to quickly wait for them below. 841 */ 842 if (bp->b_vp == vp || sync == 0) 843 (void) bawrite(bp); 844 else 845 (void) bwrite(bp); 846 goto loop; 847 } 848 if (sync == 0) { 849 splx(s); 850 return; 851 } 852 while (vp->v_numoutput) { 853 vp->v_flag |= VBWAIT; 854 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 855 } 856 splx(s); 857 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 858 vprint("vflushbuf: dirty", vp); 859 goto loop; 860 } 861 } 862 863 /* 864 * Associate a buffer with a vnode. 865 */ 866 void 867 bgetvp(vp, bp) 868 struct vnode *vp; 869 struct buf *bp; 870 { 871 int s; 872 873 if (bp->b_vp) 874 panic("bgetvp: not free, bp %p", bp); 875 VHOLD(vp); 876 s = splbio(); 877 bp->b_vp = vp; 878 if (vp->v_type == VBLK || vp->v_type == VCHR) 879 bp->b_dev = vp->v_rdev; 880 else 881 bp->b_dev = NODEV; 882 /* 883 * Insert onto list for new vnode. 884 */ 885 bufinsvn(bp, &vp->v_cleanblkhd); 886 splx(s); 887 } 888 889 /* 890 * Disassociate a buffer from a vnode. 891 */ 892 void 893 brelvp(bp) 894 struct buf *bp; 895 { 896 struct vnode *vp; 897 int s; 898 899 if (bp->b_vp == NULL) 900 panic("brelvp: vp NULL, bp %p", bp); 901 902 s = splbio(); 903 vp = bp->b_vp; 904 /* 905 * Delete from old vnode list, if on one. 906 */ 907 if (bp->b_vnbufs.le_next != NOLIST) 908 bufremvn(bp); 909 910 if (vp->v_type != VREG && (vp->v_flag & VONWORKLST) && 911 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 912 vp->v_flag &= ~VONWORKLST; 913 LIST_REMOVE(vp, v_synclist); 914 } 915 916 bp->b_vp = NULL; 917 HOLDRELE(vp); 918 splx(s); 919 } 920 921 /* 922 * Reassign a buffer from one vnode to another. 923 * Used to assign file specific control information 924 * (indirect blocks) to the vnode to which they belong. 925 * 926 * This function must be called at splbio(). 927 */ 928 void 929 reassignbuf(bp, newvp) 930 struct buf *bp; 931 struct vnode *newvp; 932 { 933 struct buflists *listheadp; 934 int delay; 935 936 /* 937 * Delete from old vnode list, if on one. 938 */ 939 if (bp->b_vnbufs.le_next != NOLIST) 940 bufremvn(bp); 941 /* 942 * If dirty, put on list of dirty buffers; 943 * otherwise insert onto list of clean buffers. 944 */ 945 if ((bp->b_flags & B_DELWRI) == 0) { 946 listheadp = &newvp->v_cleanblkhd; 947 if (newvp->v_type != VREG && 948 (newvp->v_flag & VONWORKLST) && 949 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 950 newvp->v_flag &= ~VONWORKLST; 951 LIST_REMOVE(newvp, v_synclist); 952 } 953 } else { 954 listheadp = &newvp->v_dirtyblkhd; 955 if ((newvp->v_flag & VONWORKLST) == 0) { 956 switch (newvp->v_type) { 957 case VDIR: 958 delay = dirdelay; 959 break; 960 case VBLK: 961 if (newvp->v_specmountpoint != NULL) { 962 delay = metadelay; 963 break; 964 } 965 /* fall through */ 966 default: 967 delay = filedelay; 968 break; 969 } 970 if (!newvp->v_mount || 971 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 972 vn_syncer_add_to_worklist(newvp, delay); 973 } 974 } 975 bufinsvn(bp, listheadp); 976 } 977 978 /* 979 * Create a vnode for a block device. 980 * Used for root filesystem and swap areas. 981 * Also used for memory file system special devices. 982 */ 983 int 984 bdevvp(dev, vpp) 985 dev_t dev; 986 struct vnode **vpp; 987 { 988 989 return (getdevvp(dev, vpp, VBLK)); 990 } 991 992 /* 993 * Create a vnode for a character device. 994 * Used for kernfs and some console handling. 995 */ 996 int 997 cdevvp(dev, vpp) 998 dev_t dev; 999 struct vnode **vpp; 1000 { 1001 1002 return (getdevvp(dev, vpp, VCHR)); 1003 } 1004 1005 /* 1006 * Create a vnode for a device. 1007 * Used by bdevvp (block device) for root file system etc., 1008 * and by cdevvp (character device) for console and kernfs. 1009 */ 1010 int 1011 getdevvp(dev, vpp, type) 1012 dev_t dev; 1013 struct vnode **vpp; 1014 enum vtype type; 1015 { 1016 struct vnode *vp; 1017 struct vnode *nvp; 1018 int error; 1019 1020 if (dev == NODEV) { 1021 *vpp = NULLVP; 1022 return (0); 1023 } 1024 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1025 if (error) { 1026 *vpp = NULLVP; 1027 return (error); 1028 } 1029 vp = nvp; 1030 vp->v_type = type; 1031 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1032 vput(vp); 1033 vp = nvp; 1034 } 1035 *vpp = vp; 1036 return (0); 1037 } 1038 1039 /* 1040 * Check to see if the new vnode represents a special device 1041 * for which we already have a vnode (either because of 1042 * bdevvp() or because of a different vnode representing 1043 * the same block device). If such an alias exists, deallocate 1044 * the existing contents and return the aliased vnode. The 1045 * caller is responsible for filling it with its new contents. 1046 */ 1047 struct vnode * 1048 checkalias(nvp, nvp_rdev, mp) 1049 struct vnode *nvp; 1050 dev_t nvp_rdev; 1051 struct mount *mp; 1052 { 1053 struct proc *p = curproc; /* XXX */ 1054 struct vnode *vp; 1055 struct vnode **vpp; 1056 1057 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1058 return (NULLVP); 1059 1060 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1061 loop: 1062 simple_lock(&spechash_slock); 1063 for (vp = *vpp; vp; vp = vp->v_specnext) { 1064 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1065 continue; 1066 /* 1067 * Alias, but not in use, so flush it out. 1068 */ 1069 simple_lock(&vp->v_interlock); 1070 if (vp->v_usecount == 0) { 1071 simple_unlock(&spechash_slock); 1072 vgonel(vp, p); 1073 goto loop; 1074 } 1075 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 1076 simple_unlock(&spechash_slock); 1077 goto loop; 1078 } 1079 break; 1080 } 1081 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1082 MALLOC(nvp->v_specinfo, struct specinfo *, 1083 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1084 /* XXX Erg. */ 1085 if (nvp->v_specinfo == NULL) { 1086 simple_unlock(&spechash_slock); 1087 uvm_wait("checkalias"); 1088 goto loop; 1089 } 1090 1091 nvp->v_rdev = nvp_rdev; 1092 nvp->v_hashchain = vpp; 1093 nvp->v_specnext = *vpp; 1094 nvp->v_specmountpoint = NULL; 1095 simple_unlock(&spechash_slock); 1096 nvp->v_speclockf = NULL; 1097 *vpp = nvp; 1098 if (vp != NULLVP) { 1099 nvp->v_flag |= VALIASED; 1100 vp->v_flag |= VALIASED; 1101 vput(vp); 1102 } 1103 return (NULLVP); 1104 } 1105 simple_unlock(&spechash_slock); 1106 VOP_UNLOCK(vp, 0); 1107 simple_lock(&vp->v_interlock); 1108 vclean(vp, 0, p); 1109 vp->v_op = nvp->v_op; 1110 vp->v_tag = nvp->v_tag; 1111 vp->v_vnlock = &vp->v_lock; 1112 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1113 nvp->v_type = VNON; 1114 insmntque(vp, mp); 1115 return (vp); 1116 } 1117 1118 /* 1119 * Grab a particular vnode from the free list, increment its 1120 * reference count and lock it. If the vnode lock bit is set the 1121 * vnode is being eliminated in vgone. In that case, we can not 1122 * grab the vnode, so the process is awakened when the transition is 1123 * completed, and an error returned to indicate that the vnode is no 1124 * longer usable (possibly having been changed to a new file system type). 1125 */ 1126 int 1127 vget(vp, flags) 1128 struct vnode *vp; 1129 int flags; 1130 { 1131 int error; 1132 1133 /* 1134 * If the vnode is in the process of being cleaned out for 1135 * another use, we wait for the cleaning to finish and then 1136 * return failure. Cleaning is determined by checking that 1137 * the VXLOCK flag is set. 1138 */ 1139 1140 if ((flags & LK_INTERLOCK) == 0) 1141 simple_lock(&vp->v_interlock); 1142 if (vp->v_flag & VXLOCK) { 1143 if (flags & LK_NOWAIT) { 1144 simple_unlock(&vp->v_interlock); 1145 return EBUSY; 1146 } 1147 vp->v_flag |= VXWANT; 1148 ltsleep((caddr_t)vp, PINOD|PNORELOCK, 1149 "vget", 0, &vp->v_interlock); 1150 return (ENOENT); 1151 } 1152 if (vp->v_usecount == 0) { 1153 simple_lock(&vnode_free_list_slock); 1154 if (vp->v_holdcnt > 0) 1155 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1156 else 1157 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1158 simple_unlock(&vnode_free_list_slock); 1159 } 1160 vp->v_usecount++; 1161 #ifdef DIAGNOSTIC 1162 if (vp->v_usecount == 0) { 1163 vprint("vget", vp); 1164 panic("vget: usecount overflow, vp %p", vp); 1165 } 1166 #endif 1167 if (flags & LK_TYPE_MASK) { 1168 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1169 /* 1170 * must expand vrele here because we do not want 1171 * to call VOP_INACTIVE if the reference count 1172 * drops back to zero since it was never really 1173 * active. We must remove it from the free list 1174 * before sleeping so that multiple processes do 1175 * not try to recycle it. 1176 */ 1177 simple_lock(&vp->v_interlock); 1178 vp->v_usecount--; 1179 if (vp->v_usecount > 0) { 1180 simple_unlock(&vp->v_interlock); 1181 return (error); 1182 } 1183 /* 1184 * insert at tail of LRU list 1185 */ 1186 simple_lock(&vnode_free_list_slock); 1187 if (vp->v_holdcnt > 0) 1188 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, 1189 v_freelist); 1190 else 1191 TAILQ_INSERT_TAIL(&vnode_free_list, vp, 1192 v_freelist); 1193 simple_unlock(&vnode_free_list_slock); 1194 simple_unlock(&vp->v_interlock); 1195 } 1196 return (error); 1197 } 1198 simple_unlock(&vp->v_interlock); 1199 return (0); 1200 } 1201 1202 /* 1203 * vput(), just unlock and vrele() 1204 */ 1205 void 1206 vput(vp) 1207 struct vnode *vp; 1208 { 1209 struct proc *p = curproc; /* XXX */ 1210 1211 #ifdef DIAGNOSTIC 1212 if (vp == NULL) 1213 panic("vput: null vp"); 1214 #endif 1215 simple_lock(&vp->v_interlock); 1216 vp->v_usecount--; 1217 if (vp->v_usecount > 0) { 1218 simple_unlock(&vp->v_interlock); 1219 VOP_UNLOCK(vp, 0); 1220 return; 1221 } 1222 #ifdef DIAGNOSTIC 1223 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1224 vprint("vput: bad ref count", vp); 1225 panic("vput: ref cnt"); 1226 } 1227 #endif 1228 /* 1229 * Insert at tail of LRU list. 1230 */ 1231 simple_lock(&vnode_free_list_slock); 1232 if (vp->v_holdcnt > 0) 1233 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1234 else 1235 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1236 simple_unlock(&vnode_free_list_slock); 1237 if (vp->v_flag & VTEXT) { 1238 uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages; 1239 uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages; 1240 } 1241 vp->v_flag &= ~VTEXT; 1242 simple_unlock(&vp->v_interlock); 1243 VOP_INACTIVE(vp, p); 1244 } 1245 1246 /* 1247 * Vnode release. 1248 * If count drops to zero, call inactive routine and return to freelist. 1249 */ 1250 void 1251 vrele(vp) 1252 struct vnode *vp; 1253 { 1254 struct proc *p = curproc; /* XXX */ 1255 1256 #ifdef DIAGNOSTIC 1257 if (vp == NULL) 1258 panic("vrele: null vp"); 1259 #endif 1260 simple_lock(&vp->v_interlock); 1261 vp->v_usecount--; 1262 if (vp->v_usecount > 0) { 1263 simple_unlock(&vp->v_interlock); 1264 return; 1265 } 1266 #ifdef DIAGNOSTIC 1267 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1268 vprint("vrele: bad ref count", vp); 1269 panic("vrele: ref cnt vp %p", vp); 1270 } 1271 #endif 1272 /* 1273 * Insert at tail of LRU list. 1274 */ 1275 simple_lock(&vnode_free_list_slock); 1276 if (vp->v_holdcnt > 0) 1277 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1278 else 1279 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1280 simple_unlock(&vnode_free_list_slock); 1281 if (vp->v_flag & VTEXT) { 1282 uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages; 1283 uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages; 1284 } 1285 vp->v_flag &= ~VTEXT; 1286 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1287 VOP_INACTIVE(vp, p); 1288 } 1289 1290 #ifdef DIAGNOSTIC 1291 /* 1292 * Page or buffer structure gets a reference. 1293 */ 1294 void 1295 vhold(vp) 1296 struct vnode *vp; 1297 { 1298 1299 /* 1300 * If it is on the freelist and the hold count is currently 1301 * zero, move it to the hold list. The test of the back 1302 * pointer and the use reference count of zero is because 1303 * it will be removed from a free list by getnewvnode, 1304 * but will not have its reference count incremented until 1305 * after calling vgone. If the reference count were 1306 * incremented first, vgone would (incorrectly) try to 1307 * close the previous instance of the underlying object. 1308 * So, the back pointer is explicitly set to `0xdeadb' in 1309 * getnewvnode after removing it from a freelist to ensure 1310 * that we do not try to move it here. 1311 */ 1312 simple_lock(&vp->v_interlock); 1313 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1314 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1315 simple_lock(&vnode_free_list_slock); 1316 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1317 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1318 simple_unlock(&vnode_free_list_slock); 1319 } 1320 vp->v_holdcnt++; 1321 simple_unlock(&vp->v_interlock); 1322 } 1323 1324 /* 1325 * Page or buffer structure frees a reference. 1326 */ 1327 void 1328 holdrele(vp) 1329 struct vnode *vp; 1330 { 1331 1332 simple_lock(&vp->v_interlock); 1333 if (vp->v_holdcnt <= 0) 1334 panic("holdrele: holdcnt vp %p", vp); 1335 vp->v_holdcnt--; 1336 1337 /* 1338 * If it is on the holdlist and the hold count drops to 1339 * zero, move it to the free list. The test of the back 1340 * pointer and the use reference count of zero is because 1341 * it will be removed from a free list by getnewvnode, 1342 * but will not have its reference count incremented until 1343 * after calling vgone. If the reference count were 1344 * incremented first, vgone would (incorrectly) try to 1345 * close the previous instance of the underlying object. 1346 * So, the back pointer is explicitly set to `0xdeadb' in 1347 * getnewvnode after removing it from a freelist to ensure 1348 * that we do not try to move it here. 1349 */ 1350 1351 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1352 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1353 simple_lock(&vnode_free_list_slock); 1354 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1355 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1356 simple_unlock(&vnode_free_list_slock); 1357 } 1358 simple_unlock(&vp->v_interlock); 1359 } 1360 1361 /* 1362 * Vnode reference. 1363 */ 1364 void 1365 vref(vp) 1366 struct vnode *vp; 1367 { 1368 1369 simple_lock(&vp->v_interlock); 1370 if (vp->v_usecount <= 0) 1371 panic("vref used where vget required, vp %p", vp); 1372 vp->v_usecount++; 1373 #ifdef DIAGNOSTIC 1374 if (vp->v_usecount == 0) { 1375 vprint("vref", vp); 1376 panic("vref: usecount overflow, vp %p", vp); 1377 } 1378 #endif 1379 simple_unlock(&vp->v_interlock); 1380 } 1381 #endif /* DIAGNOSTIC */ 1382 1383 /* 1384 * Remove any vnodes in the vnode table belonging to mount point mp. 1385 * 1386 * If MNT_NOFORCE is specified, there should not be any active ones, 1387 * return error if any are found (nb: this is a user error, not a 1388 * system error). If MNT_FORCE is specified, detach any active vnodes 1389 * that are found. 1390 */ 1391 #ifdef DEBUG 1392 int busyprt = 0; /* print out busy vnodes */ 1393 struct ctldebug debug1 = { "busyprt", &busyprt }; 1394 #endif 1395 1396 int 1397 vflush(mp, skipvp, flags) 1398 struct mount *mp; 1399 struct vnode *skipvp; 1400 int flags; 1401 { 1402 struct proc *p = curproc; /* XXX */ 1403 struct vnode *vp, *nvp; 1404 int busy = 0; 1405 1406 simple_lock(&mntvnode_slock); 1407 loop: 1408 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1409 if (vp->v_mount != mp) 1410 goto loop; 1411 nvp = vp->v_mntvnodes.le_next; 1412 /* 1413 * Skip over a selected vnode. 1414 */ 1415 if (vp == skipvp) 1416 continue; 1417 simple_lock(&vp->v_interlock); 1418 /* 1419 * Skip over a vnodes marked VSYSTEM. 1420 */ 1421 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1422 simple_unlock(&vp->v_interlock); 1423 continue; 1424 } 1425 /* 1426 * If WRITECLOSE is set, only flush out regular file 1427 * vnodes open for writing. 1428 */ 1429 if ((flags & WRITECLOSE) && 1430 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1431 simple_unlock(&vp->v_interlock); 1432 continue; 1433 } 1434 /* 1435 * With v_usecount == 0, all we need to do is clear 1436 * out the vnode data structures and we are done. 1437 */ 1438 if (vp->v_usecount == 0) { 1439 simple_unlock(&mntvnode_slock); 1440 vgonel(vp, p); 1441 simple_lock(&mntvnode_slock); 1442 continue; 1443 } 1444 /* 1445 * If FORCECLOSE is set, forcibly close the vnode. 1446 * For block or character devices, revert to an 1447 * anonymous device. For all other files, just kill them. 1448 */ 1449 if (flags & FORCECLOSE) { 1450 simple_unlock(&mntvnode_slock); 1451 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1452 vgonel(vp, p); 1453 } else { 1454 vclean(vp, 0, p); 1455 vp->v_op = spec_vnodeop_p; 1456 insmntque(vp, (struct mount *)0); 1457 } 1458 simple_lock(&mntvnode_slock); 1459 continue; 1460 } 1461 #ifdef DEBUG 1462 if (busyprt) 1463 vprint("vflush: busy vnode", vp); 1464 #endif 1465 simple_unlock(&vp->v_interlock); 1466 busy++; 1467 } 1468 simple_unlock(&mntvnode_slock); 1469 if (busy) 1470 return (EBUSY); 1471 return (0); 1472 } 1473 1474 /* 1475 * Disassociate the underlying file system from a vnode. 1476 */ 1477 void 1478 vclean(vp, flags, p) 1479 struct vnode *vp; 1480 int flags; 1481 struct proc *p; 1482 { 1483 int active; 1484 1485 /* 1486 * Check to see if the vnode is in use. 1487 * If so we have to reference it before we clean it out 1488 * so that its count cannot fall to zero and generate a 1489 * race against ourselves to recycle it. 1490 */ 1491 if ((active = vp->v_usecount) != 0) { 1492 /* We have the vnode interlock. */ 1493 vp->v_usecount++; 1494 #ifdef DIAGNOSTIC 1495 if (vp->v_usecount == 0) { 1496 vprint("vclean", vp); 1497 panic("vclean: usecount overflow"); 1498 } 1499 #endif 1500 } 1501 1502 /* 1503 * Prevent the vnode from being recycled or 1504 * brought into use while we clean it out. 1505 */ 1506 if (vp->v_flag & VXLOCK) 1507 panic("vclean: deadlock, vp %p", vp); 1508 vp->v_flag |= VXLOCK; 1509 if (vp->v_flag & VTEXT) { 1510 uvmexp.vtextpages -= vp->v_uvm.u_obj.uo_npages; 1511 uvmexp.vnodepages += vp->v_uvm.u_obj.uo_npages; 1512 } 1513 vp->v_flag &= ~VTEXT; 1514 1515 /* 1516 * Even if the count is zero, the VOP_INACTIVE routine may still 1517 * have the object locked while it cleans it out. The VOP_LOCK 1518 * ensures that the VOP_INACTIVE routine is done with its work. 1519 * For active vnodes, it ensures that no other activity can 1520 * occur while the underlying object is being cleaned out. 1521 */ 1522 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1523 1524 /* 1525 * Clean out any cached data associated with the vnode. 1526 */ 1527 if (flags & DOCLOSE) 1528 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1529 1530 /* 1531 * If purging an active vnode, it must be closed and 1532 * deactivated before being reclaimed. Note that the 1533 * VOP_INACTIVE will unlock the vnode. 1534 */ 1535 if (active) { 1536 if (flags & DOCLOSE) 1537 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1538 VOP_INACTIVE(vp, p); 1539 } else { 1540 /* 1541 * Any other processes trying to obtain this lock must first 1542 * wait for VXLOCK to clear, then call the new lock operation. 1543 */ 1544 VOP_UNLOCK(vp, 0); 1545 } 1546 /* 1547 * Reclaim the vnode. 1548 */ 1549 if (VOP_RECLAIM(vp, p)) 1550 panic("vclean: cannot reclaim, vp %p", vp); 1551 if (active) { 1552 /* 1553 * Inline copy of vrele() since VOP_INACTIVE 1554 * has already been called. 1555 */ 1556 simple_lock(&vp->v_interlock); 1557 if (--vp->v_usecount <= 0) { 1558 #ifdef DIAGNOSTIC 1559 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1560 vprint("vclean: bad ref count", vp); 1561 panic("vclean: ref cnt"); 1562 } 1563 #endif 1564 /* 1565 * Insert at tail of LRU list. 1566 */ 1567 1568 simple_unlock(&vp->v_interlock); 1569 simple_lock(&vnode_free_list_slock); 1570 #ifdef DIAGNOSTIC 1571 if (vp->v_vnlock) { 1572 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1573 vprint("vclean: lock not drained", vp); 1574 } 1575 if (vp->v_holdcnt > 0) 1576 panic("vclean: not clean, vp %p", vp); 1577 #endif 1578 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1579 simple_unlock(&vnode_free_list_slock); 1580 } else 1581 simple_unlock(&vp->v_interlock); 1582 } 1583 1584 cache_purge(vp); 1585 1586 /* 1587 * Done with purge, notify sleepers of the grim news. 1588 */ 1589 vp->v_op = dead_vnodeop_p; 1590 vp->v_tag = VT_NON; 1591 simple_lock(&vp->v_interlock); 1592 vp->v_flag &= ~VXLOCK; 1593 if (vp->v_flag & VXWANT) { 1594 vp->v_flag &= ~VXWANT; 1595 simple_unlock(&vp->v_interlock); 1596 wakeup((caddr_t)vp); 1597 } else 1598 simple_unlock(&vp->v_interlock); 1599 } 1600 1601 /* 1602 * Recycle an unused vnode to the front of the free list. 1603 * Release the passed interlock if the vnode will be recycled. 1604 */ 1605 int 1606 vrecycle(vp, inter_lkp, p) 1607 struct vnode *vp; 1608 struct simplelock *inter_lkp; 1609 struct proc *p; 1610 { 1611 1612 simple_lock(&vp->v_interlock); 1613 if (vp->v_usecount == 0) { 1614 if (inter_lkp) 1615 simple_unlock(inter_lkp); 1616 vgonel(vp, p); 1617 return (1); 1618 } 1619 simple_unlock(&vp->v_interlock); 1620 return (0); 1621 } 1622 1623 /* 1624 * Eliminate all activity associated with a vnode 1625 * in preparation for reuse. 1626 */ 1627 void 1628 vgone(vp) 1629 struct vnode *vp; 1630 { 1631 struct proc *p = curproc; /* XXX */ 1632 1633 simple_lock(&vp->v_interlock); 1634 vgonel(vp, p); 1635 } 1636 1637 /* 1638 * vgone, with the vp interlock held. 1639 */ 1640 void 1641 vgonel(vp, p) 1642 struct vnode *vp; 1643 struct proc *p; 1644 { 1645 struct vnode *vq; 1646 struct vnode *vx; 1647 1648 /* 1649 * If a vgone (or vclean) is already in progress, 1650 * wait until it is done and return. 1651 */ 1652 if (vp->v_flag & VXLOCK) { 1653 vp->v_flag |= VXWANT; 1654 ltsleep((caddr_t)vp, PINOD | PNORELOCK, 1655 "vgone", 0, &vp->v_interlock); 1656 return; 1657 } 1658 /* 1659 * Clean out the filesystem specific data. 1660 */ 1661 vclean(vp, DOCLOSE, p); 1662 /* 1663 * Delete from old mount point vnode list, if on one. 1664 */ 1665 if (vp->v_mount != NULL) 1666 insmntque(vp, (struct mount *)0); 1667 /* 1668 * If special device, remove it from special device alias list. 1669 * if it is on one. 1670 */ 1671 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1672 simple_lock(&spechash_slock); 1673 if (vp->v_hashchain != NULL) { 1674 if (*vp->v_hashchain == vp) { 1675 *vp->v_hashchain = vp->v_specnext; 1676 } else { 1677 for (vq = *vp->v_hashchain; vq; 1678 vq = vq->v_specnext) { 1679 if (vq->v_specnext != vp) 1680 continue; 1681 vq->v_specnext = vp->v_specnext; 1682 break; 1683 } 1684 if (vq == NULL) 1685 panic("missing bdev"); 1686 } 1687 if (vp->v_flag & VALIASED) { 1688 vx = NULL; 1689 for (vq = *vp->v_hashchain; vq; 1690 vq = vq->v_specnext) { 1691 if (vq->v_rdev != vp->v_rdev || 1692 vq->v_type != vp->v_type) 1693 continue; 1694 if (vx) 1695 break; 1696 vx = vq; 1697 } 1698 if (vx == NULL) 1699 panic("missing alias"); 1700 if (vq == NULL) 1701 vx->v_flag &= ~VALIASED; 1702 vp->v_flag &= ~VALIASED; 1703 } 1704 } 1705 simple_unlock(&spechash_slock); 1706 FREE(vp->v_specinfo, M_VNODE); 1707 vp->v_specinfo = NULL; 1708 } 1709 /* 1710 * If it is on the freelist and not already at the head, 1711 * move it to the head of the list. The test of the back 1712 * pointer and the reference count of zero is because 1713 * it will be removed from the free list by getnewvnode, 1714 * but will not have its reference count incremented until 1715 * after calling vgone. If the reference count were 1716 * incremented first, vgone would (incorrectly) try to 1717 * close the previous instance of the underlying object. 1718 * So, the back pointer is explicitly set to `0xdeadb' in 1719 * getnewvnode after removing it from the freelist to ensure 1720 * that we do not try to move it here. 1721 */ 1722 if (vp->v_usecount == 0) { 1723 simple_lock(&vnode_free_list_slock); 1724 if (vp->v_holdcnt > 0) 1725 panic("vgonel: not clean, vp %p", vp); 1726 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1727 TAILQ_FIRST(&vnode_free_list) != vp) { 1728 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1729 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1730 } 1731 simple_unlock(&vnode_free_list_slock); 1732 } 1733 vp->v_type = VBAD; 1734 } 1735 1736 /* 1737 * Lookup a vnode by device number. 1738 */ 1739 int 1740 vfinddev(dev, type, vpp) 1741 dev_t dev; 1742 enum vtype type; 1743 struct vnode **vpp; 1744 { 1745 struct vnode *vp; 1746 int rc = 0; 1747 1748 simple_lock(&spechash_slock); 1749 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1750 if (dev != vp->v_rdev || type != vp->v_type) 1751 continue; 1752 *vpp = vp; 1753 rc = 1; 1754 break; 1755 } 1756 simple_unlock(&spechash_slock); 1757 return (rc); 1758 } 1759 1760 /* 1761 * Revoke all the vnodes corresponding to the specified minor number 1762 * range (endpoints inclusive) of the specified major. 1763 */ 1764 void 1765 vdevgone(maj, minl, minh, type) 1766 int maj, minl, minh; 1767 enum vtype type; 1768 { 1769 struct vnode *vp; 1770 int mn; 1771 1772 for (mn = minl; mn <= minh; mn++) 1773 if (vfinddev(makedev(maj, mn), type, &vp)) 1774 VOP_REVOKE(vp, REVOKEALL); 1775 } 1776 1777 /* 1778 * Calculate the total number of references to a special device. 1779 */ 1780 int 1781 vcount(vp) 1782 struct vnode *vp; 1783 { 1784 struct vnode *vq, *vnext; 1785 int count; 1786 1787 loop: 1788 if ((vp->v_flag & VALIASED) == 0) 1789 return (vp->v_usecount); 1790 simple_lock(&spechash_slock); 1791 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1792 vnext = vq->v_specnext; 1793 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1794 continue; 1795 /* 1796 * Alias, but not in use, so flush it out. 1797 */ 1798 if (vq->v_usecount == 0 && vq != vp && 1799 (vq->v_flag & VXLOCK) == 0) { 1800 simple_unlock(&spechash_slock); 1801 vgone(vq); 1802 goto loop; 1803 } 1804 count += vq->v_usecount; 1805 } 1806 simple_unlock(&spechash_slock); 1807 return (count); 1808 } 1809 1810 /* 1811 * Print out a description of a vnode. 1812 */ 1813 static const char * const typename[] = 1814 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1815 1816 void 1817 vprint(label, vp) 1818 char *label; 1819 struct vnode *vp; 1820 { 1821 char buf[64]; 1822 1823 if (label != NULL) 1824 printf("%s: ", label); 1825 printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,", 1826 vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1827 vp->v_holdcnt); 1828 buf[0] = '\0'; 1829 if (vp->v_flag & VROOT) 1830 strcat(buf, "|VROOT"); 1831 if (vp->v_flag & VTEXT) 1832 strcat(buf, "|VTEXT"); 1833 if (vp->v_flag & VSYSTEM) 1834 strcat(buf, "|VSYSTEM"); 1835 if (vp->v_flag & VXLOCK) 1836 strcat(buf, "|VXLOCK"); 1837 if (vp->v_flag & VXWANT) 1838 strcat(buf, "|VXWANT"); 1839 if (vp->v_flag & VBWAIT) 1840 strcat(buf, "|VBWAIT"); 1841 if (vp->v_flag & VALIASED) 1842 strcat(buf, "|VALIASED"); 1843 if (buf[0] != '\0') 1844 printf(" flags (%s)", &buf[1]); 1845 if (vp->v_data == NULL) { 1846 printf("\n"); 1847 } else { 1848 printf("\n\t"); 1849 VOP_PRINT(vp); 1850 } 1851 } 1852 1853 #ifdef DEBUG 1854 /* 1855 * List all of the locked vnodes in the system. 1856 * Called when debugging the kernel. 1857 */ 1858 void 1859 printlockedvnodes() 1860 { 1861 struct mount *mp, *nmp; 1862 struct vnode *vp; 1863 1864 printf("Locked vnodes\n"); 1865 simple_lock(&mountlist_slock); 1866 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1867 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1868 nmp = mp->mnt_list.cqe_next; 1869 continue; 1870 } 1871 for (vp = mp->mnt_vnodelist.lh_first; 1872 vp != NULL; 1873 vp = vp->v_mntvnodes.le_next) { 1874 if (VOP_ISLOCKED(vp)) 1875 vprint((char *)0, vp); 1876 } 1877 simple_lock(&mountlist_slock); 1878 nmp = mp->mnt_list.cqe_next; 1879 vfs_unbusy(mp); 1880 } 1881 simple_unlock(&mountlist_slock); 1882 } 1883 #endif 1884 1885 /* 1886 * Top level filesystem related information gathering. 1887 */ 1888 int 1889 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1890 int *name; 1891 u_int namelen; 1892 void *oldp; 1893 size_t *oldlenp; 1894 void *newp; 1895 size_t newlen; 1896 struct proc *p; 1897 { 1898 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1899 struct vfsconf vfc; 1900 extern const char * const mountcompatnames[]; 1901 extern int nmountcompatnames; 1902 #endif 1903 struct vfsops *vfsp; 1904 1905 /* all sysctl names at this level are at least name and field */ 1906 if (namelen < 2) 1907 return (ENOTDIR); /* overloaded */ 1908 1909 /* Not generic: goes to file system. */ 1910 if (name[0] != VFS_GENERIC) { 1911 static const struct ctlname vfsnames[VFS_MAXID+1]=CTL_VFS_NAMES; 1912 const char *vfsname; 1913 1914 if (name[0] < 0 || name[0] > VFS_MAXID 1915 || (vfsname = vfsnames[name[0]].ctl_name) == NULL) 1916 return (EOPNOTSUPP); 1917 1918 vfsp = vfs_getopsbyname(vfsname); 1919 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1920 return (EOPNOTSUPP); 1921 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1922 oldp, oldlenp, newp, newlen, p)); 1923 } 1924 1925 /* The rest are generic vfs sysctls. */ 1926 switch (name[1]) { 1927 case VFS_USERMOUNT: 1928 return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount); 1929 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1930 case VFS_MAXTYPENUM: 1931 /* 1932 * Provided for 4.4BSD-Lite2 compatibility. 1933 */ 1934 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1935 case VFS_CONF: 1936 /* 1937 * Special: a node, next is a file system name. 1938 * Provided for 4.4BSD-Lite2 compatibility. 1939 */ 1940 if (namelen < 3) 1941 return (ENOTDIR); /* overloaded */ 1942 if (name[2] >= nmountcompatnames || name[2] < 0 || 1943 mountcompatnames[name[2]] == NULL) 1944 return (EOPNOTSUPP); 1945 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 1946 if (vfsp == NULL) 1947 return (EOPNOTSUPP); 1948 vfc.vfc_vfsops = vfsp; 1949 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 1950 vfc.vfc_typenum = name[2]; 1951 vfc.vfc_refcount = vfsp->vfs_refcount; 1952 vfc.vfc_flags = 0; 1953 vfc.vfc_mountroot = vfsp->vfs_mountroot; 1954 vfc.vfc_next = NULL; 1955 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 1956 sizeof(struct vfsconf))); 1957 #endif 1958 default: 1959 break; 1960 } 1961 return (EOPNOTSUPP); 1962 } 1963 1964 int kinfo_vdebug = 1; 1965 int kinfo_vgetfailed; 1966 #define KINFO_VNODESLOP 10 1967 /* 1968 * Dump vnode list (via sysctl). 1969 * Copyout address of vnode followed by vnode. 1970 */ 1971 /* ARGSUSED */ 1972 int 1973 sysctl_vnode(where, sizep, p) 1974 char *where; 1975 size_t *sizep; 1976 struct proc *p; 1977 { 1978 struct mount *mp, *nmp; 1979 struct vnode *nvp, *vp; 1980 char *bp = where, *savebp; 1981 char *ewhere; 1982 int error; 1983 1984 #define VPTRSZ sizeof(struct vnode *) 1985 #define VNODESZ sizeof(struct vnode) 1986 if (where == NULL) { 1987 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1988 return (0); 1989 } 1990 ewhere = where + *sizep; 1991 1992 simple_lock(&mountlist_slock); 1993 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1994 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1995 nmp = mp->mnt_list.cqe_next; 1996 continue; 1997 } 1998 savebp = bp; 1999 again: 2000 simple_lock(&mntvnode_slock); 2001 for (vp = mp->mnt_vnodelist.lh_first; 2002 vp != NULL; 2003 vp = nvp) { 2004 /* 2005 * Check that the vp is still associated with 2006 * this filesystem. RACE: could have been 2007 * recycled onto the same filesystem. 2008 */ 2009 if (vp->v_mount != mp) { 2010 simple_unlock(&mntvnode_slock); 2011 if (kinfo_vdebug) 2012 printf("kinfo: vp changed\n"); 2013 bp = savebp; 2014 goto again; 2015 } 2016 nvp = vp->v_mntvnodes.le_next; 2017 if (bp + VPTRSZ + VNODESZ > ewhere) { 2018 simple_unlock(&mntvnode_slock); 2019 *sizep = bp - where; 2020 return (ENOMEM); 2021 } 2022 simple_unlock(&mntvnode_slock); 2023 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2024 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2025 return (error); 2026 bp += VPTRSZ + VNODESZ; 2027 simple_lock(&mntvnode_slock); 2028 } 2029 simple_unlock(&mntvnode_slock); 2030 simple_lock(&mountlist_slock); 2031 nmp = mp->mnt_list.cqe_next; 2032 vfs_unbusy(mp); 2033 } 2034 simple_unlock(&mountlist_slock); 2035 2036 *sizep = bp - where; 2037 return (0); 2038 } 2039 2040 /* 2041 * Check to see if a filesystem is mounted on a block device. 2042 */ 2043 int 2044 vfs_mountedon(vp) 2045 struct vnode *vp; 2046 { 2047 struct vnode *vq; 2048 int error = 0; 2049 2050 if (vp->v_specmountpoint != NULL) 2051 return (EBUSY); 2052 if (vp->v_flag & VALIASED) { 2053 simple_lock(&spechash_slock); 2054 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2055 if (vq->v_rdev != vp->v_rdev || 2056 vq->v_type != vp->v_type) 2057 continue; 2058 if (vq->v_specmountpoint != NULL) { 2059 error = EBUSY; 2060 break; 2061 } 2062 } 2063 simple_unlock(&spechash_slock); 2064 } 2065 return (error); 2066 } 2067 2068 /* 2069 * Build hash lists of net addresses and hang them off the mount point. 2070 * Called by ufs_mount() to set up the lists of export addresses. 2071 */ 2072 static int 2073 vfs_hang_addrlist(mp, nep, argp) 2074 struct mount *mp; 2075 struct netexport *nep; 2076 struct export_args *argp; 2077 { 2078 struct netcred *np, *enp; 2079 struct radix_node_head *rnh; 2080 int i; 2081 struct radix_node *rn; 2082 struct sockaddr *saddr, *smask = 0; 2083 struct domain *dom; 2084 int error; 2085 2086 if (argp->ex_addrlen == 0) { 2087 if (mp->mnt_flag & MNT_DEFEXPORTED) 2088 return (EPERM); 2089 np = &nep->ne_defexported; 2090 np->netc_exflags = argp->ex_flags; 2091 np->netc_anon = argp->ex_anon; 2092 np->netc_anon.cr_ref = 1; 2093 mp->mnt_flag |= MNT_DEFEXPORTED; 2094 return (0); 2095 } 2096 2097 if (argp->ex_addrlen > MLEN) 2098 return (EINVAL); 2099 2100 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2101 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 2102 memset((caddr_t)np, 0, i); 2103 saddr = (struct sockaddr *)(np + 1); 2104 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 2105 if (error) 2106 goto out; 2107 if (saddr->sa_len > argp->ex_addrlen) 2108 saddr->sa_len = argp->ex_addrlen; 2109 if (argp->ex_masklen) { 2110 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 2111 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 2112 if (error) 2113 goto out; 2114 if (smask->sa_len > argp->ex_masklen) 2115 smask->sa_len = argp->ex_masklen; 2116 } 2117 i = saddr->sa_family; 2118 if ((rnh = nep->ne_rtable[i]) == 0) { 2119 /* 2120 * Seems silly to initialize every AF when most are not 2121 * used, do so on demand here 2122 */ 2123 for (dom = domains; dom; dom = dom->dom_next) 2124 if (dom->dom_family == i && dom->dom_rtattach) { 2125 dom->dom_rtattach((void **)&nep->ne_rtable[i], 2126 dom->dom_rtoffset); 2127 break; 2128 } 2129 if ((rnh = nep->ne_rtable[i]) == 0) { 2130 error = ENOBUFS; 2131 goto out; 2132 } 2133 } 2134 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 2135 np->netc_rnodes); 2136 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 2137 if (rn == 0) { 2138 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 2139 smask, rnh); 2140 if (enp == 0) { 2141 error = EPERM; 2142 goto out; 2143 } 2144 } else 2145 enp = (struct netcred *)rn; 2146 2147 if (enp->netc_exflags != argp->ex_flags || 2148 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 2149 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 2150 enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups || 2151 memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 2152 enp->netc_anon.cr_ngroups)) 2153 error = EPERM; 2154 else 2155 error = 0; 2156 goto out; 2157 } 2158 np->netc_exflags = argp->ex_flags; 2159 np->netc_anon = argp->ex_anon; 2160 np->netc_anon.cr_ref = 1; 2161 return (0); 2162 out: 2163 free(np, M_NETADDR); 2164 return (error); 2165 } 2166 2167 /* ARGSUSED */ 2168 static int 2169 vfs_free_netcred(rn, w) 2170 struct radix_node *rn; 2171 void *w; 2172 { 2173 struct radix_node_head *rnh = (struct radix_node_head *)w; 2174 2175 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 2176 free((caddr_t)rn, M_NETADDR); 2177 return (0); 2178 } 2179 2180 /* 2181 * Free the net address hash lists that are hanging off the mount points. 2182 */ 2183 static void 2184 vfs_free_addrlist(nep) 2185 struct netexport *nep; 2186 { 2187 int i; 2188 struct radix_node_head *rnh; 2189 2190 for (i = 0; i <= AF_MAX; i++) 2191 if ((rnh = nep->ne_rtable[i]) != NULL) { 2192 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 2193 free((caddr_t)rnh, M_RTABLE); 2194 nep->ne_rtable[i] = 0; 2195 } 2196 } 2197 2198 int 2199 vfs_export(mp, nep, argp) 2200 struct mount *mp; 2201 struct netexport *nep; 2202 struct export_args *argp; 2203 { 2204 int error; 2205 2206 if (argp->ex_flags & MNT_DELEXPORT) { 2207 if (mp->mnt_flag & MNT_EXPUBLIC) { 2208 vfs_setpublicfs(NULL, NULL, NULL); 2209 mp->mnt_flag &= ~MNT_EXPUBLIC; 2210 } 2211 vfs_free_addrlist(nep); 2212 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2213 } 2214 if (argp->ex_flags & MNT_EXPORTED) { 2215 if (argp->ex_flags & MNT_EXPUBLIC) { 2216 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2217 return (error); 2218 mp->mnt_flag |= MNT_EXPUBLIC; 2219 } 2220 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 2221 return (error); 2222 mp->mnt_flag |= MNT_EXPORTED; 2223 } 2224 return (0); 2225 } 2226 2227 /* 2228 * Set the publicly exported filesystem (WebNFS). Currently, only 2229 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2230 */ 2231 int 2232 vfs_setpublicfs(mp, nep, argp) 2233 struct mount *mp; 2234 struct netexport *nep; 2235 struct export_args *argp; 2236 { 2237 int error; 2238 struct vnode *rvp; 2239 char *cp; 2240 2241 /* 2242 * mp == NULL -> invalidate the current info, the FS is 2243 * no longer exported. May be called from either vfs_export 2244 * or unmount, so check if it hasn't already been done. 2245 */ 2246 if (mp == NULL) { 2247 if (nfs_pub.np_valid) { 2248 nfs_pub.np_valid = 0; 2249 if (nfs_pub.np_index != NULL) { 2250 FREE(nfs_pub.np_index, M_TEMP); 2251 nfs_pub.np_index = NULL; 2252 } 2253 } 2254 return (0); 2255 } 2256 2257 /* 2258 * Only one allowed at a time. 2259 */ 2260 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2261 return (EBUSY); 2262 2263 /* 2264 * Get real filehandle for root of exported FS. 2265 */ 2266 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 2267 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2268 2269 if ((error = VFS_ROOT(mp, &rvp))) 2270 return (error); 2271 2272 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2273 return (error); 2274 2275 vput(rvp); 2276 2277 /* 2278 * If an indexfile was specified, pull it in. 2279 */ 2280 if (argp->ex_indexfile != NULL) { 2281 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2282 M_WAITOK); 2283 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2284 MAXNAMLEN, (size_t *)0); 2285 if (!error) { 2286 /* 2287 * Check for illegal filenames. 2288 */ 2289 for (cp = nfs_pub.np_index; *cp; cp++) { 2290 if (*cp == '/') { 2291 error = EINVAL; 2292 break; 2293 } 2294 } 2295 } 2296 if (error) { 2297 FREE(nfs_pub.np_index, M_TEMP); 2298 return (error); 2299 } 2300 } 2301 2302 nfs_pub.np_mount = mp; 2303 nfs_pub.np_valid = 1; 2304 return (0); 2305 } 2306 2307 struct netcred * 2308 vfs_export_lookup(mp, nep, nam) 2309 struct mount *mp; 2310 struct netexport *nep; 2311 struct mbuf *nam; 2312 { 2313 struct netcred *np; 2314 struct radix_node_head *rnh; 2315 struct sockaddr *saddr; 2316 2317 np = NULL; 2318 if (mp->mnt_flag & MNT_EXPORTED) { 2319 /* 2320 * Lookup in the export list first. 2321 */ 2322 if (nam != NULL) { 2323 saddr = mtod(nam, struct sockaddr *); 2324 rnh = nep->ne_rtable[saddr->sa_family]; 2325 if (rnh != NULL) { 2326 np = (struct netcred *) 2327 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2328 rnh); 2329 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2330 np = NULL; 2331 } 2332 } 2333 /* 2334 * If no address match, use the default if it exists. 2335 */ 2336 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2337 np = &nep->ne_defexported; 2338 } 2339 return (np); 2340 } 2341 2342 /* 2343 * Do the usual access checking. 2344 * file_mode, uid and gid are from the vnode in question, 2345 * while acc_mode and cred are from the VOP_ACCESS parameter list 2346 */ 2347 int 2348 vaccess(type, file_mode, uid, gid, acc_mode, cred) 2349 enum vtype type; 2350 mode_t file_mode; 2351 uid_t uid; 2352 gid_t gid; 2353 mode_t acc_mode; 2354 struct ucred *cred; 2355 { 2356 mode_t mask; 2357 2358 /* 2359 * Super-user always gets read/write access, but execute access depends 2360 * on at least one execute bit being set. 2361 */ 2362 if (cred->cr_uid == 0) { 2363 if ((acc_mode & VEXEC) && type != VDIR && 2364 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2365 return (EACCES); 2366 return (0); 2367 } 2368 2369 mask = 0; 2370 2371 /* Otherwise, check the owner. */ 2372 if (cred->cr_uid == uid) { 2373 if (acc_mode & VEXEC) 2374 mask |= S_IXUSR; 2375 if (acc_mode & VREAD) 2376 mask |= S_IRUSR; 2377 if (acc_mode & VWRITE) 2378 mask |= S_IWUSR; 2379 return ((file_mode & mask) == mask ? 0 : EACCES); 2380 } 2381 2382 /* Otherwise, check the groups. */ 2383 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2384 if (acc_mode & VEXEC) 2385 mask |= S_IXGRP; 2386 if (acc_mode & VREAD) 2387 mask |= S_IRGRP; 2388 if (acc_mode & VWRITE) 2389 mask |= S_IWGRP; 2390 return ((file_mode & mask) == mask ? 0 : EACCES); 2391 } 2392 2393 /* Otherwise, check everyone else. */ 2394 if (acc_mode & VEXEC) 2395 mask |= S_IXOTH; 2396 if (acc_mode & VREAD) 2397 mask |= S_IROTH; 2398 if (acc_mode & VWRITE) 2399 mask |= S_IWOTH; 2400 return ((file_mode & mask) == mask ? 0 : EACCES); 2401 } 2402 2403 /* 2404 * Unmount all file systems. 2405 * We traverse the list in reverse order under the assumption that doing so 2406 * will avoid needing to worry about dependencies. 2407 */ 2408 void 2409 vfs_unmountall(p) 2410 struct proc *p; 2411 { 2412 struct mount *mp, *nmp; 2413 int allerror, error; 2414 2415 for (allerror = 0, 2416 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2417 nmp = mp->mnt_list.cqe_prev; 2418 #ifdef DEBUG 2419 printf("unmounting %s (%s)...\n", 2420 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2421 #endif 2422 /* 2423 * XXX Freeze syncer. Must do this before locking the 2424 * mount point. See dounmount() for details. 2425 */ 2426 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2427 if (vfs_busy(mp, 0, 0)) { 2428 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2429 continue; 2430 } 2431 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2432 printf("unmount of %s failed with error %d\n", 2433 mp->mnt_stat.f_mntonname, error); 2434 allerror = 1; 2435 } 2436 } 2437 if (allerror) 2438 printf("WARNING: some file systems would not unmount\n"); 2439 } 2440 2441 /* 2442 * Sync and unmount file systems before shutting down. 2443 */ 2444 void 2445 vfs_shutdown() 2446 { 2447 struct buf *bp; 2448 int iter, nbusy, nbusy_prev = 0, dcount, s; 2449 struct proc *p = curproc; 2450 2451 /* XXX we're certainly not running in proc0's context! */ 2452 if (p == NULL) 2453 p = &proc0; 2454 2455 printf("syncing disks... "); 2456 2457 /* remove user process from run queue */ 2458 suspendsched(); 2459 (void) spl0(); 2460 2461 /* avoid coming back this way again if we panic. */ 2462 doing_shutdown = 1; 2463 2464 sys_sync(p, NULL, NULL); 2465 2466 /* Wait for sync to finish. */ 2467 dcount = 10000; 2468 for (iter = 0; iter < 20;) { 2469 nbusy = 0; 2470 for (bp = &buf[nbuf]; --bp >= buf; ) { 2471 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2472 nbusy++; 2473 /* 2474 * With soft updates, some buffers that are 2475 * written will be remarked as dirty until other 2476 * buffers are written. 2477 */ 2478 if (bp->b_vp && bp->b_vp->v_mount 2479 && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP) 2480 && (bp->b_flags & B_DELWRI)) { 2481 s = splbio(); 2482 bremfree(bp); 2483 bp->b_flags |= B_BUSY; 2484 splx(s); 2485 nbusy++; 2486 bawrite(bp); 2487 if (dcount-- <= 0) { 2488 printf("softdep "); 2489 goto fail; 2490 } 2491 } 2492 } 2493 if (nbusy == 0) 2494 break; 2495 if (nbusy_prev == 0) 2496 nbusy_prev = nbusy; 2497 printf("%d ", nbusy); 2498 tsleep(&nbusy, PRIBIO, "bflush", 2499 (iter == 0) ? 1 : hz / 25 * iter); 2500 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 2501 iter++; 2502 else 2503 nbusy_prev = nbusy; 2504 } 2505 if (nbusy) { 2506 fail: 2507 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 2508 printf("giving up\nPrinting vnodes for busy buffers\n"); 2509 for (bp = &buf[nbuf]; --bp >= buf; ) 2510 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2511 vprint(NULL, bp->b_vp); 2512 2513 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2514 Debugger(); 2515 #endif 2516 2517 #else /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2518 printf("giving up\n"); 2519 #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2520 return; 2521 } else 2522 printf("done\n"); 2523 2524 /* 2525 * If we've panic'd, don't make the situation potentially 2526 * worse by unmounting the file systems. 2527 */ 2528 if (panicstr != NULL) 2529 return; 2530 2531 /* Release inodes held by texts before update. */ 2532 #ifdef notdef 2533 vnshutdown(); 2534 #endif 2535 /* Unmount file systems. */ 2536 vfs_unmountall(p); 2537 } 2538 2539 /* 2540 * Mount the root file system. If the operator didn't specify a 2541 * file system to use, try all possible file systems until one 2542 * succeeds. 2543 */ 2544 int 2545 vfs_mountroot() 2546 { 2547 extern int (*mountroot) __P((void)); 2548 struct vfsops *v; 2549 2550 if (root_device == NULL) 2551 panic("vfs_mountroot: root device unknown"); 2552 2553 switch (root_device->dv_class) { 2554 case DV_IFNET: 2555 if (rootdev != NODEV) 2556 panic("vfs_mountroot: rootdev set for DV_IFNET"); 2557 break; 2558 2559 case DV_DISK: 2560 if (rootdev == NODEV) 2561 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2562 break; 2563 2564 default: 2565 printf("%s: inappropriate for root file system\n", 2566 root_device->dv_xname); 2567 return (ENODEV); 2568 } 2569 2570 /* 2571 * If user specified a file system, use it. 2572 */ 2573 if (mountroot != NULL) 2574 return ((*mountroot)()); 2575 2576 /* 2577 * Try each file system currently configured into the kernel. 2578 */ 2579 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2580 if (v->vfs_mountroot == NULL) 2581 continue; 2582 #ifdef DEBUG 2583 printf("mountroot: trying %s...\n", v->vfs_name); 2584 #endif 2585 if ((*v->vfs_mountroot)() == 0) { 2586 printf("root file system type: %s\n", v->vfs_name); 2587 break; 2588 } 2589 } 2590 2591 if (v == NULL) { 2592 printf("no file system for %s", root_device->dv_xname); 2593 if (root_device->dv_class == DV_DISK) 2594 printf(" (dev 0x%x)", rootdev); 2595 printf("\n"); 2596 return (EFTYPE); 2597 } 2598 return (0); 2599 } 2600 2601 /* 2602 * Given a file system name, look up the vfsops for that 2603 * file system, or return NULL if file system isn't present 2604 * in the kernel. 2605 */ 2606 struct vfsops * 2607 vfs_getopsbyname(name) 2608 const char *name; 2609 { 2610 struct vfsops *v; 2611 2612 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2613 if (strcmp(v->vfs_name, name) == 0) 2614 break; 2615 } 2616 2617 return (v); 2618 } 2619 2620 /* 2621 * Establish a file system and initialize it. 2622 */ 2623 int 2624 vfs_attach(vfs) 2625 struct vfsops *vfs; 2626 { 2627 struct vfsops *v; 2628 int error = 0; 2629 2630 2631 /* 2632 * Make sure this file system doesn't already exist. 2633 */ 2634 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2635 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2636 error = EEXIST; 2637 goto out; 2638 } 2639 } 2640 2641 /* 2642 * Initialize the vnode operations for this file system. 2643 */ 2644 vfs_opv_init(vfs->vfs_opv_descs); 2645 2646 /* 2647 * Now initialize the file system itself. 2648 */ 2649 (*vfs->vfs_init)(); 2650 2651 /* 2652 * ...and link it into the kernel's list. 2653 */ 2654 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2655 2656 /* 2657 * Sanity: make sure the reference count is 0. 2658 */ 2659 vfs->vfs_refcount = 0; 2660 2661 out: 2662 return (error); 2663 } 2664 2665 /* 2666 * Remove a file system from the kernel. 2667 */ 2668 int 2669 vfs_detach(vfs) 2670 struct vfsops *vfs; 2671 { 2672 struct vfsops *v; 2673 2674 /* 2675 * Make sure no one is using the filesystem. 2676 */ 2677 if (vfs->vfs_refcount != 0) 2678 return (EBUSY); 2679 2680 /* 2681 * ...and remove it from the kernel's list. 2682 */ 2683 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2684 if (v == vfs) { 2685 LIST_REMOVE(v, vfs_list); 2686 break; 2687 } 2688 } 2689 2690 if (v == NULL) 2691 return (ESRCH); 2692 2693 /* 2694 * Now run the file system-specific cleanups. 2695 */ 2696 (*vfs->vfs_done)(); 2697 2698 /* 2699 * Free the vnode operations vector. 2700 */ 2701 vfs_opv_free(vfs->vfs_opv_descs); 2702 return (0); 2703 } 2704 2705 #ifdef DDB 2706 const char buf_flagbits[] = 2707 "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI" 2708 "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE" 2709 "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED" 2710 "\32XXX\33VFLUSH"; 2711 2712 void 2713 vfs_buf_print(bp, full, pr) 2714 struct buf *bp; 2715 int full; 2716 void (*pr) __P((const char *, ...)); 2717 { 2718 char buf[1024]; 2719 2720 (*pr)(" vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n", 2721 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev); 2722 2723 bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf)); 2724 (*pr)(" error %d flags 0x%s\n", bp->b_error, buf); 2725 2726 (*pr)(" bufsize 0x%x bcount 0x%x resid 0x%x\n", 2727 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2728 (*pr)(" data %p saveaddr %p dep %p\n", 2729 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2730 (*pr)(" iodone %p\n", bp->b_iodone); 2731 } 2732 2733 2734 const char vnode_flagbits[] = 2735 "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\11XLOCK\12XWANT\13BWAIT\14ALIASED" 2736 "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY"; 2737 2738 const char *vnode_types[] = { 2739 "VNON", 2740 "VREG", 2741 "VDIR", 2742 "VBLK", 2743 "VCHR", 2744 "VLNK", 2745 "VSOCK", 2746 "VFIFO", 2747 "VBAD", 2748 }; 2749 2750 const char *vnode_tags[] = { 2751 "VT_NON", 2752 "VT_UFS", 2753 "VT_NFS", 2754 "VT_MFS", 2755 "VT_MSDOSFS", 2756 "VT_LFS", 2757 "VT_LOFS", 2758 "VT_FDESC", 2759 "VT_PORTAL", 2760 "VT_NULL", 2761 "VT_UMAP", 2762 "VT_KERNFS", 2763 "VT_PROCFS", 2764 "VT_AFS", 2765 "VT_ISOFS", 2766 "VT_UNION", 2767 "VT_ADOSFS", 2768 "VT_EXT2FS", 2769 "VT_CODA", 2770 "VT_FILECORE", 2771 "VT_NTFS", 2772 "VT_VFS", 2773 "VT_OVERLAY" 2774 }; 2775 2776 void 2777 vfs_vnode_print(vp, full, pr) 2778 struct vnode *vp; 2779 int full; 2780 void (*pr) __P((const char *, ...)); 2781 { 2782 char buf[256]; 2783 2784 const char *vtype, *vtag; 2785 2786 uvm_object_printit(&vp->v_uvm.u_obj, full, pr); 2787 bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf)); 2788 (*pr)("\nVNODE flags %s\n", buf); 2789 (*pr)("mp %p nio %d size 0x%x rwlock 0x%x glock 0x%x\n", 2790 vp->v_mount, vp->v_uvm.u_nio, (int)vp->v_uvm.u_size, 2791 vp->v_vnlock ? lockstatus(vp->v_vnlock) : 0x999, 2792 lockstatus(&vp->v_glock)); 2793 2794 (*pr)("data %p usecount %d writecount %d holdcnt %d numoutput %d\n", 2795 vp->v_data, vp->v_usecount, vp->v_writecount, 2796 vp->v_holdcnt, vp->v_numoutput); 2797 2798 vtype = (vp->v_type >= 0 && 2799 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ? 2800 vnode_types[vp->v_type] : "UNKNOWN"; 2801 vtag = (vp->v_tag >= 0 && 2802 vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ? 2803 vnode_tags[vp->v_tag] : "UNKNOWN"; 2804 2805 (*pr)("type %s(%d) tag %s(%d) id 0x%x mount %p typedata %p\n", 2806 vtype, vp->v_type, vtag, vp->v_tag, 2807 vp->v_id, vp->v_mount, vp->v_mountedhere); 2808 (*pr)("lastr 0x%x lastw 0x%x lasta 0x%x\n", 2809 vp->v_lastr, vp->v_lastw, vp->v_lasta); 2810 (*pr)("cstart 0x%x clen 0x%x ralen 0x%x maxra 0x%x\n", 2811 vp->v_cstart, vp->v_clen, vp->v_ralen, vp->v_maxra); 2812 2813 if (full) { 2814 struct buf *bp; 2815 2816 (*pr)("clean bufs:\n"); 2817 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 2818 (*pr)(" bp %p\n", bp); 2819 vfs_buf_print(bp, full, pr); 2820 } 2821 2822 (*pr)("dirty bufs:\n"); 2823 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 2824 (*pr)(" bp %p\n", bp); 2825 vfs_buf_print(bp, full, pr); 2826 } 2827 } 2828 } 2829 #endif 2830