1 /* $NetBSD: vfs_subr.c,v 1.178 2002/09/06 13:18:43 gehenna Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include <sys/cdefs.h> 85 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.178 2002/09/06 13:18:43 gehenna Exp $"); 86 87 #include "opt_ddb.h" 88 #include "opt_compat_netbsd.h" 89 #include "opt_compat_43.h" 90 91 #include <sys/param.h> 92 #include <sys/systm.h> 93 #include <sys/proc.h> 94 #include <sys/kernel.h> 95 #include <sys/mount.h> 96 #include <sys/time.h> 97 #include <sys/fcntl.h> 98 #include <sys/vnode.h> 99 #include <sys/stat.h> 100 #include <sys/namei.h> 101 #include <sys/ucred.h> 102 #include <sys/buf.h> 103 #include <sys/errno.h> 104 #include <sys/malloc.h> 105 #include <sys/domain.h> 106 #include <sys/mbuf.h> 107 #include <sys/syscallargs.h> 108 #include <sys/device.h> 109 #include <sys/dirent.h> 110 111 #include <miscfs/specfs/specdev.h> 112 #include <miscfs/genfs/genfs.h> 113 #include <miscfs/syncfs/syncfs.h> 114 115 #include <uvm/uvm.h> 116 #include <uvm/uvm_ddb.h> 117 118 #include <sys/sysctl.h> 119 120 enum vtype iftovt_tab[16] = { 121 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 122 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 123 }; 124 const int vttoif_tab[9] = { 125 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 126 S_IFSOCK, S_IFIFO, S_IFMT, 127 }; 128 129 int doforce = 1; /* 1 => permit forcible unmounting */ 130 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 131 132 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 133 134 /* 135 * Insq/Remq for the vnode usage lists. 136 */ 137 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 138 #define bufremvn(bp) { \ 139 LIST_REMOVE(bp, b_vnbufs); \ 140 (bp)->b_vnbufs.le_next = NOLIST; \ 141 } 142 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 143 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 144 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 145 146 struct mntlist mountlist = /* mounted filesystem list */ 147 CIRCLEQ_HEAD_INITIALIZER(mountlist); 148 struct vfs_list_head vfs_list = /* vfs list */ 149 LIST_HEAD_INITIALIZER(vfs_list); 150 151 struct nfs_public nfs_pub; /* publicly exported FS */ 152 153 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 154 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 155 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 156 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 157 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 158 159 /* 160 * These define the root filesystem and device. 161 */ 162 struct mount *rootfs; 163 struct vnode *rootvnode; 164 struct device *root_device; /* root device */ 165 166 struct pool vnode_pool; /* memory pool for vnodes */ 167 168 /* 169 * Local declarations. 170 */ 171 void insmntque __P((struct vnode *, struct mount *)); 172 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 173 void vgoneall __P((struct vnode *)); 174 175 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 176 struct export_args *)); 177 static int vfs_free_netcred __P((struct radix_node *, void *)); 178 static void vfs_free_addrlist __P((struct netexport *)); 179 180 #ifdef DEBUG 181 void printlockedvnodes __P((void)); 182 #endif 183 184 /* 185 * Initialize the vnode management data structures. 186 */ 187 void 188 vntblinit() 189 { 190 191 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 192 &pool_allocator_nointr); 193 194 /* 195 * Initialize the filesystem syncer. 196 */ 197 vn_initialize_syncerd(); 198 } 199 200 /* 201 * Mark a mount point as busy. Used to synchronize access and to delay 202 * unmounting. Interlock is not released on failure. 203 */ 204 int 205 vfs_busy(mp, flags, interlkp) 206 struct mount *mp; 207 int flags; 208 struct simplelock *interlkp; 209 { 210 int lkflags; 211 212 while (mp->mnt_flag & MNT_UNMOUNT) { 213 int gone; 214 215 if (flags & LK_NOWAIT) 216 return (ENOENT); 217 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 218 && mp->mnt_unmounter == curproc) 219 return (EDEADLK); 220 if (interlkp) 221 simple_unlock(interlkp); 222 /* 223 * Since all busy locks are shared except the exclusive 224 * lock granted when unmounting, the only place that a 225 * wakeup needs to be done is at the release of the 226 * exclusive lock at the end of dounmount. 227 * 228 * XXX MP: add spinlock protecting mnt_wcnt here once you 229 * can atomically unlock-and-sleep. 230 */ 231 mp->mnt_wcnt++; 232 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 233 mp->mnt_wcnt--; 234 gone = mp->mnt_flag & MNT_GONE; 235 236 if (mp->mnt_wcnt == 0) 237 wakeup(&mp->mnt_wcnt); 238 if (interlkp) 239 simple_lock(interlkp); 240 if (gone) 241 return (ENOENT); 242 } 243 lkflags = LK_SHARED; 244 if (interlkp) 245 lkflags |= LK_INTERLOCK; 246 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 247 panic("vfs_busy: unexpected lock failure"); 248 return (0); 249 } 250 251 /* 252 * Free a busy filesystem. 253 */ 254 void 255 vfs_unbusy(mp) 256 struct mount *mp; 257 { 258 259 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 260 } 261 262 /* 263 * Lookup a filesystem type, and if found allocate and initialize 264 * a mount structure for it. 265 * 266 * Devname is usually updated by mount(8) after booting. 267 */ 268 int 269 vfs_rootmountalloc(fstypename, devname, mpp) 270 char *fstypename; 271 char *devname; 272 struct mount **mpp; 273 { 274 struct vfsops *vfsp = NULL; 275 struct mount *mp; 276 277 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 278 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 279 break; 280 281 if (vfsp == NULL) 282 return (ENODEV); 283 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 284 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 285 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 286 (void)vfs_busy(mp, LK_NOWAIT, 0); 287 LIST_INIT(&mp->mnt_vnodelist); 288 mp->mnt_op = vfsp; 289 mp->mnt_flag = MNT_RDONLY; 290 mp->mnt_vnodecovered = NULLVP; 291 vfsp->vfs_refcount++; 292 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 293 mp->mnt_stat.f_mntonname[0] = '/'; 294 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 295 *mpp = mp; 296 return (0); 297 } 298 299 /* 300 * Lookup a mount point by filesystem identifier. 301 */ 302 struct mount * 303 vfs_getvfs(fsid) 304 fsid_t *fsid; 305 { 306 struct mount *mp; 307 308 simple_lock(&mountlist_slock); 309 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { 310 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 311 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 312 simple_unlock(&mountlist_slock); 313 return (mp); 314 } 315 } 316 simple_unlock(&mountlist_slock); 317 return ((struct mount *)0); 318 } 319 320 /* 321 * Get a new unique fsid 322 */ 323 void 324 vfs_getnewfsid(mp) 325 struct mount *mp; 326 { 327 static u_short xxxfs_mntid; 328 fsid_t tfsid; 329 int mtype; 330 331 simple_lock(&mntid_slock); 332 mtype = makefstype(mp->mnt_op->vfs_name); 333 mp->mnt_stat.f_fsid.val[0] = makedev(mtype, 0); 334 mp->mnt_stat.f_fsid.val[1] = mtype; 335 if (xxxfs_mntid == 0) 336 ++xxxfs_mntid; 337 tfsid.val[0] = makedev(mtype & 0xff, xxxfs_mntid); 338 tfsid.val[1] = mtype; 339 if (!CIRCLEQ_EMPTY(&mountlist)) { 340 while (vfs_getvfs(&tfsid)) { 341 tfsid.val[0]++; 342 xxxfs_mntid++; 343 } 344 } 345 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 346 simple_unlock(&mntid_slock); 347 } 348 349 /* 350 * Make a 'unique' number from a mount type name. 351 */ 352 long 353 makefstype(type) 354 const char *type; 355 { 356 long rv; 357 358 for (rv = 0; *type; type++) { 359 rv <<= 2; 360 rv ^= *type; 361 } 362 return rv; 363 } 364 365 366 /* 367 * Set vnode attributes to VNOVAL 368 */ 369 void 370 vattr_null(vap) 371 struct vattr *vap; 372 { 373 374 vap->va_type = VNON; 375 376 /* 377 * Assign individually so that it is safe even if size and 378 * sign of each member are varied. 379 */ 380 vap->va_mode = VNOVAL; 381 vap->va_nlink = VNOVAL; 382 vap->va_uid = VNOVAL; 383 vap->va_gid = VNOVAL; 384 vap->va_fsid = VNOVAL; 385 vap->va_fileid = VNOVAL; 386 vap->va_size = VNOVAL; 387 vap->va_blocksize = VNOVAL; 388 vap->va_atime.tv_sec = 389 vap->va_mtime.tv_sec = 390 vap->va_ctime.tv_sec = VNOVAL; 391 vap->va_atime.tv_nsec = 392 vap->va_mtime.tv_nsec = 393 vap->va_ctime.tv_nsec = VNOVAL; 394 vap->va_gen = VNOVAL; 395 vap->va_flags = VNOVAL; 396 vap->va_rdev = VNOVAL; 397 vap->va_bytes = VNOVAL; 398 vap->va_vaflags = 0; 399 } 400 401 /* 402 * Routines having to do with the management of the vnode table. 403 */ 404 extern int (**dead_vnodeop_p) __P((void *)); 405 long numvnodes; 406 407 /* 408 * Return the next vnode from the free list. 409 */ 410 int 411 getnewvnode(tag, mp, vops, vpp) 412 enum vtagtype tag; 413 struct mount *mp; 414 int (**vops) __P((void *)); 415 struct vnode **vpp; 416 { 417 extern struct uvm_pagerops uvm_vnodeops; 418 struct uvm_object *uobj; 419 struct proc *p = curproc; /* XXX */ 420 struct freelst *listhd; 421 static int toggle; 422 struct vnode *vp; 423 int error = 0, tryalloc; 424 425 try_again: 426 if (mp) { 427 /* 428 * Mark filesystem busy while we're creating a vnode. 429 * If unmount is in progress, this will wait; if the 430 * unmount succeeds (only if umount -f), this will 431 * return an error. If the unmount fails, we'll keep 432 * going afterwards. 433 * (This puts the per-mount vnode list logically under 434 * the protection of the vfs_busy lock). 435 */ 436 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 437 if (error && error != EDEADLK) 438 return error; 439 } 440 441 /* 442 * We must choose whether to allocate a new vnode or recycle an 443 * existing one. The criterion for allocating a new one is that 444 * the total number of vnodes is less than the number desired or 445 * there are no vnodes on either free list. Generally we only 446 * want to recycle vnodes that have no buffers associated with 447 * them, so we look first on the vnode_free_list. If it is empty, 448 * we next consider vnodes with referencing buffers on the 449 * vnode_hold_list. The toggle ensures that half the time we 450 * will use a buffer from the vnode_hold_list, and half the time 451 * we will allocate a new one unless the list has grown to twice 452 * the desired size. We are reticent to recycle vnodes from the 453 * vnode_hold_list because we will lose the identity of all its 454 * referencing buffers. 455 */ 456 457 vp = NULL; 458 459 simple_lock(&vnode_free_list_slock); 460 461 toggle ^= 1; 462 if (numvnodes > 2 * desiredvnodes) 463 toggle = 0; 464 465 tryalloc = numvnodes < desiredvnodes || 466 (TAILQ_FIRST(&vnode_free_list) == NULL && 467 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 468 469 if (tryalloc && 470 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { 471 simple_unlock(&vnode_free_list_slock); 472 memset(vp, 0, sizeof(*vp)); 473 simple_lock_init(&vp->v_interlock); 474 uobj = &vp->v_uobj; 475 uobj->pgops = &uvm_vnodeops; 476 uobj->uo_npages = 0; 477 TAILQ_INIT(&uobj->memq); 478 numvnodes++; 479 } else { 480 if ((vp = TAILQ_FIRST(listhd = &vnode_free_list)) == NULL) 481 vp = TAILQ_FIRST(listhd = &vnode_hold_list); 482 for (; vp != NULL; vp = TAILQ_NEXT(vp, v_freelist)) { 483 if (simple_lock_try(&vp->v_interlock)) { 484 if ((vp->v_flag & VLAYER) == 0) { 485 break; 486 } 487 if (VOP_ISLOCKED(vp) == 0) 488 break; 489 else 490 simple_unlock(&vp->v_interlock); 491 } 492 } 493 /* 494 * Unless this is a bad time of the month, at most 495 * the first NCPUS items on the free list are 496 * locked, so this is close enough to being empty. 497 */ 498 if (vp == NULLVP) { 499 simple_unlock(&vnode_free_list_slock); 500 if (mp && error != EDEADLK) 501 vfs_unbusy(mp); 502 if (tryalloc) { 503 printf("WARNING: unable to allocate new " 504 "vnode, retrying...\n"); 505 (void) tsleep(&lbolt, PRIBIO, "newvn", hz); 506 goto try_again; 507 } 508 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 509 *vpp = 0; 510 return (ENFILE); 511 } 512 if (vp->v_usecount) 513 panic("free vnode isn't, vp %p", vp); 514 TAILQ_REMOVE(listhd, vp, v_freelist); 515 /* see comment on why 0xdeadb is set at end of vgone (below) */ 516 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 517 simple_unlock(&vnode_free_list_slock); 518 vp->v_lease = NULL; 519 520 if (vp->v_type != VBAD) 521 vgonel(vp, p); 522 else 523 simple_unlock(&vp->v_interlock); 524 #ifdef DIAGNOSTIC 525 if (vp->v_data || vp->v_uobj.uo_npages || 526 TAILQ_FIRST(&vp->v_uobj.memq)) 527 panic("cleaned vnode isn't, vp %p", vp); 528 if (vp->v_numoutput) 529 panic("clean vnode has pending I/O's, vp %p", vp); 530 #endif 531 KASSERT((vp->v_flag & VONWORKLST) == 0); 532 vp->v_flag = 0; 533 vp->v_socket = NULL; 534 } 535 vp->v_type = VNON; 536 vp->v_vnlock = &vp->v_lock; 537 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 538 cache_purge(vp); 539 vp->v_tag = tag; 540 vp->v_op = vops; 541 insmntque(vp, mp); 542 *vpp = vp; 543 vp->v_usecount = 1; 544 vp->v_data = 0; 545 simple_lock_init(&vp->v_uobj.vmobjlock); 546 547 /* 548 * initialize uvm_object within vnode. 549 */ 550 551 uobj = &vp->v_uobj; 552 KASSERT(uobj->pgops == &uvm_vnodeops); 553 KASSERT(uobj->uo_npages == 0); 554 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 555 vp->v_size = VSIZENOTSET; 556 557 if (mp && error != EDEADLK) 558 vfs_unbusy(mp); 559 return (0); 560 } 561 562 /* 563 * This is really just the reverse of getnewvnode(). Needed for 564 * VFS_VGET functions who may need to push back a vnode in case 565 * of a locking race. 566 */ 567 void 568 ungetnewvnode(vp) 569 struct vnode *vp; 570 { 571 #ifdef DIAGNOSTIC 572 if (vp->v_usecount != 1) 573 panic("ungetnewvnode: busy vnode"); 574 #endif 575 vp->v_usecount--; 576 insmntque(vp, NULL); 577 vp->v_type = VBAD; 578 579 simple_lock(&vp->v_interlock); 580 /* 581 * Insert at head of LRU list 582 */ 583 simple_lock(&vnode_free_list_slock); 584 if (vp->v_holdcnt > 0) 585 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 586 else 587 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 588 simple_unlock(&vnode_free_list_slock); 589 simple_unlock(&vp->v_interlock); 590 } 591 592 /* 593 * Move a vnode from one mount queue to another. 594 */ 595 void 596 insmntque(vp, mp) 597 struct vnode *vp; 598 struct mount *mp; 599 { 600 601 #ifdef DIAGNOSTIC 602 if ((mp != NULL) && 603 (mp->mnt_flag & MNT_UNMOUNT) && 604 !(mp->mnt_flag & MNT_SOFTDEP) && 605 vp->v_tag != VT_VFS) { 606 panic("insmntque into dying filesystem"); 607 } 608 #endif 609 610 simple_lock(&mntvnode_slock); 611 /* 612 * Delete from old mount point vnode list, if on one. 613 */ 614 if (vp->v_mount != NULL) 615 LIST_REMOVE(vp, v_mntvnodes); 616 /* 617 * Insert into list of vnodes for the new mount point, if available. 618 */ 619 if ((vp->v_mount = mp) != NULL) 620 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 621 simple_unlock(&mntvnode_slock); 622 } 623 624 /* 625 * Update outstanding I/O count and do wakeup if requested. 626 */ 627 void 628 vwakeup(bp) 629 struct buf *bp; 630 { 631 struct vnode *vp; 632 633 if ((vp = bp->b_vp) != NULL) { 634 if (--vp->v_numoutput < 0) 635 panic("vwakeup: neg numoutput, vp %p", vp); 636 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 637 vp->v_flag &= ~VBWAIT; 638 wakeup((caddr_t)&vp->v_numoutput); 639 } 640 } 641 } 642 643 /* 644 * Flush out and invalidate all buffers associated with a vnode. 645 * Called with the underlying vnode locked, which should prevent new dirty 646 * buffers from being queued. 647 */ 648 int 649 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 650 struct vnode *vp; 651 int flags; 652 struct ucred *cred; 653 struct proc *p; 654 int slpflag, slptimeo; 655 { 656 struct buf *bp, *nbp; 657 int s, error; 658 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 659 (flags & V_SAVE ? PGO_CLEANIT : 0); 660 661 /* XXXUBC this doesn't look at flags or slp* */ 662 simple_lock(&vp->v_interlock); 663 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 664 if (error) { 665 return error; 666 } 667 668 if (flags & V_SAVE) { 669 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p); 670 if (error) 671 return (error); 672 #ifdef DIAGNOSTIC 673 s = splbio(); 674 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 675 panic("vinvalbuf: dirty bufs, vp %p", vp); 676 splx(s); 677 #endif 678 } 679 680 s = splbio(); 681 682 restart: 683 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 684 nbp = LIST_NEXT(bp, b_vnbufs); 685 if (bp->b_flags & B_BUSY) { 686 bp->b_flags |= B_WANTED; 687 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 688 "vinvalbuf", slptimeo); 689 if (error) { 690 splx(s); 691 return (error); 692 } 693 goto restart; 694 } 695 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 696 brelse(bp); 697 } 698 699 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 700 nbp = LIST_NEXT(bp, b_vnbufs); 701 if (bp->b_flags & B_BUSY) { 702 bp->b_flags |= B_WANTED; 703 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 704 "vinvalbuf", slptimeo); 705 if (error) { 706 splx(s); 707 return (error); 708 } 709 goto restart; 710 } 711 /* 712 * XXX Since there are no node locks for NFS, I believe 713 * there is a slight chance that a delayed write will 714 * occur while sleeping just above, so check for it. 715 */ 716 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 717 #ifdef DEBUG 718 printf("buffer still DELWRI\n"); 719 #endif 720 bp->b_flags |= B_BUSY | B_VFLUSH; 721 VOP_BWRITE(bp); 722 goto restart; 723 } 724 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 725 brelse(bp); 726 } 727 728 #ifdef DIAGNOSTIC 729 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 730 panic("vinvalbuf: flush failed, vp %p", vp); 731 #endif 732 733 splx(s); 734 735 return (0); 736 } 737 738 /* 739 * Destroy any in core blocks past the truncation length. 740 * Called with the underlying vnode locked, which should prevent new dirty 741 * buffers from being queued. 742 */ 743 int 744 vtruncbuf(vp, lbn, slpflag, slptimeo) 745 struct vnode *vp; 746 daddr_t lbn; 747 int slpflag, slptimeo; 748 { 749 struct buf *bp, *nbp; 750 int s, error; 751 voff_t off; 752 753 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 754 simple_lock(&vp->v_interlock); 755 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 756 if (error) { 757 return error; 758 } 759 760 s = splbio(); 761 762 restart: 763 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 764 nbp = LIST_NEXT(bp, b_vnbufs); 765 if (bp->b_lblkno < lbn) 766 continue; 767 if (bp->b_flags & B_BUSY) { 768 bp->b_flags |= B_WANTED; 769 error = tsleep(bp, slpflag | (PRIBIO + 1), 770 "vtruncbuf", slptimeo); 771 if (error) { 772 splx(s); 773 return (error); 774 } 775 goto restart; 776 } 777 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 778 brelse(bp); 779 } 780 781 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 782 nbp = LIST_NEXT(bp, b_vnbufs); 783 if (bp->b_lblkno < lbn) 784 continue; 785 if (bp->b_flags & B_BUSY) { 786 bp->b_flags |= B_WANTED; 787 error = tsleep(bp, slpflag | (PRIBIO + 1), 788 "vtruncbuf", slptimeo); 789 if (error) { 790 splx(s); 791 return (error); 792 } 793 goto restart; 794 } 795 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 796 brelse(bp); 797 } 798 799 splx(s); 800 801 return (0); 802 } 803 804 void 805 vflushbuf(vp, sync) 806 struct vnode *vp; 807 int sync; 808 { 809 struct buf *bp, *nbp; 810 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); 811 int s; 812 813 simple_lock(&vp->v_interlock); 814 (void) VOP_PUTPAGES(vp, 0, 0, flags); 815 816 loop: 817 s = splbio(); 818 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 819 nbp = LIST_NEXT(bp, b_vnbufs); 820 if ((bp->b_flags & B_BUSY)) 821 continue; 822 if ((bp->b_flags & B_DELWRI) == 0) 823 panic("vflushbuf: not dirty, bp %p", bp); 824 bp->b_flags |= B_BUSY | B_VFLUSH; 825 splx(s); 826 /* 827 * Wait for I/O associated with indirect blocks to complete, 828 * since there is no way to quickly wait for them below. 829 */ 830 if (bp->b_vp == vp || sync == 0) 831 (void) bawrite(bp); 832 else 833 (void) bwrite(bp); 834 goto loop; 835 } 836 if (sync == 0) { 837 splx(s); 838 return; 839 } 840 while (vp->v_numoutput) { 841 vp->v_flag |= VBWAIT; 842 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 843 } 844 splx(s); 845 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 846 vprint("vflushbuf: dirty", vp); 847 goto loop; 848 } 849 } 850 851 /* 852 * Associate a buffer with a vnode. 853 */ 854 void 855 bgetvp(vp, bp) 856 struct vnode *vp; 857 struct buf *bp; 858 { 859 int s; 860 861 if (bp->b_vp) 862 panic("bgetvp: not free, bp %p", bp); 863 VHOLD(vp); 864 s = splbio(); 865 bp->b_vp = vp; 866 if (vp->v_type == VBLK || vp->v_type == VCHR) 867 bp->b_dev = vp->v_rdev; 868 else 869 bp->b_dev = NODEV; 870 /* 871 * Insert onto list for new vnode. 872 */ 873 bufinsvn(bp, &vp->v_cleanblkhd); 874 splx(s); 875 } 876 877 /* 878 * Disassociate a buffer from a vnode. 879 */ 880 void 881 brelvp(bp) 882 struct buf *bp; 883 { 884 struct vnode *vp; 885 int s; 886 887 if (bp->b_vp == NULL) 888 panic("brelvp: vp NULL, bp %p", bp); 889 890 s = splbio(); 891 vp = bp->b_vp; 892 /* 893 * Delete from old vnode list, if on one. 894 */ 895 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 896 bufremvn(bp); 897 898 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) && 899 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 900 vp->v_flag &= ~VONWORKLST; 901 LIST_REMOVE(vp, v_synclist); 902 } 903 904 bp->b_vp = NULL; 905 HOLDRELE(vp); 906 splx(s); 907 } 908 909 /* 910 * Reassign a buffer from one vnode to another. 911 * Used to assign file specific control information 912 * (indirect blocks) to the vnode to which they belong. 913 * 914 * This function must be called at splbio(). 915 */ 916 void 917 reassignbuf(bp, newvp) 918 struct buf *bp; 919 struct vnode *newvp; 920 { 921 struct buflists *listheadp; 922 int delay; 923 924 /* 925 * Delete from old vnode list, if on one. 926 */ 927 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 928 bufremvn(bp); 929 /* 930 * If dirty, put on list of dirty buffers; 931 * otherwise insert onto list of clean buffers. 932 */ 933 if ((bp->b_flags & B_DELWRI) == 0) { 934 listheadp = &newvp->v_cleanblkhd; 935 if (TAILQ_EMPTY(&newvp->v_uobj.memq) && 936 (newvp->v_flag & VONWORKLST) && 937 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 938 newvp->v_flag &= ~VONWORKLST; 939 LIST_REMOVE(newvp, v_synclist); 940 } 941 } else { 942 listheadp = &newvp->v_dirtyblkhd; 943 if ((newvp->v_flag & VONWORKLST) == 0) { 944 switch (newvp->v_type) { 945 case VDIR: 946 delay = dirdelay; 947 break; 948 case VBLK: 949 if (newvp->v_specmountpoint != NULL) { 950 delay = metadelay; 951 break; 952 } 953 /* fall through */ 954 default: 955 delay = filedelay; 956 break; 957 } 958 if (!newvp->v_mount || 959 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 960 vn_syncer_add_to_worklist(newvp, delay); 961 } 962 } 963 bufinsvn(bp, listheadp); 964 } 965 966 /* 967 * Create a vnode for a block device. 968 * Used for root filesystem and swap areas. 969 * Also used for memory file system special devices. 970 */ 971 int 972 bdevvp(dev, vpp) 973 dev_t dev; 974 struct vnode **vpp; 975 { 976 977 return (getdevvp(dev, vpp, VBLK)); 978 } 979 980 /* 981 * Create a vnode for a character device. 982 * Used for kernfs and some console handling. 983 */ 984 int 985 cdevvp(dev, vpp) 986 dev_t dev; 987 struct vnode **vpp; 988 { 989 990 return (getdevvp(dev, vpp, VCHR)); 991 } 992 993 /* 994 * Create a vnode for a device. 995 * Used by bdevvp (block device) for root file system etc., 996 * and by cdevvp (character device) for console and kernfs. 997 */ 998 int 999 getdevvp(dev, vpp, type) 1000 dev_t dev; 1001 struct vnode **vpp; 1002 enum vtype type; 1003 { 1004 struct vnode *vp; 1005 struct vnode *nvp; 1006 int error; 1007 1008 if (dev == NODEV) { 1009 *vpp = NULLVP; 1010 return (0); 1011 } 1012 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1013 if (error) { 1014 *vpp = NULLVP; 1015 return (error); 1016 } 1017 vp = nvp; 1018 vp->v_type = type; 1019 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1020 vput(vp); 1021 vp = nvp; 1022 } 1023 *vpp = vp; 1024 return (0); 1025 } 1026 1027 /* 1028 * Check to see if the new vnode represents a special device 1029 * for which we already have a vnode (either because of 1030 * bdevvp() or because of a different vnode representing 1031 * the same block device). If such an alias exists, deallocate 1032 * the existing contents and return the aliased vnode. The 1033 * caller is responsible for filling it with its new contents. 1034 */ 1035 struct vnode * 1036 checkalias(nvp, nvp_rdev, mp) 1037 struct vnode *nvp; 1038 dev_t nvp_rdev; 1039 struct mount *mp; 1040 { 1041 struct proc *p = curproc; /* XXX */ 1042 struct vnode *vp; 1043 struct vnode **vpp; 1044 1045 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1046 return (NULLVP); 1047 1048 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1049 loop: 1050 simple_lock(&spechash_slock); 1051 for (vp = *vpp; vp; vp = vp->v_specnext) { 1052 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1053 continue; 1054 /* 1055 * Alias, but not in use, so flush it out. 1056 */ 1057 simple_lock(&vp->v_interlock); 1058 if (vp->v_usecount == 0) { 1059 simple_unlock(&spechash_slock); 1060 vgonel(vp, p); 1061 goto loop; 1062 } 1063 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 1064 simple_unlock(&spechash_slock); 1065 goto loop; 1066 } 1067 break; 1068 } 1069 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1070 MALLOC(nvp->v_specinfo, struct specinfo *, 1071 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1072 /* XXX Erg. */ 1073 if (nvp->v_specinfo == NULL) { 1074 simple_unlock(&spechash_slock); 1075 uvm_wait("checkalias"); 1076 goto loop; 1077 } 1078 1079 nvp->v_rdev = nvp_rdev; 1080 nvp->v_hashchain = vpp; 1081 nvp->v_specnext = *vpp; 1082 nvp->v_specmountpoint = NULL; 1083 simple_unlock(&spechash_slock); 1084 nvp->v_speclockf = NULL; 1085 *vpp = nvp; 1086 if (vp != NULLVP) { 1087 nvp->v_flag |= VALIASED; 1088 vp->v_flag |= VALIASED; 1089 vput(vp); 1090 } 1091 return (NULLVP); 1092 } 1093 simple_unlock(&spechash_slock); 1094 VOP_UNLOCK(vp, 0); 1095 simple_lock(&vp->v_interlock); 1096 vclean(vp, 0, p); 1097 vp->v_op = nvp->v_op; 1098 vp->v_tag = nvp->v_tag; 1099 vp->v_vnlock = &vp->v_lock; 1100 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1101 nvp->v_type = VNON; 1102 insmntque(vp, mp); 1103 return (vp); 1104 } 1105 1106 /* 1107 * Grab a particular vnode from the free list, increment its 1108 * reference count and lock it. If the vnode lock bit is set the 1109 * vnode is being eliminated in vgone. In that case, we can not 1110 * grab the vnode, so the process is awakened when the transition is 1111 * completed, and an error returned to indicate that the vnode is no 1112 * longer usable (possibly having been changed to a new file system type). 1113 */ 1114 int 1115 vget(vp, flags) 1116 struct vnode *vp; 1117 int flags; 1118 { 1119 int error; 1120 1121 /* 1122 * If the vnode is in the process of being cleaned out for 1123 * another use, we wait for the cleaning to finish and then 1124 * return failure. Cleaning is determined by checking that 1125 * the VXLOCK flag is set. 1126 */ 1127 1128 if ((flags & LK_INTERLOCK) == 0) 1129 simple_lock(&vp->v_interlock); 1130 if (vp->v_flag & VXLOCK) { 1131 if (flags & LK_NOWAIT) { 1132 simple_unlock(&vp->v_interlock); 1133 return EBUSY; 1134 } 1135 vp->v_flag |= VXWANT; 1136 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock); 1137 return (ENOENT); 1138 } 1139 if (vp->v_usecount == 0) { 1140 simple_lock(&vnode_free_list_slock); 1141 if (vp->v_holdcnt > 0) 1142 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1143 else 1144 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1145 simple_unlock(&vnode_free_list_slock); 1146 } 1147 vp->v_usecount++; 1148 #ifdef DIAGNOSTIC 1149 if (vp->v_usecount == 0) { 1150 vprint("vget", vp); 1151 panic("vget: usecount overflow, vp %p", vp); 1152 } 1153 #endif 1154 if (flags & LK_TYPE_MASK) { 1155 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1156 /* 1157 * must expand vrele here because we do not want 1158 * to call VOP_INACTIVE if the reference count 1159 * drops back to zero since it was never really 1160 * active. We must remove it from the free list 1161 * before sleeping so that multiple processes do 1162 * not try to recycle it. 1163 */ 1164 simple_lock(&vp->v_interlock); 1165 vp->v_usecount--; 1166 if (vp->v_usecount > 0) { 1167 simple_unlock(&vp->v_interlock); 1168 return (error); 1169 } 1170 /* 1171 * insert at tail of LRU list 1172 */ 1173 simple_lock(&vnode_free_list_slock); 1174 if (vp->v_holdcnt > 0) 1175 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, 1176 v_freelist); 1177 else 1178 TAILQ_INSERT_TAIL(&vnode_free_list, vp, 1179 v_freelist); 1180 simple_unlock(&vnode_free_list_slock); 1181 simple_unlock(&vp->v_interlock); 1182 } 1183 return (error); 1184 } 1185 simple_unlock(&vp->v_interlock); 1186 return (0); 1187 } 1188 1189 /* 1190 * vput(), just unlock and vrele() 1191 */ 1192 void 1193 vput(vp) 1194 struct vnode *vp; 1195 { 1196 struct proc *p = curproc; /* XXX */ 1197 1198 #ifdef DIAGNOSTIC 1199 if (vp == NULL) 1200 panic("vput: null vp"); 1201 #endif 1202 simple_lock(&vp->v_interlock); 1203 vp->v_usecount--; 1204 if (vp->v_usecount > 0) { 1205 simple_unlock(&vp->v_interlock); 1206 VOP_UNLOCK(vp, 0); 1207 return; 1208 } 1209 #ifdef DIAGNOSTIC 1210 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1211 vprint("vput: bad ref count", vp); 1212 panic("vput: ref cnt"); 1213 } 1214 #endif 1215 /* 1216 * Insert at tail of LRU list. 1217 */ 1218 simple_lock(&vnode_free_list_slock); 1219 if (vp->v_holdcnt > 0) 1220 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1221 else 1222 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1223 simple_unlock(&vnode_free_list_slock); 1224 if (vp->v_flag & VEXECMAP) { 1225 uvmexp.execpages -= vp->v_uobj.uo_npages; 1226 uvmexp.filepages += vp->v_uobj.uo_npages; 1227 } 1228 vp->v_flag &= ~(VTEXT|VEXECMAP); 1229 simple_unlock(&vp->v_interlock); 1230 VOP_INACTIVE(vp, p); 1231 } 1232 1233 /* 1234 * Vnode release. 1235 * If count drops to zero, call inactive routine and return to freelist. 1236 */ 1237 void 1238 vrele(vp) 1239 struct vnode *vp; 1240 { 1241 struct proc *p = curproc; /* XXX */ 1242 1243 #ifdef DIAGNOSTIC 1244 if (vp == NULL) 1245 panic("vrele: null vp"); 1246 #endif 1247 simple_lock(&vp->v_interlock); 1248 vp->v_usecount--; 1249 if (vp->v_usecount > 0) { 1250 simple_unlock(&vp->v_interlock); 1251 return; 1252 } 1253 #ifdef DIAGNOSTIC 1254 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1255 vprint("vrele: bad ref count", vp); 1256 panic("vrele: ref cnt vp %p", vp); 1257 } 1258 #endif 1259 /* 1260 * Insert at tail of LRU list. 1261 */ 1262 simple_lock(&vnode_free_list_slock); 1263 if (vp->v_holdcnt > 0) 1264 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1265 else 1266 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1267 simple_unlock(&vnode_free_list_slock); 1268 if (vp->v_flag & VEXECMAP) { 1269 uvmexp.execpages -= vp->v_uobj.uo_npages; 1270 uvmexp.filepages += vp->v_uobj.uo_npages; 1271 } 1272 vp->v_flag &= ~(VTEXT|VEXECMAP); 1273 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1274 VOP_INACTIVE(vp, p); 1275 } 1276 1277 #ifdef DIAGNOSTIC 1278 /* 1279 * Page or buffer structure gets a reference. 1280 */ 1281 void 1282 vhold(vp) 1283 struct vnode *vp; 1284 { 1285 1286 /* 1287 * If it is on the freelist and the hold count is currently 1288 * zero, move it to the hold list. The test of the back 1289 * pointer and the use reference count of zero is because 1290 * it will be removed from a free list by getnewvnode, 1291 * but will not have its reference count incremented until 1292 * after calling vgone. If the reference count were 1293 * incremented first, vgone would (incorrectly) try to 1294 * close the previous instance of the underlying object. 1295 * So, the back pointer is explicitly set to `0xdeadb' in 1296 * getnewvnode after removing it from a freelist to ensure 1297 * that we do not try to move it here. 1298 */ 1299 simple_lock(&vp->v_interlock); 1300 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1301 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1302 simple_lock(&vnode_free_list_slock); 1303 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1304 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1305 simple_unlock(&vnode_free_list_slock); 1306 } 1307 vp->v_holdcnt++; 1308 simple_unlock(&vp->v_interlock); 1309 } 1310 1311 /* 1312 * Page or buffer structure frees a reference. 1313 */ 1314 void 1315 holdrele(vp) 1316 struct vnode *vp; 1317 { 1318 1319 simple_lock(&vp->v_interlock); 1320 if (vp->v_holdcnt <= 0) 1321 panic("holdrele: holdcnt vp %p", vp); 1322 vp->v_holdcnt--; 1323 1324 /* 1325 * If it is on the holdlist and the hold count drops to 1326 * zero, move it to the free list. The test of the back 1327 * pointer and the use reference count of zero is because 1328 * it will be removed from a free list by getnewvnode, 1329 * but will not have its reference count incremented until 1330 * after calling vgone. If the reference count were 1331 * incremented first, vgone would (incorrectly) try to 1332 * close the previous instance of the underlying object. 1333 * So, the back pointer is explicitly set to `0xdeadb' in 1334 * getnewvnode after removing it from a freelist to ensure 1335 * that we do not try to move it here. 1336 */ 1337 1338 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1339 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1340 simple_lock(&vnode_free_list_slock); 1341 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1342 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1343 simple_unlock(&vnode_free_list_slock); 1344 } 1345 simple_unlock(&vp->v_interlock); 1346 } 1347 1348 /* 1349 * Vnode reference. 1350 */ 1351 void 1352 vref(vp) 1353 struct vnode *vp; 1354 { 1355 1356 simple_lock(&vp->v_interlock); 1357 if (vp->v_usecount <= 0) 1358 panic("vref used where vget required, vp %p", vp); 1359 vp->v_usecount++; 1360 #ifdef DIAGNOSTIC 1361 if (vp->v_usecount == 0) { 1362 vprint("vref", vp); 1363 panic("vref: usecount overflow, vp %p", vp); 1364 } 1365 #endif 1366 simple_unlock(&vp->v_interlock); 1367 } 1368 #endif /* DIAGNOSTIC */ 1369 1370 /* 1371 * Remove any vnodes in the vnode table belonging to mount point mp. 1372 * 1373 * If MNT_NOFORCE is specified, there should not be any active ones, 1374 * return error if any are found (nb: this is a user error, not a 1375 * system error). If MNT_FORCE is specified, detach any active vnodes 1376 * that are found. 1377 */ 1378 #ifdef DEBUG 1379 int busyprt = 0; /* print out busy vnodes */ 1380 struct ctldebug debug1 = { "busyprt", &busyprt }; 1381 #endif 1382 1383 int 1384 vflush(mp, skipvp, flags) 1385 struct mount *mp; 1386 struct vnode *skipvp; 1387 int flags; 1388 { 1389 struct proc *p = curproc; /* XXX */ 1390 struct vnode *vp, *nvp; 1391 int busy = 0; 1392 1393 simple_lock(&mntvnode_slock); 1394 loop: 1395 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 1396 if (vp->v_mount != mp) 1397 goto loop; 1398 nvp = LIST_NEXT(vp, v_mntvnodes); 1399 /* 1400 * Skip over a selected vnode. 1401 */ 1402 if (vp == skipvp) 1403 continue; 1404 simple_lock(&vp->v_interlock); 1405 /* 1406 * Skip over a vnodes marked VSYSTEM. 1407 */ 1408 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1409 simple_unlock(&vp->v_interlock); 1410 continue; 1411 } 1412 /* 1413 * If WRITECLOSE is set, only flush out regular file 1414 * vnodes open for writing. 1415 */ 1416 if ((flags & WRITECLOSE) && 1417 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1418 simple_unlock(&vp->v_interlock); 1419 continue; 1420 } 1421 /* 1422 * With v_usecount == 0, all we need to do is clear 1423 * out the vnode data structures and we are done. 1424 */ 1425 if (vp->v_usecount == 0) { 1426 simple_unlock(&mntvnode_slock); 1427 vgonel(vp, p); 1428 simple_lock(&mntvnode_slock); 1429 continue; 1430 } 1431 /* 1432 * If FORCECLOSE is set, forcibly close the vnode. 1433 * For block or character devices, revert to an 1434 * anonymous device. For all other files, just kill them. 1435 */ 1436 if (flags & FORCECLOSE) { 1437 simple_unlock(&mntvnode_slock); 1438 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1439 vgonel(vp, p); 1440 } else { 1441 vclean(vp, 0, p); 1442 vp->v_op = spec_vnodeop_p; 1443 insmntque(vp, (struct mount *)0); 1444 } 1445 simple_lock(&mntvnode_slock); 1446 continue; 1447 } 1448 #ifdef DEBUG 1449 if (busyprt) 1450 vprint("vflush: busy vnode", vp); 1451 #endif 1452 simple_unlock(&vp->v_interlock); 1453 busy++; 1454 } 1455 simple_unlock(&mntvnode_slock); 1456 if (busy) 1457 return (EBUSY); 1458 return (0); 1459 } 1460 1461 /* 1462 * Disassociate the underlying file system from a vnode. 1463 */ 1464 void 1465 vclean(vp, flags, p) 1466 struct vnode *vp; 1467 int flags; 1468 struct proc *p; 1469 { 1470 int active; 1471 1472 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1473 1474 /* 1475 * Check to see if the vnode is in use. 1476 * If so we have to reference it before we clean it out 1477 * so that its count cannot fall to zero and generate a 1478 * race against ourselves to recycle it. 1479 */ 1480 1481 if ((active = vp->v_usecount) != 0) { 1482 vp->v_usecount++; 1483 #ifdef DIAGNOSTIC 1484 if (vp->v_usecount == 0) { 1485 vprint("vclean", vp); 1486 panic("vclean: usecount overflow"); 1487 } 1488 #endif 1489 } 1490 1491 /* 1492 * Prevent the vnode from being recycled or 1493 * brought into use while we clean it out. 1494 */ 1495 if (vp->v_flag & VXLOCK) 1496 panic("vclean: deadlock, vp %p", vp); 1497 vp->v_flag |= VXLOCK; 1498 if (vp->v_flag & VEXECMAP) { 1499 uvmexp.execpages -= vp->v_uobj.uo_npages; 1500 uvmexp.filepages += vp->v_uobj.uo_npages; 1501 } 1502 vp->v_flag &= ~(VTEXT|VEXECMAP); 1503 1504 /* 1505 * Even if the count is zero, the VOP_INACTIVE routine may still 1506 * have the object locked while it cleans it out. The VOP_LOCK 1507 * ensures that the VOP_INACTIVE routine is done with its work. 1508 * For active vnodes, it ensures that no other activity can 1509 * occur while the underlying object is being cleaned out. 1510 */ 1511 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1512 1513 /* 1514 * Clean out any cached data associated with the vnode. 1515 */ 1516 if (flags & DOCLOSE) { 1517 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1518 KASSERT((vp->v_flag & VONWORKLST) == 0); 1519 } 1520 LOCK_ASSERT(!simple_lock_held(&vp->v_interlock)); 1521 1522 /* 1523 * If purging an active vnode, it must be closed and 1524 * deactivated before being reclaimed. Note that the 1525 * VOP_INACTIVE will unlock the vnode. 1526 */ 1527 if (active) { 1528 if (flags & DOCLOSE) 1529 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1530 VOP_INACTIVE(vp, p); 1531 } else { 1532 /* 1533 * Any other processes trying to obtain this lock must first 1534 * wait for VXLOCK to clear, then call the new lock operation. 1535 */ 1536 VOP_UNLOCK(vp, 0); 1537 } 1538 /* 1539 * Reclaim the vnode. 1540 */ 1541 if (VOP_RECLAIM(vp, p)) 1542 panic("vclean: cannot reclaim, vp %p", vp); 1543 if (active) { 1544 /* 1545 * Inline copy of vrele() since VOP_INACTIVE 1546 * has already been called. 1547 */ 1548 simple_lock(&vp->v_interlock); 1549 if (--vp->v_usecount <= 0) { 1550 #ifdef DIAGNOSTIC 1551 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1552 vprint("vclean: bad ref count", vp); 1553 panic("vclean: ref cnt"); 1554 } 1555 #endif 1556 /* 1557 * Insert at tail of LRU list. 1558 */ 1559 1560 simple_unlock(&vp->v_interlock); 1561 simple_lock(&vnode_free_list_slock); 1562 #ifdef DIAGNOSTIC 1563 if (vp->v_holdcnt > 0) 1564 panic("vclean: not clean, vp %p", vp); 1565 #endif 1566 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1567 simple_unlock(&vnode_free_list_slock); 1568 } else 1569 simple_unlock(&vp->v_interlock); 1570 } 1571 1572 KASSERT(vp->v_uobj.uo_npages == 0); 1573 cache_purge(vp); 1574 1575 /* 1576 * Done with purge, notify sleepers of the grim news. 1577 */ 1578 vp->v_op = dead_vnodeop_p; 1579 vp->v_tag = VT_NON; 1580 simple_lock(&vp->v_interlock); 1581 vp->v_flag &= ~VXLOCK; 1582 if (vp->v_flag & VXWANT) { 1583 vp->v_flag &= ~VXWANT; 1584 simple_unlock(&vp->v_interlock); 1585 wakeup((caddr_t)vp); 1586 } else 1587 simple_unlock(&vp->v_interlock); 1588 } 1589 1590 /* 1591 * Recycle an unused vnode to the front of the free list. 1592 * Release the passed interlock if the vnode will be recycled. 1593 */ 1594 int 1595 vrecycle(vp, inter_lkp, p) 1596 struct vnode *vp; 1597 struct simplelock *inter_lkp; 1598 struct proc *p; 1599 { 1600 1601 simple_lock(&vp->v_interlock); 1602 if (vp->v_usecount == 0) { 1603 if (inter_lkp) 1604 simple_unlock(inter_lkp); 1605 vgonel(vp, p); 1606 return (1); 1607 } 1608 simple_unlock(&vp->v_interlock); 1609 return (0); 1610 } 1611 1612 /* 1613 * Eliminate all activity associated with a vnode 1614 * in preparation for reuse. 1615 */ 1616 void 1617 vgone(vp) 1618 struct vnode *vp; 1619 { 1620 struct proc *p = curproc; /* XXX */ 1621 1622 simple_lock(&vp->v_interlock); 1623 vgonel(vp, p); 1624 } 1625 1626 /* 1627 * vgone, with the vp interlock held. 1628 */ 1629 void 1630 vgonel(vp, p) 1631 struct vnode *vp; 1632 struct proc *p; 1633 { 1634 struct vnode *vq; 1635 struct vnode *vx; 1636 1637 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1638 1639 /* 1640 * If a vgone (or vclean) is already in progress, 1641 * wait until it is done and return. 1642 */ 1643 1644 if (vp->v_flag & VXLOCK) { 1645 vp->v_flag |= VXWANT; 1646 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock); 1647 return; 1648 } 1649 1650 /* 1651 * Clean out the filesystem specific data. 1652 */ 1653 1654 vclean(vp, DOCLOSE, p); 1655 KASSERT((vp->v_flag & VONWORKLST) == 0); 1656 1657 /* 1658 * Delete from old mount point vnode list, if on one. 1659 */ 1660 1661 if (vp->v_mount != NULL) 1662 insmntque(vp, (struct mount *)0); 1663 1664 /* 1665 * If special device, remove it from special device alias list. 1666 * if it is on one. 1667 */ 1668 1669 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1670 simple_lock(&spechash_slock); 1671 if (vp->v_hashchain != NULL) { 1672 if (*vp->v_hashchain == vp) { 1673 *vp->v_hashchain = vp->v_specnext; 1674 } else { 1675 for (vq = *vp->v_hashchain; vq; 1676 vq = vq->v_specnext) { 1677 if (vq->v_specnext != vp) 1678 continue; 1679 vq->v_specnext = vp->v_specnext; 1680 break; 1681 } 1682 if (vq == NULL) 1683 panic("missing bdev"); 1684 } 1685 if (vp->v_flag & VALIASED) { 1686 vx = NULL; 1687 for (vq = *vp->v_hashchain; vq; 1688 vq = vq->v_specnext) { 1689 if (vq->v_rdev != vp->v_rdev || 1690 vq->v_type != vp->v_type) 1691 continue; 1692 if (vx) 1693 break; 1694 vx = vq; 1695 } 1696 if (vx == NULL) 1697 panic("missing alias"); 1698 if (vq == NULL) 1699 vx->v_flag &= ~VALIASED; 1700 vp->v_flag &= ~VALIASED; 1701 } 1702 } 1703 simple_unlock(&spechash_slock); 1704 FREE(vp->v_specinfo, M_VNODE); 1705 vp->v_specinfo = NULL; 1706 } 1707 1708 /* 1709 * If it is on the freelist and not already at the head, 1710 * move it to the head of the list. The test of the back 1711 * pointer and the reference count of zero is because 1712 * it will be removed from the free list by getnewvnode, 1713 * but will not have its reference count incremented until 1714 * after calling vgone. If the reference count were 1715 * incremented first, vgone would (incorrectly) try to 1716 * close the previous instance of the underlying object. 1717 * So, the back pointer is explicitly set to `0xdeadb' in 1718 * getnewvnode after removing it from the freelist to ensure 1719 * that we do not try to move it here. 1720 */ 1721 1722 if (vp->v_usecount == 0) { 1723 simple_lock(&vnode_free_list_slock); 1724 if (vp->v_holdcnt > 0) 1725 panic("vgonel: not clean, vp %p", vp); 1726 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1727 TAILQ_FIRST(&vnode_free_list) != vp) { 1728 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1729 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1730 } 1731 simple_unlock(&vnode_free_list_slock); 1732 } 1733 vp->v_type = VBAD; 1734 } 1735 1736 /* 1737 * Lookup a vnode by device number. 1738 */ 1739 int 1740 vfinddev(dev, type, vpp) 1741 dev_t dev; 1742 enum vtype type; 1743 struct vnode **vpp; 1744 { 1745 struct vnode *vp; 1746 int rc = 0; 1747 1748 simple_lock(&spechash_slock); 1749 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1750 if (dev != vp->v_rdev || type != vp->v_type) 1751 continue; 1752 *vpp = vp; 1753 rc = 1; 1754 break; 1755 } 1756 simple_unlock(&spechash_slock); 1757 return (rc); 1758 } 1759 1760 /* 1761 * Revoke all the vnodes corresponding to the specified minor number 1762 * range (endpoints inclusive) of the specified major. 1763 */ 1764 void 1765 vdevgone(maj, minl, minh, type) 1766 int maj, minl, minh; 1767 enum vtype type; 1768 { 1769 struct vnode *vp; 1770 int mn; 1771 1772 for (mn = minl; mn <= minh; mn++) 1773 if (vfinddev(makedev(maj, mn), type, &vp)) 1774 VOP_REVOKE(vp, REVOKEALL); 1775 } 1776 1777 /* 1778 * Calculate the total number of references to a special device. 1779 */ 1780 int 1781 vcount(vp) 1782 struct vnode *vp; 1783 { 1784 struct vnode *vq, *vnext; 1785 int count; 1786 1787 loop: 1788 if ((vp->v_flag & VALIASED) == 0) 1789 return (vp->v_usecount); 1790 simple_lock(&spechash_slock); 1791 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1792 vnext = vq->v_specnext; 1793 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1794 continue; 1795 /* 1796 * Alias, but not in use, so flush it out. 1797 */ 1798 if (vq->v_usecount == 0 && vq != vp && 1799 (vq->v_flag & VXLOCK) == 0) { 1800 simple_unlock(&spechash_slock); 1801 vgone(vq); 1802 goto loop; 1803 } 1804 count += vq->v_usecount; 1805 } 1806 simple_unlock(&spechash_slock); 1807 return (count); 1808 } 1809 1810 /* 1811 * Print out a description of a vnode. 1812 */ 1813 static const char * const typename[] = 1814 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1815 1816 void 1817 vprint(label, vp) 1818 char *label; 1819 struct vnode *vp; 1820 { 1821 char buf[96]; 1822 1823 if (label != NULL) 1824 printf("%s: ", label); 1825 printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,", 1826 vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1827 vp->v_holdcnt); 1828 buf[0] = '\0'; 1829 if (vp->v_flag & VROOT) 1830 strcat(buf, "|VROOT"); 1831 if (vp->v_flag & VTEXT) 1832 strcat(buf, "|VTEXT"); 1833 if (vp->v_flag & VEXECMAP) 1834 strcat(buf, "|VEXECMAP"); 1835 if (vp->v_flag & VSYSTEM) 1836 strcat(buf, "|VSYSTEM"); 1837 if (vp->v_flag & VXLOCK) 1838 strcat(buf, "|VXLOCK"); 1839 if (vp->v_flag & VXWANT) 1840 strcat(buf, "|VXWANT"); 1841 if (vp->v_flag & VBWAIT) 1842 strcat(buf, "|VBWAIT"); 1843 if (vp->v_flag & VALIASED) 1844 strcat(buf, "|VALIASED"); 1845 if (buf[0] != '\0') 1846 printf(" flags (%s)", &buf[1]); 1847 if (vp->v_data == NULL) { 1848 printf("\n"); 1849 } else { 1850 printf("\n\t"); 1851 VOP_PRINT(vp); 1852 } 1853 } 1854 1855 #ifdef DEBUG 1856 /* 1857 * List all of the locked vnodes in the system. 1858 * Called when debugging the kernel. 1859 */ 1860 void 1861 printlockedvnodes() 1862 { 1863 struct mount *mp, *nmp; 1864 struct vnode *vp; 1865 1866 printf("Locked vnodes\n"); 1867 simple_lock(&mountlist_slock); 1868 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1869 mp = nmp) { 1870 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1871 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1872 continue; 1873 } 1874 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1875 if (VOP_ISLOCKED(vp)) 1876 vprint(NULL, vp); 1877 } 1878 simple_lock(&mountlist_slock); 1879 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1880 vfs_unbusy(mp); 1881 } 1882 simple_unlock(&mountlist_slock); 1883 } 1884 #endif 1885 1886 /* 1887 * Top level filesystem related information gathering. 1888 */ 1889 int 1890 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1891 int *name; 1892 u_int namelen; 1893 void *oldp; 1894 size_t *oldlenp; 1895 void *newp; 1896 size_t newlen; 1897 struct proc *p; 1898 { 1899 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1900 struct vfsconf vfc; 1901 extern const char * const mountcompatnames[]; 1902 extern int nmountcompatnames; 1903 #endif 1904 struct vfsops *vfsp; 1905 1906 /* all sysctl names at this level are at least name and field */ 1907 if (namelen < 2) 1908 return (ENOTDIR); /* overloaded */ 1909 1910 /* Not generic: goes to file system. */ 1911 if (name[0] != VFS_GENERIC) { 1912 static const struct ctlname vfsnames[VFS_MAXID+1]=CTL_VFS_NAMES; 1913 const char *vfsname; 1914 1915 if (name[0] < 0 || name[0] > VFS_MAXID 1916 || (vfsname = vfsnames[name[0]].ctl_name) == NULL) 1917 return (EOPNOTSUPP); 1918 1919 vfsp = vfs_getopsbyname(vfsname); 1920 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1921 return (EOPNOTSUPP); 1922 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1923 oldp, oldlenp, newp, newlen, p)); 1924 } 1925 1926 /* The rest are generic vfs sysctls. */ 1927 switch (name[1]) { 1928 case VFS_USERMOUNT: 1929 return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount); 1930 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1931 case VFS_MAXTYPENUM: 1932 /* 1933 * Provided for 4.4BSD-Lite2 compatibility. 1934 */ 1935 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1936 case VFS_CONF: 1937 /* 1938 * Special: a node, next is a file system name. 1939 * Provided for 4.4BSD-Lite2 compatibility. 1940 */ 1941 if (namelen < 3) 1942 return (ENOTDIR); /* overloaded */ 1943 if (name[2] >= nmountcompatnames || name[2] < 0 || 1944 mountcompatnames[name[2]] == NULL) 1945 return (EOPNOTSUPP); 1946 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 1947 if (vfsp == NULL) 1948 return (EOPNOTSUPP); 1949 vfc.vfc_vfsops = vfsp; 1950 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 1951 vfc.vfc_typenum = name[2]; 1952 vfc.vfc_refcount = vfsp->vfs_refcount; 1953 vfc.vfc_flags = 0; 1954 vfc.vfc_mountroot = vfsp->vfs_mountroot; 1955 vfc.vfc_next = NULL; 1956 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 1957 sizeof(struct vfsconf))); 1958 #endif 1959 default: 1960 break; 1961 } 1962 return (EOPNOTSUPP); 1963 } 1964 1965 int kinfo_vdebug = 1; 1966 int kinfo_vgetfailed; 1967 #define KINFO_VNODESLOP 10 1968 /* 1969 * Dump vnode list (via sysctl). 1970 * Copyout address of vnode followed by vnode. 1971 */ 1972 /* ARGSUSED */ 1973 int 1974 sysctl_vnode(where, sizep, p) 1975 char *where; 1976 size_t *sizep; 1977 struct proc *p; 1978 { 1979 struct mount *mp, *nmp; 1980 struct vnode *nvp, *vp; 1981 char *bp = where, *savebp; 1982 char *ewhere; 1983 int error; 1984 1985 #define VPTRSZ sizeof(struct vnode *) 1986 #define VNODESZ sizeof(struct vnode) 1987 if (where == NULL) { 1988 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1989 return (0); 1990 } 1991 ewhere = where + *sizep; 1992 1993 simple_lock(&mountlist_slock); 1994 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1995 mp = nmp) { 1996 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1997 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1998 continue; 1999 } 2000 savebp = bp; 2001 again: 2002 simple_lock(&mntvnode_slock); 2003 for (vp = LIST_FIRST(&mp->mnt_vnodelist); 2004 vp != NULL; 2005 vp = nvp) { 2006 /* 2007 * Check that the vp is still associated with 2008 * this filesystem. RACE: could have been 2009 * recycled onto the same filesystem. 2010 */ 2011 if (vp->v_mount != mp) { 2012 simple_unlock(&mntvnode_slock); 2013 if (kinfo_vdebug) 2014 printf("kinfo: vp changed\n"); 2015 bp = savebp; 2016 goto again; 2017 } 2018 nvp = LIST_NEXT(vp, v_mntvnodes); 2019 if (bp + VPTRSZ + VNODESZ > ewhere) { 2020 simple_unlock(&mntvnode_slock); 2021 *sizep = bp - where; 2022 return (ENOMEM); 2023 } 2024 simple_unlock(&mntvnode_slock); 2025 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2026 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2027 return (error); 2028 bp += VPTRSZ + VNODESZ; 2029 simple_lock(&mntvnode_slock); 2030 } 2031 simple_unlock(&mntvnode_slock); 2032 simple_lock(&mountlist_slock); 2033 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2034 vfs_unbusy(mp); 2035 } 2036 simple_unlock(&mountlist_slock); 2037 2038 *sizep = bp - where; 2039 return (0); 2040 } 2041 2042 /* 2043 * Check to see if a filesystem is mounted on a block device. 2044 */ 2045 int 2046 vfs_mountedon(vp) 2047 struct vnode *vp; 2048 { 2049 struct vnode *vq; 2050 int error = 0; 2051 2052 if (vp->v_specmountpoint != NULL) 2053 return (EBUSY); 2054 if (vp->v_flag & VALIASED) { 2055 simple_lock(&spechash_slock); 2056 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2057 if (vq->v_rdev != vp->v_rdev || 2058 vq->v_type != vp->v_type) 2059 continue; 2060 if (vq->v_specmountpoint != NULL) { 2061 error = EBUSY; 2062 break; 2063 } 2064 } 2065 simple_unlock(&spechash_slock); 2066 } 2067 return (error); 2068 } 2069 2070 /* 2071 * Build hash lists of net addresses and hang them off the mount point. 2072 * Called by ufs_mount() to set up the lists of export addresses. 2073 */ 2074 static int 2075 vfs_hang_addrlist(mp, nep, argp) 2076 struct mount *mp; 2077 struct netexport *nep; 2078 struct export_args *argp; 2079 { 2080 struct netcred *np, *enp; 2081 struct radix_node_head *rnh; 2082 int i; 2083 struct radix_node *rn; 2084 struct sockaddr *saddr, *smask = 0; 2085 struct domain *dom; 2086 int error; 2087 2088 if (argp->ex_addrlen == 0) { 2089 if (mp->mnt_flag & MNT_DEFEXPORTED) 2090 return (EPERM); 2091 np = &nep->ne_defexported; 2092 np->netc_exflags = argp->ex_flags; 2093 crcvt(&np->netc_anon, &argp->ex_anon); 2094 np->netc_anon.cr_ref = 1; 2095 mp->mnt_flag |= MNT_DEFEXPORTED; 2096 return (0); 2097 } 2098 2099 if (argp->ex_addrlen > MLEN) 2100 return (EINVAL); 2101 2102 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2103 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 2104 memset((caddr_t)np, 0, i); 2105 saddr = (struct sockaddr *)(np + 1); 2106 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 2107 if (error) 2108 goto out; 2109 if (saddr->sa_len > argp->ex_addrlen) 2110 saddr->sa_len = argp->ex_addrlen; 2111 if (argp->ex_masklen) { 2112 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 2113 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 2114 if (error) 2115 goto out; 2116 if (smask->sa_len > argp->ex_masklen) 2117 smask->sa_len = argp->ex_masklen; 2118 } 2119 i = saddr->sa_family; 2120 if ((rnh = nep->ne_rtable[i]) == 0) { 2121 /* 2122 * Seems silly to initialize every AF when most are not 2123 * used, do so on demand here 2124 */ 2125 for (dom = domains; dom; dom = dom->dom_next) 2126 if (dom->dom_family == i && dom->dom_rtattach) { 2127 dom->dom_rtattach((void **)&nep->ne_rtable[i], 2128 dom->dom_rtoffset); 2129 break; 2130 } 2131 if ((rnh = nep->ne_rtable[i]) == 0) { 2132 error = ENOBUFS; 2133 goto out; 2134 } 2135 } 2136 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 2137 np->netc_rnodes); 2138 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 2139 if (rn == 0) { 2140 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 2141 smask, rnh); 2142 if (enp == 0) { 2143 error = EPERM; 2144 goto out; 2145 } 2146 } else 2147 enp = (struct netcred *)rn; 2148 2149 if (enp->netc_exflags != argp->ex_flags || 2150 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 2151 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 2152 enp->netc_anon.cr_ngroups != 2153 (uint32_t) argp->ex_anon.cr_ngroups || 2154 memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 2155 enp->netc_anon.cr_ngroups)) 2156 error = EPERM; 2157 else 2158 error = 0; 2159 goto out; 2160 } 2161 np->netc_exflags = argp->ex_flags; 2162 crcvt(&np->netc_anon, &argp->ex_anon); 2163 np->netc_anon.cr_ref = 1; 2164 return (0); 2165 out: 2166 free(np, M_NETADDR); 2167 return (error); 2168 } 2169 2170 /* ARGSUSED */ 2171 static int 2172 vfs_free_netcred(rn, w) 2173 struct radix_node *rn; 2174 void *w; 2175 { 2176 struct radix_node_head *rnh = (struct radix_node_head *)w; 2177 2178 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 2179 free((caddr_t)rn, M_NETADDR); 2180 return (0); 2181 } 2182 2183 /* 2184 * Free the net address hash lists that are hanging off the mount points. 2185 */ 2186 static void 2187 vfs_free_addrlist(nep) 2188 struct netexport *nep; 2189 { 2190 int i; 2191 struct radix_node_head *rnh; 2192 2193 for (i = 0; i <= AF_MAX; i++) 2194 if ((rnh = nep->ne_rtable[i]) != NULL) { 2195 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 2196 free((caddr_t)rnh, M_RTABLE); 2197 nep->ne_rtable[i] = 0; 2198 } 2199 } 2200 2201 int 2202 vfs_export(mp, nep, argp) 2203 struct mount *mp; 2204 struct netexport *nep; 2205 struct export_args *argp; 2206 { 2207 int error; 2208 2209 if (argp->ex_flags & MNT_DELEXPORT) { 2210 if (mp->mnt_flag & MNT_EXPUBLIC) { 2211 vfs_setpublicfs(NULL, NULL, NULL); 2212 mp->mnt_flag &= ~MNT_EXPUBLIC; 2213 } 2214 vfs_free_addrlist(nep); 2215 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2216 } 2217 if (argp->ex_flags & MNT_EXPORTED) { 2218 if (argp->ex_flags & MNT_EXPUBLIC) { 2219 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2220 return (error); 2221 mp->mnt_flag |= MNT_EXPUBLIC; 2222 } 2223 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 2224 return (error); 2225 mp->mnt_flag |= MNT_EXPORTED; 2226 } 2227 return (0); 2228 } 2229 2230 /* 2231 * Set the publicly exported filesystem (WebNFS). Currently, only 2232 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2233 */ 2234 int 2235 vfs_setpublicfs(mp, nep, argp) 2236 struct mount *mp; 2237 struct netexport *nep; 2238 struct export_args *argp; 2239 { 2240 int error; 2241 struct vnode *rvp; 2242 char *cp; 2243 2244 /* 2245 * mp == NULL -> invalidate the current info, the FS is 2246 * no longer exported. May be called from either vfs_export 2247 * or unmount, so check if it hasn't already been done. 2248 */ 2249 if (mp == NULL) { 2250 if (nfs_pub.np_valid) { 2251 nfs_pub.np_valid = 0; 2252 if (nfs_pub.np_index != NULL) { 2253 FREE(nfs_pub.np_index, M_TEMP); 2254 nfs_pub.np_index = NULL; 2255 } 2256 } 2257 return (0); 2258 } 2259 2260 /* 2261 * Only one allowed at a time. 2262 */ 2263 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2264 return (EBUSY); 2265 2266 /* 2267 * Get real filehandle for root of exported FS. 2268 */ 2269 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 2270 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2271 2272 if ((error = VFS_ROOT(mp, &rvp))) 2273 return (error); 2274 2275 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2276 return (error); 2277 2278 vput(rvp); 2279 2280 /* 2281 * If an indexfile was specified, pull it in. 2282 */ 2283 if (argp->ex_indexfile != NULL) { 2284 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2285 M_WAITOK); 2286 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2287 MAXNAMLEN, (size_t *)0); 2288 if (!error) { 2289 /* 2290 * Check for illegal filenames. 2291 */ 2292 for (cp = nfs_pub.np_index; *cp; cp++) { 2293 if (*cp == '/') { 2294 error = EINVAL; 2295 break; 2296 } 2297 } 2298 } 2299 if (error) { 2300 FREE(nfs_pub.np_index, M_TEMP); 2301 return (error); 2302 } 2303 } 2304 2305 nfs_pub.np_mount = mp; 2306 nfs_pub.np_valid = 1; 2307 return (0); 2308 } 2309 2310 struct netcred * 2311 vfs_export_lookup(mp, nep, nam) 2312 struct mount *mp; 2313 struct netexport *nep; 2314 struct mbuf *nam; 2315 { 2316 struct netcred *np; 2317 struct radix_node_head *rnh; 2318 struct sockaddr *saddr; 2319 2320 np = NULL; 2321 if (mp->mnt_flag & MNT_EXPORTED) { 2322 /* 2323 * Lookup in the export list first. 2324 */ 2325 if (nam != NULL) { 2326 saddr = mtod(nam, struct sockaddr *); 2327 rnh = nep->ne_rtable[saddr->sa_family]; 2328 if (rnh != NULL) { 2329 np = (struct netcred *) 2330 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2331 rnh); 2332 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2333 np = NULL; 2334 } 2335 } 2336 /* 2337 * If no address match, use the default if it exists. 2338 */ 2339 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2340 np = &nep->ne_defexported; 2341 } 2342 return (np); 2343 } 2344 2345 /* 2346 * Do the usual access checking. 2347 * file_mode, uid and gid are from the vnode in question, 2348 * while acc_mode and cred are from the VOP_ACCESS parameter list 2349 */ 2350 int 2351 vaccess(type, file_mode, uid, gid, acc_mode, cred) 2352 enum vtype type; 2353 mode_t file_mode; 2354 uid_t uid; 2355 gid_t gid; 2356 mode_t acc_mode; 2357 struct ucred *cred; 2358 { 2359 mode_t mask; 2360 2361 /* 2362 * Super-user always gets read/write access, but execute access depends 2363 * on at least one execute bit being set. 2364 */ 2365 if (cred->cr_uid == 0) { 2366 if ((acc_mode & VEXEC) && type != VDIR && 2367 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2368 return (EACCES); 2369 return (0); 2370 } 2371 2372 mask = 0; 2373 2374 /* Otherwise, check the owner. */ 2375 if (cred->cr_uid == uid) { 2376 if (acc_mode & VEXEC) 2377 mask |= S_IXUSR; 2378 if (acc_mode & VREAD) 2379 mask |= S_IRUSR; 2380 if (acc_mode & VWRITE) 2381 mask |= S_IWUSR; 2382 return ((file_mode & mask) == mask ? 0 : EACCES); 2383 } 2384 2385 /* Otherwise, check the groups. */ 2386 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2387 if (acc_mode & VEXEC) 2388 mask |= S_IXGRP; 2389 if (acc_mode & VREAD) 2390 mask |= S_IRGRP; 2391 if (acc_mode & VWRITE) 2392 mask |= S_IWGRP; 2393 return ((file_mode & mask) == mask ? 0 : EACCES); 2394 } 2395 2396 /* Otherwise, check everyone else. */ 2397 if (acc_mode & VEXEC) 2398 mask |= S_IXOTH; 2399 if (acc_mode & VREAD) 2400 mask |= S_IROTH; 2401 if (acc_mode & VWRITE) 2402 mask |= S_IWOTH; 2403 return ((file_mode & mask) == mask ? 0 : EACCES); 2404 } 2405 2406 /* 2407 * Unmount all file systems. 2408 * We traverse the list in reverse order under the assumption that doing so 2409 * will avoid needing to worry about dependencies. 2410 */ 2411 void 2412 vfs_unmountall(p) 2413 struct proc *p; 2414 { 2415 struct mount *mp, *nmp; 2416 int allerror, error; 2417 2418 for (allerror = 0, 2419 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2420 nmp = mp->mnt_list.cqe_prev; 2421 #ifdef DEBUG 2422 printf("unmounting %s (%s)...\n", 2423 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2424 #endif 2425 /* 2426 * XXX Freeze syncer. Must do this before locking the 2427 * mount point. See dounmount() for details. 2428 */ 2429 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2430 if (vfs_busy(mp, 0, 0)) { 2431 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2432 continue; 2433 } 2434 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2435 printf("unmount of %s failed with error %d\n", 2436 mp->mnt_stat.f_mntonname, error); 2437 allerror = 1; 2438 } 2439 } 2440 if (allerror) 2441 printf("WARNING: some file systems would not unmount\n"); 2442 } 2443 2444 /* 2445 * Sync and unmount file systems before shutting down. 2446 */ 2447 void 2448 vfs_shutdown() 2449 { 2450 struct buf *bp; 2451 int iter, nbusy, nbusy_prev = 0, dcount, s; 2452 struct proc *p = curproc; 2453 2454 /* XXX we're certainly not running in proc0's context! */ 2455 if (p == NULL) 2456 p = &proc0; 2457 2458 printf("syncing disks... "); 2459 2460 /* remove user process from run queue */ 2461 suspendsched(); 2462 (void) spl0(); 2463 2464 /* avoid coming back this way again if we panic. */ 2465 doing_shutdown = 1; 2466 2467 sys_sync(p, NULL, NULL); 2468 2469 /* Wait for sync to finish. */ 2470 dcount = 10000; 2471 for (iter = 0; iter < 20;) { 2472 nbusy = 0; 2473 for (bp = &buf[nbuf]; --bp >= buf; ) { 2474 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2475 nbusy++; 2476 /* 2477 * With soft updates, some buffers that are 2478 * written will be remarked as dirty until other 2479 * buffers are written. 2480 */ 2481 if (bp->b_vp && bp->b_vp->v_mount 2482 && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP) 2483 && (bp->b_flags & B_DELWRI)) { 2484 s = splbio(); 2485 bremfree(bp); 2486 bp->b_flags |= B_BUSY; 2487 splx(s); 2488 nbusy++; 2489 bawrite(bp); 2490 if (dcount-- <= 0) { 2491 printf("softdep "); 2492 goto fail; 2493 } 2494 } 2495 } 2496 if (nbusy == 0) 2497 break; 2498 if (nbusy_prev == 0) 2499 nbusy_prev = nbusy; 2500 printf("%d ", nbusy); 2501 tsleep(&nbusy, PRIBIO, "bflush", 2502 (iter == 0) ? 1 : hz / 25 * iter); 2503 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 2504 iter++; 2505 else 2506 nbusy_prev = nbusy; 2507 } 2508 if (nbusy) { 2509 fail: 2510 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 2511 printf("giving up\nPrinting vnodes for busy buffers\n"); 2512 for (bp = &buf[nbuf]; --bp >= buf; ) 2513 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2514 vprint(NULL, bp->b_vp); 2515 2516 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2517 Debugger(); 2518 #endif 2519 2520 #else /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2521 printf("giving up\n"); 2522 #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2523 return; 2524 } else 2525 printf("done\n"); 2526 2527 /* 2528 * If we've panic'd, don't make the situation potentially 2529 * worse by unmounting the file systems. 2530 */ 2531 if (panicstr != NULL) 2532 return; 2533 2534 /* Release inodes held by texts before update. */ 2535 #ifdef notdef 2536 vnshutdown(); 2537 #endif 2538 /* Unmount file systems. */ 2539 vfs_unmountall(p); 2540 } 2541 2542 /* 2543 * Mount the root file system. If the operator didn't specify a 2544 * file system to use, try all possible file systems until one 2545 * succeeds. 2546 */ 2547 int 2548 vfs_mountroot() 2549 { 2550 struct vfsops *v; 2551 2552 if (root_device == NULL) 2553 panic("vfs_mountroot: root device unknown"); 2554 2555 switch (root_device->dv_class) { 2556 case DV_IFNET: 2557 if (rootdev != NODEV) 2558 panic("vfs_mountroot: rootdev set for DV_IFNET " 2559 "(0x%08x -> %d,%d)", rootdev, 2560 major(rootdev), minor(rootdev)); 2561 break; 2562 2563 case DV_DISK: 2564 if (rootdev == NODEV) 2565 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2566 break; 2567 2568 default: 2569 printf("%s: inappropriate for root file system\n", 2570 root_device->dv_xname); 2571 return (ENODEV); 2572 } 2573 2574 /* 2575 * If user specified a file system, use it. 2576 */ 2577 if (mountroot != NULL) 2578 return ((*mountroot)()); 2579 2580 /* 2581 * Try each file system currently configured into the kernel. 2582 */ 2583 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2584 if (v->vfs_mountroot == NULL) 2585 continue; 2586 #ifdef DEBUG 2587 printf("mountroot: trying %s...\n", v->vfs_name); 2588 #endif 2589 if ((*v->vfs_mountroot)() == 0) { 2590 printf("root file system type: %s\n", v->vfs_name); 2591 break; 2592 } 2593 } 2594 2595 if (v == NULL) { 2596 printf("no file system for %s", root_device->dv_xname); 2597 if (root_device->dv_class == DV_DISK) 2598 printf(" (dev 0x%x)", rootdev); 2599 printf("\n"); 2600 return (EFTYPE); 2601 } 2602 return (0); 2603 } 2604 2605 /* 2606 * Given a file system name, look up the vfsops for that 2607 * file system, or return NULL if file system isn't present 2608 * in the kernel. 2609 */ 2610 struct vfsops * 2611 vfs_getopsbyname(name) 2612 const char *name; 2613 { 2614 struct vfsops *v; 2615 2616 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2617 if (strcmp(v->vfs_name, name) == 0) 2618 break; 2619 } 2620 2621 return (v); 2622 } 2623 2624 /* 2625 * Establish a file system and initialize it. 2626 */ 2627 int 2628 vfs_attach(vfs) 2629 struct vfsops *vfs; 2630 { 2631 struct vfsops *v; 2632 int error = 0; 2633 2634 2635 /* 2636 * Make sure this file system doesn't already exist. 2637 */ 2638 LIST_FOREACH(v, &vfs_list, vfs_list) { 2639 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2640 error = EEXIST; 2641 goto out; 2642 } 2643 } 2644 2645 /* 2646 * Initialize the vnode operations for this file system. 2647 */ 2648 vfs_opv_init(vfs->vfs_opv_descs); 2649 2650 /* 2651 * Now initialize the file system itself. 2652 */ 2653 (*vfs->vfs_init)(); 2654 2655 /* 2656 * ...and link it into the kernel's list. 2657 */ 2658 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2659 2660 /* 2661 * Sanity: make sure the reference count is 0. 2662 */ 2663 vfs->vfs_refcount = 0; 2664 2665 out: 2666 return (error); 2667 } 2668 2669 /* 2670 * Remove a file system from the kernel. 2671 */ 2672 int 2673 vfs_detach(vfs) 2674 struct vfsops *vfs; 2675 { 2676 struct vfsops *v; 2677 2678 /* 2679 * Make sure no one is using the filesystem. 2680 */ 2681 if (vfs->vfs_refcount != 0) 2682 return (EBUSY); 2683 2684 /* 2685 * ...and remove it from the kernel's list. 2686 */ 2687 LIST_FOREACH(v, &vfs_list, vfs_list) { 2688 if (v == vfs) { 2689 LIST_REMOVE(v, vfs_list); 2690 break; 2691 } 2692 } 2693 2694 if (v == NULL) 2695 return (ESRCH); 2696 2697 /* 2698 * Now run the file system-specific cleanups. 2699 */ 2700 (*vfs->vfs_done)(); 2701 2702 /* 2703 * Free the vnode operations vector. 2704 */ 2705 vfs_opv_free(vfs->vfs_opv_descs); 2706 return (0); 2707 } 2708 2709 void 2710 vfs_reinit(void) 2711 { 2712 struct vfsops *vfs; 2713 2714 LIST_FOREACH(vfs, &vfs_list, vfs_list) { 2715 if (vfs->vfs_reinit) { 2716 (*vfs->vfs_reinit)(); 2717 } 2718 } 2719 } 2720 2721 #ifdef DDB 2722 const char buf_flagbits[] = 2723 "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI" 2724 "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE" 2725 "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED" 2726 "\32XXX\33VFLUSH"; 2727 2728 void 2729 vfs_buf_print(bp, full, pr) 2730 struct buf *bp; 2731 int full; 2732 void (*pr) __P((const char *, ...)); 2733 { 2734 char buf[1024]; 2735 2736 (*pr)(" vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n", 2737 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev); 2738 2739 bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf)); 2740 (*pr)(" error %d flags 0x%s\n", bp->b_error, buf); 2741 2742 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 2743 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2744 (*pr)(" data %p saveaddr %p dep %p\n", 2745 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2746 (*pr)(" iodone %p\n", bp->b_iodone); 2747 } 2748 2749 2750 const char vnode_flagbits[] = 2751 "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\5EXECMAP" 2752 "\11XLOCK\12XWANT\13BWAIT\14ALIASED" 2753 "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY"; 2754 2755 const char *vnode_types[] = { 2756 "VNON", 2757 "VREG", 2758 "VDIR", 2759 "VBLK", 2760 "VCHR", 2761 "VLNK", 2762 "VSOCK", 2763 "VFIFO", 2764 "VBAD", 2765 }; 2766 2767 const char *vnode_tags[] = { 2768 "VT_NON", 2769 "VT_UFS", 2770 "VT_NFS", 2771 "VT_MFS", 2772 "VT_MSDOSFS", 2773 "VT_LFS", 2774 "VT_LOFS", 2775 "VT_FDESC", 2776 "VT_PORTAL", 2777 "VT_NULL", 2778 "VT_UMAP", 2779 "VT_KERNFS", 2780 "VT_PROCFS", 2781 "VT_AFS", 2782 "VT_ISOFS", 2783 "VT_UNION", 2784 "VT_ADOSFS", 2785 "VT_EXT2FS", 2786 "VT_CODA", 2787 "VT_FILECORE", 2788 "VT_NTFS", 2789 "VT_VFS", 2790 "VT_OVERLAY" 2791 }; 2792 2793 void 2794 vfs_vnode_print(vp, full, pr) 2795 struct vnode *vp; 2796 int full; 2797 void (*pr) __P((const char *, ...)); 2798 { 2799 char buf[256]; 2800 const char *vtype, *vtag; 2801 int tmp; 2802 2803 uvm_object_printit(&vp->v_uobj, full, pr); 2804 bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf)); 2805 (*pr)("\nVNODE flags %s\n", buf); 2806 (*pr)("mp %p numoutput %d size 0x%llx\n", 2807 vp->v_mount, vp->v_numoutput, vp->v_size); 2808 2809 (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n", 2810 vp->v_data, vp->v_usecount, vp->v_writecount, 2811 vp->v_holdcnt, vp->v_numoutput); 2812 2813 vtype = ((tmp = vp->v_type) >= 0 && 2814 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ? 2815 vnode_types[vp->v_type] : "UNKNOWN"; 2816 vtag = ((tmp = vp->v_tag) >= 0 && 2817 vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ? 2818 vnode_tags[vp->v_tag] : "UNKNOWN"; 2819 2820 (*pr)("type %s(%d) tag %s(%d) id 0x%lx mount %p typedata %p\n", 2821 vtype, vp->v_type, vtag, vp->v_tag, 2822 vp->v_id, vp->v_mount, vp->v_mountedhere); 2823 2824 if (full) { 2825 struct buf *bp; 2826 2827 (*pr)("clean bufs:\n"); 2828 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 2829 (*pr)(" bp %p\n", bp); 2830 vfs_buf_print(bp, full, pr); 2831 } 2832 2833 (*pr)("dirty bufs:\n"); 2834 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 2835 (*pr)(" bp %p\n", bp); 2836 vfs_buf_print(bp, full, pr); 2837 } 2838 } 2839 } 2840 #endif 2841