1 /* $NetBSD: vfs_subr.c,v 1.160 2001/10/04 05:46:45 chs Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include "opt_ddb.h" 85 #include "opt_compat_netbsd.h" 86 #include "opt_compat_43.h" 87 88 #include <sys/param.h> 89 #include <sys/systm.h> 90 #include <sys/proc.h> 91 #include <sys/kernel.h> 92 #include <sys/mount.h> 93 #include <sys/time.h> 94 #include <sys/fcntl.h> 95 #include <sys/vnode.h> 96 #include <sys/stat.h> 97 #include <sys/namei.h> 98 #include <sys/ucred.h> 99 #include <sys/buf.h> 100 #include <sys/errno.h> 101 #include <sys/malloc.h> 102 #include <sys/domain.h> 103 #include <sys/mbuf.h> 104 #include <sys/syscallargs.h> 105 #include <sys/device.h> 106 #include <sys/dirent.h> 107 108 #include <miscfs/specfs/specdev.h> 109 #include <miscfs/genfs/genfs.h> 110 #include <miscfs/syncfs/syncfs.h> 111 112 #include <uvm/uvm.h> 113 #include <uvm/uvm_ddb.h> 114 115 #include <sys/sysctl.h> 116 117 enum vtype iftovt_tab[16] = { 118 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 119 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 120 }; 121 const int vttoif_tab[9] = { 122 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 123 S_IFSOCK, S_IFIFO, S_IFMT, 124 }; 125 126 int doforce = 1; /* 1 => permit forcible unmounting */ 127 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 128 129 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 130 131 /* 132 * Insq/Remq for the vnode usage lists. 133 */ 134 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 135 #define bufremvn(bp) { \ 136 LIST_REMOVE(bp, b_vnbufs); \ 137 (bp)->b_vnbufs.le_next = NOLIST; \ 138 } 139 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 140 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 141 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 142 143 struct mntlist mountlist = /* mounted filesystem list */ 144 CIRCLEQ_HEAD_INITIALIZER(mountlist); 145 struct vfs_list_head vfs_list = /* vfs list */ 146 LIST_HEAD_INITIALIZER(vfs_list); 147 148 struct nfs_public nfs_pub; /* publicly exported FS */ 149 150 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 151 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 152 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 153 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 154 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 155 156 /* 157 * These define the root filesystem and device. 158 */ 159 struct mount *rootfs; 160 struct vnode *rootvnode; 161 struct device *root_device; /* root device */ 162 163 struct pool vnode_pool; /* memory pool for vnodes */ 164 165 /* 166 * Local declarations. 167 */ 168 void insmntque __P((struct vnode *, struct mount *)); 169 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 170 void vgoneall __P((struct vnode *)); 171 172 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 173 struct export_args *)); 174 static int vfs_free_netcred __P((struct radix_node *, void *)); 175 static void vfs_free_addrlist __P((struct netexport *)); 176 177 #ifdef DEBUG 178 void printlockedvnodes __P((void)); 179 #endif 180 181 /* 182 * Initialize the vnode management data structures. 183 */ 184 void 185 vntblinit() 186 { 187 188 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 189 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE); 190 191 /* 192 * Initialize the filesystem syncer. 193 */ 194 vn_initialize_syncerd(); 195 } 196 197 /* 198 * Mark a mount point as busy. Used to synchronize access and to delay 199 * unmounting. Interlock is not released on failure. 200 */ 201 int 202 vfs_busy(mp, flags, interlkp) 203 struct mount *mp; 204 int flags; 205 struct simplelock *interlkp; 206 { 207 int lkflags; 208 209 while (mp->mnt_flag & MNT_UNMOUNT) { 210 int gone; 211 212 if (flags & LK_NOWAIT) 213 return (ENOENT); 214 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 215 && mp->mnt_unmounter == curproc) 216 return (EDEADLK); 217 if (interlkp) 218 simple_unlock(interlkp); 219 /* 220 * Since all busy locks are shared except the exclusive 221 * lock granted when unmounting, the only place that a 222 * wakeup needs to be done is at the release of the 223 * exclusive lock at the end of dounmount. 224 * 225 * XXX MP: add spinlock protecting mnt_wcnt here once you 226 * can atomically unlock-and-sleep. 227 */ 228 mp->mnt_wcnt++; 229 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 230 mp->mnt_wcnt--; 231 gone = mp->mnt_flag & MNT_GONE; 232 233 if (mp->mnt_wcnt == 0) 234 wakeup(&mp->mnt_wcnt); 235 if (interlkp) 236 simple_lock(interlkp); 237 if (gone) 238 return (ENOENT); 239 } 240 lkflags = LK_SHARED; 241 if (interlkp) 242 lkflags |= LK_INTERLOCK; 243 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 244 panic("vfs_busy: unexpected lock failure"); 245 return (0); 246 } 247 248 /* 249 * Free a busy filesystem. 250 */ 251 void 252 vfs_unbusy(mp) 253 struct mount *mp; 254 { 255 256 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 257 } 258 259 /* 260 * Lookup a filesystem type, and if found allocate and initialize 261 * a mount structure for it. 262 * 263 * Devname is usually updated by mount(8) after booting. 264 */ 265 int 266 vfs_rootmountalloc(fstypename, devname, mpp) 267 char *fstypename; 268 char *devname; 269 struct mount **mpp; 270 { 271 struct vfsops *vfsp = NULL; 272 struct mount *mp; 273 274 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 275 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 276 break; 277 278 if (vfsp == NULL) 279 return (ENODEV); 280 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 281 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 282 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 283 (void)vfs_busy(mp, LK_NOWAIT, 0); 284 LIST_INIT(&mp->mnt_vnodelist); 285 mp->mnt_op = vfsp; 286 mp->mnt_flag = MNT_RDONLY; 287 mp->mnt_vnodecovered = NULLVP; 288 vfsp->vfs_refcount++; 289 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 290 mp->mnt_stat.f_mntonname[0] = '/'; 291 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 292 *mpp = mp; 293 return (0); 294 } 295 296 /* 297 * Lookup a mount point by filesystem identifier. 298 */ 299 struct mount * 300 vfs_getvfs(fsid) 301 fsid_t *fsid; 302 { 303 struct mount *mp; 304 305 simple_lock(&mountlist_slock); 306 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 307 mp = mp->mnt_list.cqe_next) { 308 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 309 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 310 simple_unlock(&mountlist_slock); 311 return (mp); 312 } 313 } 314 simple_unlock(&mountlist_slock); 315 return ((struct mount *)0); 316 } 317 318 /* 319 * Get a new unique fsid 320 */ 321 void 322 vfs_getnewfsid(mp) 323 struct mount *mp; 324 { 325 static u_short xxxfs_mntid; 326 fsid_t tfsid; 327 int mtype; 328 329 simple_lock(&mntid_slock); 330 mtype = makefstype(mp->mnt_op->vfs_name); 331 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 332 mp->mnt_stat.f_fsid.val[1] = mtype; 333 if (xxxfs_mntid == 0) 334 ++xxxfs_mntid; 335 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 336 tfsid.val[1] = mtype; 337 if (mountlist.cqh_first != (void *)&mountlist) { 338 while (vfs_getvfs(&tfsid)) { 339 tfsid.val[0]++; 340 xxxfs_mntid++; 341 } 342 } 343 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 344 simple_unlock(&mntid_slock); 345 } 346 347 /* 348 * Make a 'unique' number from a mount type name. 349 */ 350 long 351 makefstype(type) 352 const char *type; 353 { 354 long rv; 355 356 for (rv = 0; *type; type++) { 357 rv <<= 2; 358 rv ^= *type; 359 } 360 return rv; 361 } 362 363 364 /* 365 * Set vnode attributes to VNOVAL 366 */ 367 void 368 vattr_null(vap) 369 struct vattr *vap; 370 { 371 372 vap->va_type = VNON; 373 374 /* 375 * Assign individually so that it is safe even if size and 376 * sign of each member are varied. 377 */ 378 vap->va_mode = VNOVAL; 379 vap->va_nlink = VNOVAL; 380 vap->va_uid = VNOVAL; 381 vap->va_gid = VNOVAL; 382 vap->va_fsid = VNOVAL; 383 vap->va_fileid = VNOVAL; 384 vap->va_size = VNOVAL; 385 vap->va_blocksize = VNOVAL; 386 vap->va_atime.tv_sec = 387 vap->va_mtime.tv_sec = 388 vap->va_ctime.tv_sec = VNOVAL; 389 vap->va_atime.tv_nsec = 390 vap->va_mtime.tv_nsec = 391 vap->va_ctime.tv_nsec = VNOVAL; 392 vap->va_gen = VNOVAL; 393 vap->va_flags = VNOVAL; 394 vap->va_rdev = VNOVAL; 395 vap->va_bytes = VNOVAL; 396 vap->va_vaflags = 0; 397 } 398 399 /* 400 * Routines having to do with the management of the vnode table. 401 */ 402 extern int (**dead_vnodeop_p) __P((void *)); 403 long numvnodes; 404 405 /* 406 * Return the next vnode from the free list. 407 */ 408 int 409 getnewvnode(tag, mp, vops, vpp) 410 enum vtagtype tag; 411 struct mount *mp; 412 int (**vops) __P((void *)); 413 struct vnode **vpp; 414 { 415 extern struct uvm_pagerops uvm_vnodeops; 416 struct uvm_object *uobj; 417 struct proc *p = curproc; /* XXX */ 418 struct freelst *listhd; 419 static int toggle; 420 struct vnode *vp; 421 int error = 0, tryalloc; 422 423 try_again: 424 if (mp) { 425 /* 426 * Mark filesystem busy while we're creating a vnode. 427 * If unmount is in progress, this will wait; if the 428 * unmount succeeds (only if umount -f), this will 429 * return an error. If the unmount fails, we'll keep 430 * going afterwards. 431 * (This puts the per-mount vnode list logically under 432 * the protection of the vfs_busy lock). 433 */ 434 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 435 if (error && error != EDEADLK) 436 return error; 437 } 438 439 /* 440 * We must choose whether to allocate a new vnode or recycle an 441 * existing one. The criterion for allocating a new one is that 442 * the total number of vnodes is less than the number desired or 443 * there are no vnodes on either free list. Generally we only 444 * want to recycle vnodes that have no buffers associated with 445 * them, so we look first on the vnode_free_list. If it is empty, 446 * we next consider vnodes with referencing buffers on the 447 * vnode_hold_list. The toggle ensures that half the time we 448 * will use a buffer from the vnode_hold_list, and half the time 449 * we will allocate a new one unless the list has grown to twice 450 * the desired size. We are reticent to recycle vnodes from the 451 * vnode_hold_list because we will lose the identity of all its 452 * referencing buffers. 453 */ 454 455 vp = NULL; 456 457 simple_lock(&vnode_free_list_slock); 458 459 toggle ^= 1; 460 if (numvnodes > 2 * desiredvnodes) 461 toggle = 0; 462 463 tryalloc = numvnodes < desiredvnodes || 464 (TAILQ_FIRST(&vnode_free_list) == NULL && 465 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 466 467 if (tryalloc && 468 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { 469 simple_unlock(&vnode_free_list_slock); 470 memset(vp, 0, sizeof(*vp)); 471 simple_lock_init(&vp->v_interlock); 472 uobj = &vp->v_uobj; 473 uobj->pgops = &uvm_vnodeops; 474 uobj->uo_npages = 0; 475 TAILQ_INIT(&uobj->memq); 476 numvnodes++; 477 } else { 478 if ((vp = TAILQ_FIRST(listhd = &vnode_free_list)) == NULL) 479 vp = TAILQ_FIRST(listhd = &vnode_hold_list); 480 for (; vp != NULL; vp = TAILQ_NEXT(vp, v_freelist)) { 481 if (simple_lock_try(&vp->v_interlock)) { 482 if ((vp->v_flag & VLAYER) == 0) { 483 break; 484 } 485 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT | 486 LK_RECURSEFAIL | LK_INTERLOCK)) { 487 continue; 488 } 489 VOP_UNLOCK(vp, 0); 490 break; 491 } 492 } 493 /* 494 * Unless this is a bad time of the month, at most 495 * the first NCPUS items on the free list are 496 * locked, so this is close enough to being empty. 497 */ 498 if (vp == NULLVP) { 499 simple_unlock(&vnode_free_list_slock); 500 if (mp && error != EDEADLK) 501 vfs_unbusy(mp); 502 if (tryalloc) { 503 printf("WARNING: unable to allocate new " 504 "vnode, retrying...\n"); 505 (void) tsleep(&lbolt, PRIBIO, "newvn", hz); 506 goto try_again; 507 } 508 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 509 *vpp = 0; 510 return (ENFILE); 511 } 512 if (vp->v_usecount) 513 panic("free vnode isn't, vp %p", vp); 514 TAILQ_REMOVE(listhd, vp, v_freelist); 515 /* see comment on why 0xdeadb is set at end of vgone (below) */ 516 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 517 simple_unlock(&vnode_free_list_slock); 518 vp->v_lease = NULL; 519 520 if (vp->v_type != VBAD) 521 vgonel(vp, p); 522 else 523 simple_unlock(&vp->v_interlock); 524 #ifdef DIAGNOSTIC 525 if (vp->v_data || vp->v_uobj.uo_npages || 526 TAILQ_FIRST(&vp->v_uobj.memq)) 527 panic("cleaned vnode isn't, vp %p", vp); 528 if (vp->v_numoutput) 529 panic("clean vnode has pending I/O's, vp %p", vp); 530 #endif 531 vp->v_flag = 0; 532 vp->v_socket = NULL; 533 } 534 vp->v_type = VNON; 535 vp->v_vnlock = &vp->v_lock; 536 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 537 cache_purge(vp); 538 vp->v_tag = tag; 539 vp->v_op = vops; 540 insmntque(vp, mp); 541 *vpp = vp; 542 vp->v_usecount = 1; 543 vp->v_data = 0; 544 simple_lock_init(&vp->v_uobj.vmobjlock); 545 546 /* 547 * initialize uvm_object within vnode. 548 */ 549 550 uobj = &vp->v_uobj; 551 KASSERT(uobj->pgops == &uvm_vnodeops); 552 KASSERT(uobj->uo_npages == 0); 553 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 554 vp->v_size = VSIZENOTSET; 555 556 if (mp && error != EDEADLK) 557 vfs_unbusy(mp); 558 return (0); 559 } 560 561 /* 562 * This is really just the reverse of getnewvnode(). Needed for 563 * VFS_VGET functions who may need to push back a vnode in case 564 * of a locking race. 565 */ 566 void 567 ungetnewvnode(vp) 568 struct vnode *vp; 569 { 570 #ifdef DIAGNOSTIC 571 if (vp->v_usecount != 1) 572 panic("ungetnewvnode: busy vnode"); 573 #endif 574 vp->v_usecount--; 575 insmntque(vp, NULL); 576 vp->v_type = VBAD; 577 578 simple_lock(&vp->v_interlock); 579 /* 580 * Insert at head of LRU list 581 */ 582 simple_lock(&vnode_free_list_slock); 583 if (vp->v_holdcnt > 0) 584 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 585 else 586 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 587 simple_unlock(&vnode_free_list_slock); 588 simple_unlock(&vp->v_interlock); 589 } 590 591 /* 592 * Move a vnode from one mount queue to another. 593 */ 594 void 595 insmntque(vp, mp) 596 struct vnode *vp; 597 struct mount *mp; 598 { 599 600 #ifdef DIAGNOSTIC 601 if ((mp != NULL) && 602 (mp->mnt_flag & MNT_UNMOUNT) && 603 !(mp->mnt_flag & MNT_SOFTDEP) && 604 vp->v_tag != VT_VFS) { 605 panic("insmntque into dying filesystem"); 606 } 607 #endif 608 609 simple_lock(&mntvnode_slock); 610 /* 611 * Delete from old mount point vnode list, if on one. 612 */ 613 if (vp->v_mount != NULL) 614 LIST_REMOVE(vp, v_mntvnodes); 615 /* 616 * Insert into list of vnodes for the new mount point, if available. 617 */ 618 if ((vp->v_mount = mp) != NULL) 619 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 620 simple_unlock(&mntvnode_slock); 621 } 622 623 /* 624 * Update outstanding I/O count and do wakeup if requested. 625 */ 626 void 627 vwakeup(bp) 628 struct buf *bp; 629 { 630 struct vnode *vp; 631 632 if ((vp = bp->b_vp) != NULL) { 633 if (--vp->v_numoutput < 0) 634 panic("vwakeup: neg numoutput, vp %p", vp); 635 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 636 vp->v_flag &= ~VBWAIT; 637 wakeup((caddr_t)&vp->v_numoutput); 638 } 639 } 640 } 641 642 /* 643 * Flush out and invalidate all buffers associated with a vnode. 644 * Called with the underlying vnode locked, which should prevent new dirty 645 * buffers from being queued. 646 */ 647 int 648 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 649 struct vnode *vp; 650 int flags; 651 struct ucred *cred; 652 struct proc *p; 653 int slpflag, slptimeo; 654 { 655 struct uvm_object *uobj = &vp->v_uobj; 656 struct buf *bp, *nbp; 657 int s, error; 658 int flushflags = PGO_ALLPAGES|PGO_FREE|PGO_SYNCIO| 659 (flags & V_SAVE ? PGO_CLEANIT : 0); 660 661 /* XXXUBC this doesn't look at flags or slp* */ 662 if (TAILQ_FIRST(&uobj->memq)) { 663 simple_lock(&uobj->vmobjlock); 664 error = (uobj->pgops->pgo_put)(uobj, 0, 0, flushflags); 665 if (error) { 666 return error; 667 } 668 } 669 if (flags & V_SAVE) { 670 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p); 671 if (error) 672 return (error); 673 #ifdef DIAGNOSTIC 674 s = splbio(); 675 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 676 panic("vinvalbuf: dirty bufs, vp %p", vp); 677 splx(s); 678 #endif 679 } 680 681 s = splbio(); 682 683 restart: 684 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 685 nbp = LIST_NEXT(bp, b_vnbufs); 686 if (bp->b_flags & B_BUSY) { 687 bp->b_flags |= B_WANTED; 688 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 689 "vinvalbuf", slptimeo); 690 if (error) { 691 splx(s); 692 return (error); 693 } 694 goto restart; 695 } 696 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 697 brelse(bp); 698 } 699 700 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 701 nbp = LIST_NEXT(bp, b_vnbufs); 702 if (bp->b_flags & B_BUSY) { 703 bp->b_flags |= B_WANTED; 704 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 705 "vinvalbuf", slptimeo); 706 if (error) { 707 splx(s); 708 return (error); 709 } 710 goto restart; 711 } 712 /* 713 * XXX Since there are no node locks for NFS, I believe 714 * there is a slight chance that a delayed write will 715 * occur while sleeping just above, so check for it. 716 */ 717 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 718 #ifdef DEBUG 719 printf("buffer still DELWRI\n"); 720 #endif 721 bp->b_flags |= B_BUSY | B_VFLUSH; 722 VOP_BWRITE(bp); 723 goto restart; 724 } 725 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 726 brelse(bp); 727 } 728 729 #ifdef DIAGNOSTIC 730 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 731 panic("vinvalbuf: flush failed, vp %p", vp); 732 #endif 733 734 splx(s); 735 736 return (0); 737 } 738 739 /* 740 * Destroy any in core blocks past the truncation length. 741 * Called with the underlying vnode locked, which should prevent new dirty 742 * buffers from being queued. 743 */ 744 int 745 vtruncbuf(vp, lbn, slpflag, slptimeo) 746 struct vnode *vp; 747 daddr_t lbn; 748 int slpflag, slptimeo; 749 { 750 struct uvm_object *uobj = &vp->v_uobj; 751 struct buf *bp, *nbp; 752 int s, error; 753 754 s = splbio(); 755 if (TAILQ_FIRST(&uobj->memq)) { 756 simple_lock(&uobj->vmobjlock); 757 error = (uobj->pgops->pgo_put)(uobj, 758 round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift), 0, 759 PGO_FREE|PGO_SYNCIO); 760 if (error) { 761 splx(s); 762 return error; 763 } 764 } 765 766 restart: 767 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 768 nbp = LIST_NEXT(bp, b_vnbufs); 769 if (bp->b_lblkno < lbn) 770 continue; 771 if (bp->b_flags & B_BUSY) { 772 bp->b_flags |= B_WANTED; 773 error = tsleep(bp, slpflag | (PRIBIO + 1), 774 "vtruncbuf", slptimeo); 775 if (error) { 776 splx(s); 777 return (error); 778 } 779 goto restart; 780 } 781 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 782 brelse(bp); 783 } 784 785 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 786 nbp = LIST_NEXT(bp, b_vnbufs); 787 if (bp->b_lblkno < lbn) 788 continue; 789 if (bp->b_flags & B_BUSY) { 790 bp->b_flags |= B_WANTED; 791 error = tsleep(bp, slpflag | (PRIBIO + 1), 792 "vtruncbuf", slptimeo); 793 if (error) { 794 splx(s); 795 return (error); 796 } 797 goto restart; 798 } 799 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 800 brelse(bp); 801 } 802 803 splx(s); 804 805 return (0); 806 } 807 808 void 809 vflushbuf(vp, sync) 810 struct vnode *vp; 811 int sync; 812 { 813 struct uvm_object *uobj = &vp->v_uobj; 814 struct buf *bp, *nbp; 815 int s; 816 817 if (TAILQ_FIRST(&uobj->memq)) { 818 int flags = PGO_CLEANIT|PGO_ALLPAGES| (sync ? PGO_SYNCIO : 0); 819 820 simple_lock(&uobj->vmobjlock); 821 (void) (uobj->pgops->pgo_put)(uobj, 0, 0, flags); 822 } 823 824 loop: 825 s = splbio(); 826 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 827 nbp = LIST_NEXT(bp, b_vnbufs); 828 if ((bp->b_flags & B_BUSY)) 829 continue; 830 if ((bp->b_flags & B_DELWRI) == 0) 831 panic("vflushbuf: not dirty, bp %p", bp); 832 bp->b_flags |= B_BUSY | B_VFLUSH; 833 splx(s); 834 /* 835 * Wait for I/O associated with indirect blocks to complete, 836 * since there is no way to quickly wait for them below. 837 */ 838 if (bp->b_vp == vp || sync == 0) 839 (void) bawrite(bp); 840 else 841 (void) bwrite(bp); 842 goto loop; 843 } 844 if (sync == 0) { 845 splx(s); 846 return; 847 } 848 while (vp->v_numoutput) { 849 vp->v_flag |= VBWAIT; 850 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 851 } 852 splx(s); 853 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 854 vprint("vflushbuf: dirty", vp); 855 goto loop; 856 } 857 } 858 859 /* 860 * Associate a buffer with a vnode. 861 */ 862 void 863 bgetvp(vp, bp) 864 struct vnode *vp; 865 struct buf *bp; 866 { 867 int s; 868 869 if (bp->b_vp) 870 panic("bgetvp: not free, bp %p", bp); 871 VHOLD(vp); 872 s = splbio(); 873 bp->b_vp = vp; 874 if (vp->v_type == VBLK || vp->v_type == VCHR) 875 bp->b_dev = vp->v_rdev; 876 else 877 bp->b_dev = NODEV; 878 /* 879 * Insert onto list for new vnode. 880 */ 881 bufinsvn(bp, &vp->v_cleanblkhd); 882 splx(s); 883 } 884 885 /* 886 * Disassociate a buffer from a vnode. 887 */ 888 void 889 brelvp(bp) 890 struct buf *bp; 891 { 892 struct vnode *vp; 893 int s; 894 895 if (bp->b_vp == NULL) 896 panic("brelvp: vp NULL, bp %p", bp); 897 898 s = splbio(); 899 vp = bp->b_vp; 900 /* 901 * Delete from old vnode list, if on one. 902 */ 903 if (bp->b_vnbufs.le_next != NOLIST) 904 bufremvn(bp); 905 906 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) && 907 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 908 vp->v_flag &= ~VONWORKLST; 909 LIST_REMOVE(vp, v_synclist); 910 } 911 912 bp->b_vp = NULL; 913 HOLDRELE(vp); 914 splx(s); 915 } 916 917 /* 918 * Reassign a buffer from one vnode to another. 919 * Used to assign file specific control information 920 * (indirect blocks) to the vnode to which they belong. 921 * 922 * This function must be called at splbio(). 923 */ 924 void 925 reassignbuf(bp, newvp) 926 struct buf *bp; 927 struct vnode *newvp; 928 { 929 struct buflists *listheadp; 930 int delay; 931 932 /* 933 * Delete from old vnode list, if on one. 934 */ 935 if (bp->b_vnbufs.le_next != NOLIST) 936 bufremvn(bp); 937 /* 938 * If dirty, put on list of dirty buffers; 939 * otherwise insert onto list of clean buffers. 940 */ 941 if ((bp->b_flags & B_DELWRI) == 0) { 942 listheadp = &newvp->v_cleanblkhd; 943 if (TAILQ_EMPTY(&newvp->v_uobj.memq) && 944 (newvp->v_flag & VONWORKLST) && 945 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 946 newvp->v_flag &= ~VONWORKLST; 947 LIST_REMOVE(newvp, v_synclist); 948 } 949 } else { 950 listheadp = &newvp->v_dirtyblkhd; 951 if ((newvp->v_flag & VONWORKLST) == 0) { 952 switch (newvp->v_type) { 953 case VDIR: 954 delay = dirdelay; 955 break; 956 case VBLK: 957 if (newvp->v_specmountpoint != NULL) { 958 delay = metadelay; 959 break; 960 } 961 /* fall through */ 962 default: 963 delay = filedelay; 964 break; 965 } 966 if (!newvp->v_mount || 967 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 968 vn_syncer_add_to_worklist(newvp, delay); 969 } 970 } 971 bufinsvn(bp, listheadp); 972 } 973 974 /* 975 * Create a vnode for a block device. 976 * Used for root filesystem and swap areas. 977 * Also used for memory file system special devices. 978 */ 979 int 980 bdevvp(dev, vpp) 981 dev_t dev; 982 struct vnode **vpp; 983 { 984 985 return (getdevvp(dev, vpp, VBLK)); 986 } 987 988 /* 989 * Create a vnode for a character device. 990 * Used for kernfs and some console handling. 991 */ 992 int 993 cdevvp(dev, vpp) 994 dev_t dev; 995 struct vnode **vpp; 996 { 997 998 return (getdevvp(dev, vpp, VCHR)); 999 } 1000 1001 /* 1002 * Create a vnode for a device. 1003 * Used by bdevvp (block device) for root file system etc., 1004 * and by cdevvp (character device) for console and kernfs. 1005 */ 1006 int 1007 getdevvp(dev, vpp, type) 1008 dev_t dev; 1009 struct vnode **vpp; 1010 enum vtype type; 1011 { 1012 struct vnode *vp; 1013 struct vnode *nvp; 1014 int error; 1015 1016 if (dev == NODEV) { 1017 *vpp = NULLVP; 1018 return (0); 1019 } 1020 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1021 if (error) { 1022 *vpp = NULLVP; 1023 return (error); 1024 } 1025 vp = nvp; 1026 vp->v_type = type; 1027 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1028 vput(vp); 1029 vp = nvp; 1030 } 1031 *vpp = vp; 1032 return (0); 1033 } 1034 1035 /* 1036 * Check to see if the new vnode represents a special device 1037 * for which we already have a vnode (either because of 1038 * bdevvp() or because of a different vnode representing 1039 * the same block device). If such an alias exists, deallocate 1040 * the existing contents and return the aliased vnode. The 1041 * caller is responsible for filling it with its new contents. 1042 */ 1043 struct vnode * 1044 checkalias(nvp, nvp_rdev, mp) 1045 struct vnode *nvp; 1046 dev_t nvp_rdev; 1047 struct mount *mp; 1048 { 1049 struct proc *p = curproc; /* XXX */ 1050 struct vnode *vp; 1051 struct vnode **vpp; 1052 1053 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1054 return (NULLVP); 1055 1056 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1057 loop: 1058 simple_lock(&spechash_slock); 1059 for (vp = *vpp; vp; vp = vp->v_specnext) { 1060 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1061 continue; 1062 /* 1063 * Alias, but not in use, so flush it out. 1064 */ 1065 simple_lock(&vp->v_interlock); 1066 if (vp->v_usecount == 0) { 1067 simple_unlock(&spechash_slock); 1068 vgonel(vp, p); 1069 goto loop; 1070 } 1071 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 1072 simple_unlock(&spechash_slock); 1073 goto loop; 1074 } 1075 break; 1076 } 1077 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1078 MALLOC(nvp->v_specinfo, struct specinfo *, 1079 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1080 /* XXX Erg. */ 1081 if (nvp->v_specinfo == NULL) { 1082 simple_unlock(&spechash_slock); 1083 uvm_wait("checkalias"); 1084 goto loop; 1085 } 1086 1087 nvp->v_rdev = nvp_rdev; 1088 nvp->v_hashchain = vpp; 1089 nvp->v_specnext = *vpp; 1090 nvp->v_specmountpoint = NULL; 1091 simple_unlock(&spechash_slock); 1092 nvp->v_speclockf = NULL; 1093 *vpp = nvp; 1094 if (vp != NULLVP) { 1095 nvp->v_flag |= VALIASED; 1096 vp->v_flag |= VALIASED; 1097 vput(vp); 1098 } 1099 return (NULLVP); 1100 } 1101 simple_unlock(&spechash_slock); 1102 VOP_UNLOCK(vp, 0); 1103 simple_lock(&vp->v_interlock); 1104 vclean(vp, 0, p); 1105 vp->v_op = nvp->v_op; 1106 vp->v_tag = nvp->v_tag; 1107 vp->v_vnlock = &vp->v_lock; 1108 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1109 nvp->v_type = VNON; 1110 insmntque(vp, mp); 1111 return (vp); 1112 } 1113 1114 /* 1115 * Grab a particular vnode from the free list, increment its 1116 * reference count and lock it. If the vnode lock bit is set the 1117 * vnode is being eliminated in vgone. In that case, we can not 1118 * grab the vnode, so the process is awakened when the transition is 1119 * completed, and an error returned to indicate that the vnode is no 1120 * longer usable (possibly having been changed to a new file system type). 1121 */ 1122 int 1123 vget(vp, flags) 1124 struct vnode *vp; 1125 int flags; 1126 { 1127 int error; 1128 1129 /* 1130 * If the vnode is in the process of being cleaned out for 1131 * another use, we wait for the cleaning to finish and then 1132 * return failure. Cleaning is determined by checking that 1133 * the VXLOCK flag is set. 1134 */ 1135 1136 if ((flags & LK_INTERLOCK) == 0) 1137 simple_lock(&vp->v_interlock); 1138 if (vp->v_flag & VXLOCK) { 1139 if (flags & LK_NOWAIT) { 1140 simple_unlock(&vp->v_interlock); 1141 return EBUSY; 1142 } 1143 vp->v_flag |= VXWANT; 1144 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock); 1145 return (ENOENT); 1146 } 1147 if (vp->v_usecount == 0) { 1148 simple_lock(&vnode_free_list_slock); 1149 if (vp->v_holdcnt > 0) 1150 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1151 else 1152 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1153 simple_unlock(&vnode_free_list_slock); 1154 } 1155 vp->v_usecount++; 1156 #ifdef DIAGNOSTIC 1157 if (vp->v_usecount == 0) { 1158 vprint("vget", vp); 1159 panic("vget: usecount overflow, vp %p", vp); 1160 } 1161 #endif 1162 if (flags & LK_TYPE_MASK) { 1163 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1164 /* 1165 * must expand vrele here because we do not want 1166 * to call VOP_INACTIVE if the reference count 1167 * drops back to zero since it was never really 1168 * active. We must remove it from the free list 1169 * before sleeping so that multiple processes do 1170 * not try to recycle it. 1171 */ 1172 simple_lock(&vp->v_interlock); 1173 vp->v_usecount--; 1174 if (vp->v_usecount > 0) { 1175 simple_unlock(&vp->v_interlock); 1176 return (error); 1177 } 1178 /* 1179 * insert at tail of LRU list 1180 */ 1181 simple_lock(&vnode_free_list_slock); 1182 if (vp->v_holdcnt > 0) 1183 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, 1184 v_freelist); 1185 else 1186 TAILQ_INSERT_TAIL(&vnode_free_list, vp, 1187 v_freelist); 1188 simple_unlock(&vnode_free_list_slock); 1189 simple_unlock(&vp->v_interlock); 1190 } 1191 return (error); 1192 } 1193 simple_unlock(&vp->v_interlock); 1194 return (0); 1195 } 1196 1197 /* 1198 * vput(), just unlock and vrele() 1199 */ 1200 void 1201 vput(vp) 1202 struct vnode *vp; 1203 { 1204 struct proc *p = curproc; /* XXX */ 1205 1206 #ifdef DIAGNOSTIC 1207 if (vp == NULL) 1208 panic("vput: null vp"); 1209 #endif 1210 simple_lock(&vp->v_interlock); 1211 vp->v_usecount--; 1212 if (vp->v_usecount > 0) { 1213 simple_unlock(&vp->v_interlock); 1214 VOP_UNLOCK(vp, 0); 1215 return; 1216 } 1217 #ifdef DIAGNOSTIC 1218 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1219 vprint("vput: bad ref count", vp); 1220 panic("vput: ref cnt"); 1221 } 1222 #endif 1223 /* 1224 * Insert at tail of LRU list. 1225 */ 1226 simple_lock(&vnode_free_list_slock); 1227 if (vp->v_holdcnt > 0) 1228 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1229 else 1230 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1231 simple_unlock(&vnode_free_list_slock); 1232 if (vp->v_flag & VTEXT) { 1233 uvmexp.vtextpages -= vp->v_uobj.uo_npages; 1234 uvmexp.vnodepages += vp->v_uobj.uo_npages; 1235 } 1236 vp->v_flag &= ~VTEXT; 1237 simple_unlock(&vp->v_interlock); 1238 VOP_INACTIVE(vp, p); 1239 } 1240 1241 /* 1242 * Vnode release. 1243 * If count drops to zero, call inactive routine and return to freelist. 1244 */ 1245 void 1246 vrele(vp) 1247 struct vnode *vp; 1248 { 1249 struct proc *p = curproc; /* XXX */ 1250 1251 #ifdef DIAGNOSTIC 1252 if (vp == NULL) 1253 panic("vrele: null vp"); 1254 #endif 1255 simple_lock(&vp->v_interlock); 1256 vp->v_usecount--; 1257 if (vp->v_usecount > 0) { 1258 simple_unlock(&vp->v_interlock); 1259 return; 1260 } 1261 #ifdef DIAGNOSTIC 1262 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1263 vprint("vrele: bad ref count", vp); 1264 panic("vrele: ref cnt vp %p", vp); 1265 } 1266 #endif 1267 /* 1268 * Insert at tail of LRU list. 1269 */ 1270 simple_lock(&vnode_free_list_slock); 1271 if (vp->v_holdcnt > 0) 1272 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1273 else 1274 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1275 simple_unlock(&vnode_free_list_slock); 1276 if (vp->v_flag & VTEXT) { 1277 uvmexp.vtextpages -= vp->v_uobj.uo_npages; 1278 uvmexp.vnodepages += vp->v_uobj.uo_npages; 1279 } 1280 vp->v_flag &= ~VTEXT; 1281 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1282 VOP_INACTIVE(vp, p); 1283 } 1284 1285 #ifdef DIAGNOSTIC 1286 /* 1287 * Page or buffer structure gets a reference. 1288 */ 1289 void 1290 vhold(vp) 1291 struct vnode *vp; 1292 { 1293 1294 /* 1295 * If it is on the freelist and the hold count is currently 1296 * zero, move it to the hold list. The test of the back 1297 * pointer and the use reference count of zero is because 1298 * it will be removed from a free list by getnewvnode, 1299 * but will not have its reference count incremented until 1300 * after calling vgone. If the reference count were 1301 * incremented first, vgone would (incorrectly) try to 1302 * close the previous instance of the underlying object. 1303 * So, the back pointer is explicitly set to `0xdeadb' in 1304 * getnewvnode after removing it from a freelist to ensure 1305 * that we do not try to move it here. 1306 */ 1307 simple_lock(&vp->v_interlock); 1308 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1309 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1310 simple_lock(&vnode_free_list_slock); 1311 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1312 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1313 simple_unlock(&vnode_free_list_slock); 1314 } 1315 vp->v_holdcnt++; 1316 simple_unlock(&vp->v_interlock); 1317 } 1318 1319 /* 1320 * Page or buffer structure frees a reference. 1321 */ 1322 void 1323 holdrele(vp) 1324 struct vnode *vp; 1325 { 1326 1327 simple_lock(&vp->v_interlock); 1328 if (vp->v_holdcnt <= 0) 1329 panic("holdrele: holdcnt vp %p", vp); 1330 vp->v_holdcnt--; 1331 1332 /* 1333 * If it is on the holdlist and the hold count drops to 1334 * zero, move it to the free list. The test of the back 1335 * pointer and the use reference count of zero is because 1336 * it will be removed from a free list by getnewvnode, 1337 * but will not have its reference count incremented until 1338 * after calling vgone. If the reference count were 1339 * incremented first, vgone would (incorrectly) try to 1340 * close the previous instance of the underlying object. 1341 * So, the back pointer is explicitly set to `0xdeadb' in 1342 * getnewvnode after removing it from a freelist to ensure 1343 * that we do not try to move it here. 1344 */ 1345 1346 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1347 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1348 simple_lock(&vnode_free_list_slock); 1349 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1350 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1351 simple_unlock(&vnode_free_list_slock); 1352 } 1353 simple_unlock(&vp->v_interlock); 1354 } 1355 1356 /* 1357 * Vnode reference. 1358 */ 1359 void 1360 vref(vp) 1361 struct vnode *vp; 1362 { 1363 1364 simple_lock(&vp->v_interlock); 1365 if (vp->v_usecount <= 0) 1366 panic("vref used where vget required, vp %p", vp); 1367 vp->v_usecount++; 1368 #ifdef DIAGNOSTIC 1369 if (vp->v_usecount == 0) { 1370 vprint("vref", vp); 1371 panic("vref: usecount overflow, vp %p", vp); 1372 } 1373 #endif 1374 simple_unlock(&vp->v_interlock); 1375 } 1376 #endif /* DIAGNOSTIC */ 1377 1378 /* 1379 * Remove any vnodes in the vnode table belonging to mount point mp. 1380 * 1381 * If MNT_NOFORCE is specified, there should not be any active ones, 1382 * return error if any are found (nb: this is a user error, not a 1383 * system error). If MNT_FORCE is specified, detach any active vnodes 1384 * that are found. 1385 */ 1386 #ifdef DEBUG 1387 int busyprt = 0; /* print out busy vnodes */ 1388 struct ctldebug debug1 = { "busyprt", &busyprt }; 1389 #endif 1390 1391 int 1392 vflush(mp, skipvp, flags) 1393 struct mount *mp; 1394 struct vnode *skipvp; 1395 int flags; 1396 { 1397 struct proc *p = curproc; /* XXX */ 1398 struct vnode *vp, *nvp; 1399 int busy = 0; 1400 1401 simple_lock(&mntvnode_slock); 1402 loop: 1403 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1404 if (vp->v_mount != mp) 1405 goto loop; 1406 nvp = vp->v_mntvnodes.le_next; 1407 /* 1408 * Skip over a selected vnode. 1409 */ 1410 if (vp == skipvp) 1411 continue; 1412 simple_lock(&vp->v_interlock); 1413 /* 1414 * Skip over a vnodes marked VSYSTEM. 1415 */ 1416 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1417 simple_unlock(&vp->v_interlock); 1418 continue; 1419 } 1420 /* 1421 * If WRITECLOSE is set, only flush out regular file 1422 * vnodes open for writing. 1423 */ 1424 if ((flags & WRITECLOSE) && 1425 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1426 simple_unlock(&vp->v_interlock); 1427 continue; 1428 } 1429 /* 1430 * With v_usecount == 0, all we need to do is clear 1431 * out the vnode data structures and we are done. 1432 */ 1433 if (vp->v_usecount == 0) { 1434 simple_unlock(&mntvnode_slock); 1435 vgonel(vp, p); 1436 simple_lock(&mntvnode_slock); 1437 continue; 1438 } 1439 /* 1440 * If FORCECLOSE is set, forcibly close the vnode. 1441 * For block or character devices, revert to an 1442 * anonymous device. For all other files, just kill them. 1443 */ 1444 if (flags & FORCECLOSE) { 1445 simple_unlock(&mntvnode_slock); 1446 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1447 vgonel(vp, p); 1448 } else { 1449 vclean(vp, 0, p); 1450 vp->v_op = spec_vnodeop_p; 1451 insmntque(vp, (struct mount *)0); 1452 } 1453 simple_lock(&mntvnode_slock); 1454 continue; 1455 } 1456 #ifdef DEBUG 1457 if (busyprt) 1458 vprint("vflush: busy vnode", vp); 1459 #endif 1460 simple_unlock(&vp->v_interlock); 1461 busy++; 1462 } 1463 simple_unlock(&mntvnode_slock); 1464 if (busy) 1465 return (EBUSY); 1466 return (0); 1467 } 1468 1469 /* 1470 * Disassociate the underlying file system from a vnode. 1471 */ 1472 void 1473 vclean(vp, flags, p) 1474 struct vnode *vp; 1475 int flags; 1476 struct proc *p; 1477 { 1478 int active; 1479 1480 /* 1481 * Check to see if the vnode is in use. 1482 * If so we have to reference it before we clean it out 1483 * so that its count cannot fall to zero and generate a 1484 * race against ourselves to recycle it. 1485 */ 1486 if ((active = vp->v_usecount) != 0) { 1487 /* We have the vnode interlock. */ 1488 vp->v_usecount++; 1489 #ifdef DIAGNOSTIC 1490 if (vp->v_usecount == 0) { 1491 vprint("vclean", vp); 1492 panic("vclean: usecount overflow"); 1493 } 1494 #endif 1495 } 1496 1497 /* 1498 * Prevent the vnode from being recycled or 1499 * brought into use while we clean it out. 1500 */ 1501 if (vp->v_flag & VXLOCK) 1502 panic("vclean: deadlock, vp %p", vp); 1503 vp->v_flag |= VXLOCK; 1504 if (vp->v_flag & VTEXT) { 1505 uvmexp.vtextpages -= vp->v_uobj.uo_npages; 1506 uvmexp.vnodepages += vp->v_uobj.uo_npages; 1507 } 1508 vp->v_flag &= ~VTEXT; 1509 1510 /* 1511 * Even if the count is zero, the VOP_INACTIVE routine may still 1512 * have the object locked while it cleans it out. The VOP_LOCK 1513 * ensures that the VOP_INACTIVE routine is done with its work. 1514 * For active vnodes, it ensures that no other activity can 1515 * occur while the underlying object is being cleaned out. 1516 */ 1517 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1518 1519 /* 1520 * Clean out any cached data associated with the vnode. 1521 */ 1522 if (flags & DOCLOSE) 1523 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1524 1525 /* 1526 * If purging an active vnode, it must be closed and 1527 * deactivated before being reclaimed. Note that the 1528 * VOP_INACTIVE will unlock the vnode. 1529 */ 1530 if (active) { 1531 if (flags & DOCLOSE) 1532 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1533 VOP_INACTIVE(vp, p); 1534 } else { 1535 /* 1536 * Any other processes trying to obtain this lock must first 1537 * wait for VXLOCK to clear, then call the new lock operation. 1538 */ 1539 VOP_UNLOCK(vp, 0); 1540 } 1541 /* 1542 * Reclaim the vnode. 1543 */ 1544 if (VOP_RECLAIM(vp, p)) 1545 panic("vclean: cannot reclaim, vp %p", vp); 1546 if (active) { 1547 /* 1548 * Inline copy of vrele() since VOP_INACTIVE 1549 * has already been called. 1550 */ 1551 simple_lock(&vp->v_interlock); 1552 if (--vp->v_usecount <= 0) { 1553 #ifdef DIAGNOSTIC 1554 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1555 vprint("vclean: bad ref count", vp); 1556 panic("vclean: ref cnt"); 1557 } 1558 #endif 1559 /* 1560 * Insert at tail of LRU list. 1561 */ 1562 1563 simple_unlock(&vp->v_interlock); 1564 simple_lock(&vnode_free_list_slock); 1565 #ifdef DIAGNOSTIC 1566 if (vp->v_holdcnt > 0) 1567 panic("vclean: not clean, vp %p", vp); 1568 #endif 1569 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1570 simple_unlock(&vnode_free_list_slock); 1571 } else 1572 simple_unlock(&vp->v_interlock); 1573 } 1574 1575 cache_purge(vp); 1576 1577 /* 1578 * Done with purge, notify sleepers of the grim news. 1579 */ 1580 vp->v_op = dead_vnodeop_p; 1581 vp->v_tag = VT_NON; 1582 simple_lock(&vp->v_interlock); 1583 vp->v_flag &= ~VXLOCK; 1584 if (vp->v_flag & VXWANT) { 1585 vp->v_flag &= ~VXWANT; 1586 simple_unlock(&vp->v_interlock); 1587 wakeup((caddr_t)vp); 1588 } else 1589 simple_unlock(&vp->v_interlock); 1590 } 1591 1592 /* 1593 * Recycle an unused vnode to the front of the free list. 1594 * Release the passed interlock if the vnode will be recycled. 1595 */ 1596 int 1597 vrecycle(vp, inter_lkp, p) 1598 struct vnode *vp; 1599 struct simplelock *inter_lkp; 1600 struct proc *p; 1601 { 1602 1603 simple_lock(&vp->v_interlock); 1604 if (vp->v_usecount == 0) { 1605 if (inter_lkp) 1606 simple_unlock(inter_lkp); 1607 vgonel(vp, p); 1608 return (1); 1609 } 1610 simple_unlock(&vp->v_interlock); 1611 return (0); 1612 } 1613 1614 /* 1615 * Eliminate all activity associated with a vnode 1616 * in preparation for reuse. 1617 */ 1618 void 1619 vgone(vp) 1620 struct vnode *vp; 1621 { 1622 struct proc *p = curproc; /* XXX */ 1623 1624 simple_lock(&vp->v_interlock); 1625 vgonel(vp, p); 1626 } 1627 1628 /* 1629 * vgone, with the vp interlock held. 1630 */ 1631 void 1632 vgonel(vp, p) 1633 struct vnode *vp; 1634 struct proc *p; 1635 { 1636 struct vnode *vq; 1637 struct vnode *vx; 1638 1639 /* 1640 * If a vgone (or vclean) is already in progress, 1641 * wait until it is done and return. 1642 */ 1643 if (vp->v_flag & VXLOCK) { 1644 vp->v_flag |= VXWANT; 1645 ltsleep((caddr_t)vp, PINOD | PNORELOCK, 1646 "vgone", 0, &vp->v_interlock); 1647 return; 1648 } 1649 /* 1650 * Clean out the filesystem specific data. 1651 */ 1652 vclean(vp, DOCLOSE, p); 1653 /* 1654 * Delete from old mount point vnode list, if on one. 1655 */ 1656 if (vp->v_mount != NULL) 1657 insmntque(vp, (struct mount *)0); 1658 /* 1659 * If special device, remove it from special device alias list. 1660 * if it is on one. 1661 */ 1662 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1663 simple_lock(&spechash_slock); 1664 if (vp->v_hashchain != NULL) { 1665 if (*vp->v_hashchain == vp) { 1666 *vp->v_hashchain = vp->v_specnext; 1667 } else { 1668 for (vq = *vp->v_hashchain; vq; 1669 vq = vq->v_specnext) { 1670 if (vq->v_specnext != vp) 1671 continue; 1672 vq->v_specnext = vp->v_specnext; 1673 break; 1674 } 1675 if (vq == NULL) 1676 panic("missing bdev"); 1677 } 1678 if (vp->v_flag & VALIASED) { 1679 vx = NULL; 1680 for (vq = *vp->v_hashchain; vq; 1681 vq = vq->v_specnext) { 1682 if (vq->v_rdev != vp->v_rdev || 1683 vq->v_type != vp->v_type) 1684 continue; 1685 if (vx) 1686 break; 1687 vx = vq; 1688 } 1689 if (vx == NULL) 1690 panic("missing alias"); 1691 if (vq == NULL) 1692 vx->v_flag &= ~VALIASED; 1693 vp->v_flag &= ~VALIASED; 1694 } 1695 } 1696 simple_unlock(&spechash_slock); 1697 FREE(vp->v_specinfo, M_VNODE); 1698 vp->v_specinfo = NULL; 1699 } 1700 /* 1701 * If it is on the freelist and not already at the head, 1702 * move it to the head of the list. The test of the back 1703 * pointer and the reference count of zero is because 1704 * it will be removed from the free list by getnewvnode, 1705 * but will not have its reference count incremented until 1706 * after calling vgone. If the reference count were 1707 * incremented first, vgone would (incorrectly) try to 1708 * close the previous instance of the underlying object. 1709 * So, the back pointer is explicitly set to `0xdeadb' in 1710 * getnewvnode after removing it from the freelist to ensure 1711 * that we do not try to move it here. 1712 */ 1713 if (vp->v_usecount == 0) { 1714 simple_lock(&vnode_free_list_slock); 1715 if (vp->v_holdcnt > 0) 1716 panic("vgonel: not clean, vp %p", vp); 1717 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1718 TAILQ_FIRST(&vnode_free_list) != vp) { 1719 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1720 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1721 } 1722 simple_unlock(&vnode_free_list_slock); 1723 } 1724 vp->v_type = VBAD; 1725 } 1726 1727 /* 1728 * Lookup a vnode by device number. 1729 */ 1730 int 1731 vfinddev(dev, type, vpp) 1732 dev_t dev; 1733 enum vtype type; 1734 struct vnode **vpp; 1735 { 1736 struct vnode *vp; 1737 int rc = 0; 1738 1739 simple_lock(&spechash_slock); 1740 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1741 if (dev != vp->v_rdev || type != vp->v_type) 1742 continue; 1743 *vpp = vp; 1744 rc = 1; 1745 break; 1746 } 1747 simple_unlock(&spechash_slock); 1748 return (rc); 1749 } 1750 1751 /* 1752 * Revoke all the vnodes corresponding to the specified minor number 1753 * range (endpoints inclusive) of the specified major. 1754 */ 1755 void 1756 vdevgone(maj, minl, minh, type) 1757 int maj, minl, minh; 1758 enum vtype type; 1759 { 1760 struct vnode *vp; 1761 int mn; 1762 1763 for (mn = minl; mn <= minh; mn++) 1764 if (vfinddev(makedev(maj, mn), type, &vp)) 1765 VOP_REVOKE(vp, REVOKEALL); 1766 } 1767 1768 /* 1769 * Calculate the total number of references to a special device. 1770 */ 1771 int 1772 vcount(vp) 1773 struct vnode *vp; 1774 { 1775 struct vnode *vq, *vnext; 1776 int count; 1777 1778 loop: 1779 if ((vp->v_flag & VALIASED) == 0) 1780 return (vp->v_usecount); 1781 simple_lock(&spechash_slock); 1782 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1783 vnext = vq->v_specnext; 1784 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1785 continue; 1786 /* 1787 * Alias, but not in use, so flush it out. 1788 */ 1789 if (vq->v_usecount == 0 && vq != vp && 1790 (vq->v_flag & VXLOCK) == 0) { 1791 simple_unlock(&spechash_slock); 1792 vgone(vq); 1793 goto loop; 1794 } 1795 count += vq->v_usecount; 1796 } 1797 simple_unlock(&spechash_slock); 1798 return (count); 1799 } 1800 1801 /* 1802 * Print out a description of a vnode. 1803 */ 1804 static const char * const typename[] = 1805 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1806 1807 void 1808 vprint(label, vp) 1809 char *label; 1810 struct vnode *vp; 1811 { 1812 char buf[64]; 1813 1814 if (label != NULL) 1815 printf("%s: ", label); 1816 printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,", 1817 vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1818 vp->v_holdcnt); 1819 buf[0] = '\0'; 1820 if (vp->v_flag & VROOT) 1821 strcat(buf, "|VROOT"); 1822 if (vp->v_flag & VTEXT) 1823 strcat(buf, "|VTEXT"); 1824 if (vp->v_flag & VSYSTEM) 1825 strcat(buf, "|VSYSTEM"); 1826 if (vp->v_flag & VXLOCK) 1827 strcat(buf, "|VXLOCK"); 1828 if (vp->v_flag & VXWANT) 1829 strcat(buf, "|VXWANT"); 1830 if (vp->v_flag & VBWAIT) 1831 strcat(buf, "|VBWAIT"); 1832 if (vp->v_flag & VALIASED) 1833 strcat(buf, "|VALIASED"); 1834 if (buf[0] != '\0') 1835 printf(" flags (%s)", &buf[1]); 1836 if (vp->v_data == NULL) { 1837 printf("\n"); 1838 } else { 1839 printf("\n\t"); 1840 VOP_PRINT(vp); 1841 } 1842 } 1843 1844 #ifdef DEBUG 1845 /* 1846 * List all of the locked vnodes in the system. 1847 * Called when debugging the kernel. 1848 */ 1849 void 1850 printlockedvnodes() 1851 { 1852 struct mount *mp, *nmp; 1853 struct vnode *vp; 1854 1855 printf("Locked vnodes\n"); 1856 simple_lock(&mountlist_slock); 1857 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1858 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1859 nmp = mp->mnt_list.cqe_next; 1860 continue; 1861 } 1862 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1863 if (VOP_ISLOCKED(vp)) 1864 vprint(NULL, vp); 1865 } 1866 simple_lock(&mountlist_slock); 1867 nmp = mp->mnt_list.cqe_next; 1868 vfs_unbusy(mp); 1869 } 1870 simple_unlock(&mountlist_slock); 1871 } 1872 #endif 1873 1874 /* 1875 * Top level filesystem related information gathering. 1876 */ 1877 int 1878 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1879 int *name; 1880 u_int namelen; 1881 void *oldp; 1882 size_t *oldlenp; 1883 void *newp; 1884 size_t newlen; 1885 struct proc *p; 1886 { 1887 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1888 struct vfsconf vfc; 1889 extern const char * const mountcompatnames[]; 1890 extern int nmountcompatnames; 1891 #endif 1892 struct vfsops *vfsp; 1893 1894 /* all sysctl names at this level are at least name and field */ 1895 if (namelen < 2) 1896 return (ENOTDIR); /* overloaded */ 1897 1898 /* Not generic: goes to file system. */ 1899 if (name[0] != VFS_GENERIC) { 1900 static const struct ctlname vfsnames[VFS_MAXID+1]=CTL_VFS_NAMES; 1901 const char *vfsname; 1902 1903 if (name[0] < 0 || name[0] > VFS_MAXID 1904 || (vfsname = vfsnames[name[0]].ctl_name) == NULL) 1905 return (EOPNOTSUPP); 1906 1907 vfsp = vfs_getopsbyname(vfsname); 1908 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1909 return (EOPNOTSUPP); 1910 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1911 oldp, oldlenp, newp, newlen, p)); 1912 } 1913 1914 /* The rest are generic vfs sysctls. */ 1915 switch (name[1]) { 1916 case VFS_USERMOUNT: 1917 return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount); 1918 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1919 case VFS_MAXTYPENUM: 1920 /* 1921 * Provided for 4.4BSD-Lite2 compatibility. 1922 */ 1923 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1924 case VFS_CONF: 1925 /* 1926 * Special: a node, next is a file system name. 1927 * Provided for 4.4BSD-Lite2 compatibility. 1928 */ 1929 if (namelen < 3) 1930 return (ENOTDIR); /* overloaded */ 1931 if (name[2] >= nmountcompatnames || name[2] < 0 || 1932 mountcompatnames[name[2]] == NULL) 1933 return (EOPNOTSUPP); 1934 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 1935 if (vfsp == NULL) 1936 return (EOPNOTSUPP); 1937 vfc.vfc_vfsops = vfsp; 1938 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 1939 vfc.vfc_typenum = name[2]; 1940 vfc.vfc_refcount = vfsp->vfs_refcount; 1941 vfc.vfc_flags = 0; 1942 vfc.vfc_mountroot = vfsp->vfs_mountroot; 1943 vfc.vfc_next = NULL; 1944 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 1945 sizeof(struct vfsconf))); 1946 #endif 1947 default: 1948 break; 1949 } 1950 return (EOPNOTSUPP); 1951 } 1952 1953 int kinfo_vdebug = 1; 1954 int kinfo_vgetfailed; 1955 #define KINFO_VNODESLOP 10 1956 /* 1957 * Dump vnode list (via sysctl). 1958 * Copyout address of vnode followed by vnode. 1959 */ 1960 /* ARGSUSED */ 1961 int 1962 sysctl_vnode(where, sizep, p) 1963 char *where; 1964 size_t *sizep; 1965 struct proc *p; 1966 { 1967 struct mount *mp, *nmp; 1968 struct vnode *nvp, *vp; 1969 char *bp = where, *savebp; 1970 char *ewhere; 1971 int error; 1972 1973 #define VPTRSZ sizeof(struct vnode *) 1974 #define VNODESZ sizeof(struct vnode) 1975 if (where == NULL) { 1976 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1977 return (0); 1978 } 1979 ewhere = where + *sizep; 1980 1981 simple_lock(&mountlist_slock); 1982 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1983 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1984 nmp = mp->mnt_list.cqe_next; 1985 continue; 1986 } 1987 savebp = bp; 1988 again: 1989 simple_lock(&mntvnode_slock); 1990 for (vp = mp->mnt_vnodelist.lh_first; 1991 vp != NULL; 1992 vp = nvp) { 1993 /* 1994 * Check that the vp is still associated with 1995 * this filesystem. RACE: could have been 1996 * recycled onto the same filesystem. 1997 */ 1998 if (vp->v_mount != mp) { 1999 simple_unlock(&mntvnode_slock); 2000 if (kinfo_vdebug) 2001 printf("kinfo: vp changed\n"); 2002 bp = savebp; 2003 goto again; 2004 } 2005 nvp = vp->v_mntvnodes.le_next; 2006 if (bp + VPTRSZ + VNODESZ > ewhere) { 2007 simple_unlock(&mntvnode_slock); 2008 *sizep = bp - where; 2009 return (ENOMEM); 2010 } 2011 simple_unlock(&mntvnode_slock); 2012 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2013 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2014 return (error); 2015 bp += VPTRSZ + VNODESZ; 2016 simple_lock(&mntvnode_slock); 2017 } 2018 simple_unlock(&mntvnode_slock); 2019 simple_lock(&mountlist_slock); 2020 nmp = mp->mnt_list.cqe_next; 2021 vfs_unbusy(mp); 2022 } 2023 simple_unlock(&mountlist_slock); 2024 2025 *sizep = bp - where; 2026 return (0); 2027 } 2028 2029 /* 2030 * Check to see if a filesystem is mounted on a block device. 2031 */ 2032 int 2033 vfs_mountedon(vp) 2034 struct vnode *vp; 2035 { 2036 struct vnode *vq; 2037 int error = 0; 2038 2039 if (vp->v_specmountpoint != NULL) 2040 return (EBUSY); 2041 if (vp->v_flag & VALIASED) { 2042 simple_lock(&spechash_slock); 2043 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2044 if (vq->v_rdev != vp->v_rdev || 2045 vq->v_type != vp->v_type) 2046 continue; 2047 if (vq->v_specmountpoint != NULL) { 2048 error = EBUSY; 2049 break; 2050 } 2051 } 2052 simple_unlock(&spechash_slock); 2053 } 2054 return (error); 2055 } 2056 2057 /* 2058 * Build hash lists of net addresses and hang them off the mount point. 2059 * Called by ufs_mount() to set up the lists of export addresses. 2060 */ 2061 static int 2062 vfs_hang_addrlist(mp, nep, argp) 2063 struct mount *mp; 2064 struct netexport *nep; 2065 struct export_args *argp; 2066 { 2067 struct netcred *np, *enp; 2068 struct radix_node_head *rnh; 2069 int i; 2070 struct radix_node *rn; 2071 struct sockaddr *saddr, *smask = 0; 2072 struct domain *dom; 2073 int error; 2074 2075 if (argp->ex_addrlen == 0) { 2076 if (mp->mnt_flag & MNT_DEFEXPORTED) 2077 return (EPERM); 2078 np = &nep->ne_defexported; 2079 np->netc_exflags = argp->ex_flags; 2080 np->netc_anon = argp->ex_anon; 2081 np->netc_anon.cr_ref = 1; 2082 mp->mnt_flag |= MNT_DEFEXPORTED; 2083 return (0); 2084 } 2085 2086 if (argp->ex_addrlen > MLEN) 2087 return (EINVAL); 2088 2089 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2090 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 2091 memset((caddr_t)np, 0, i); 2092 saddr = (struct sockaddr *)(np + 1); 2093 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 2094 if (error) 2095 goto out; 2096 if (saddr->sa_len > argp->ex_addrlen) 2097 saddr->sa_len = argp->ex_addrlen; 2098 if (argp->ex_masklen) { 2099 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 2100 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 2101 if (error) 2102 goto out; 2103 if (smask->sa_len > argp->ex_masklen) 2104 smask->sa_len = argp->ex_masklen; 2105 } 2106 i = saddr->sa_family; 2107 if ((rnh = nep->ne_rtable[i]) == 0) { 2108 /* 2109 * Seems silly to initialize every AF when most are not 2110 * used, do so on demand here 2111 */ 2112 for (dom = domains; dom; dom = dom->dom_next) 2113 if (dom->dom_family == i && dom->dom_rtattach) { 2114 dom->dom_rtattach((void **)&nep->ne_rtable[i], 2115 dom->dom_rtoffset); 2116 break; 2117 } 2118 if ((rnh = nep->ne_rtable[i]) == 0) { 2119 error = ENOBUFS; 2120 goto out; 2121 } 2122 } 2123 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 2124 np->netc_rnodes); 2125 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 2126 if (rn == 0) { 2127 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 2128 smask, rnh); 2129 if (enp == 0) { 2130 error = EPERM; 2131 goto out; 2132 } 2133 } else 2134 enp = (struct netcred *)rn; 2135 2136 if (enp->netc_exflags != argp->ex_flags || 2137 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 2138 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 2139 enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups || 2140 memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 2141 enp->netc_anon.cr_ngroups)) 2142 error = EPERM; 2143 else 2144 error = 0; 2145 goto out; 2146 } 2147 np->netc_exflags = argp->ex_flags; 2148 np->netc_anon = argp->ex_anon; 2149 np->netc_anon.cr_ref = 1; 2150 return (0); 2151 out: 2152 free(np, M_NETADDR); 2153 return (error); 2154 } 2155 2156 /* ARGSUSED */ 2157 static int 2158 vfs_free_netcred(rn, w) 2159 struct radix_node *rn; 2160 void *w; 2161 { 2162 struct radix_node_head *rnh = (struct radix_node_head *)w; 2163 2164 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 2165 free((caddr_t)rn, M_NETADDR); 2166 return (0); 2167 } 2168 2169 /* 2170 * Free the net address hash lists that are hanging off the mount points. 2171 */ 2172 static void 2173 vfs_free_addrlist(nep) 2174 struct netexport *nep; 2175 { 2176 int i; 2177 struct radix_node_head *rnh; 2178 2179 for (i = 0; i <= AF_MAX; i++) 2180 if ((rnh = nep->ne_rtable[i]) != NULL) { 2181 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 2182 free((caddr_t)rnh, M_RTABLE); 2183 nep->ne_rtable[i] = 0; 2184 } 2185 } 2186 2187 int 2188 vfs_export(mp, nep, argp) 2189 struct mount *mp; 2190 struct netexport *nep; 2191 struct export_args *argp; 2192 { 2193 int error; 2194 2195 if (argp->ex_flags & MNT_DELEXPORT) { 2196 if (mp->mnt_flag & MNT_EXPUBLIC) { 2197 vfs_setpublicfs(NULL, NULL, NULL); 2198 mp->mnt_flag &= ~MNT_EXPUBLIC; 2199 } 2200 vfs_free_addrlist(nep); 2201 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2202 } 2203 if (argp->ex_flags & MNT_EXPORTED) { 2204 if (argp->ex_flags & MNT_EXPUBLIC) { 2205 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2206 return (error); 2207 mp->mnt_flag |= MNT_EXPUBLIC; 2208 } 2209 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 2210 return (error); 2211 mp->mnt_flag |= MNT_EXPORTED; 2212 } 2213 return (0); 2214 } 2215 2216 /* 2217 * Set the publicly exported filesystem (WebNFS). Currently, only 2218 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2219 */ 2220 int 2221 vfs_setpublicfs(mp, nep, argp) 2222 struct mount *mp; 2223 struct netexport *nep; 2224 struct export_args *argp; 2225 { 2226 int error; 2227 struct vnode *rvp; 2228 char *cp; 2229 2230 /* 2231 * mp == NULL -> invalidate the current info, the FS is 2232 * no longer exported. May be called from either vfs_export 2233 * or unmount, so check if it hasn't already been done. 2234 */ 2235 if (mp == NULL) { 2236 if (nfs_pub.np_valid) { 2237 nfs_pub.np_valid = 0; 2238 if (nfs_pub.np_index != NULL) { 2239 FREE(nfs_pub.np_index, M_TEMP); 2240 nfs_pub.np_index = NULL; 2241 } 2242 } 2243 return (0); 2244 } 2245 2246 /* 2247 * Only one allowed at a time. 2248 */ 2249 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2250 return (EBUSY); 2251 2252 /* 2253 * Get real filehandle for root of exported FS. 2254 */ 2255 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 2256 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2257 2258 if ((error = VFS_ROOT(mp, &rvp))) 2259 return (error); 2260 2261 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2262 return (error); 2263 2264 vput(rvp); 2265 2266 /* 2267 * If an indexfile was specified, pull it in. 2268 */ 2269 if (argp->ex_indexfile != NULL) { 2270 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2271 M_WAITOK); 2272 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2273 MAXNAMLEN, (size_t *)0); 2274 if (!error) { 2275 /* 2276 * Check for illegal filenames. 2277 */ 2278 for (cp = nfs_pub.np_index; *cp; cp++) { 2279 if (*cp == '/') { 2280 error = EINVAL; 2281 break; 2282 } 2283 } 2284 } 2285 if (error) { 2286 FREE(nfs_pub.np_index, M_TEMP); 2287 return (error); 2288 } 2289 } 2290 2291 nfs_pub.np_mount = mp; 2292 nfs_pub.np_valid = 1; 2293 return (0); 2294 } 2295 2296 struct netcred * 2297 vfs_export_lookup(mp, nep, nam) 2298 struct mount *mp; 2299 struct netexport *nep; 2300 struct mbuf *nam; 2301 { 2302 struct netcred *np; 2303 struct radix_node_head *rnh; 2304 struct sockaddr *saddr; 2305 2306 np = NULL; 2307 if (mp->mnt_flag & MNT_EXPORTED) { 2308 /* 2309 * Lookup in the export list first. 2310 */ 2311 if (nam != NULL) { 2312 saddr = mtod(nam, struct sockaddr *); 2313 rnh = nep->ne_rtable[saddr->sa_family]; 2314 if (rnh != NULL) { 2315 np = (struct netcred *) 2316 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2317 rnh); 2318 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2319 np = NULL; 2320 } 2321 } 2322 /* 2323 * If no address match, use the default if it exists. 2324 */ 2325 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2326 np = &nep->ne_defexported; 2327 } 2328 return (np); 2329 } 2330 2331 /* 2332 * Do the usual access checking. 2333 * file_mode, uid and gid are from the vnode in question, 2334 * while acc_mode and cred are from the VOP_ACCESS parameter list 2335 */ 2336 int 2337 vaccess(type, file_mode, uid, gid, acc_mode, cred) 2338 enum vtype type; 2339 mode_t file_mode; 2340 uid_t uid; 2341 gid_t gid; 2342 mode_t acc_mode; 2343 struct ucred *cred; 2344 { 2345 mode_t mask; 2346 2347 /* 2348 * Super-user always gets read/write access, but execute access depends 2349 * on at least one execute bit being set. 2350 */ 2351 if (cred->cr_uid == 0) { 2352 if ((acc_mode & VEXEC) && type != VDIR && 2353 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2354 return (EACCES); 2355 return (0); 2356 } 2357 2358 mask = 0; 2359 2360 /* Otherwise, check the owner. */ 2361 if (cred->cr_uid == uid) { 2362 if (acc_mode & VEXEC) 2363 mask |= S_IXUSR; 2364 if (acc_mode & VREAD) 2365 mask |= S_IRUSR; 2366 if (acc_mode & VWRITE) 2367 mask |= S_IWUSR; 2368 return ((file_mode & mask) == mask ? 0 : EACCES); 2369 } 2370 2371 /* Otherwise, check the groups. */ 2372 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2373 if (acc_mode & VEXEC) 2374 mask |= S_IXGRP; 2375 if (acc_mode & VREAD) 2376 mask |= S_IRGRP; 2377 if (acc_mode & VWRITE) 2378 mask |= S_IWGRP; 2379 return ((file_mode & mask) == mask ? 0 : EACCES); 2380 } 2381 2382 /* Otherwise, check everyone else. */ 2383 if (acc_mode & VEXEC) 2384 mask |= S_IXOTH; 2385 if (acc_mode & VREAD) 2386 mask |= S_IROTH; 2387 if (acc_mode & VWRITE) 2388 mask |= S_IWOTH; 2389 return ((file_mode & mask) == mask ? 0 : EACCES); 2390 } 2391 2392 /* 2393 * Unmount all file systems. 2394 * We traverse the list in reverse order under the assumption that doing so 2395 * will avoid needing to worry about dependencies. 2396 */ 2397 void 2398 vfs_unmountall(p) 2399 struct proc *p; 2400 { 2401 struct mount *mp, *nmp; 2402 int allerror, error; 2403 2404 for (allerror = 0, 2405 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2406 nmp = mp->mnt_list.cqe_prev; 2407 #ifdef DEBUG 2408 printf("unmounting %s (%s)...\n", 2409 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2410 #endif 2411 /* 2412 * XXX Freeze syncer. Must do this before locking the 2413 * mount point. See dounmount() for details. 2414 */ 2415 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2416 if (vfs_busy(mp, 0, 0)) { 2417 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2418 continue; 2419 } 2420 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2421 printf("unmount of %s failed with error %d\n", 2422 mp->mnt_stat.f_mntonname, error); 2423 allerror = 1; 2424 } 2425 } 2426 if (allerror) 2427 printf("WARNING: some file systems would not unmount\n"); 2428 } 2429 2430 /* 2431 * Sync and unmount file systems before shutting down. 2432 */ 2433 void 2434 vfs_shutdown() 2435 { 2436 struct buf *bp; 2437 int iter, nbusy, nbusy_prev = 0, dcount, s; 2438 struct proc *p = curproc; 2439 2440 /* XXX we're certainly not running in proc0's context! */ 2441 if (p == NULL) 2442 p = &proc0; 2443 2444 printf("syncing disks... "); 2445 2446 /* remove user process from run queue */ 2447 suspendsched(); 2448 (void) spl0(); 2449 2450 /* avoid coming back this way again if we panic. */ 2451 doing_shutdown = 1; 2452 2453 sys_sync(p, NULL, NULL); 2454 2455 /* Wait for sync to finish. */ 2456 dcount = 10000; 2457 for (iter = 0; iter < 20;) { 2458 nbusy = 0; 2459 for (bp = &buf[nbuf]; --bp >= buf; ) { 2460 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2461 nbusy++; 2462 /* 2463 * With soft updates, some buffers that are 2464 * written will be remarked as dirty until other 2465 * buffers are written. 2466 */ 2467 if (bp->b_vp && bp->b_vp->v_mount 2468 && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP) 2469 && (bp->b_flags & B_DELWRI)) { 2470 s = splbio(); 2471 bremfree(bp); 2472 bp->b_flags |= B_BUSY; 2473 splx(s); 2474 nbusy++; 2475 bawrite(bp); 2476 if (dcount-- <= 0) { 2477 printf("softdep "); 2478 goto fail; 2479 } 2480 } 2481 } 2482 if (nbusy == 0) 2483 break; 2484 if (nbusy_prev == 0) 2485 nbusy_prev = nbusy; 2486 printf("%d ", nbusy); 2487 tsleep(&nbusy, PRIBIO, "bflush", 2488 (iter == 0) ? 1 : hz / 25 * iter); 2489 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 2490 iter++; 2491 else 2492 nbusy_prev = nbusy; 2493 } 2494 if (nbusy) { 2495 fail: 2496 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 2497 printf("giving up\nPrinting vnodes for busy buffers\n"); 2498 for (bp = &buf[nbuf]; --bp >= buf; ) 2499 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2500 vprint(NULL, bp->b_vp); 2501 2502 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2503 Debugger(); 2504 #endif 2505 2506 #else /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2507 printf("giving up\n"); 2508 #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2509 return; 2510 } else 2511 printf("done\n"); 2512 2513 /* 2514 * If we've panic'd, don't make the situation potentially 2515 * worse by unmounting the file systems. 2516 */ 2517 if (panicstr != NULL) 2518 return; 2519 2520 /* Release inodes held by texts before update. */ 2521 #ifdef notdef 2522 vnshutdown(); 2523 #endif 2524 /* Unmount file systems. */ 2525 vfs_unmountall(p); 2526 } 2527 2528 /* 2529 * Mount the root file system. If the operator didn't specify a 2530 * file system to use, try all possible file systems until one 2531 * succeeds. 2532 */ 2533 int 2534 vfs_mountroot() 2535 { 2536 extern int (*mountroot) __P((void)); 2537 struct vfsops *v; 2538 2539 if (root_device == NULL) 2540 panic("vfs_mountroot: root device unknown"); 2541 2542 switch (root_device->dv_class) { 2543 case DV_IFNET: 2544 if (rootdev != NODEV) 2545 panic("vfs_mountroot: rootdev set for DV_IFNET"); 2546 break; 2547 2548 case DV_DISK: 2549 if (rootdev == NODEV) 2550 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2551 break; 2552 2553 default: 2554 printf("%s: inappropriate for root file system\n", 2555 root_device->dv_xname); 2556 return (ENODEV); 2557 } 2558 2559 /* 2560 * If user specified a file system, use it. 2561 */ 2562 if (mountroot != NULL) 2563 return ((*mountroot)()); 2564 2565 /* 2566 * Try each file system currently configured into the kernel. 2567 */ 2568 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2569 if (v->vfs_mountroot == NULL) 2570 continue; 2571 #ifdef DEBUG 2572 printf("mountroot: trying %s...\n", v->vfs_name); 2573 #endif 2574 if ((*v->vfs_mountroot)() == 0) { 2575 printf("root file system type: %s\n", v->vfs_name); 2576 break; 2577 } 2578 } 2579 2580 if (v == NULL) { 2581 printf("no file system for %s", root_device->dv_xname); 2582 if (root_device->dv_class == DV_DISK) 2583 printf(" (dev 0x%x)", rootdev); 2584 printf("\n"); 2585 return (EFTYPE); 2586 } 2587 return (0); 2588 } 2589 2590 /* 2591 * Given a file system name, look up the vfsops for that 2592 * file system, or return NULL if file system isn't present 2593 * in the kernel. 2594 */ 2595 struct vfsops * 2596 vfs_getopsbyname(name) 2597 const char *name; 2598 { 2599 struct vfsops *v; 2600 2601 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2602 if (strcmp(v->vfs_name, name) == 0) 2603 break; 2604 } 2605 2606 return (v); 2607 } 2608 2609 /* 2610 * Establish a file system and initialize it. 2611 */ 2612 int 2613 vfs_attach(vfs) 2614 struct vfsops *vfs; 2615 { 2616 struct vfsops *v; 2617 int error = 0; 2618 2619 2620 /* 2621 * Make sure this file system doesn't already exist. 2622 */ 2623 LIST_FOREACH(v, &vfs_list, vfs_list) { 2624 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2625 error = EEXIST; 2626 goto out; 2627 } 2628 } 2629 2630 /* 2631 * Initialize the vnode operations for this file system. 2632 */ 2633 vfs_opv_init(vfs->vfs_opv_descs); 2634 2635 /* 2636 * Now initialize the file system itself. 2637 */ 2638 (*vfs->vfs_init)(); 2639 2640 /* 2641 * ...and link it into the kernel's list. 2642 */ 2643 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2644 2645 /* 2646 * Sanity: make sure the reference count is 0. 2647 */ 2648 vfs->vfs_refcount = 0; 2649 2650 out: 2651 return (error); 2652 } 2653 2654 /* 2655 * Remove a file system from the kernel. 2656 */ 2657 int 2658 vfs_detach(vfs) 2659 struct vfsops *vfs; 2660 { 2661 struct vfsops *v; 2662 2663 /* 2664 * Make sure no one is using the filesystem. 2665 */ 2666 if (vfs->vfs_refcount != 0) 2667 return (EBUSY); 2668 2669 /* 2670 * ...and remove it from the kernel's list. 2671 */ 2672 LIST_FOREACH(v, &vfs_list, vfs_list) { 2673 if (v == vfs) { 2674 LIST_REMOVE(v, vfs_list); 2675 break; 2676 } 2677 } 2678 2679 if (v == NULL) 2680 return (ESRCH); 2681 2682 /* 2683 * Now run the file system-specific cleanups. 2684 */ 2685 (*vfs->vfs_done)(); 2686 2687 /* 2688 * Free the vnode operations vector. 2689 */ 2690 vfs_opv_free(vfs->vfs_opv_descs); 2691 return (0); 2692 } 2693 2694 void 2695 vfs_reinit(void) 2696 { 2697 struct vfsops *vfs; 2698 2699 LIST_FOREACH(vfs, &vfs_list, vfs_list) { 2700 if (vfs->vfs_reinit) { 2701 (*vfs->vfs_reinit)(); 2702 } 2703 } 2704 } 2705 2706 #ifdef DDB 2707 const char buf_flagbits[] = 2708 "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI" 2709 "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE" 2710 "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED" 2711 "\32XXX\33VFLUSH"; 2712 2713 void 2714 vfs_buf_print(bp, full, pr) 2715 struct buf *bp; 2716 int full; 2717 void (*pr) __P((const char *, ...)); 2718 { 2719 char buf[1024]; 2720 2721 (*pr)(" vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n", 2722 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev); 2723 2724 bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf)); 2725 (*pr)(" error %d flags 0x%s\n", bp->b_error, buf); 2726 2727 (*pr)(" bufsize 0x%x bcount 0x%x resid 0x%x\n", 2728 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2729 (*pr)(" data %p saveaddr %p dep %p\n", 2730 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2731 (*pr)(" iodone %p\n", bp->b_iodone); 2732 } 2733 2734 2735 const char vnode_flagbits[] = 2736 "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\11XLOCK\12XWANT\13BWAIT\14ALIASED" 2737 "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY"; 2738 2739 const char *vnode_types[] = { 2740 "VNON", 2741 "VREG", 2742 "VDIR", 2743 "VBLK", 2744 "VCHR", 2745 "VLNK", 2746 "VSOCK", 2747 "VFIFO", 2748 "VBAD", 2749 }; 2750 2751 const char *vnode_tags[] = { 2752 "VT_NON", 2753 "VT_UFS", 2754 "VT_NFS", 2755 "VT_MFS", 2756 "VT_MSDOSFS", 2757 "VT_LFS", 2758 "VT_LOFS", 2759 "VT_FDESC", 2760 "VT_PORTAL", 2761 "VT_NULL", 2762 "VT_UMAP", 2763 "VT_KERNFS", 2764 "VT_PROCFS", 2765 "VT_AFS", 2766 "VT_ISOFS", 2767 "VT_UNION", 2768 "VT_ADOSFS", 2769 "VT_EXT2FS", 2770 "VT_CODA", 2771 "VT_FILECORE", 2772 "VT_NTFS", 2773 "VT_VFS", 2774 "VT_OVERLAY" 2775 }; 2776 2777 void 2778 vfs_vnode_print(vp, full, pr) 2779 struct vnode *vp; 2780 int full; 2781 void (*pr) __P((const char *, ...)); 2782 { 2783 char buf[256]; 2784 const char *vtype, *vtag; 2785 2786 uvm_object_printit(&vp->v_uobj, full, pr); 2787 bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf)); 2788 (*pr)("\nVNODE flags %s\n", buf); 2789 (*pr)("mp %p numoutput %d size 0x%llx\n", 2790 vp->v_mount, vp->v_numoutput, vp->v_size); 2791 2792 (*pr)("data %p usecount %d writecount %d holdcnt %d numoutput %d\n", 2793 vp->v_data, vp->v_usecount, vp->v_writecount, 2794 vp->v_holdcnt, vp->v_numoutput); 2795 2796 vtype = (vp->v_type >= 0 && 2797 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ? 2798 vnode_types[vp->v_type] : "UNKNOWN"; 2799 vtag = (vp->v_tag >= 0 && 2800 vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ? 2801 vnode_tags[vp->v_tag] : "UNKNOWN"; 2802 2803 (*pr)("type %s(%d) tag %s(%d) id 0x%x mount %p typedata %p\n", 2804 vtype, vp->v_type, vtag, vp->v_tag, 2805 vp->v_id, vp->v_mount, vp->v_mountedhere); 2806 2807 if (full) { 2808 struct buf *bp; 2809 2810 (*pr)("clean bufs:\n"); 2811 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 2812 (*pr)(" bp %p\n", bp); 2813 vfs_buf_print(bp, full, pr); 2814 } 2815 2816 (*pr)("dirty bufs:\n"); 2817 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 2818 (*pr)(" bp %p\n", bp); 2819 vfs_buf_print(bp, full, pr); 2820 } 2821 } 2822 } 2823 #endif 2824