1 /* $NetBSD: vfs_subr.c,v 1.162 2001/11/12 15:25:39 lukem Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include <sys/cdefs.h> 85 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.162 2001/11/12 15:25:39 lukem Exp $"); 86 87 #include "opt_ddb.h" 88 #include "opt_compat_netbsd.h" 89 #include "opt_compat_43.h" 90 91 #include <sys/param.h> 92 #include <sys/systm.h> 93 #include <sys/proc.h> 94 #include <sys/kernel.h> 95 #include <sys/mount.h> 96 #include <sys/time.h> 97 #include <sys/fcntl.h> 98 #include <sys/vnode.h> 99 #include <sys/stat.h> 100 #include <sys/namei.h> 101 #include <sys/ucred.h> 102 #include <sys/buf.h> 103 #include <sys/errno.h> 104 #include <sys/malloc.h> 105 #include <sys/domain.h> 106 #include <sys/mbuf.h> 107 #include <sys/syscallargs.h> 108 #include <sys/device.h> 109 #include <sys/dirent.h> 110 111 #include <miscfs/specfs/specdev.h> 112 #include <miscfs/genfs/genfs.h> 113 #include <miscfs/syncfs/syncfs.h> 114 115 #include <uvm/uvm.h> 116 #include <uvm/uvm_ddb.h> 117 118 #include <sys/sysctl.h> 119 120 enum vtype iftovt_tab[16] = { 121 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 122 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 123 }; 124 const int vttoif_tab[9] = { 125 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 126 S_IFSOCK, S_IFIFO, S_IFMT, 127 }; 128 129 int doforce = 1; /* 1 => permit forcible unmounting */ 130 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 131 132 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 133 134 /* 135 * Insq/Remq for the vnode usage lists. 136 */ 137 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 138 #define bufremvn(bp) { \ 139 LIST_REMOVE(bp, b_vnbufs); \ 140 (bp)->b_vnbufs.le_next = NOLIST; \ 141 } 142 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 143 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 144 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 145 146 struct mntlist mountlist = /* mounted filesystem list */ 147 CIRCLEQ_HEAD_INITIALIZER(mountlist); 148 struct vfs_list_head vfs_list = /* vfs list */ 149 LIST_HEAD_INITIALIZER(vfs_list); 150 151 struct nfs_public nfs_pub; /* publicly exported FS */ 152 153 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 154 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 155 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 156 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 157 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 158 159 /* 160 * These define the root filesystem and device. 161 */ 162 struct mount *rootfs; 163 struct vnode *rootvnode; 164 struct device *root_device; /* root device */ 165 166 struct pool vnode_pool; /* memory pool for vnodes */ 167 168 /* 169 * Local declarations. 170 */ 171 void insmntque __P((struct vnode *, struct mount *)); 172 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 173 void vgoneall __P((struct vnode *)); 174 175 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 176 struct export_args *)); 177 static int vfs_free_netcred __P((struct radix_node *, void *)); 178 static void vfs_free_addrlist __P((struct netexport *)); 179 180 #ifdef DEBUG 181 void printlockedvnodes __P((void)); 182 #endif 183 184 /* 185 * Initialize the vnode management data structures. 186 */ 187 void 188 vntblinit() 189 { 190 191 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 192 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE); 193 194 /* 195 * Initialize the filesystem syncer. 196 */ 197 vn_initialize_syncerd(); 198 } 199 200 /* 201 * Mark a mount point as busy. Used to synchronize access and to delay 202 * unmounting. Interlock is not released on failure. 203 */ 204 int 205 vfs_busy(mp, flags, interlkp) 206 struct mount *mp; 207 int flags; 208 struct simplelock *interlkp; 209 { 210 int lkflags; 211 212 while (mp->mnt_flag & MNT_UNMOUNT) { 213 int gone; 214 215 if (flags & LK_NOWAIT) 216 return (ENOENT); 217 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 218 && mp->mnt_unmounter == curproc) 219 return (EDEADLK); 220 if (interlkp) 221 simple_unlock(interlkp); 222 /* 223 * Since all busy locks are shared except the exclusive 224 * lock granted when unmounting, the only place that a 225 * wakeup needs to be done is at the release of the 226 * exclusive lock at the end of dounmount. 227 * 228 * XXX MP: add spinlock protecting mnt_wcnt here once you 229 * can atomically unlock-and-sleep. 230 */ 231 mp->mnt_wcnt++; 232 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 233 mp->mnt_wcnt--; 234 gone = mp->mnt_flag & MNT_GONE; 235 236 if (mp->mnt_wcnt == 0) 237 wakeup(&mp->mnt_wcnt); 238 if (interlkp) 239 simple_lock(interlkp); 240 if (gone) 241 return (ENOENT); 242 } 243 lkflags = LK_SHARED; 244 if (interlkp) 245 lkflags |= LK_INTERLOCK; 246 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 247 panic("vfs_busy: unexpected lock failure"); 248 return (0); 249 } 250 251 /* 252 * Free a busy filesystem. 253 */ 254 void 255 vfs_unbusy(mp) 256 struct mount *mp; 257 { 258 259 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 260 } 261 262 /* 263 * Lookup a filesystem type, and if found allocate and initialize 264 * a mount structure for it. 265 * 266 * Devname is usually updated by mount(8) after booting. 267 */ 268 int 269 vfs_rootmountalloc(fstypename, devname, mpp) 270 char *fstypename; 271 char *devname; 272 struct mount **mpp; 273 { 274 struct vfsops *vfsp = NULL; 275 struct mount *mp; 276 277 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 278 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 279 break; 280 281 if (vfsp == NULL) 282 return (ENODEV); 283 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 284 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 285 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 286 (void)vfs_busy(mp, LK_NOWAIT, 0); 287 LIST_INIT(&mp->mnt_vnodelist); 288 mp->mnt_op = vfsp; 289 mp->mnt_flag = MNT_RDONLY; 290 mp->mnt_vnodecovered = NULLVP; 291 vfsp->vfs_refcount++; 292 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 293 mp->mnt_stat.f_mntonname[0] = '/'; 294 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 295 *mpp = mp; 296 return (0); 297 } 298 299 /* 300 * Lookup a mount point by filesystem identifier. 301 */ 302 struct mount * 303 vfs_getvfs(fsid) 304 fsid_t *fsid; 305 { 306 struct mount *mp; 307 308 simple_lock(&mountlist_slock); 309 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 310 mp = mp->mnt_list.cqe_next) { 311 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 312 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 313 simple_unlock(&mountlist_slock); 314 return (mp); 315 } 316 } 317 simple_unlock(&mountlist_slock); 318 return ((struct mount *)0); 319 } 320 321 /* 322 * Get a new unique fsid 323 */ 324 void 325 vfs_getnewfsid(mp) 326 struct mount *mp; 327 { 328 static u_short xxxfs_mntid; 329 fsid_t tfsid; 330 int mtype; 331 332 simple_lock(&mntid_slock); 333 mtype = makefstype(mp->mnt_op->vfs_name); 334 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 335 mp->mnt_stat.f_fsid.val[1] = mtype; 336 if (xxxfs_mntid == 0) 337 ++xxxfs_mntid; 338 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 339 tfsid.val[1] = mtype; 340 if (mountlist.cqh_first != (void *)&mountlist) { 341 while (vfs_getvfs(&tfsid)) { 342 tfsid.val[0]++; 343 xxxfs_mntid++; 344 } 345 } 346 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 347 simple_unlock(&mntid_slock); 348 } 349 350 /* 351 * Make a 'unique' number from a mount type name. 352 */ 353 long 354 makefstype(type) 355 const char *type; 356 { 357 long rv; 358 359 for (rv = 0; *type; type++) { 360 rv <<= 2; 361 rv ^= *type; 362 } 363 return rv; 364 } 365 366 367 /* 368 * Set vnode attributes to VNOVAL 369 */ 370 void 371 vattr_null(vap) 372 struct vattr *vap; 373 { 374 375 vap->va_type = VNON; 376 377 /* 378 * Assign individually so that it is safe even if size and 379 * sign of each member are varied. 380 */ 381 vap->va_mode = VNOVAL; 382 vap->va_nlink = VNOVAL; 383 vap->va_uid = VNOVAL; 384 vap->va_gid = VNOVAL; 385 vap->va_fsid = VNOVAL; 386 vap->va_fileid = VNOVAL; 387 vap->va_size = VNOVAL; 388 vap->va_blocksize = VNOVAL; 389 vap->va_atime.tv_sec = 390 vap->va_mtime.tv_sec = 391 vap->va_ctime.tv_sec = VNOVAL; 392 vap->va_atime.tv_nsec = 393 vap->va_mtime.tv_nsec = 394 vap->va_ctime.tv_nsec = VNOVAL; 395 vap->va_gen = VNOVAL; 396 vap->va_flags = VNOVAL; 397 vap->va_rdev = VNOVAL; 398 vap->va_bytes = VNOVAL; 399 vap->va_vaflags = 0; 400 } 401 402 /* 403 * Routines having to do with the management of the vnode table. 404 */ 405 extern int (**dead_vnodeop_p) __P((void *)); 406 long numvnodes; 407 408 /* 409 * Return the next vnode from the free list. 410 */ 411 int 412 getnewvnode(tag, mp, vops, vpp) 413 enum vtagtype tag; 414 struct mount *mp; 415 int (**vops) __P((void *)); 416 struct vnode **vpp; 417 { 418 extern struct uvm_pagerops uvm_vnodeops; 419 struct uvm_object *uobj; 420 struct proc *p = curproc; /* XXX */ 421 struct freelst *listhd; 422 static int toggle; 423 struct vnode *vp; 424 int error = 0, tryalloc; 425 426 try_again: 427 if (mp) { 428 /* 429 * Mark filesystem busy while we're creating a vnode. 430 * If unmount is in progress, this will wait; if the 431 * unmount succeeds (only if umount -f), this will 432 * return an error. If the unmount fails, we'll keep 433 * going afterwards. 434 * (This puts the per-mount vnode list logically under 435 * the protection of the vfs_busy lock). 436 */ 437 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 438 if (error && error != EDEADLK) 439 return error; 440 } 441 442 /* 443 * We must choose whether to allocate a new vnode or recycle an 444 * existing one. The criterion for allocating a new one is that 445 * the total number of vnodes is less than the number desired or 446 * there are no vnodes on either free list. Generally we only 447 * want to recycle vnodes that have no buffers associated with 448 * them, so we look first on the vnode_free_list. If it is empty, 449 * we next consider vnodes with referencing buffers on the 450 * vnode_hold_list. The toggle ensures that half the time we 451 * will use a buffer from the vnode_hold_list, and half the time 452 * we will allocate a new one unless the list has grown to twice 453 * the desired size. We are reticent to recycle vnodes from the 454 * vnode_hold_list because we will lose the identity of all its 455 * referencing buffers. 456 */ 457 458 vp = NULL; 459 460 simple_lock(&vnode_free_list_slock); 461 462 toggle ^= 1; 463 if (numvnodes > 2 * desiredvnodes) 464 toggle = 0; 465 466 tryalloc = numvnodes < desiredvnodes || 467 (TAILQ_FIRST(&vnode_free_list) == NULL && 468 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 469 470 if (tryalloc && 471 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { 472 simple_unlock(&vnode_free_list_slock); 473 memset(vp, 0, sizeof(*vp)); 474 simple_lock_init(&vp->v_interlock); 475 uobj = &vp->v_uobj; 476 uobj->pgops = &uvm_vnodeops; 477 uobj->uo_npages = 0; 478 TAILQ_INIT(&uobj->memq); 479 numvnodes++; 480 } else { 481 if ((vp = TAILQ_FIRST(listhd = &vnode_free_list)) == NULL) 482 vp = TAILQ_FIRST(listhd = &vnode_hold_list); 483 for (; vp != NULL; vp = TAILQ_NEXT(vp, v_freelist)) { 484 if (simple_lock_try(&vp->v_interlock)) { 485 if ((vp->v_flag & VLAYER) == 0) { 486 break; 487 } 488 if (vn_lock(vp, LK_EXCLUSIVE | LK_NOWAIT | 489 LK_RECURSEFAIL | LK_INTERLOCK)) { 490 continue; 491 } 492 VOP_UNLOCK(vp, 0); 493 break; 494 } 495 } 496 /* 497 * Unless this is a bad time of the month, at most 498 * the first NCPUS items on the free list are 499 * locked, so this is close enough to being empty. 500 */ 501 if (vp == NULLVP) { 502 simple_unlock(&vnode_free_list_slock); 503 if (mp && error != EDEADLK) 504 vfs_unbusy(mp); 505 if (tryalloc) { 506 printf("WARNING: unable to allocate new " 507 "vnode, retrying...\n"); 508 (void) tsleep(&lbolt, PRIBIO, "newvn", hz); 509 goto try_again; 510 } 511 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 512 *vpp = 0; 513 return (ENFILE); 514 } 515 if (vp->v_usecount) 516 panic("free vnode isn't, vp %p", vp); 517 TAILQ_REMOVE(listhd, vp, v_freelist); 518 /* see comment on why 0xdeadb is set at end of vgone (below) */ 519 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 520 simple_unlock(&vnode_free_list_slock); 521 vp->v_lease = NULL; 522 523 if (vp->v_type != VBAD) 524 vgonel(vp, p); 525 else 526 simple_unlock(&vp->v_interlock); 527 #ifdef DIAGNOSTIC 528 if (vp->v_data || vp->v_uobj.uo_npages || 529 TAILQ_FIRST(&vp->v_uobj.memq)) 530 panic("cleaned vnode isn't, vp %p", vp); 531 if (vp->v_numoutput) 532 panic("clean vnode has pending I/O's, vp %p", vp); 533 #endif 534 vp->v_flag = 0; 535 vp->v_socket = NULL; 536 } 537 vp->v_type = VNON; 538 vp->v_vnlock = &vp->v_lock; 539 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 540 cache_purge(vp); 541 vp->v_tag = tag; 542 vp->v_op = vops; 543 insmntque(vp, mp); 544 *vpp = vp; 545 vp->v_usecount = 1; 546 vp->v_data = 0; 547 simple_lock_init(&vp->v_uobj.vmobjlock); 548 549 /* 550 * initialize uvm_object within vnode. 551 */ 552 553 uobj = &vp->v_uobj; 554 KASSERT(uobj->pgops == &uvm_vnodeops); 555 KASSERT(uobj->uo_npages == 0); 556 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 557 vp->v_size = VSIZENOTSET; 558 559 if (mp && error != EDEADLK) 560 vfs_unbusy(mp); 561 return (0); 562 } 563 564 /* 565 * This is really just the reverse of getnewvnode(). Needed for 566 * VFS_VGET functions who may need to push back a vnode in case 567 * of a locking race. 568 */ 569 void 570 ungetnewvnode(vp) 571 struct vnode *vp; 572 { 573 #ifdef DIAGNOSTIC 574 if (vp->v_usecount != 1) 575 panic("ungetnewvnode: busy vnode"); 576 #endif 577 vp->v_usecount--; 578 insmntque(vp, NULL); 579 vp->v_type = VBAD; 580 581 simple_lock(&vp->v_interlock); 582 /* 583 * Insert at head of LRU list 584 */ 585 simple_lock(&vnode_free_list_slock); 586 if (vp->v_holdcnt > 0) 587 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 588 else 589 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 590 simple_unlock(&vnode_free_list_slock); 591 simple_unlock(&vp->v_interlock); 592 } 593 594 /* 595 * Move a vnode from one mount queue to another. 596 */ 597 void 598 insmntque(vp, mp) 599 struct vnode *vp; 600 struct mount *mp; 601 { 602 603 #ifdef DIAGNOSTIC 604 if ((mp != NULL) && 605 (mp->mnt_flag & MNT_UNMOUNT) && 606 !(mp->mnt_flag & MNT_SOFTDEP) && 607 vp->v_tag != VT_VFS) { 608 panic("insmntque into dying filesystem"); 609 } 610 #endif 611 612 simple_lock(&mntvnode_slock); 613 /* 614 * Delete from old mount point vnode list, if on one. 615 */ 616 if (vp->v_mount != NULL) 617 LIST_REMOVE(vp, v_mntvnodes); 618 /* 619 * Insert into list of vnodes for the new mount point, if available. 620 */ 621 if ((vp->v_mount = mp) != NULL) 622 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 623 simple_unlock(&mntvnode_slock); 624 } 625 626 /* 627 * Update outstanding I/O count and do wakeup if requested. 628 */ 629 void 630 vwakeup(bp) 631 struct buf *bp; 632 { 633 struct vnode *vp; 634 635 if ((vp = bp->b_vp) != NULL) { 636 if (--vp->v_numoutput < 0) 637 panic("vwakeup: neg numoutput, vp %p", vp); 638 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 639 vp->v_flag &= ~VBWAIT; 640 wakeup((caddr_t)&vp->v_numoutput); 641 } 642 } 643 } 644 645 /* 646 * Flush out and invalidate all buffers associated with a vnode. 647 * Called with the underlying vnode locked, which should prevent new dirty 648 * buffers from being queued. 649 */ 650 int 651 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 652 struct vnode *vp; 653 int flags; 654 struct ucred *cred; 655 struct proc *p; 656 int slpflag, slptimeo; 657 { 658 struct uvm_object *uobj = &vp->v_uobj; 659 struct buf *bp, *nbp; 660 int s, error; 661 int flushflags = PGO_ALLPAGES|PGO_FREE|PGO_SYNCIO| 662 (flags & V_SAVE ? PGO_CLEANIT : 0); 663 664 /* XXXUBC this doesn't look at flags or slp* */ 665 if (TAILQ_FIRST(&uobj->memq)) { 666 simple_lock(&uobj->vmobjlock); 667 error = (uobj->pgops->pgo_put)(uobj, 0, 0, flushflags); 668 if (error) { 669 return error; 670 } 671 } 672 if (flags & V_SAVE) { 673 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p); 674 if (error) 675 return (error); 676 #ifdef DIAGNOSTIC 677 s = splbio(); 678 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 679 panic("vinvalbuf: dirty bufs, vp %p", vp); 680 splx(s); 681 #endif 682 } 683 684 s = splbio(); 685 686 restart: 687 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 688 nbp = LIST_NEXT(bp, b_vnbufs); 689 if (bp->b_flags & B_BUSY) { 690 bp->b_flags |= B_WANTED; 691 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 692 "vinvalbuf", slptimeo); 693 if (error) { 694 splx(s); 695 return (error); 696 } 697 goto restart; 698 } 699 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 700 brelse(bp); 701 } 702 703 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 704 nbp = LIST_NEXT(bp, b_vnbufs); 705 if (bp->b_flags & B_BUSY) { 706 bp->b_flags |= B_WANTED; 707 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 708 "vinvalbuf", slptimeo); 709 if (error) { 710 splx(s); 711 return (error); 712 } 713 goto restart; 714 } 715 /* 716 * XXX Since there are no node locks for NFS, I believe 717 * there is a slight chance that a delayed write will 718 * occur while sleeping just above, so check for it. 719 */ 720 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 721 #ifdef DEBUG 722 printf("buffer still DELWRI\n"); 723 #endif 724 bp->b_flags |= B_BUSY | B_VFLUSH; 725 VOP_BWRITE(bp); 726 goto restart; 727 } 728 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 729 brelse(bp); 730 } 731 732 #ifdef DIAGNOSTIC 733 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 734 panic("vinvalbuf: flush failed, vp %p", vp); 735 #endif 736 737 splx(s); 738 739 return (0); 740 } 741 742 /* 743 * Destroy any in core blocks past the truncation length. 744 * Called with the underlying vnode locked, which should prevent new dirty 745 * buffers from being queued. 746 */ 747 int 748 vtruncbuf(vp, lbn, slpflag, slptimeo) 749 struct vnode *vp; 750 daddr_t lbn; 751 int slpflag, slptimeo; 752 { 753 struct uvm_object *uobj = &vp->v_uobj; 754 struct buf *bp, *nbp; 755 int s, error; 756 757 s = splbio(); 758 if (TAILQ_FIRST(&uobj->memq)) { 759 simple_lock(&uobj->vmobjlock); 760 error = (uobj->pgops->pgo_put)(uobj, 761 round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift), 0, 762 PGO_FREE|PGO_SYNCIO); 763 if (error) { 764 splx(s); 765 return error; 766 } 767 } 768 769 restart: 770 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 771 nbp = LIST_NEXT(bp, b_vnbufs); 772 if (bp->b_lblkno < lbn) 773 continue; 774 if (bp->b_flags & B_BUSY) { 775 bp->b_flags |= B_WANTED; 776 error = tsleep(bp, slpflag | (PRIBIO + 1), 777 "vtruncbuf", slptimeo); 778 if (error) { 779 splx(s); 780 return (error); 781 } 782 goto restart; 783 } 784 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 785 brelse(bp); 786 } 787 788 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 789 nbp = LIST_NEXT(bp, b_vnbufs); 790 if (bp->b_lblkno < lbn) 791 continue; 792 if (bp->b_flags & B_BUSY) { 793 bp->b_flags |= B_WANTED; 794 error = tsleep(bp, slpflag | (PRIBIO + 1), 795 "vtruncbuf", slptimeo); 796 if (error) { 797 splx(s); 798 return (error); 799 } 800 goto restart; 801 } 802 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 803 brelse(bp); 804 } 805 806 splx(s); 807 808 return (0); 809 } 810 811 void 812 vflushbuf(vp, sync) 813 struct vnode *vp; 814 int sync; 815 { 816 struct uvm_object *uobj = &vp->v_uobj; 817 struct buf *bp, *nbp; 818 int s; 819 820 if (TAILQ_FIRST(&uobj->memq)) { 821 int flags = PGO_CLEANIT|PGO_ALLPAGES| (sync ? PGO_SYNCIO : 0); 822 823 simple_lock(&uobj->vmobjlock); 824 (void) (uobj->pgops->pgo_put)(uobj, 0, 0, flags); 825 } 826 827 loop: 828 s = splbio(); 829 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 830 nbp = LIST_NEXT(bp, b_vnbufs); 831 if ((bp->b_flags & B_BUSY)) 832 continue; 833 if ((bp->b_flags & B_DELWRI) == 0) 834 panic("vflushbuf: not dirty, bp %p", bp); 835 bp->b_flags |= B_BUSY | B_VFLUSH; 836 splx(s); 837 /* 838 * Wait for I/O associated with indirect blocks to complete, 839 * since there is no way to quickly wait for them below. 840 */ 841 if (bp->b_vp == vp || sync == 0) 842 (void) bawrite(bp); 843 else 844 (void) bwrite(bp); 845 goto loop; 846 } 847 if (sync == 0) { 848 splx(s); 849 return; 850 } 851 while (vp->v_numoutput) { 852 vp->v_flag |= VBWAIT; 853 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 854 } 855 splx(s); 856 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 857 vprint("vflushbuf: dirty", vp); 858 goto loop; 859 } 860 } 861 862 /* 863 * Associate a buffer with a vnode. 864 */ 865 void 866 bgetvp(vp, bp) 867 struct vnode *vp; 868 struct buf *bp; 869 { 870 int s; 871 872 if (bp->b_vp) 873 panic("bgetvp: not free, bp %p", bp); 874 VHOLD(vp); 875 s = splbio(); 876 bp->b_vp = vp; 877 if (vp->v_type == VBLK || vp->v_type == VCHR) 878 bp->b_dev = vp->v_rdev; 879 else 880 bp->b_dev = NODEV; 881 /* 882 * Insert onto list for new vnode. 883 */ 884 bufinsvn(bp, &vp->v_cleanblkhd); 885 splx(s); 886 } 887 888 /* 889 * Disassociate a buffer from a vnode. 890 */ 891 void 892 brelvp(bp) 893 struct buf *bp; 894 { 895 struct vnode *vp; 896 int s; 897 898 if (bp->b_vp == NULL) 899 panic("brelvp: vp NULL, bp %p", bp); 900 901 s = splbio(); 902 vp = bp->b_vp; 903 /* 904 * Delete from old vnode list, if on one. 905 */ 906 if (bp->b_vnbufs.le_next != NOLIST) 907 bufremvn(bp); 908 909 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) && 910 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 911 vp->v_flag &= ~VONWORKLST; 912 LIST_REMOVE(vp, v_synclist); 913 } 914 915 bp->b_vp = NULL; 916 HOLDRELE(vp); 917 splx(s); 918 } 919 920 /* 921 * Reassign a buffer from one vnode to another. 922 * Used to assign file specific control information 923 * (indirect blocks) to the vnode to which they belong. 924 * 925 * This function must be called at splbio(). 926 */ 927 void 928 reassignbuf(bp, newvp) 929 struct buf *bp; 930 struct vnode *newvp; 931 { 932 struct buflists *listheadp; 933 int delay; 934 935 /* 936 * Delete from old vnode list, if on one. 937 */ 938 if (bp->b_vnbufs.le_next != NOLIST) 939 bufremvn(bp); 940 /* 941 * If dirty, put on list of dirty buffers; 942 * otherwise insert onto list of clean buffers. 943 */ 944 if ((bp->b_flags & B_DELWRI) == 0) { 945 listheadp = &newvp->v_cleanblkhd; 946 if (TAILQ_EMPTY(&newvp->v_uobj.memq) && 947 (newvp->v_flag & VONWORKLST) && 948 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 949 newvp->v_flag &= ~VONWORKLST; 950 LIST_REMOVE(newvp, v_synclist); 951 } 952 } else { 953 listheadp = &newvp->v_dirtyblkhd; 954 if ((newvp->v_flag & VONWORKLST) == 0) { 955 switch (newvp->v_type) { 956 case VDIR: 957 delay = dirdelay; 958 break; 959 case VBLK: 960 if (newvp->v_specmountpoint != NULL) { 961 delay = metadelay; 962 break; 963 } 964 /* fall through */ 965 default: 966 delay = filedelay; 967 break; 968 } 969 if (!newvp->v_mount || 970 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 971 vn_syncer_add_to_worklist(newvp, delay); 972 } 973 } 974 bufinsvn(bp, listheadp); 975 } 976 977 /* 978 * Create a vnode for a block device. 979 * Used for root filesystem and swap areas. 980 * Also used for memory file system special devices. 981 */ 982 int 983 bdevvp(dev, vpp) 984 dev_t dev; 985 struct vnode **vpp; 986 { 987 988 return (getdevvp(dev, vpp, VBLK)); 989 } 990 991 /* 992 * Create a vnode for a character device. 993 * Used for kernfs and some console handling. 994 */ 995 int 996 cdevvp(dev, vpp) 997 dev_t dev; 998 struct vnode **vpp; 999 { 1000 1001 return (getdevvp(dev, vpp, VCHR)); 1002 } 1003 1004 /* 1005 * Create a vnode for a device. 1006 * Used by bdevvp (block device) for root file system etc., 1007 * and by cdevvp (character device) for console and kernfs. 1008 */ 1009 int 1010 getdevvp(dev, vpp, type) 1011 dev_t dev; 1012 struct vnode **vpp; 1013 enum vtype type; 1014 { 1015 struct vnode *vp; 1016 struct vnode *nvp; 1017 int error; 1018 1019 if (dev == NODEV) { 1020 *vpp = NULLVP; 1021 return (0); 1022 } 1023 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1024 if (error) { 1025 *vpp = NULLVP; 1026 return (error); 1027 } 1028 vp = nvp; 1029 vp->v_type = type; 1030 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1031 vput(vp); 1032 vp = nvp; 1033 } 1034 *vpp = vp; 1035 return (0); 1036 } 1037 1038 /* 1039 * Check to see if the new vnode represents a special device 1040 * for which we already have a vnode (either because of 1041 * bdevvp() or because of a different vnode representing 1042 * the same block device). If such an alias exists, deallocate 1043 * the existing contents and return the aliased vnode. The 1044 * caller is responsible for filling it with its new contents. 1045 */ 1046 struct vnode * 1047 checkalias(nvp, nvp_rdev, mp) 1048 struct vnode *nvp; 1049 dev_t nvp_rdev; 1050 struct mount *mp; 1051 { 1052 struct proc *p = curproc; /* XXX */ 1053 struct vnode *vp; 1054 struct vnode **vpp; 1055 1056 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1057 return (NULLVP); 1058 1059 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1060 loop: 1061 simple_lock(&spechash_slock); 1062 for (vp = *vpp; vp; vp = vp->v_specnext) { 1063 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1064 continue; 1065 /* 1066 * Alias, but not in use, so flush it out. 1067 */ 1068 simple_lock(&vp->v_interlock); 1069 if (vp->v_usecount == 0) { 1070 simple_unlock(&spechash_slock); 1071 vgonel(vp, p); 1072 goto loop; 1073 } 1074 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 1075 simple_unlock(&spechash_slock); 1076 goto loop; 1077 } 1078 break; 1079 } 1080 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1081 MALLOC(nvp->v_specinfo, struct specinfo *, 1082 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1083 /* XXX Erg. */ 1084 if (nvp->v_specinfo == NULL) { 1085 simple_unlock(&spechash_slock); 1086 uvm_wait("checkalias"); 1087 goto loop; 1088 } 1089 1090 nvp->v_rdev = nvp_rdev; 1091 nvp->v_hashchain = vpp; 1092 nvp->v_specnext = *vpp; 1093 nvp->v_specmountpoint = NULL; 1094 simple_unlock(&spechash_slock); 1095 nvp->v_speclockf = NULL; 1096 *vpp = nvp; 1097 if (vp != NULLVP) { 1098 nvp->v_flag |= VALIASED; 1099 vp->v_flag |= VALIASED; 1100 vput(vp); 1101 } 1102 return (NULLVP); 1103 } 1104 simple_unlock(&spechash_slock); 1105 VOP_UNLOCK(vp, 0); 1106 simple_lock(&vp->v_interlock); 1107 vclean(vp, 0, p); 1108 vp->v_op = nvp->v_op; 1109 vp->v_tag = nvp->v_tag; 1110 vp->v_vnlock = &vp->v_lock; 1111 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1112 nvp->v_type = VNON; 1113 insmntque(vp, mp); 1114 return (vp); 1115 } 1116 1117 /* 1118 * Grab a particular vnode from the free list, increment its 1119 * reference count and lock it. If the vnode lock bit is set the 1120 * vnode is being eliminated in vgone. In that case, we can not 1121 * grab the vnode, so the process is awakened when the transition is 1122 * completed, and an error returned to indicate that the vnode is no 1123 * longer usable (possibly having been changed to a new file system type). 1124 */ 1125 int 1126 vget(vp, flags) 1127 struct vnode *vp; 1128 int flags; 1129 { 1130 int error; 1131 1132 /* 1133 * If the vnode is in the process of being cleaned out for 1134 * another use, we wait for the cleaning to finish and then 1135 * return failure. Cleaning is determined by checking that 1136 * the VXLOCK flag is set. 1137 */ 1138 1139 if ((flags & LK_INTERLOCK) == 0) 1140 simple_lock(&vp->v_interlock); 1141 if (vp->v_flag & VXLOCK) { 1142 if (flags & LK_NOWAIT) { 1143 simple_unlock(&vp->v_interlock); 1144 return EBUSY; 1145 } 1146 vp->v_flag |= VXWANT; 1147 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock); 1148 return (ENOENT); 1149 } 1150 if (vp->v_usecount == 0) { 1151 simple_lock(&vnode_free_list_slock); 1152 if (vp->v_holdcnt > 0) 1153 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1154 else 1155 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1156 simple_unlock(&vnode_free_list_slock); 1157 } 1158 vp->v_usecount++; 1159 #ifdef DIAGNOSTIC 1160 if (vp->v_usecount == 0) { 1161 vprint("vget", vp); 1162 panic("vget: usecount overflow, vp %p", vp); 1163 } 1164 #endif 1165 if (flags & LK_TYPE_MASK) { 1166 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1167 /* 1168 * must expand vrele here because we do not want 1169 * to call VOP_INACTIVE if the reference count 1170 * drops back to zero since it was never really 1171 * active. We must remove it from the free list 1172 * before sleeping so that multiple processes do 1173 * not try to recycle it. 1174 */ 1175 simple_lock(&vp->v_interlock); 1176 vp->v_usecount--; 1177 if (vp->v_usecount > 0) { 1178 simple_unlock(&vp->v_interlock); 1179 return (error); 1180 } 1181 /* 1182 * insert at tail of LRU list 1183 */ 1184 simple_lock(&vnode_free_list_slock); 1185 if (vp->v_holdcnt > 0) 1186 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, 1187 v_freelist); 1188 else 1189 TAILQ_INSERT_TAIL(&vnode_free_list, vp, 1190 v_freelist); 1191 simple_unlock(&vnode_free_list_slock); 1192 simple_unlock(&vp->v_interlock); 1193 } 1194 return (error); 1195 } 1196 simple_unlock(&vp->v_interlock); 1197 return (0); 1198 } 1199 1200 /* 1201 * vput(), just unlock and vrele() 1202 */ 1203 void 1204 vput(vp) 1205 struct vnode *vp; 1206 { 1207 struct proc *p = curproc; /* XXX */ 1208 1209 #ifdef DIAGNOSTIC 1210 if (vp == NULL) 1211 panic("vput: null vp"); 1212 #endif 1213 simple_lock(&vp->v_interlock); 1214 vp->v_usecount--; 1215 if (vp->v_usecount > 0) { 1216 simple_unlock(&vp->v_interlock); 1217 VOP_UNLOCK(vp, 0); 1218 return; 1219 } 1220 #ifdef DIAGNOSTIC 1221 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1222 vprint("vput: bad ref count", vp); 1223 panic("vput: ref cnt"); 1224 } 1225 #endif 1226 /* 1227 * Insert at tail of LRU list. 1228 */ 1229 simple_lock(&vnode_free_list_slock); 1230 if (vp->v_holdcnt > 0) 1231 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1232 else 1233 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1234 simple_unlock(&vnode_free_list_slock); 1235 if (vp->v_flag & VEXECMAP) { 1236 uvmexp.vtextpages -= vp->v_uobj.uo_npages; 1237 uvmexp.vnodepages += vp->v_uobj.uo_npages; 1238 } 1239 vp->v_flag &= ~(VTEXT|VEXECMAP); 1240 simple_unlock(&vp->v_interlock); 1241 VOP_INACTIVE(vp, p); 1242 } 1243 1244 /* 1245 * Vnode release. 1246 * If count drops to zero, call inactive routine and return to freelist. 1247 */ 1248 void 1249 vrele(vp) 1250 struct vnode *vp; 1251 { 1252 struct proc *p = curproc; /* XXX */ 1253 1254 #ifdef DIAGNOSTIC 1255 if (vp == NULL) 1256 panic("vrele: null vp"); 1257 #endif 1258 simple_lock(&vp->v_interlock); 1259 vp->v_usecount--; 1260 if (vp->v_usecount > 0) { 1261 simple_unlock(&vp->v_interlock); 1262 return; 1263 } 1264 #ifdef DIAGNOSTIC 1265 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1266 vprint("vrele: bad ref count", vp); 1267 panic("vrele: ref cnt vp %p", vp); 1268 } 1269 #endif 1270 /* 1271 * Insert at tail of LRU list. 1272 */ 1273 simple_lock(&vnode_free_list_slock); 1274 if (vp->v_holdcnt > 0) 1275 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1276 else 1277 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1278 simple_unlock(&vnode_free_list_slock); 1279 if (vp->v_flag & VEXECMAP) { 1280 uvmexp.vtextpages -= vp->v_uobj.uo_npages; 1281 uvmexp.vnodepages += vp->v_uobj.uo_npages; 1282 } 1283 vp->v_flag &= ~(VTEXT|VEXECMAP); 1284 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1285 VOP_INACTIVE(vp, p); 1286 } 1287 1288 #ifdef DIAGNOSTIC 1289 /* 1290 * Page or buffer structure gets a reference. 1291 */ 1292 void 1293 vhold(vp) 1294 struct vnode *vp; 1295 { 1296 1297 /* 1298 * If it is on the freelist and the hold count is currently 1299 * zero, move it to the hold list. The test of the back 1300 * pointer and the use reference count of zero is because 1301 * it will be removed from a free list by getnewvnode, 1302 * but will not have its reference count incremented until 1303 * after calling vgone. If the reference count were 1304 * incremented first, vgone would (incorrectly) try to 1305 * close the previous instance of the underlying object. 1306 * So, the back pointer is explicitly set to `0xdeadb' in 1307 * getnewvnode after removing it from a freelist to ensure 1308 * that we do not try to move it here. 1309 */ 1310 simple_lock(&vp->v_interlock); 1311 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1312 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1313 simple_lock(&vnode_free_list_slock); 1314 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1315 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1316 simple_unlock(&vnode_free_list_slock); 1317 } 1318 vp->v_holdcnt++; 1319 simple_unlock(&vp->v_interlock); 1320 } 1321 1322 /* 1323 * Page or buffer structure frees a reference. 1324 */ 1325 void 1326 holdrele(vp) 1327 struct vnode *vp; 1328 { 1329 1330 simple_lock(&vp->v_interlock); 1331 if (vp->v_holdcnt <= 0) 1332 panic("holdrele: holdcnt vp %p", vp); 1333 vp->v_holdcnt--; 1334 1335 /* 1336 * If it is on the holdlist and the hold count drops to 1337 * zero, move it to the free list. The test of the back 1338 * pointer and the use reference count of zero is because 1339 * it will be removed from a free list by getnewvnode, 1340 * but will not have its reference count incremented until 1341 * after calling vgone. If the reference count were 1342 * incremented first, vgone would (incorrectly) try to 1343 * close the previous instance of the underlying object. 1344 * So, the back pointer is explicitly set to `0xdeadb' in 1345 * getnewvnode after removing it from a freelist to ensure 1346 * that we do not try to move it here. 1347 */ 1348 1349 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1350 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1351 simple_lock(&vnode_free_list_slock); 1352 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1353 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1354 simple_unlock(&vnode_free_list_slock); 1355 } 1356 simple_unlock(&vp->v_interlock); 1357 } 1358 1359 /* 1360 * Vnode reference. 1361 */ 1362 void 1363 vref(vp) 1364 struct vnode *vp; 1365 { 1366 1367 simple_lock(&vp->v_interlock); 1368 if (vp->v_usecount <= 0) 1369 panic("vref used where vget required, vp %p", vp); 1370 vp->v_usecount++; 1371 #ifdef DIAGNOSTIC 1372 if (vp->v_usecount == 0) { 1373 vprint("vref", vp); 1374 panic("vref: usecount overflow, vp %p", vp); 1375 } 1376 #endif 1377 simple_unlock(&vp->v_interlock); 1378 } 1379 #endif /* DIAGNOSTIC */ 1380 1381 /* 1382 * Remove any vnodes in the vnode table belonging to mount point mp. 1383 * 1384 * If MNT_NOFORCE is specified, there should not be any active ones, 1385 * return error if any are found (nb: this is a user error, not a 1386 * system error). If MNT_FORCE is specified, detach any active vnodes 1387 * that are found. 1388 */ 1389 #ifdef DEBUG 1390 int busyprt = 0; /* print out busy vnodes */ 1391 struct ctldebug debug1 = { "busyprt", &busyprt }; 1392 #endif 1393 1394 int 1395 vflush(mp, skipvp, flags) 1396 struct mount *mp; 1397 struct vnode *skipvp; 1398 int flags; 1399 { 1400 struct proc *p = curproc; /* XXX */ 1401 struct vnode *vp, *nvp; 1402 int busy = 0; 1403 1404 simple_lock(&mntvnode_slock); 1405 loop: 1406 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1407 if (vp->v_mount != mp) 1408 goto loop; 1409 nvp = vp->v_mntvnodes.le_next; 1410 /* 1411 * Skip over a selected vnode. 1412 */ 1413 if (vp == skipvp) 1414 continue; 1415 simple_lock(&vp->v_interlock); 1416 /* 1417 * Skip over a vnodes marked VSYSTEM. 1418 */ 1419 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1420 simple_unlock(&vp->v_interlock); 1421 continue; 1422 } 1423 /* 1424 * If WRITECLOSE is set, only flush out regular file 1425 * vnodes open for writing. 1426 */ 1427 if ((flags & WRITECLOSE) && 1428 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1429 simple_unlock(&vp->v_interlock); 1430 continue; 1431 } 1432 /* 1433 * With v_usecount == 0, all we need to do is clear 1434 * out the vnode data structures and we are done. 1435 */ 1436 if (vp->v_usecount == 0) { 1437 simple_unlock(&mntvnode_slock); 1438 vgonel(vp, p); 1439 simple_lock(&mntvnode_slock); 1440 continue; 1441 } 1442 /* 1443 * If FORCECLOSE is set, forcibly close the vnode. 1444 * For block or character devices, revert to an 1445 * anonymous device. For all other files, just kill them. 1446 */ 1447 if (flags & FORCECLOSE) { 1448 simple_unlock(&mntvnode_slock); 1449 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1450 vgonel(vp, p); 1451 } else { 1452 vclean(vp, 0, p); 1453 vp->v_op = spec_vnodeop_p; 1454 insmntque(vp, (struct mount *)0); 1455 } 1456 simple_lock(&mntvnode_slock); 1457 continue; 1458 } 1459 #ifdef DEBUG 1460 if (busyprt) 1461 vprint("vflush: busy vnode", vp); 1462 #endif 1463 simple_unlock(&vp->v_interlock); 1464 busy++; 1465 } 1466 simple_unlock(&mntvnode_slock); 1467 if (busy) 1468 return (EBUSY); 1469 return (0); 1470 } 1471 1472 /* 1473 * Disassociate the underlying file system from a vnode. 1474 */ 1475 void 1476 vclean(vp, flags, p) 1477 struct vnode *vp; 1478 int flags; 1479 struct proc *p; 1480 { 1481 int active; 1482 1483 /* 1484 * Check to see if the vnode is in use. 1485 * If so we have to reference it before we clean it out 1486 * so that its count cannot fall to zero and generate a 1487 * race against ourselves to recycle it. 1488 */ 1489 if ((active = vp->v_usecount) != 0) { 1490 /* We have the vnode interlock. */ 1491 vp->v_usecount++; 1492 #ifdef DIAGNOSTIC 1493 if (vp->v_usecount == 0) { 1494 vprint("vclean", vp); 1495 panic("vclean: usecount overflow"); 1496 } 1497 #endif 1498 } 1499 1500 /* 1501 * Prevent the vnode from being recycled or 1502 * brought into use while we clean it out. 1503 */ 1504 if (vp->v_flag & VXLOCK) 1505 panic("vclean: deadlock, vp %p", vp); 1506 vp->v_flag |= VXLOCK; 1507 if (vp->v_flag & VEXECMAP) { 1508 uvmexp.vtextpages -= vp->v_uobj.uo_npages; 1509 uvmexp.vnodepages += vp->v_uobj.uo_npages; 1510 } 1511 vp->v_flag &= ~(VTEXT|VEXECMAP); 1512 1513 /* 1514 * Even if the count is zero, the VOP_INACTIVE routine may still 1515 * have the object locked while it cleans it out. The VOP_LOCK 1516 * ensures that the VOP_INACTIVE routine is done with its work. 1517 * For active vnodes, it ensures that no other activity can 1518 * occur while the underlying object is being cleaned out. 1519 */ 1520 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1521 1522 /* 1523 * Clean out any cached data associated with the vnode. 1524 */ 1525 if (flags & DOCLOSE) 1526 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1527 1528 /* 1529 * If purging an active vnode, it must be closed and 1530 * deactivated before being reclaimed. Note that the 1531 * VOP_INACTIVE will unlock the vnode. 1532 */ 1533 if (active) { 1534 if (flags & DOCLOSE) 1535 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1536 VOP_INACTIVE(vp, p); 1537 } else { 1538 /* 1539 * Any other processes trying to obtain this lock must first 1540 * wait for VXLOCK to clear, then call the new lock operation. 1541 */ 1542 VOP_UNLOCK(vp, 0); 1543 } 1544 /* 1545 * Reclaim the vnode. 1546 */ 1547 if (VOP_RECLAIM(vp, p)) 1548 panic("vclean: cannot reclaim, vp %p", vp); 1549 if (active) { 1550 /* 1551 * Inline copy of vrele() since VOP_INACTIVE 1552 * has already been called. 1553 */ 1554 simple_lock(&vp->v_interlock); 1555 if (--vp->v_usecount <= 0) { 1556 #ifdef DIAGNOSTIC 1557 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1558 vprint("vclean: bad ref count", vp); 1559 panic("vclean: ref cnt"); 1560 } 1561 #endif 1562 /* 1563 * Insert at tail of LRU list. 1564 */ 1565 1566 simple_unlock(&vp->v_interlock); 1567 simple_lock(&vnode_free_list_slock); 1568 #ifdef DIAGNOSTIC 1569 if (vp->v_holdcnt > 0) 1570 panic("vclean: not clean, vp %p", vp); 1571 #endif 1572 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1573 simple_unlock(&vnode_free_list_slock); 1574 } else 1575 simple_unlock(&vp->v_interlock); 1576 } 1577 1578 cache_purge(vp); 1579 1580 /* 1581 * Done with purge, notify sleepers of the grim news. 1582 */ 1583 vp->v_op = dead_vnodeop_p; 1584 vp->v_tag = VT_NON; 1585 simple_lock(&vp->v_interlock); 1586 vp->v_flag &= ~VXLOCK; 1587 if (vp->v_flag & VXWANT) { 1588 vp->v_flag &= ~VXWANT; 1589 simple_unlock(&vp->v_interlock); 1590 wakeup((caddr_t)vp); 1591 } else 1592 simple_unlock(&vp->v_interlock); 1593 } 1594 1595 /* 1596 * Recycle an unused vnode to the front of the free list. 1597 * Release the passed interlock if the vnode will be recycled. 1598 */ 1599 int 1600 vrecycle(vp, inter_lkp, p) 1601 struct vnode *vp; 1602 struct simplelock *inter_lkp; 1603 struct proc *p; 1604 { 1605 1606 simple_lock(&vp->v_interlock); 1607 if (vp->v_usecount == 0) { 1608 if (inter_lkp) 1609 simple_unlock(inter_lkp); 1610 vgonel(vp, p); 1611 return (1); 1612 } 1613 simple_unlock(&vp->v_interlock); 1614 return (0); 1615 } 1616 1617 /* 1618 * Eliminate all activity associated with a vnode 1619 * in preparation for reuse. 1620 */ 1621 void 1622 vgone(vp) 1623 struct vnode *vp; 1624 { 1625 struct proc *p = curproc; /* XXX */ 1626 1627 simple_lock(&vp->v_interlock); 1628 vgonel(vp, p); 1629 } 1630 1631 /* 1632 * vgone, with the vp interlock held. 1633 */ 1634 void 1635 vgonel(vp, p) 1636 struct vnode *vp; 1637 struct proc *p; 1638 { 1639 struct vnode *vq; 1640 struct vnode *vx; 1641 1642 /* 1643 * If a vgone (or vclean) is already in progress, 1644 * wait until it is done and return. 1645 */ 1646 if (vp->v_flag & VXLOCK) { 1647 vp->v_flag |= VXWANT; 1648 ltsleep((caddr_t)vp, PINOD | PNORELOCK, 1649 "vgone", 0, &vp->v_interlock); 1650 return; 1651 } 1652 /* 1653 * Clean out the filesystem specific data. 1654 */ 1655 vclean(vp, DOCLOSE, p); 1656 /* 1657 * Delete from old mount point vnode list, if on one. 1658 */ 1659 if (vp->v_mount != NULL) 1660 insmntque(vp, (struct mount *)0); 1661 /* 1662 * If special device, remove it from special device alias list. 1663 * if it is on one. 1664 */ 1665 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1666 simple_lock(&spechash_slock); 1667 if (vp->v_hashchain != NULL) { 1668 if (*vp->v_hashchain == vp) { 1669 *vp->v_hashchain = vp->v_specnext; 1670 } else { 1671 for (vq = *vp->v_hashchain; vq; 1672 vq = vq->v_specnext) { 1673 if (vq->v_specnext != vp) 1674 continue; 1675 vq->v_specnext = vp->v_specnext; 1676 break; 1677 } 1678 if (vq == NULL) 1679 panic("missing bdev"); 1680 } 1681 if (vp->v_flag & VALIASED) { 1682 vx = NULL; 1683 for (vq = *vp->v_hashchain; vq; 1684 vq = vq->v_specnext) { 1685 if (vq->v_rdev != vp->v_rdev || 1686 vq->v_type != vp->v_type) 1687 continue; 1688 if (vx) 1689 break; 1690 vx = vq; 1691 } 1692 if (vx == NULL) 1693 panic("missing alias"); 1694 if (vq == NULL) 1695 vx->v_flag &= ~VALIASED; 1696 vp->v_flag &= ~VALIASED; 1697 } 1698 } 1699 simple_unlock(&spechash_slock); 1700 FREE(vp->v_specinfo, M_VNODE); 1701 vp->v_specinfo = NULL; 1702 } 1703 /* 1704 * If it is on the freelist and not already at the head, 1705 * move it to the head of the list. The test of the back 1706 * pointer and the reference count of zero is because 1707 * it will be removed from the free list by getnewvnode, 1708 * but will not have its reference count incremented until 1709 * after calling vgone. If the reference count were 1710 * incremented first, vgone would (incorrectly) try to 1711 * close the previous instance of the underlying object. 1712 * So, the back pointer is explicitly set to `0xdeadb' in 1713 * getnewvnode after removing it from the freelist to ensure 1714 * that we do not try to move it here. 1715 */ 1716 if (vp->v_usecount == 0) { 1717 simple_lock(&vnode_free_list_slock); 1718 if (vp->v_holdcnt > 0) 1719 panic("vgonel: not clean, vp %p", vp); 1720 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1721 TAILQ_FIRST(&vnode_free_list) != vp) { 1722 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1723 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1724 } 1725 simple_unlock(&vnode_free_list_slock); 1726 } 1727 vp->v_type = VBAD; 1728 } 1729 1730 /* 1731 * Lookup a vnode by device number. 1732 */ 1733 int 1734 vfinddev(dev, type, vpp) 1735 dev_t dev; 1736 enum vtype type; 1737 struct vnode **vpp; 1738 { 1739 struct vnode *vp; 1740 int rc = 0; 1741 1742 simple_lock(&spechash_slock); 1743 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1744 if (dev != vp->v_rdev || type != vp->v_type) 1745 continue; 1746 *vpp = vp; 1747 rc = 1; 1748 break; 1749 } 1750 simple_unlock(&spechash_slock); 1751 return (rc); 1752 } 1753 1754 /* 1755 * Revoke all the vnodes corresponding to the specified minor number 1756 * range (endpoints inclusive) of the specified major. 1757 */ 1758 void 1759 vdevgone(maj, minl, minh, type) 1760 int maj, minl, minh; 1761 enum vtype type; 1762 { 1763 struct vnode *vp; 1764 int mn; 1765 1766 for (mn = minl; mn <= minh; mn++) 1767 if (vfinddev(makedev(maj, mn), type, &vp)) 1768 VOP_REVOKE(vp, REVOKEALL); 1769 } 1770 1771 /* 1772 * Calculate the total number of references to a special device. 1773 */ 1774 int 1775 vcount(vp) 1776 struct vnode *vp; 1777 { 1778 struct vnode *vq, *vnext; 1779 int count; 1780 1781 loop: 1782 if ((vp->v_flag & VALIASED) == 0) 1783 return (vp->v_usecount); 1784 simple_lock(&spechash_slock); 1785 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1786 vnext = vq->v_specnext; 1787 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1788 continue; 1789 /* 1790 * Alias, but not in use, so flush it out. 1791 */ 1792 if (vq->v_usecount == 0 && vq != vp && 1793 (vq->v_flag & VXLOCK) == 0) { 1794 simple_unlock(&spechash_slock); 1795 vgone(vq); 1796 goto loop; 1797 } 1798 count += vq->v_usecount; 1799 } 1800 simple_unlock(&spechash_slock); 1801 return (count); 1802 } 1803 1804 /* 1805 * Print out a description of a vnode. 1806 */ 1807 static const char * const typename[] = 1808 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1809 1810 void 1811 vprint(label, vp) 1812 char *label; 1813 struct vnode *vp; 1814 { 1815 char buf[96]; 1816 1817 if (label != NULL) 1818 printf("%s: ", label); 1819 printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,", 1820 vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1821 vp->v_holdcnt); 1822 buf[0] = '\0'; 1823 if (vp->v_flag & VROOT) 1824 strcat(buf, "|VROOT"); 1825 if (vp->v_flag & VTEXT) 1826 strcat(buf, "|VTEXT"); 1827 if (vp->v_flag & VEXECMAP) 1828 strcat(buf, "|VEXECMAP"); 1829 if (vp->v_flag & VSYSTEM) 1830 strcat(buf, "|VSYSTEM"); 1831 if (vp->v_flag & VXLOCK) 1832 strcat(buf, "|VXLOCK"); 1833 if (vp->v_flag & VXWANT) 1834 strcat(buf, "|VXWANT"); 1835 if (vp->v_flag & VBWAIT) 1836 strcat(buf, "|VBWAIT"); 1837 if (vp->v_flag & VALIASED) 1838 strcat(buf, "|VALIASED"); 1839 if (buf[0] != '\0') 1840 printf(" flags (%s)", &buf[1]); 1841 if (vp->v_data == NULL) { 1842 printf("\n"); 1843 } else { 1844 printf("\n\t"); 1845 VOP_PRINT(vp); 1846 } 1847 } 1848 1849 #ifdef DEBUG 1850 /* 1851 * List all of the locked vnodes in the system. 1852 * Called when debugging the kernel. 1853 */ 1854 void 1855 printlockedvnodes() 1856 { 1857 struct mount *mp, *nmp; 1858 struct vnode *vp; 1859 1860 printf("Locked vnodes\n"); 1861 simple_lock(&mountlist_slock); 1862 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1863 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1864 nmp = mp->mnt_list.cqe_next; 1865 continue; 1866 } 1867 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1868 if (VOP_ISLOCKED(vp)) 1869 vprint(NULL, vp); 1870 } 1871 simple_lock(&mountlist_slock); 1872 nmp = mp->mnt_list.cqe_next; 1873 vfs_unbusy(mp); 1874 } 1875 simple_unlock(&mountlist_slock); 1876 } 1877 #endif 1878 1879 /* 1880 * Top level filesystem related information gathering. 1881 */ 1882 int 1883 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1884 int *name; 1885 u_int namelen; 1886 void *oldp; 1887 size_t *oldlenp; 1888 void *newp; 1889 size_t newlen; 1890 struct proc *p; 1891 { 1892 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1893 struct vfsconf vfc; 1894 extern const char * const mountcompatnames[]; 1895 extern int nmountcompatnames; 1896 #endif 1897 struct vfsops *vfsp; 1898 1899 /* all sysctl names at this level are at least name and field */ 1900 if (namelen < 2) 1901 return (ENOTDIR); /* overloaded */ 1902 1903 /* Not generic: goes to file system. */ 1904 if (name[0] != VFS_GENERIC) { 1905 static const struct ctlname vfsnames[VFS_MAXID+1]=CTL_VFS_NAMES; 1906 const char *vfsname; 1907 1908 if (name[0] < 0 || name[0] > VFS_MAXID 1909 || (vfsname = vfsnames[name[0]].ctl_name) == NULL) 1910 return (EOPNOTSUPP); 1911 1912 vfsp = vfs_getopsbyname(vfsname); 1913 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1914 return (EOPNOTSUPP); 1915 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1916 oldp, oldlenp, newp, newlen, p)); 1917 } 1918 1919 /* The rest are generic vfs sysctls. */ 1920 switch (name[1]) { 1921 case VFS_USERMOUNT: 1922 return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount); 1923 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1924 case VFS_MAXTYPENUM: 1925 /* 1926 * Provided for 4.4BSD-Lite2 compatibility. 1927 */ 1928 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1929 case VFS_CONF: 1930 /* 1931 * Special: a node, next is a file system name. 1932 * Provided for 4.4BSD-Lite2 compatibility. 1933 */ 1934 if (namelen < 3) 1935 return (ENOTDIR); /* overloaded */ 1936 if (name[2] >= nmountcompatnames || name[2] < 0 || 1937 mountcompatnames[name[2]] == NULL) 1938 return (EOPNOTSUPP); 1939 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 1940 if (vfsp == NULL) 1941 return (EOPNOTSUPP); 1942 vfc.vfc_vfsops = vfsp; 1943 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 1944 vfc.vfc_typenum = name[2]; 1945 vfc.vfc_refcount = vfsp->vfs_refcount; 1946 vfc.vfc_flags = 0; 1947 vfc.vfc_mountroot = vfsp->vfs_mountroot; 1948 vfc.vfc_next = NULL; 1949 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 1950 sizeof(struct vfsconf))); 1951 #endif 1952 default: 1953 break; 1954 } 1955 return (EOPNOTSUPP); 1956 } 1957 1958 int kinfo_vdebug = 1; 1959 int kinfo_vgetfailed; 1960 #define KINFO_VNODESLOP 10 1961 /* 1962 * Dump vnode list (via sysctl). 1963 * Copyout address of vnode followed by vnode. 1964 */ 1965 /* ARGSUSED */ 1966 int 1967 sysctl_vnode(where, sizep, p) 1968 char *where; 1969 size_t *sizep; 1970 struct proc *p; 1971 { 1972 struct mount *mp, *nmp; 1973 struct vnode *nvp, *vp; 1974 char *bp = where, *savebp; 1975 char *ewhere; 1976 int error; 1977 1978 #define VPTRSZ sizeof(struct vnode *) 1979 #define VNODESZ sizeof(struct vnode) 1980 if (where == NULL) { 1981 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1982 return (0); 1983 } 1984 ewhere = where + *sizep; 1985 1986 simple_lock(&mountlist_slock); 1987 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1988 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1989 nmp = mp->mnt_list.cqe_next; 1990 continue; 1991 } 1992 savebp = bp; 1993 again: 1994 simple_lock(&mntvnode_slock); 1995 for (vp = mp->mnt_vnodelist.lh_first; 1996 vp != NULL; 1997 vp = nvp) { 1998 /* 1999 * Check that the vp is still associated with 2000 * this filesystem. RACE: could have been 2001 * recycled onto the same filesystem. 2002 */ 2003 if (vp->v_mount != mp) { 2004 simple_unlock(&mntvnode_slock); 2005 if (kinfo_vdebug) 2006 printf("kinfo: vp changed\n"); 2007 bp = savebp; 2008 goto again; 2009 } 2010 nvp = vp->v_mntvnodes.le_next; 2011 if (bp + VPTRSZ + VNODESZ > ewhere) { 2012 simple_unlock(&mntvnode_slock); 2013 *sizep = bp - where; 2014 return (ENOMEM); 2015 } 2016 simple_unlock(&mntvnode_slock); 2017 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2018 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2019 return (error); 2020 bp += VPTRSZ + VNODESZ; 2021 simple_lock(&mntvnode_slock); 2022 } 2023 simple_unlock(&mntvnode_slock); 2024 simple_lock(&mountlist_slock); 2025 nmp = mp->mnt_list.cqe_next; 2026 vfs_unbusy(mp); 2027 } 2028 simple_unlock(&mountlist_slock); 2029 2030 *sizep = bp - where; 2031 return (0); 2032 } 2033 2034 /* 2035 * Check to see if a filesystem is mounted on a block device. 2036 */ 2037 int 2038 vfs_mountedon(vp) 2039 struct vnode *vp; 2040 { 2041 struct vnode *vq; 2042 int error = 0; 2043 2044 if (vp->v_specmountpoint != NULL) 2045 return (EBUSY); 2046 if (vp->v_flag & VALIASED) { 2047 simple_lock(&spechash_slock); 2048 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2049 if (vq->v_rdev != vp->v_rdev || 2050 vq->v_type != vp->v_type) 2051 continue; 2052 if (vq->v_specmountpoint != NULL) { 2053 error = EBUSY; 2054 break; 2055 } 2056 } 2057 simple_unlock(&spechash_slock); 2058 } 2059 return (error); 2060 } 2061 2062 /* 2063 * Build hash lists of net addresses and hang them off the mount point. 2064 * Called by ufs_mount() to set up the lists of export addresses. 2065 */ 2066 static int 2067 vfs_hang_addrlist(mp, nep, argp) 2068 struct mount *mp; 2069 struct netexport *nep; 2070 struct export_args *argp; 2071 { 2072 struct netcred *np, *enp; 2073 struct radix_node_head *rnh; 2074 int i; 2075 struct radix_node *rn; 2076 struct sockaddr *saddr, *smask = 0; 2077 struct domain *dom; 2078 int error; 2079 2080 if (argp->ex_addrlen == 0) { 2081 if (mp->mnt_flag & MNT_DEFEXPORTED) 2082 return (EPERM); 2083 np = &nep->ne_defexported; 2084 np->netc_exflags = argp->ex_flags; 2085 np->netc_anon = argp->ex_anon; 2086 np->netc_anon.cr_ref = 1; 2087 mp->mnt_flag |= MNT_DEFEXPORTED; 2088 return (0); 2089 } 2090 2091 if (argp->ex_addrlen > MLEN) 2092 return (EINVAL); 2093 2094 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2095 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 2096 memset((caddr_t)np, 0, i); 2097 saddr = (struct sockaddr *)(np + 1); 2098 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 2099 if (error) 2100 goto out; 2101 if (saddr->sa_len > argp->ex_addrlen) 2102 saddr->sa_len = argp->ex_addrlen; 2103 if (argp->ex_masklen) { 2104 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 2105 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 2106 if (error) 2107 goto out; 2108 if (smask->sa_len > argp->ex_masklen) 2109 smask->sa_len = argp->ex_masklen; 2110 } 2111 i = saddr->sa_family; 2112 if ((rnh = nep->ne_rtable[i]) == 0) { 2113 /* 2114 * Seems silly to initialize every AF when most are not 2115 * used, do so on demand here 2116 */ 2117 for (dom = domains; dom; dom = dom->dom_next) 2118 if (dom->dom_family == i && dom->dom_rtattach) { 2119 dom->dom_rtattach((void **)&nep->ne_rtable[i], 2120 dom->dom_rtoffset); 2121 break; 2122 } 2123 if ((rnh = nep->ne_rtable[i]) == 0) { 2124 error = ENOBUFS; 2125 goto out; 2126 } 2127 } 2128 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 2129 np->netc_rnodes); 2130 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 2131 if (rn == 0) { 2132 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 2133 smask, rnh); 2134 if (enp == 0) { 2135 error = EPERM; 2136 goto out; 2137 } 2138 } else 2139 enp = (struct netcred *)rn; 2140 2141 if (enp->netc_exflags != argp->ex_flags || 2142 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 2143 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 2144 enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups || 2145 memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 2146 enp->netc_anon.cr_ngroups)) 2147 error = EPERM; 2148 else 2149 error = 0; 2150 goto out; 2151 } 2152 np->netc_exflags = argp->ex_flags; 2153 np->netc_anon = argp->ex_anon; 2154 np->netc_anon.cr_ref = 1; 2155 return (0); 2156 out: 2157 free(np, M_NETADDR); 2158 return (error); 2159 } 2160 2161 /* ARGSUSED */ 2162 static int 2163 vfs_free_netcred(rn, w) 2164 struct radix_node *rn; 2165 void *w; 2166 { 2167 struct radix_node_head *rnh = (struct radix_node_head *)w; 2168 2169 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 2170 free((caddr_t)rn, M_NETADDR); 2171 return (0); 2172 } 2173 2174 /* 2175 * Free the net address hash lists that are hanging off the mount points. 2176 */ 2177 static void 2178 vfs_free_addrlist(nep) 2179 struct netexport *nep; 2180 { 2181 int i; 2182 struct radix_node_head *rnh; 2183 2184 for (i = 0; i <= AF_MAX; i++) 2185 if ((rnh = nep->ne_rtable[i]) != NULL) { 2186 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 2187 free((caddr_t)rnh, M_RTABLE); 2188 nep->ne_rtable[i] = 0; 2189 } 2190 } 2191 2192 int 2193 vfs_export(mp, nep, argp) 2194 struct mount *mp; 2195 struct netexport *nep; 2196 struct export_args *argp; 2197 { 2198 int error; 2199 2200 if (argp->ex_flags & MNT_DELEXPORT) { 2201 if (mp->mnt_flag & MNT_EXPUBLIC) { 2202 vfs_setpublicfs(NULL, NULL, NULL); 2203 mp->mnt_flag &= ~MNT_EXPUBLIC; 2204 } 2205 vfs_free_addrlist(nep); 2206 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2207 } 2208 if (argp->ex_flags & MNT_EXPORTED) { 2209 if (argp->ex_flags & MNT_EXPUBLIC) { 2210 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2211 return (error); 2212 mp->mnt_flag |= MNT_EXPUBLIC; 2213 } 2214 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 2215 return (error); 2216 mp->mnt_flag |= MNT_EXPORTED; 2217 } 2218 return (0); 2219 } 2220 2221 /* 2222 * Set the publicly exported filesystem (WebNFS). Currently, only 2223 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2224 */ 2225 int 2226 vfs_setpublicfs(mp, nep, argp) 2227 struct mount *mp; 2228 struct netexport *nep; 2229 struct export_args *argp; 2230 { 2231 int error; 2232 struct vnode *rvp; 2233 char *cp; 2234 2235 /* 2236 * mp == NULL -> invalidate the current info, the FS is 2237 * no longer exported. May be called from either vfs_export 2238 * or unmount, so check if it hasn't already been done. 2239 */ 2240 if (mp == NULL) { 2241 if (nfs_pub.np_valid) { 2242 nfs_pub.np_valid = 0; 2243 if (nfs_pub.np_index != NULL) { 2244 FREE(nfs_pub.np_index, M_TEMP); 2245 nfs_pub.np_index = NULL; 2246 } 2247 } 2248 return (0); 2249 } 2250 2251 /* 2252 * Only one allowed at a time. 2253 */ 2254 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2255 return (EBUSY); 2256 2257 /* 2258 * Get real filehandle for root of exported FS. 2259 */ 2260 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 2261 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2262 2263 if ((error = VFS_ROOT(mp, &rvp))) 2264 return (error); 2265 2266 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2267 return (error); 2268 2269 vput(rvp); 2270 2271 /* 2272 * If an indexfile was specified, pull it in. 2273 */ 2274 if (argp->ex_indexfile != NULL) { 2275 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2276 M_WAITOK); 2277 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2278 MAXNAMLEN, (size_t *)0); 2279 if (!error) { 2280 /* 2281 * Check for illegal filenames. 2282 */ 2283 for (cp = nfs_pub.np_index; *cp; cp++) { 2284 if (*cp == '/') { 2285 error = EINVAL; 2286 break; 2287 } 2288 } 2289 } 2290 if (error) { 2291 FREE(nfs_pub.np_index, M_TEMP); 2292 return (error); 2293 } 2294 } 2295 2296 nfs_pub.np_mount = mp; 2297 nfs_pub.np_valid = 1; 2298 return (0); 2299 } 2300 2301 struct netcred * 2302 vfs_export_lookup(mp, nep, nam) 2303 struct mount *mp; 2304 struct netexport *nep; 2305 struct mbuf *nam; 2306 { 2307 struct netcred *np; 2308 struct radix_node_head *rnh; 2309 struct sockaddr *saddr; 2310 2311 np = NULL; 2312 if (mp->mnt_flag & MNT_EXPORTED) { 2313 /* 2314 * Lookup in the export list first. 2315 */ 2316 if (nam != NULL) { 2317 saddr = mtod(nam, struct sockaddr *); 2318 rnh = nep->ne_rtable[saddr->sa_family]; 2319 if (rnh != NULL) { 2320 np = (struct netcred *) 2321 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2322 rnh); 2323 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2324 np = NULL; 2325 } 2326 } 2327 /* 2328 * If no address match, use the default if it exists. 2329 */ 2330 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2331 np = &nep->ne_defexported; 2332 } 2333 return (np); 2334 } 2335 2336 /* 2337 * Do the usual access checking. 2338 * file_mode, uid and gid are from the vnode in question, 2339 * while acc_mode and cred are from the VOP_ACCESS parameter list 2340 */ 2341 int 2342 vaccess(type, file_mode, uid, gid, acc_mode, cred) 2343 enum vtype type; 2344 mode_t file_mode; 2345 uid_t uid; 2346 gid_t gid; 2347 mode_t acc_mode; 2348 struct ucred *cred; 2349 { 2350 mode_t mask; 2351 2352 /* 2353 * Super-user always gets read/write access, but execute access depends 2354 * on at least one execute bit being set. 2355 */ 2356 if (cred->cr_uid == 0) { 2357 if ((acc_mode & VEXEC) && type != VDIR && 2358 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2359 return (EACCES); 2360 return (0); 2361 } 2362 2363 mask = 0; 2364 2365 /* Otherwise, check the owner. */ 2366 if (cred->cr_uid == uid) { 2367 if (acc_mode & VEXEC) 2368 mask |= S_IXUSR; 2369 if (acc_mode & VREAD) 2370 mask |= S_IRUSR; 2371 if (acc_mode & VWRITE) 2372 mask |= S_IWUSR; 2373 return ((file_mode & mask) == mask ? 0 : EACCES); 2374 } 2375 2376 /* Otherwise, check the groups. */ 2377 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2378 if (acc_mode & VEXEC) 2379 mask |= S_IXGRP; 2380 if (acc_mode & VREAD) 2381 mask |= S_IRGRP; 2382 if (acc_mode & VWRITE) 2383 mask |= S_IWGRP; 2384 return ((file_mode & mask) == mask ? 0 : EACCES); 2385 } 2386 2387 /* Otherwise, check everyone else. */ 2388 if (acc_mode & VEXEC) 2389 mask |= S_IXOTH; 2390 if (acc_mode & VREAD) 2391 mask |= S_IROTH; 2392 if (acc_mode & VWRITE) 2393 mask |= S_IWOTH; 2394 return ((file_mode & mask) == mask ? 0 : EACCES); 2395 } 2396 2397 /* 2398 * Unmount all file systems. 2399 * We traverse the list in reverse order under the assumption that doing so 2400 * will avoid needing to worry about dependencies. 2401 */ 2402 void 2403 vfs_unmountall(p) 2404 struct proc *p; 2405 { 2406 struct mount *mp, *nmp; 2407 int allerror, error; 2408 2409 for (allerror = 0, 2410 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2411 nmp = mp->mnt_list.cqe_prev; 2412 #ifdef DEBUG 2413 printf("unmounting %s (%s)...\n", 2414 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2415 #endif 2416 /* 2417 * XXX Freeze syncer. Must do this before locking the 2418 * mount point. See dounmount() for details. 2419 */ 2420 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2421 if (vfs_busy(mp, 0, 0)) { 2422 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2423 continue; 2424 } 2425 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2426 printf("unmount of %s failed with error %d\n", 2427 mp->mnt_stat.f_mntonname, error); 2428 allerror = 1; 2429 } 2430 } 2431 if (allerror) 2432 printf("WARNING: some file systems would not unmount\n"); 2433 } 2434 2435 /* 2436 * Sync and unmount file systems before shutting down. 2437 */ 2438 void 2439 vfs_shutdown() 2440 { 2441 struct buf *bp; 2442 int iter, nbusy, nbusy_prev = 0, dcount, s; 2443 struct proc *p = curproc; 2444 2445 /* XXX we're certainly not running in proc0's context! */ 2446 if (p == NULL) 2447 p = &proc0; 2448 2449 printf("syncing disks... "); 2450 2451 /* remove user process from run queue */ 2452 suspendsched(); 2453 (void) spl0(); 2454 2455 /* avoid coming back this way again if we panic. */ 2456 doing_shutdown = 1; 2457 2458 sys_sync(p, NULL, NULL); 2459 2460 /* Wait for sync to finish. */ 2461 dcount = 10000; 2462 for (iter = 0; iter < 20;) { 2463 nbusy = 0; 2464 for (bp = &buf[nbuf]; --bp >= buf; ) { 2465 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2466 nbusy++; 2467 /* 2468 * With soft updates, some buffers that are 2469 * written will be remarked as dirty until other 2470 * buffers are written. 2471 */ 2472 if (bp->b_vp && bp->b_vp->v_mount 2473 && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP) 2474 && (bp->b_flags & B_DELWRI)) { 2475 s = splbio(); 2476 bremfree(bp); 2477 bp->b_flags |= B_BUSY; 2478 splx(s); 2479 nbusy++; 2480 bawrite(bp); 2481 if (dcount-- <= 0) { 2482 printf("softdep "); 2483 goto fail; 2484 } 2485 } 2486 } 2487 if (nbusy == 0) 2488 break; 2489 if (nbusy_prev == 0) 2490 nbusy_prev = nbusy; 2491 printf("%d ", nbusy); 2492 tsleep(&nbusy, PRIBIO, "bflush", 2493 (iter == 0) ? 1 : hz / 25 * iter); 2494 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 2495 iter++; 2496 else 2497 nbusy_prev = nbusy; 2498 } 2499 if (nbusy) { 2500 fail: 2501 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 2502 printf("giving up\nPrinting vnodes for busy buffers\n"); 2503 for (bp = &buf[nbuf]; --bp >= buf; ) 2504 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2505 vprint(NULL, bp->b_vp); 2506 2507 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2508 Debugger(); 2509 #endif 2510 2511 #else /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2512 printf("giving up\n"); 2513 #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2514 return; 2515 } else 2516 printf("done\n"); 2517 2518 /* 2519 * If we've panic'd, don't make the situation potentially 2520 * worse by unmounting the file systems. 2521 */ 2522 if (panicstr != NULL) 2523 return; 2524 2525 /* Release inodes held by texts before update. */ 2526 #ifdef notdef 2527 vnshutdown(); 2528 #endif 2529 /* Unmount file systems. */ 2530 vfs_unmountall(p); 2531 } 2532 2533 /* 2534 * Mount the root file system. If the operator didn't specify a 2535 * file system to use, try all possible file systems until one 2536 * succeeds. 2537 */ 2538 int 2539 vfs_mountroot() 2540 { 2541 extern int (*mountroot) __P((void)); 2542 struct vfsops *v; 2543 2544 if (root_device == NULL) 2545 panic("vfs_mountroot: root device unknown"); 2546 2547 switch (root_device->dv_class) { 2548 case DV_IFNET: 2549 if (rootdev != NODEV) 2550 panic("vfs_mountroot: rootdev set for DV_IFNET"); 2551 break; 2552 2553 case DV_DISK: 2554 if (rootdev == NODEV) 2555 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2556 break; 2557 2558 default: 2559 printf("%s: inappropriate for root file system\n", 2560 root_device->dv_xname); 2561 return (ENODEV); 2562 } 2563 2564 /* 2565 * If user specified a file system, use it. 2566 */ 2567 if (mountroot != NULL) 2568 return ((*mountroot)()); 2569 2570 /* 2571 * Try each file system currently configured into the kernel. 2572 */ 2573 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2574 if (v->vfs_mountroot == NULL) 2575 continue; 2576 #ifdef DEBUG 2577 printf("mountroot: trying %s...\n", v->vfs_name); 2578 #endif 2579 if ((*v->vfs_mountroot)() == 0) { 2580 printf("root file system type: %s\n", v->vfs_name); 2581 break; 2582 } 2583 } 2584 2585 if (v == NULL) { 2586 printf("no file system for %s", root_device->dv_xname); 2587 if (root_device->dv_class == DV_DISK) 2588 printf(" (dev 0x%x)", rootdev); 2589 printf("\n"); 2590 return (EFTYPE); 2591 } 2592 return (0); 2593 } 2594 2595 /* 2596 * Given a file system name, look up the vfsops for that 2597 * file system, or return NULL if file system isn't present 2598 * in the kernel. 2599 */ 2600 struct vfsops * 2601 vfs_getopsbyname(name) 2602 const char *name; 2603 { 2604 struct vfsops *v; 2605 2606 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2607 if (strcmp(v->vfs_name, name) == 0) 2608 break; 2609 } 2610 2611 return (v); 2612 } 2613 2614 /* 2615 * Establish a file system and initialize it. 2616 */ 2617 int 2618 vfs_attach(vfs) 2619 struct vfsops *vfs; 2620 { 2621 struct vfsops *v; 2622 int error = 0; 2623 2624 2625 /* 2626 * Make sure this file system doesn't already exist. 2627 */ 2628 LIST_FOREACH(v, &vfs_list, vfs_list) { 2629 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2630 error = EEXIST; 2631 goto out; 2632 } 2633 } 2634 2635 /* 2636 * Initialize the vnode operations for this file system. 2637 */ 2638 vfs_opv_init(vfs->vfs_opv_descs); 2639 2640 /* 2641 * Now initialize the file system itself. 2642 */ 2643 (*vfs->vfs_init)(); 2644 2645 /* 2646 * ...and link it into the kernel's list. 2647 */ 2648 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2649 2650 /* 2651 * Sanity: make sure the reference count is 0. 2652 */ 2653 vfs->vfs_refcount = 0; 2654 2655 out: 2656 return (error); 2657 } 2658 2659 /* 2660 * Remove a file system from the kernel. 2661 */ 2662 int 2663 vfs_detach(vfs) 2664 struct vfsops *vfs; 2665 { 2666 struct vfsops *v; 2667 2668 /* 2669 * Make sure no one is using the filesystem. 2670 */ 2671 if (vfs->vfs_refcount != 0) 2672 return (EBUSY); 2673 2674 /* 2675 * ...and remove it from the kernel's list. 2676 */ 2677 LIST_FOREACH(v, &vfs_list, vfs_list) { 2678 if (v == vfs) { 2679 LIST_REMOVE(v, vfs_list); 2680 break; 2681 } 2682 } 2683 2684 if (v == NULL) 2685 return (ESRCH); 2686 2687 /* 2688 * Now run the file system-specific cleanups. 2689 */ 2690 (*vfs->vfs_done)(); 2691 2692 /* 2693 * Free the vnode operations vector. 2694 */ 2695 vfs_opv_free(vfs->vfs_opv_descs); 2696 return (0); 2697 } 2698 2699 void 2700 vfs_reinit(void) 2701 { 2702 struct vfsops *vfs; 2703 2704 LIST_FOREACH(vfs, &vfs_list, vfs_list) { 2705 if (vfs->vfs_reinit) { 2706 (*vfs->vfs_reinit)(); 2707 } 2708 } 2709 } 2710 2711 #ifdef DDB 2712 const char buf_flagbits[] = 2713 "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI" 2714 "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE" 2715 "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED" 2716 "\32XXX\33VFLUSH"; 2717 2718 void 2719 vfs_buf_print(bp, full, pr) 2720 struct buf *bp; 2721 int full; 2722 void (*pr) __P((const char *, ...)); 2723 { 2724 char buf[1024]; 2725 2726 (*pr)(" vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n", 2727 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev); 2728 2729 bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf)); 2730 (*pr)(" error %d flags 0x%s\n", bp->b_error, buf); 2731 2732 (*pr)(" bufsize 0x%x bcount 0x%x resid 0x%x\n", 2733 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2734 (*pr)(" data %p saveaddr %p dep %p\n", 2735 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2736 (*pr)(" iodone %p\n", bp->b_iodone); 2737 } 2738 2739 2740 const char vnode_flagbits[] = 2741 "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\11XLOCK\12XWANT\13BWAIT\14ALIASED" 2742 "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY"; 2743 2744 const char *vnode_types[] = { 2745 "VNON", 2746 "VREG", 2747 "VDIR", 2748 "VBLK", 2749 "VCHR", 2750 "VLNK", 2751 "VSOCK", 2752 "VFIFO", 2753 "VBAD", 2754 }; 2755 2756 const char *vnode_tags[] = { 2757 "VT_NON", 2758 "VT_UFS", 2759 "VT_NFS", 2760 "VT_MFS", 2761 "VT_MSDOSFS", 2762 "VT_LFS", 2763 "VT_LOFS", 2764 "VT_FDESC", 2765 "VT_PORTAL", 2766 "VT_NULL", 2767 "VT_UMAP", 2768 "VT_KERNFS", 2769 "VT_PROCFS", 2770 "VT_AFS", 2771 "VT_ISOFS", 2772 "VT_UNION", 2773 "VT_ADOSFS", 2774 "VT_EXT2FS", 2775 "VT_CODA", 2776 "VT_FILECORE", 2777 "VT_NTFS", 2778 "VT_VFS", 2779 "VT_OVERLAY" 2780 }; 2781 2782 void 2783 vfs_vnode_print(vp, full, pr) 2784 struct vnode *vp; 2785 int full; 2786 void (*pr) __P((const char *, ...)); 2787 { 2788 char buf[256]; 2789 const char *vtype, *vtag; 2790 2791 uvm_object_printit(&vp->v_uobj, full, pr); 2792 bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf)); 2793 (*pr)("\nVNODE flags %s\n", buf); 2794 (*pr)("mp %p numoutput %d size 0x%llx\n", 2795 vp->v_mount, vp->v_numoutput, vp->v_size); 2796 2797 (*pr)("data %p usecount %d writecount %d holdcnt %d numoutput %d\n", 2798 vp->v_data, vp->v_usecount, vp->v_writecount, 2799 vp->v_holdcnt, vp->v_numoutput); 2800 2801 vtype = (vp->v_type >= 0 && 2802 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ? 2803 vnode_types[vp->v_type] : "UNKNOWN"; 2804 vtag = (vp->v_tag >= 0 && 2805 vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ? 2806 vnode_tags[vp->v_tag] : "UNKNOWN"; 2807 2808 (*pr)("type %s(%d) tag %s(%d) id 0x%x mount %p typedata %p\n", 2809 vtype, vp->v_type, vtag, vp->v_tag, 2810 vp->v_id, vp->v_mount, vp->v_mountedhere); 2811 2812 if (full) { 2813 struct buf *bp; 2814 2815 (*pr)("clean bufs:\n"); 2816 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 2817 (*pr)(" bp %p\n", bp); 2818 vfs_buf_print(bp, full, pr); 2819 } 2820 2821 (*pr)("dirty bufs:\n"); 2822 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 2823 (*pr)(" bp %p\n", bp); 2824 vfs_buf_print(bp, full, pr); 2825 } 2826 } 2827 } 2828 #endif 2829