1 /* $NetBSD: vfs_subr.c,v 1.204 2003/08/07 16:32:03 agc Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. Neither the name of the University nor the names of its contributors 58 * may be used to endorse or promote products derived from this software 59 * without specific prior written permission. 60 * 61 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 62 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 63 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 64 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 65 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 66 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 67 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 68 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 69 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 70 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 71 * SUCH DAMAGE. 72 * 73 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 74 */ 75 76 /* 77 * External virtual filesystem routines 78 */ 79 80 #include <sys/cdefs.h> 81 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.204 2003/08/07 16:32:03 agc Exp $"); 82 83 #include "opt_inet.h" 84 #include "opt_ddb.h" 85 #include "opt_compat_netbsd.h" 86 #include "opt_compat_43.h" 87 88 #include <sys/param.h> 89 #include <sys/systm.h> 90 #include <sys/proc.h> 91 #include <sys/kernel.h> 92 #include <sys/mount.h> 93 #include <sys/time.h> 94 #include <sys/event.h> 95 #include <sys/fcntl.h> 96 #include <sys/vnode.h> 97 #include <sys/stat.h> 98 #include <sys/namei.h> 99 #include <sys/ucred.h> 100 #include <sys/buf.h> 101 #include <sys/errno.h> 102 #include <sys/malloc.h> 103 #include <sys/domain.h> 104 #include <sys/mbuf.h> 105 #include <sys/sa.h> 106 #include <sys/syscallargs.h> 107 #include <sys/device.h> 108 #include <sys/dirent.h> 109 #include <sys/filedesc.h> 110 111 #include <miscfs/specfs/specdev.h> 112 #include <miscfs/genfs/genfs.h> 113 #include <miscfs/syncfs/syncfs.h> 114 115 #include <netinet/in.h> 116 117 #include <uvm/uvm.h> 118 #include <uvm/uvm_ddb.h> 119 120 #include <netinet/in.h> 121 122 #include <sys/sysctl.h> 123 124 const enum vtype iftovt_tab[16] = { 125 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 126 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 127 }; 128 const int vttoif_tab[9] = { 129 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 130 S_IFSOCK, S_IFIFO, S_IFMT, 131 }; 132 133 int doforce = 1; /* 1 => permit forcible unmounting */ 134 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 135 136 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 137 138 /* 139 * Insq/Remq for the vnode usage lists. 140 */ 141 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 142 #define bufremvn(bp) { \ 143 LIST_REMOVE(bp, b_vnbufs); \ 144 (bp)->b_vnbufs.le_next = NOLIST; \ 145 } 146 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 147 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 148 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 149 150 struct mntlist mountlist = /* mounted filesystem list */ 151 CIRCLEQ_HEAD_INITIALIZER(mountlist); 152 struct vfs_list_head vfs_list = /* vfs list */ 153 LIST_HEAD_INITIALIZER(vfs_list); 154 155 struct nfs_public nfs_pub; /* publicly exported FS */ 156 157 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 158 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 159 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 160 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 161 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 162 163 /* XXX - gross; single global lock to protect v_numoutput */ 164 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER; 165 166 /* 167 * These define the root filesystem and device. 168 */ 169 struct mount *rootfs; 170 struct vnode *rootvnode; 171 struct device *root_device; /* root device */ 172 173 struct pool vnode_pool; /* memory pool for vnodes */ 174 175 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 176 177 /* 178 * Local declarations. 179 */ 180 void insmntque __P((struct vnode *, struct mount *)); 181 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 182 void vgoneall __P((struct vnode *)); 183 184 void vclean(struct vnode *, int, struct proc *); 185 186 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 187 struct export_args *)); 188 static int vfs_free_netcred __P((struct radix_node *, void *)); 189 static void vfs_free_addrlist __P((struct netexport *)); 190 static struct vnode *getcleanvnode __P((struct proc *)); 191 192 #ifdef DEBUG 193 void printlockedvnodes __P((void)); 194 #endif 195 196 /* 197 * Initialize the vnode management data structures. 198 */ 199 void 200 vntblinit() 201 { 202 203 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 204 &pool_allocator_nointr); 205 206 /* 207 * Initialize the filesystem syncer. 208 */ 209 vn_initialize_syncerd(); 210 } 211 212 int 213 vfs_drainvnodes(long target, struct proc *p) 214 { 215 216 simple_lock(&vnode_free_list_slock); 217 while (numvnodes > target) { 218 struct vnode *vp; 219 220 vp = getcleanvnode(p); 221 if (vp == NULL) 222 return EBUSY; /* give up */ 223 pool_put(&vnode_pool, vp); 224 simple_lock(&vnode_free_list_slock); 225 numvnodes--; 226 } 227 simple_unlock(&vnode_free_list_slock); 228 229 return 0; 230 } 231 232 /* 233 * grab a vnode from freelist and clean it. 234 */ 235 struct vnode * 236 getcleanvnode(p) 237 struct proc *p; 238 { 239 struct vnode *vp; 240 struct freelst *listhd; 241 242 LOCK_ASSERT(simple_lock_held(&vnode_free_list_slock)); 243 if ((vp = TAILQ_FIRST(listhd = &vnode_free_list)) == NULL) 244 vp = TAILQ_FIRST(listhd = &vnode_hold_list); 245 for (; vp != NULL; vp = TAILQ_NEXT(vp, v_freelist)) { 246 if (simple_lock_try(&vp->v_interlock)) { 247 if ((vp->v_flag & VLAYER) == 0) { 248 break; 249 } 250 if (VOP_ISLOCKED(vp) == 0) 251 break; 252 else 253 simple_unlock(&vp->v_interlock); 254 } 255 } 256 257 if (vp == NULLVP) { 258 simple_unlock(&vnode_free_list_slock); 259 return NULLVP; 260 } 261 262 if (vp->v_usecount) 263 panic("free vnode isn't, vp %p", vp); 264 TAILQ_REMOVE(listhd, vp, v_freelist); 265 /* see comment on why 0xdeadb is set at end of vgone (below) */ 266 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 267 simple_unlock(&vnode_free_list_slock); 268 vp->v_lease = NULL; 269 270 if (vp->v_type != VBAD) 271 vgonel(vp, p); 272 else 273 simple_unlock(&vp->v_interlock); 274 #ifdef DIAGNOSTIC 275 if (vp->v_data || vp->v_uobj.uo_npages || 276 TAILQ_FIRST(&vp->v_uobj.memq)) 277 panic("cleaned vnode isn't, vp %p", vp); 278 if (vp->v_numoutput) 279 panic("clean vnode has pending I/O's, vp %p", vp); 280 #endif 281 KASSERT((vp->v_flag & VONWORKLST) == 0); 282 283 return vp; 284 } 285 286 /* 287 * Mark a mount point as busy. Used to synchronize access and to delay 288 * unmounting. Interlock is not released on failure. 289 */ 290 int 291 vfs_busy(mp, flags, interlkp) 292 struct mount *mp; 293 int flags; 294 struct simplelock *interlkp; 295 { 296 int lkflags; 297 298 while (mp->mnt_flag & MNT_UNMOUNT) { 299 int gone; 300 301 if (flags & LK_NOWAIT) 302 return (ENOENT); 303 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 304 && mp->mnt_unmounter == curproc) 305 return (EDEADLK); 306 if (interlkp) 307 simple_unlock(interlkp); 308 /* 309 * Since all busy locks are shared except the exclusive 310 * lock granted when unmounting, the only place that a 311 * wakeup needs to be done is at the release of the 312 * exclusive lock at the end of dounmount. 313 * 314 * XXX MP: add spinlock protecting mnt_wcnt here once you 315 * can atomically unlock-and-sleep. 316 */ 317 mp->mnt_wcnt++; 318 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 319 mp->mnt_wcnt--; 320 gone = mp->mnt_flag & MNT_GONE; 321 322 if (mp->mnt_wcnt == 0) 323 wakeup(&mp->mnt_wcnt); 324 if (interlkp) 325 simple_lock(interlkp); 326 if (gone) 327 return (ENOENT); 328 } 329 lkflags = LK_SHARED; 330 if (interlkp) 331 lkflags |= LK_INTERLOCK; 332 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 333 panic("vfs_busy: unexpected lock failure"); 334 return (0); 335 } 336 337 /* 338 * Free a busy filesystem. 339 */ 340 void 341 vfs_unbusy(mp) 342 struct mount *mp; 343 { 344 345 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 346 } 347 348 /* 349 * Lookup a filesystem type, and if found allocate and initialize 350 * a mount structure for it. 351 * 352 * Devname is usually updated by mount(8) after booting. 353 */ 354 int 355 vfs_rootmountalloc(fstypename, devname, mpp) 356 char *fstypename; 357 char *devname; 358 struct mount **mpp; 359 { 360 struct vfsops *vfsp = NULL; 361 struct mount *mp; 362 363 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 364 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 365 break; 366 367 if (vfsp == NULL) 368 return (ENODEV); 369 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 370 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 371 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 372 (void)vfs_busy(mp, LK_NOWAIT, 0); 373 LIST_INIT(&mp->mnt_vnodelist); 374 mp->mnt_op = vfsp; 375 mp->mnt_flag = MNT_RDONLY; 376 mp->mnt_vnodecovered = NULLVP; 377 vfsp->vfs_refcount++; 378 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 379 mp->mnt_stat.f_mntonname[0] = '/'; 380 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 381 *mpp = mp; 382 return (0); 383 } 384 385 /* 386 * Lookup a mount point by filesystem identifier. 387 */ 388 struct mount * 389 vfs_getvfs(fsid) 390 fsid_t *fsid; 391 { 392 struct mount *mp; 393 394 simple_lock(&mountlist_slock); 395 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { 396 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 397 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 398 simple_unlock(&mountlist_slock); 399 return (mp); 400 } 401 } 402 simple_unlock(&mountlist_slock); 403 return ((struct mount *)0); 404 } 405 406 /* 407 * Get a new unique fsid 408 */ 409 void 410 vfs_getnewfsid(mp) 411 struct mount *mp; 412 { 413 static u_short xxxfs_mntid; 414 fsid_t tfsid; 415 int mtype; 416 417 simple_lock(&mntid_slock); 418 mtype = makefstype(mp->mnt_op->vfs_name); 419 mp->mnt_stat.f_fsid.val[0] = makedev(mtype, 0); 420 mp->mnt_stat.f_fsid.val[1] = mtype; 421 if (xxxfs_mntid == 0) 422 ++xxxfs_mntid; 423 tfsid.val[0] = makedev(mtype & 0xff, xxxfs_mntid); 424 tfsid.val[1] = mtype; 425 if (!CIRCLEQ_EMPTY(&mountlist)) { 426 while (vfs_getvfs(&tfsid)) { 427 tfsid.val[0]++; 428 xxxfs_mntid++; 429 } 430 } 431 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 432 simple_unlock(&mntid_slock); 433 } 434 435 /* 436 * Make a 'unique' number from a mount type name. 437 */ 438 long 439 makefstype(type) 440 const char *type; 441 { 442 long rv; 443 444 for (rv = 0; *type; type++) { 445 rv <<= 2; 446 rv ^= *type; 447 } 448 return rv; 449 } 450 451 452 /* 453 * Set vnode attributes to VNOVAL 454 */ 455 void 456 vattr_null(vap) 457 struct vattr *vap; 458 { 459 460 vap->va_type = VNON; 461 462 /* 463 * Assign individually so that it is safe even if size and 464 * sign of each member are varied. 465 */ 466 vap->va_mode = VNOVAL; 467 vap->va_nlink = VNOVAL; 468 vap->va_uid = VNOVAL; 469 vap->va_gid = VNOVAL; 470 vap->va_fsid = VNOVAL; 471 vap->va_fileid = VNOVAL; 472 vap->va_size = VNOVAL; 473 vap->va_blocksize = VNOVAL; 474 vap->va_atime.tv_sec = 475 vap->va_mtime.tv_sec = 476 vap->va_ctime.tv_sec = 477 vap->va_birthtime.tv_sec = VNOVAL; 478 vap->va_atime.tv_nsec = 479 vap->va_mtime.tv_nsec = 480 vap->va_ctime.tv_nsec = 481 vap->va_birthtime.tv_nsec = VNOVAL; 482 vap->va_gen = VNOVAL; 483 vap->va_flags = VNOVAL; 484 vap->va_rdev = VNOVAL; 485 vap->va_bytes = VNOVAL; 486 vap->va_vaflags = 0; 487 } 488 489 /* 490 * Routines having to do with the management of the vnode table. 491 */ 492 extern int (**dead_vnodeop_p) __P((void *)); 493 long numvnodes; 494 495 /* 496 * Return the next vnode from the free list. 497 */ 498 int 499 getnewvnode(tag, mp, vops, vpp) 500 enum vtagtype tag; 501 struct mount *mp; 502 int (**vops) __P((void *)); 503 struct vnode **vpp; 504 { 505 extern struct uvm_pagerops uvm_vnodeops; 506 struct uvm_object *uobj; 507 struct proc *p = curproc; /* XXX */ 508 static int toggle; 509 struct vnode *vp; 510 int error = 0, tryalloc; 511 512 try_again: 513 if (mp) { 514 /* 515 * Mark filesystem busy while we're creating a vnode. 516 * If unmount is in progress, this will wait; if the 517 * unmount succeeds (only if umount -f), this will 518 * return an error. If the unmount fails, we'll keep 519 * going afterwards. 520 * (This puts the per-mount vnode list logically under 521 * the protection of the vfs_busy lock). 522 */ 523 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 524 if (error && error != EDEADLK) 525 return error; 526 } 527 528 /* 529 * We must choose whether to allocate a new vnode or recycle an 530 * existing one. The criterion for allocating a new one is that 531 * the total number of vnodes is less than the number desired or 532 * there are no vnodes on either free list. Generally we only 533 * want to recycle vnodes that have no buffers associated with 534 * them, so we look first on the vnode_free_list. If it is empty, 535 * we next consider vnodes with referencing buffers on the 536 * vnode_hold_list. The toggle ensures that half the time we 537 * will use a buffer from the vnode_hold_list, and half the time 538 * we will allocate a new one unless the list has grown to twice 539 * the desired size. We are reticent to recycle vnodes from the 540 * vnode_hold_list because we will lose the identity of all its 541 * referencing buffers. 542 */ 543 544 vp = NULL; 545 546 simple_lock(&vnode_free_list_slock); 547 548 toggle ^= 1; 549 if (numvnodes > 2 * desiredvnodes) 550 toggle = 0; 551 552 tryalloc = numvnodes < desiredvnodes || 553 (TAILQ_FIRST(&vnode_free_list) == NULL && 554 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 555 556 if (tryalloc && 557 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { 558 simple_unlock(&vnode_free_list_slock); 559 memset(vp, 0, sizeof(*vp)); 560 simple_lock_init(&vp->v_interlock); 561 uobj = &vp->v_uobj; 562 uobj->pgops = &uvm_vnodeops; 563 uobj->uo_npages = 0; 564 TAILQ_INIT(&uobj->memq); 565 numvnodes++; 566 } else { 567 vp = getcleanvnode(p); 568 /* 569 * Unless this is a bad time of the month, at most 570 * the first NCPUS items on the free list are 571 * locked, so this is close enough to being empty. 572 */ 573 if (vp == NULLVP) { 574 if (mp && error != EDEADLK) 575 vfs_unbusy(mp); 576 if (tryalloc) { 577 printf("WARNING: unable to allocate new " 578 "vnode, retrying...\n"); 579 (void) tsleep(&lbolt, PRIBIO, "newvn", hz); 580 goto try_again; 581 } 582 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 583 *vpp = 0; 584 return (ENFILE); 585 } 586 vp->v_flag = 0; 587 vp->v_socket = NULL; 588 #ifdef VERIFIED_EXEC 589 vp->fp_status = FINGERPRINT_INVALID; 590 #endif 591 } 592 vp->v_type = VNON; 593 vp->v_vnlock = &vp->v_lock; 594 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 595 cache_purge(vp); 596 vp->v_tag = tag; 597 vp->v_op = vops; 598 insmntque(vp, mp); 599 *vpp = vp; 600 vp->v_usecount = 1; 601 vp->v_data = 0; 602 simple_lock_init(&vp->v_uobj.vmobjlock); 603 604 /* 605 * initialize uvm_object within vnode. 606 */ 607 608 uobj = &vp->v_uobj; 609 KASSERT(uobj->pgops == &uvm_vnodeops); 610 KASSERT(uobj->uo_npages == 0); 611 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 612 vp->v_size = VSIZENOTSET; 613 614 if (mp && error != EDEADLK) 615 vfs_unbusy(mp); 616 return (0); 617 } 618 619 /* 620 * This is really just the reverse of getnewvnode(). Needed for 621 * VFS_VGET functions who may need to push back a vnode in case 622 * of a locking race. 623 */ 624 void 625 ungetnewvnode(vp) 626 struct vnode *vp; 627 { 628 #ifdef DIAGNOSTIC 629 if (vp->v_usecount != 1) 630 panic("ungetnewvnode: busy vnode"); 631 #endif 632 vp->v_usecount--; 633 insmntque(vp, NULL); 634 vp->v_type = VBAD; 635 636 simple_lock(&vp->v_interlock); 637 /* 638 * Insert at head of LRU list 639 */ 640 simple_lock(&vnode_free_list_slock); 641 if (vp->v_holdcnt > 0) 642 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 643 else 644 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 645 simple_unlock(&vnode_free_list_slock); 646 simple_unlock(&vp->v_interlock); 647 } 648 649 /* 650 * Move a vnode from one mount queue to another. 651 */ 652 void 653 insmntque(vp, mp) 654 struct vnode *vp; 655 struct mount *mp; 656 { 657 658 #ifdef DIAGNOSTIC 659 if ((mp != NULL) && 660 (mp->mnt_flag & MNT_UNMOUNT) && 661 !(mp->mnt_flag & MNT_SOFTDEP) && 662 vp->v_tag != VT_VFS) { 663 panic("insmntque into dying filesystem"); 664 } 665 #endif 666 667 simple_lock(&mntvnode_slock); 668 /* 669 * Delete from old mount point vnode list, if on one. 670 */ 671 if (vp->v_mount != NULL) 672 LIST_REMOVE(vp, v_mntvnodes); 673 /* 674 * Insert into list of vnodes for the new mount point, if available. 675 */ 676 if ((vp->v_mount = mp) != NULL) 677 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 678 simple_unlock(&mntvnode_slock); 679 } 680 681 /* 682 * Update outstanding I/O count and do wakeup if requested. 683 */ 684 void 685 vwakeup(bp) 686 struct buf *bp; 687 { 688 struct vnode *vp; 689 690 if ((vp = bp->b_vp) != NULL) { 691 /* XXX global lock hack 692 * can't use v_interlock here since this is called 693 * in interrupt context from biodone(). 694 */ 695 simple_lock(&global_v_numoutput_slock); 696 if (--vp->v_numoutput < 0) 697 panic("vwakeup: neg numoutput, vp %p", vp); 698 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 699 vp->v_flag &= ~VBWAIT; 700 wakeup((caddr_t)&vp->v_numoutput); 701 } 702 simple_unlock(&global_v_numoutput_slock); 703 } 704 } 705 706 /* 707 * Flush out and invalidate all buffers associated with a vnode. 708 * Called with the underlying vnode locked, which should prevent new dirty 709 * buffers from being queued. 710 */ 711 int 712 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 713 struct vnode *vp; 714 int flags; 715 struct ucred *cred; 716 struct proc *p; 717 int slpflag, slptimeo; 718 { 719 struct buf *bp, *nbp; 720 int s, error; 721 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 722 (flags & V_SAVE ? PGO_CLEANIT : 0); 723 724 /* XXXUBC this doesn't look at flags or slp* */ 725 simple_lock(&vp->v_interlock); 726 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 727 if (error) { 728 return error; 729 } 730 731 if (flags & V_SAVE) { 732 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p); 733 if (error) 734 return (error); 735 #ifdef DIAGNOSTIC 736 s = splbio(); 737 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 738 panic("vinvalbuf: dirty bufs, vp %p", vp); 739 splx(s); 740 #endif 741 } 742 743 s = splbio(); 744 745 restart: 746 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 747 nbp = LIST_NEXT(bp, b_vnbufs); 748 simple_lock(&bp->b_interlock); 749 if (bp->b_flags & B_BUSY) { 750 bp->b_flags |= B_WANTED; 751 error = ltsleep((caddr_t)bp, 752 slpflag | (PRIBIO + 1) | PNORELOCK, 753 "vinvalbuf", slptimeo, &bp->b_interlock); 754 if (error) { 755 splx(s); 756 return (error); 757 } 758 goto restart; 759 } 760 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 761 simple_unlock(&bp->b_interlock); 762 brelse(bp); 763 } 764 765 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 766 nbp = LIST_NEXT(bp, b_vnbufs); 767 simple_lock(&bp->b_interlock); 768 if (bp->b_flags & B_BUSY) { 769 bp->b_flags |= B_WANTED; 770 error = ltsleep((caddr_t)bp, 771 slpflag | (PRIBIO + 1) | PNORELOCK, 772 "vinvalbuf", slptimeo, &bp->b_interlock); 773 if (error) { 774 splx(s); 775 return (error); 776 } 777 goto restart; 778 } 779 /* 780 * XXX Since there are no node locks for NFS, I believe 781 * there is a slight chance that a delayed write will 782 * occur while sleeping just above, so check for it. 783 */ 784 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 785 #ifdef DEBUG 786 printf("buffer still DELWRI\n"); 787 #endif 788 bp->b_flags |= B_BUSY | B_VFLUSH; 789 simple_unlock(&bp->b_interlock); 790 VOP_BWRITE(bp); 791 goto restart; 792 } 793 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 794 simple_unlock(&bp->b_interlock); 795 brelse(bp); 796 } 797 798 #ifdef DIAGNOSTIC 799 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 800 panic("vinvalbuf: flush failed, vp %p", vp); 801 #endif 802 803 splx(s); 804 805 return (0); 806 } 807 808 /* 809 * Destroy any in core blocks past the truncation length. 810 * Called with the underlying vnode locked, which should prevent new dirty 811 * buffers from being queued. 812 */ 813 int 814 vtruncbuf(vp, lbn, slpflag, slptimeo) 815 struct vnode *vp; 816 daddr_t lbn; 817 int slpflag, slptimeo; 818 { 819 struct buf *bp, *nbp; 820 int s, error; 821 voff_t off; 822 823 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 824 simple_lock(&vp->v_interlock); 825 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 826 if (error) { 827 return error; 828 } 829 830 s = splbio(); 831 832 restart: 833 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 834 nbp = LIST_NEXT(bp, b_vnbufs); 835 if (bp->b_lblkno < lbn) 836 continue; 837 simple_lock(&bp->b_interlock); 838 if (bp->b_flags & B_BUSY) { 839 bp->b_flags |= B_WANTED; 840 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 841 "vtruncbuf", slptimeo, &bp->b_interlock); 842 if (error) { 843 splx(s); 844 return (error); 845 } 846 goto restart; 847 } 848 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 849 simple_unlock(&bp->b_interlock); 850 brelse(bp); 851 } 852 853 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 854 nbp = LIST_NEXT(bp, b_vnbufs); 855 if (bp->b_lblkno < lbn) 856 continue; 857 simple_lock(&bp->b_interlock); 858 if (bp->b_flags & B_BUSY) { 859 bp->b_flags |= B_WANTED; 860 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 861 "vtruncbuf", slptimeo, &bp->b_interlock); 862 if (error) { 863 splx(s); 864 return (error); 865 } 866 goto restart; 867 } 868 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 869 simple_unlock(&bp->b_interlock); 870 brelse(bp); 871 } 872 873 splx(s); 874 875 return (0); 876 } 877 878 void 879 vflushbuf(vp, sync) 880 struct vnode *vp; 881 int sync; 882 { 883 struct buf *bp, *nbp; 884 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); 885 int s; 886 887 simple_lock(&vp->v_interlock); 888 (void) VOP_PUTPAGES(vp, 0, 0, flags); 889 890 loop: 891 s = splbio(); 892 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 893 nbp = LIST_NEXT(bp, b_vnbufs); 894 simple_lock(&bp->b_interlock); 895 if ((bp->b_flags & B_BUSY)) { 896 simple_unlock(&bp->b_interlock); 897 continue; 898 } 899 if ((bp->b_flags & B_DELWRI) == 0) 900 panic("vflushbuf: not dirty, bp %p", bp); 901 bp->b_flags |= B_BUSY | B_VFLUSH; 902 simple_unlock(&bp->b_interlock); 903 splx(s); 904 /* 905 * Wait for I/O associated with indirect blocks to complete, 906 * since there is no way to quickly wait for them below. 907 */ 908 if (bp->b_vp == vp || sync == 0) 909 (void) bawrite(bp); 910 else 911 (void) bwrite(bp); 912 goto loop; 913 } 914 if (sync == 0) { 915 splx(s); 916 return; 917 } 918 simple_lock(&global_v_numoutput_slock); 919 while (vp->v_numoutput) { 920 vp->v_flag |= VBWAIT; 921 ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0, 922 &global_v_numoutput_slock); 923 } 924 simple_unlock(&global_v_numoutput_slock); 925 splx(s); 926 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 927 vprint("vflushbuf: dirty", vp); 928 goto loop; 929 } 930 } 931 932 /* 933 * Associate a buffer with a vnode. 934 */ 935 void 936 bgetvp(vp, bp) 937 struct vnode *vp; 938 struct buf *bp; 939 { 940 int s; 941 942 if (bp->b_vp) 943 panic("bgetvp: not free, bp %p", bp); 944 VHOLD(vp); 945 s = splbio(); 946 bp->b_vp = vp; 947 if (vp->v_type == VBLK || vp->v_type == VCHR) 948 bp->b_dev = vp->v_rdev; 949 else 950 bp->b_dev = NODEV; 951 /* 952 * Insert onto list for new vnode. 953 */ 954 bufinsvn(bp, &vp->v_cleanblkhd); 955 splx(s); 956 } 957 958 /* 959 * Disassociate a buffer from a vnode. 960 */ 961 void 962 brelvp(bp) 963 struct buf *bp; 964 { 965 struct vnode *vp; 966 int s; 967 968 if (bp->b_vp == NULL) 969 panic("brelvp: vp NULL, bp %p", bp); 970 971 s = splbio(); 972 vp = bp->b_vp; 973 /* 974 * Delete from old vnode list, if on one. 975 */ 976 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 977 bufremvn(bp); 978 979 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) && 980 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 981 vp->v_flag &= ~VONWORKLST; 982 LIST_REMOVE(vp, v_synclist); 983 } 984 985 bp->b_vp = NULL; 986 HOLDRELE(vp); 987 splx(s); 988 } 989 990 /* 991 * Reassign a buffer from one vnode to another. 992 * Used to assign file specific control information 993 * (indirect blocks) to the vnode to which they belong. 994 * 995 * This function must be called at splbio(). 996 */ 997 void 998 reassignbuf(bp, newvp) 999 struct buf *bp; 1000 struct vnode *newvp; 1001 { 1002 struct buflists *listheadp; 1003 int delay; 1004 1005 /* 1006 * Delete from old vnode list, if on one. 1007 */ 1008 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 1009 bufremvn(bp); 1010 /* 1011 * If dirty, put on list of dirty buffers; 1012 * otherwise insert onto list of clean buffers. 1013 */ 1014 if ((bp->b_flags & B_DELWRI) == 0) { 1015 listheadp = &newvp->v_cleanblkhd; 1016 if (TAILQ_EMPTY(&newvp->v_uobj.memq) && 1017 (newvp->v_flag & VONWORKLST) && 1018 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 1019 newvp->v_flag &= ~VONWORKLST; 1020 LIST_REMOVE(newvp, v_synclist); 1021 } 1022 } else { 1023 listheadp = &newvp->v_dirtyblkhd; 1024 if ((newvp->v_flag & VONWORKLST) == 0) { 1025 switch (newvp->v_type) { 1026 case VDIR: 1027 delay = dirdelay; 1028 break; 1029 case VBLK: 1030 if (newvp->v_specmountpoint != NULL) { 1031 delay = metadelay; 1032 break; 1033 } 1034 /* fall through */ 1035 default: 1036 delay = filedelay; 1037 break; 1038 } 1039 if (!newvp->v_mount || 1040 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 1041 vn_syncer_add_to_worklist(newvp, delay); 1042 } 1043 } 1044 bufinsvn(bp, listheadp); 1045 } 1046 1047 /* 1048 * Create a vnode for a block device. 1049 * Used for root filesystem and swap areas. 1050 * Also used for memory file system special devices. 1051 */ 1052 int 1053 bdevvp(dev, vpp) 1054 dev_t dev; 1055 struct vnode **vpp; 1056 { 1057 1058 return (getdevvp(dev, vpp, VBLK)); 1059 } 1060 1061 /* 1062 * Create a vnode for a character device. 1063 * Used for kernfs and some console handling. 1064 */ 1065 int 1066 cdevvp(dev, vpp) 1067 dev_t dev; 1068 struct vnode **vpp; 1069 { 1070 1071 return (getdevvp(dev, vpp, VCHR)); 1072 } 1073 1074 /* 1075 * Create a vnode for a device. 1076 * Used by bdevvp (block device) for root file system etc., 1077 * and by cdevvp (character device) for console and kernfs. 1078 */ 1079 int 1080 getdevvp(dev, vpp, type) 1081 dev_t dev; 1082 struct vnode **vpp; 1083 enum vtype type; 1084 { 1085 struct vnode *vp; 1086 struct vnode *nvp; 1087 int error; 1088 1089 if (dev == NODEV) { 1090 *vpp = NULLVP; 1091 return (0); 1092 } 1093 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1094 if (error) { 1095 *vpp = NULLVP; 1096 return (error); 1097 } 1098 vp = nvp; 1099 vp->v_type = type; 1100 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1101 vput(vp); 1102 vp = nvp; 1103 } 1104 *vpp = vp; 1105 return (0); 1106 } 1107 1108 /* 1109 * Check to see if the new vnode represents a special device 1110 * for which we already have a vnode (either because of 1111 * bdevvp() or because of a different vnode representing 1112 * the same block device). If such an alias exists, deallocate 1113 * the existing contents and return the aliased vnode. The 1114 * caller is responsible for filling it with its new contents. 1115 */ 1116 struct vnode * 1117 checkalias(nvp, nvp_rdev, mp) 1118 struct vnode *nvp; 1119 dev_t nvp_rdev; 1120 struct mount *mp; 1121 { 1122 struct proc *p = curproc; /* XXX */ 1123 struct vnode *vp; 1124 struct vnode **vpp; 1125 1126 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1127 return (NULLVP); 1128 1129 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1130 loop: 1131 simple_lock(&spechash_slock); 1132 for (vp = *vpp; vp; vp = vp->v_specnext) { 1133 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1134 continue; 1135 /* 1136 * Alias, but not in use, so flush it out. 1137 */ 1138 simple_lock(&vp->v_interlock); 1139 if (vp->v_usecount == 0) { 1140 simple_unlock(&spechash_slock); 1141 vgonel(vp, p); 1142 goto loop; 1143 } 1144 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 1145 simple_unlock(&spechash_slock); 1146 goto loop; 1147 } 1148 break; 1149 } 1150 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1151 MALLOC(nvp->v_specinfo, struct specinfo *, 1152 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1153 /* XXX Erg. */ 1154 if (nvp->v_specinfo == NULL) { 1155 simple_unlock(&spechash_slock); 1156 uvm_wait("checkalias"); 1157 goto loop; 1158 } 1159 1160 nvp->v_rdev = nvp_rdev; 1161 nvp->v_hashchain = vpp; 1162 nvp->v_specnext = *vpp; 1163 nvp->v_specmountpoint = NULL; 1164 simple_unlock(&spechash_slock); 1165 nvp->v_speclockf = NULL; 1166 *vpp = nvp; 1167 if (vp != NULLVP) { 1168 nvp->v_flag |= VALIASED; 1169 vp->v_flag |= VALIASED; 1170 vput(vp); 1171 } 1172 return (NULLVP); 1173 } 1174 simple_unlock(&spechash_slock); 1175 VOP_UNLOCK(vp, 0); 1176 simple_lock(&vp->v_interlock); 1177 vclean(vp, 0, p); 1178 vp->v_op = nvp->v_op; 1179 vp->v_tag = nvp->v_tag; 1180 vp->v_vnlock = &vp->v_lock; 1181 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1182 nvp->v_type = VNON; 1183 insmntque(vp, mp); 1184 return (vp); 1185 } 1186 1187 /* 1188 * Grab a particular vnode from the free list, increment its 1189 * reference count and lock it. If the vnode lock bit is set the 1190 * vnode is being eliminated in vgone. In that case, we can not 1191 * grab the vnode, so the process is awakened when the transition is 1192 * completed, and an error returned to indicate that the vnode is no 1193 * longer usable (possibly having been changed to a new file system type). 1194 */ 1195 int 1196 vget(vp, flags) 1197 struct vnode *vp; 1198 int flags; 1199 { 1200 int error; 1201 1202 /* 1203 * If the vnode is in the process of being cleaned out for 1204 * another use, we wait for the cleaning to finish and then 1205 * return failure. Cleaning is determined by checking that 1206 * the VXLOCK flag is set. 1207 */ 1208 1209 if ((flags & LK_INTERLOCK) == 0) 1210 simple_lock(&vp->v_interlock); 1211 if (vp->v_flag & VXLOCK) { 1212 if (flags & LK_NOWAIT) { 1213 simple_unlock(&vp->v_interlock); 1214 return EBUSY; 1215 } 1216 vp->v_flag |= VXWANT; 1217 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock); 1218 return (ENOENT); 1219 } 1220 if (vp->v_usecount == 0) { 1221 simple_lock(&vnode_free_list_slock); 1222 if (vp->v_holdcnt > 0) 1223 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1224 else 1225 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1226 simple_unlock(&vnode_free_list_slock); 1227 } 1228 vp->v_usecount++; 1229 #ifdef DIAGNOSTIC 1230 if (vp->v_usecount == 0) { 1231 vprint("vget", vp); 1232 panic("vget: usecount overflow, vp %p", vp); 1233 } 1234 #endif 1235 if (flags & LK_TYPE_MASK) { 1236 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1237 /* 1238 * must expand vrele here because we do not want 1239 * to call VOP_INACTIVE if the reference count 1240 * drops back to zero since it was never really 1241 * active. We must remove it from the free list 1242 * before sleeping so that multiple processes do 1243 * not try to recycle it. 1244 */ 1245 simple_lock(&vp->v_interlock); 1246 vp->v_usecount--; 1247 if (vp->v_usecount > 0) { 1248 simple_unlock(&vp->v_interlock); 1249 return (error); 1250 } 1251 /* 1252 * insert at tail of LRU list 1253 */ 1254 simple_lock(&vnode_free_list_slock); 1255 if (vp->v_holdcnt > 0) 1256 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, 1257 v_freelist); 1258 else 1259 TAILQ_INSERT_TAIL(&vnode_free_list, vp, 1260 v_freelist); 1261 simple_unlock(&vnode_free_list_slock); 1262 simple_unlock(&vp->v_interlock); 1263 } 1264 return (error); 1265 } 1266 simple_unlock(&vp->v_interlock); 1267 return (0); 1268 } 1269 1270 /* 1271 * vput(), just unlock and vrele() 1272 */ 1273 void 1274 vput(vp) 1275 struct vnode *vp; 1276 { 1277 struct proc *p = curproc; /* XXX */ 1278 1279 #ifdef DIAGNOSTIC 1280 if (vp == NULL) 1281 panic("vput: null vp"); 1282 #endif 1283 simple_lock(&vp->v_interlock); 1284 vp->v_usecount--; 1285 if (vp->v_usecount > 0) { 1286 simple_unlock(&vp->v_interlock); 1287 VOP_UNLOCK(vp, 0); 1288 return; 1289 } 1290 #ifdef DIAGNOSTIC 1291 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1292 vprint("vput: bad ref count", vp); 1293 panic("vput: ref cnt"); 1294 } 1295 #endif 1296 /* 1297 * Insert at tail of LRU list. 1298 */ 1299 simple_lock(&vnode_free_list_slock); 1300 if (vp->v_holdcnt > 0) 1301 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1302 else 1303 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1304 simple_unlock(&vnode_free_list_slock); 1305 if (vp->v_flag & VEXECMAP) { 1306 uvmexp.execpages -= vp->v_uobj.uo_npages; 1307 uvmexp.filepages += vp->v_uobj.uo_npages; 1308 } 1309 vp->v_flag &= ~(VTEXT|VEXECMAP); 1310 simple_unlock(&vp->v_interlock); 1311 VOP_INACTIVE(vp, p); 1312 } 1313 1314 /* 1315 * Vnode release. 1316 * If count drops to zero, call inactive routine and return to freelist. 1317 */ 1318 void 1319 vrele(vp) 1320 struct vnode *vp; 1321 { 1322 struct proc *p = curproc; /* XXX */ 1323 1324 #ifdef DIAGNOSTIC 1325 if (vp == NULL) 1326 panic("vrele: null vp"); 1327 #endif 1328 simple_lock(&vp->v_interlock); 1329 vp->v_usecount--; 1330 if (vp->v_usecount > 0) { 1331 simple_unlock(&vp->v_interlock); 1332 return; 1333 } 1334 #ifdef DIAGNOSTIC 1335 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1336 vprint("vrele: bad ref count", vp); 1337 panic("vrele: ref cnt vp %p", vp); 1338 } 1339 #endif 1340 /* 1341 * Insert at tail of LRU list. 1342 */ 1343 simple_lock(&vnode_free_list_slock); 1344 if (vp->v_holdcnt > 0) 1345 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1346 else 1347 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1348 simple_unlock(&vnode_free_list_slock); 1349 if (vp->v_flag & VEXECMAP) { 1350 uvmexp.execpages -= vp->v_uobj.uo_npages; 1351 uvmexp.filepages += vp->v_uobj.uo_npages; 1352 } 1353 vp->v_flag &= ~(VTEXT|VEXECMAP); 1354 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1355 VOP_INACTIVE(vp, p); 1356 } 1357 1358 #ifdef DIAGNOSTIC 1359 /* 1360 * Page or buffer structure gets a reference. 1361 */ 1362 void 1363 vhold(vp) 1364 struct vnode *vp; 1365 { 1366 1367 /* 1368 * If it is on the freelist and the hold count is currently 1369 * zero, move it to the hold list. The test of the back 1370 * pointer and the use reference count of zero is because 1371 * it will be removed from a free list by getnewvnode, 1372 * but will not have its reference count incremented until 1373 * after calling vgone. If the reference count were 1374 * incremented first, vgone would (incorrectly) try to 1375 * close the previous instance of the underlying object. 1376 * So, the back pointer is explicitly set to `0xdeadb' in 1377 * getnewvnode after removing it from a freelist to ensure 1378 * that we do not try to move it here. 1379 */ 1380 simple_lock(&vp->v_interlock); 1381 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1382 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1383 simple_lock(&vnode_free_list_slock); 1384 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1385 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1386 simple_unlock(&vnode_free_list_slock); 1387 } 1388 vp->v_holdcnt++; 1389 simple_unlock(&vp->v_interlock); 1390 } 1391 1392 /* 1393 * Page or buffer structure frees a reference. 1394 */ 1395 void 1396 holdrele(vp) 1397 struct vnode *vp; 1398 { 1399 1400 simple_lock(&vp->v_interlock); 1401 if (vp->v_holdcnt <= 0) 1402 panic("holdrele: holdcnt vp %p", vp); 1403 vp->v_holdcnt--; 1404 1405 /* 1406 * If it is on the holdlist and the hold count drops to 1407 * zero, move it to the free list. The test of the back 1408 * pointer and the use reference count of zero is because 1409 * it will be removed from a free list by getnewvnode, 1410 * but will not have its reference count incremented until 1411 * after calling vgone. If the reference count were 1412 * incremented first, vgone would (incorrectly) try to 1413 * close the previous instance of the underlying object. 1414 * So, the back pointer is explicitly set to `0xdeadb' in 1415 * getnewvnode after removing it from a freelist to ensure 1416 * that we do not try to move it here. 1417 */ 1418 1419 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1420 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1421 simple_lock(&vnode_free_list_slock); 1422 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1423 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1424 simple_unlock(&vnode_free_list_slock); 1425 } 1426 simple_unlock(&vp->v_interlock); 1427 } 1428 1429 /* 1430 * Vnode reference. 1431 */ 1432 void 1433 vref(vp) 1434 struct vnode *vp; 1435 { 1436 1437 simple_lock(&vp->v_interlock); 1438 if (vp->v_usecount <= 0) 1439 panic("vref used where vget required, vp %p", vp); 1440 vp->v_usecount++; 1441 #ifdef DIAGNOSTIC 1442 if (vp->v_usecount == 0) { 1443 vprint("vref", vp); 1444 panic("vref: usecount overflow, vp %p", vp); 1445 } 1446 #endif 1447 simple_unlock(&vp->v_interlock); 1448 } 1449 #endif /* DIAGNOSTIC */ 1450 1451 /* 1452 * Remove any vnodes in the vnode table belonging to mount point mp. 1453 * 1454 * If FORCECLOSE is not specified, there should not be any active ones, 1455 * return error if any are found (nb: this is a user error, not a 1456 * system error). If FORCECLOSE is specified, detach any active vnodes 1457 * that are found. 1458 * 1459 * If WRITECLOSE is set, only flush out regular file vnodes open for 1460 * writing. 1461 * 1462 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped. 1463 */ 1464 #ifdef DEBUG 1465 int busyprt = 0; /* print out busy vnodes */ 1466 struct ctldebug debug1 = { "busyprt", &busyprt }; 1467 #endif 1468 1469 int 1470 vflush(mp, skipvp, flags) 1471 struct mount *mp; 1472 struct vnode *skipvp; 1473 int flags; 1474 { 1475 struct proc *p = curproc; /* XXX */ 1476 struct vnode *vp, *nvp; 1477 int busy = 0; 1478 1479 simple_lock(&mntvnode_slock); 1480 loop: 1481 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 1482 if (vp->v_mount != mp) 1483 goto loop; 1484 nvp = LIST_NEXT(vp, v_mntvnodes); 1485 /* 1486 * Skip over a selected vnode. 1487 */ 1488 if (vp == skipvp) 1489 continue; 1490 simple_lock(&vp->v_interlock); 1491 /* 1492 * Skip over a vnodes marked VSYSTEM. 1493 */ 1494 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1495 simple_unlock(&vp->v_interlock); 1496 continue; 1497 } 1498 /* 1499 * If WRITECLOSE is set, only flush out regular file 1500 * vnodes open for writing. 1501 */ 1502 if ((flags & WRITECLOSE) && 1503 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1504 simple_unlock(&vp->v_interlock); 1505 continue; 1506 } 1507 /* 1508 * With v_usecount == 0, all we need to do is clear 1509 * out the vnode data structures and we are done. 1510 */ 1511 if (vp->v_usecount == 0) { 1512 simple_unlock(&mntvnode_slock); 1513 vgonel(vp, p); 1514 simple_lock(&mntvnode_slock); 1515 continue; 1516 } 1517 /* 1518 * If FORCECLOSE is set, forcibly close the vnode. 1519 * For block or character devices, revert to an 1520 * anonymous device. For all other files, just kill them. 1521 */ 1522 if (flags & FORCECLOSE) { 1523 simple_unlock(&mntvnode_slock); 1524 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1525 vgonel(vp, p); 1526 } else { 1527 vclean(vp, 0, p); 1528 vp->v_op = spec_vnodeop_p; 1529 insmntque(vp, (struct mount *)0); 1530 } 1531 simple_lock(&mntvnode_slock); 1532 continue; 1533 } 1534 #ifdef DEBUG 1535 if (busyprt) 1536 vprint("vflush: busy vnode", vp); 1537 #endif 1538 simple_unlock(&vp->v_interlock); 1539 busy++; 1540 } 1541 simple_unlock(&mntvnode_slock); 1542 if (busy) 1543 return (EBUSY); 1544 return (0); 1545 } 1546 1547 /* 1548 * Disassociate the underlying file system from a vnode. 1549 */ 1550 void 1551 vclean(vp, flags, p) 1552 struct vnode *vp; 1553 int flags; 1554 struct proc *p; 1555 { 1556 int active; 1557 1558 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1559 1560 /* 1561 * Check to see if the vnode is in use. 1562 * If so we have to reference it before we clean it out 1563 * so that its count cannot fall to zero and generate a 1564 * race against ourselves to recycle it. 1565 */ 1566 1567 if ((active = vp->v_usecount) != 0) { 1568 vp->v_usecount++; 1569 #ifdef DIAGNOSTIC 1570 if (vp->v_usecount == 0) { 1571 vprint("vclean", vp); 1572 panic("vclean: usecount overflow"); 1573 } 1574 #endif 1575 } 1576 1577 /* 1578 * Prevent the vnode from being recycled or 1579 * brought into use while we clean it out. 1580 */ 1581 if (vp->v_flag & VXLOCK) 1582 panic("vclean: deadlock, vp %p", vp); 1583 vp->v_flag |= VXLOCK; 1584 if (vp->v_flag & VEXECMAP) { 1585 uvmexp.execpages -= vp->v_uobj.uo_npages; 1586 uvmexp.filepages += vp->v_uobj.uo_npages; 1587 } 1588 vp->v_flag &= ~(VTEXT|VEXECMAP); 1589 1590 /* 1591 * Even if the count is zero, the VOP_INACTIVE routine may still 1592 * have the object locked while it cleans it out. The VOP_LOCK 1593 * ensures that the VOP_INACTIVE routine is done with its work. 1594 * For active vnodes, it ensures that no other activity can 1595 * occur while the underlying object is being cleaned out. 1596 */ 1597 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1598 1599 /* 1600 * Clean out any cached data associated with the vnode. 1601 */ 1602 if (flags & DOCLOSE) { 1603 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1604 KASSERT((vp->v_flag & VONWORKLST) == 0); 1605 } 1606 LOCK_ASSERT(!simple_lock_held(&vp->v_interlock)); 1607 1608 /* 1609 * If purging an active vnode, it must be closed and 1610 * deactivated before being reclaimed. Note that the 1611 * VOP_INACTIVE will unlock the vnode. 1612 */ 1613 if (active) { 1614 if (flags & DOCLOSE) 1615 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1616 VOP_INACTIVE(vp, p); 1617 } else { 1618 /* 1619 * Any other processes trying to obtain this lock must first 1620 * wait for VXLOCK to clear, then call the new lock operation. 1621 */ 1622 VOP_UNLOCK(vp, 0); 1623 } 1624 /* 1625 * Reclaim the vnode. 1626 */ 1627 if (VOP_RECLAIM(vp, p)) 1628 panic("vclean: cannot reclaim, vp %p", vp); 1629 if (active) { 1630 /* 1631 * Inline copy of vrele() since VOP_INACTIVE 1632 * has already been called. 1633 */ 1634 simple_lock(&vp->v_interlock); 1635 if (--vp->v_usecount <= 0) { 1636 #ifdef DIAGNOSTIC 1637 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1638 vprint("vclean: bad ref count", vp); 1639 panic("vclean: ref cnt"); 1640 } 1641 #endif 1642 /* 1643 * Insert at tail of LRU list. 1644 */ 1645 1646 simple_unlock(&vp->v_interlock); 1647 simple_lock(&vnode_free_list_slock); 1648 #ifdef DIAGNOSTIC 1649 if (vp->v_holdcnt > 0) 1650 panic("vclean: not clean, vp %p", vp); 1651 #endif 1652 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1653 simple_unlock(&vnode_free_list_slock); 1654 } else 1655 simple_unlock(&vp->v_interlock); 1656 } 1657 1658 KASSERT(vp->v_uobj.uo_npages == 0); 1659 cache_purge(vp); 1660 1661 /* 1662 * Done with purge, notify sleepers of the grim news. 1663 */ 1664 vp->v_op = dead_vnodeop_p; 1665 vp->v_tag = VT_NON; 1666 simple_lock(&vp->v_interlock); 1667 VN_KNOTE(vp, NOTE_REVOKE); /* FreeBSD has this in vn_pollgone() */ 1668 vp->v_flag &= ~VXLOCK; 1669 if (vp->v_flag & VXWANT) { 1670 vp->v_flag &= ~VXWANT; 1671 simple_unlock(&vp->v_interlock); 1672 wakeup((caddr_t)vp); 1673 } else 1674 simple_unlock(&vp->v_interlock); 1675 } 1676 1677 /* 1678 * Recycle an unused vnode to the front of the free list. 1679 * Release the passed interlock if the vnode will be recycled. 1680 */ 1681 int 1682 vrecycle(vp, inter_lkp, p) 1683 struct vnode *vp; 1684 struct simplelock *inter_lkp; 1685 struct proc *p; 1686 { 1687 1688 simple_lock(&vp->v_interlock); 1689 if (vp->v_usecount == 0) { 1690 if (inter_lkp) 1691 simple_unlock(inter_lkp); 1692 vgonel(vp, p); 1693 return (1); 1694 } 1695 simple_unlock(&vp->v_interlock); 1696 return (0); 1697 } 1698 1699 /* 1700 * Eliminate all activity associated with a vnode 1701 * in preparation for reuse. 1702 */ 1703 void 1704 vgone(vp) 1705 struct vnode *vp; 1706 { 1707 struct proc *p = curproc; /* XXX */ 1708 1709 simple_lock(&vp->v_interlock); 1710 vgonel(vp, p); 1711 } 1712 1713 /* 1714 * vgone, with the vp interlock held. 1715 */ 1716 void 1717 vgonel(vp, p) 1718 struct vnode *vp; 1719 struct proc *p; 1720 { 1721 struct vnode *vq; 1722 struct vnode *vx; 1723 1724 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1725 1726 /* 1727 * If a vgone (or vclean) is already in progress, 1728 * wait until it is done and return. 1729 */ 1730 1731 if (vp->v_flag & VXLOCK) { 1732 vp->v_flag |= VXWANT; 1733 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock); 1734 return; 1735 } 1736 1737 /* 1738 * Clean out the filesystem specific data. 1739 */ 1740 1741 vclean(vp, DOCLOSE, p); 1742 KASSERT((vp->v_flag & VONWORKLST) == 0); 1743 1744 /* 1745 * Delete from old mount point vnode list, if on one. 1746 */ 1747 1748 if (vp->v_mount != NULL) 1749 insmntque(vp, (struct mount *)0); 1750 1751 /* 1752 * If special device, remove it from special device alias list. 1753 * if it is on one. 1754 */ 1755 1756 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1757 simple_lock(&spechash_slock); 1758 if (vp->v_hashchain != NULL) { 1759 if (*vp->v_hashchain == vp) { 1760 *vp->v_hashchain = vp->v_specnext; 1761 } else { 1762 for (vq = *vp->v_hashchain; vq; 1763 vq = vq->v_specnext) { 1764 if (vq->v_specnext != vp) 1765 continue; 1766 vq->v_specnext = vp->v_specnext; 1767 break; 1768 } 1769 if (vq == NULL) 1770 panic("missing bdev"); 1771 } 1772 if (vp->v_flag & VALIASED) { 1773 vx = NULL; 1774 for (vq = *vp->v_hashchain; vq; 1775 vq = vq->v_specnext) { 1776 if (vq->v_rdev != vp->v_rdev || 1777 vq->v_type != vp->v_type) 1778 continue; 1779 if (vx) 1780 break; 1781 vx = vq; 1782 } 1783 if (vx == NULL) 1784 panic("missing alias"); 1785 if (vq == NULL) 1786 vx->v_flag &= ~VALIASED; 1787 vp->v_flag &= ~VALIASED; 1788 } 1789 } 1790 simple_unlock(&spechash_slock); 1791 FREE(vp->v_specinfo, M_VNODE); 1792 vp->v_specinfo = NULL; 1793 } 1794 1795 /* 1796 * The test of the back pointer and the reference count of 1797 * zero is because it will be removed from the free list by 1798 * getcleanvnode, but will not have its reference count 1799 * incremented until after calling vgone. If the reference 1800 * count were incremented first, vgone would (incorrectly) 1801 * try to close the previous instance of the underlying object. 1802 * So, the back pointer is explicitly set to `0xdeadb' in 1803 * getnewvnode after removing it from the freelist to ensure 1804 * that we do not try to move it here. 1805 */ 1806 1807 vp->v_type = VBAD; 1808 if (vp->v_usecount == 0) { 1809 boolean_t dofree; 1810 1811 simple_lock(&vnode_free_list_slock); 1812 if (vp->v_holdcnt > 0) 1813 panic("vgonel: not clean, vp %p", vp); 1814 /* 1815 * if it isn't on the freelist, we're called by getcleanvnode 1816 * and vnode is being re-used. otherwise, we'll free it. 1817 */ 1818 dofree = vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb; 1819 if (dofree) { 1820 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1821 numvnodes--; 1822 } 1823 simple_unlock(&vnode_free_list_slock); 1824 if (dofree) 1825 pool_put(&vnode_pool, vp); 1826 } 1827 } 1828 1829 /* 1830 * Lookup a vnode by device number. 1831 */ 1832 int 1833 vfinddev(dev, type, vpp) 1834 dev_t dev; 1835 enum vtype type; 1836 struct vnode **vpp; 1837 { 1838 struct vnode *vp; 1839 int rc = 0; 1840 1841 simple_lock(&spechash_slock); 1842 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1843 if (dev != vp->v_rdev || type != vp->v_type) 1844 continue; 1845 *vpp = vp; 1846 rc = 1; 1847 break; 1848 } 1849 simple_unlock(&spechash_slock); 1850 return (rc); 1851 } 1852 1853 /* 1854 * Revoke all the vnodes corresponding to the specified minor number 1855 * range (endpoints inclusive) of the specified major. 1856 */ 1857 void 1858 vdevgone(maj, minl, minh, type) 1859 int maj, minl, minh; 1860 enum vtype type; 1861 { 1862 struct vnode *vp; 1863 int mn; 1864 1865 for (mn = minl; mn <= minh; mn++) 1866 if (vfinddev(makedev(maj, mn), type, &vp)) 1867 VOP_REVOKE(vp, REVOKEALL); 1868 } 1869 1870 /* 1871 * Calculate the total number of references to a special device. 1872 */ 1873 int 1874 vcount(vp) 1875 struct vnode *vp; 1876 { 1877 struct vnode *vq, *vnext; 1878 int count; 1879 1880 loop: 1881 if ((vp->v_flag & VALIASED) == 0) 1882 return (vp->v_usecount); 1883 simple_lock(&spechash_slock); 1884 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1885 vnext = vq->v_specnext; 1886 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1887 continue; 1888 /* 1889 * Alias, but not in use, so flush it out. 1890 */ 1891 if (vq->v_usecount == 0 && vq != vp && 1892 (vq->v_flag & VXLOCK) == 0) { 1893 simple_unlock(&spechash_slock); 1894 vgone(vq); 1895 goto loop; 1896 } 1897 count += vq->v_usecount; 1898 } 1899 simple_unlock(&spechash_slock); 1900 return (count); 1901 } 1902 1903 /* 1904 * Print out a description of a vnode. 1905 */ 1906 const char * const vnode_types[] = { 1907 "VNON", 1908 "VREG", 1909 "VDIR", 1910 "VBLK", 1911 "VCHR", 1912 "VLNK", 1913 "VSOCK", 1914 "VFIFO", 1915 "VBAD" 1916 }; 1917 1918 void 1919 vprint(label, vp) 1920 char *label; 1921 struct vnode *vp; 1922 { 1923 char buf[96]; 1924 1925 if (label != NULL) 1926 printf("%s: ", label); 1927 printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,", 1928 vp->v_tag, vnode_types[vp->v_type], 1929 vp->v_usecount, vp->v_writecount, vp->v_holdcnt); 1930 buf[0] = '\0'; 1931 if (vp->v_flag & VROOT) 1932 strlcat(buf, "|VROOT", sizeof(buf)); 1933 if (vp->v_flag & VTEXT) 1934 strlcat(buf, "|VTEXT", sizeof(buf)); 1935 if (vp->v_flag & VEXECMAP) 1936 strlcat(buf, "|VEXECMAP", sizeof(buf)); 1937 if (vp->v_flag & VSYSTEM) 1938 strlcat(buf, "|VSYSTEM", sizeof(buf)); 1939 if (vp->v_flag & VXLOCK) 1940 strlcat(buf, "|VXLOCK", sizeof(buf)); 1941 if (vp->v_flag & VXWANT) 1942 strlcat(buf, "|VXWANT", sizeof(buf)); 1943 if (vp->v_flag & VBWAIT) 1944 strlcat(buf, "|VBWAIT", sizeof(buf)); 1945 if (vp->v_flag & VALIASED) 1946 strlcat(buf, "|VALIASED", sizeof(buf)); 1947 if (buf[0] != '\0') 1948 printf(" flags (%s)", &buf[1]); 1949 if (vp->v_data == NULL) { 1950 printf("\n"); 1951 } else { 1952 printf("\n\t"); 1953 VOP_PRINT(vp); 1954 } 1955 } 1956 1957 #ifdef DEBUG 1958 /* 1959 * List all of the locked vnodes in the system. 1960 * Called when debugging the kernel. 1961 */ 1962 void 1963 printlockedvnodes() 1964 { 1965 struct mount *mp, *nmp; 1966 struct vnode *vp; 1967 1968 printf("Locked vnodes\n"); 1969 simple_lock(&mountlist_slock); 1970 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1971 mp = nmp) { 1972 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1973 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1974 continue; 1975 } 1976 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1977 if (VOP_ISLOCKED(vp)) 1978 vprint(NULL, vp); 1979 } 1980 simple_lock(&mountlist_slock); 1981 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1982 vfs_unbusy(mp); 1983 } 1984 simple_unlock(&mountlist_slock); 1985 } 1986 #endif 1987 1988 /* 1989 * Top level filesystem related information gathering. 1990 */ 1991 int 1992 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1993 int *name; 1994 u_int namelen; 1995 void *oldp; 1996 size_t *oldlenp; 1997 void *newp; 1998 size_t newlen; 1999 struct proc *p; 2000 { 2001 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 2002 struct vfsconf vfc; 2003 extern const char * const mountcompatnames[]; 2004 extern int nmountcompatnames; 2005 #endif 2006 struct vfsops *vfsp; 2007 2008 /* all sysctl names at this level are at least name and field */ 2009 if (namelen < 2) 2010 return (ENOTDIR); /* overloaded */ 2011 2012 /* Not generic: goes to file system. */ 2013 if (name[0] != VFS_GENERIC) { 2014 static const struct ctlname vfsnames[VFS_MAXID+1]=CTL_VFS_NAMES; 2015 const char *vfsname; 2016 2017 if (name[0] < 0 || name[0] > VFS_MAXID 2018 || (vfsname = vfsnames[name[0]].ctl_name) == NULL) 2019 return (EOPNOTSUPP); 2020 2021 vfsp = vfs_getopsbyname(vfsname); 2022 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 2023 return (EOPNOTSUPP); 2024 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 2025 oldp, oldlenp, newp, newlen, p)); 2026 } 2027 2028 /* The rest are generic vfs sysctls. */ 2029 switch (name[1]) { 2030 case VFS_USERMOUNT: 2031 return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount); 2032 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 2033 case VFS_MAXTYPENUM: 2034 /* 2035 * Provided for 4.4BSD-Lite2 compatibility. 2036 */ 2037 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 2038 case VFS_CONF: 2039 /* 2040 * Special: a node, next is a file system name. 2041 * Provided for 4.4BSD-Lite2 compatibility. 2042 */ 2043 if (namelen < 3) 2044 return (ENOTDIR); /* overloaded */ 2045 if (name[2] >= nmountcompatnames || name[2] < 0 || 2046 mountcompatnames[name[2]] == NULL) 2047 return (EOPNOTSUPP); 2048 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 2049 if (vfsp == NULL) 2050 return (EOPNOTSUPP); 2051 vfc.vfc_vfsops = vfsp; 2052 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 2053 vfc.vfc_typenum = name[2]; 2054 vfc.vfc_refcount = vfsp->vfs_refcount; 2055 vfc.vfc_flags = 0; 2056 vfc.vfc_mountroot = vfsp->vfs_mountroot; 2057 vfc.vfc_next = NULL; 2058 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 2059 sizeof(struct vfsconf))); 2060 #endif 2061 default: 2062 break; 2063 } 2064 return (EOPNOTSUPP); 2065 } 2066 2067 int kinfo_vdebug = 1; 2068 int kinfo_vgetfailed; 2069 #define KINFO_VNODESLOP 10 2070 /* 2071 * Dump vnode list (via sysctl). 2072 * Copyout address of vnode followed by vnode. 2073 */ 2074 /* ARGSUSED */ 2075 int 2076 sysctl_vnode(where, sizep, p) 2077 char *where; 2078 size_t *sizep; 2079 struct proc *p; 2080 { 2081 struct mount *mp, *nmp; 2082 struct vnode *nvp, *vp; 2083 char *bp = where, *savebp; 2084 char *ewhere; 2085 int error; 2086 2087 #define VPTRSZ sizeof(struct vnode *) 2088 #define VNODESZ sizeof(struct vnode) 2089 if (where == NULL) { 2090 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 2091 return (0); 2092 } 2093 ewhere = where + *sizep; 2094 2095 simple_lock(&mountlist_slock); 2096 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2097 mp = nmp) { 2098 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 2099 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2100 continue; 2101 } 2102 savebp = bp; 2103 again: 2104 simple_lock(&mntvnode_slock); 2105 for (vp = LIST_FIRST(&mp->mnt_vnodelist); 2106 vp != NULL; 2107 vp = nvp) { 2108 /* 2109 * Check that the vp is still associated with 2110 * this filesystem. RACE: could have been 2111 * recycled onto the same filesystem. 2112 */ 2113 if (vp->v_mount != mp) { 2114 simple_unlock(&mntvnode_slock); 2115 if (kinfo_vdebug) 2116 printf("kinfo: vp changed\n"); 2117 bp = savebp; 2118 goto again; 2119 } 2120 nvp = LIST_NEXT(vp, v_mntvnodes); 2121 if (bp + VPTRSZ + VNODESZ > ewhere) { 2122 simple_unlock(&mntvnode_slock); 2123 *sizep = bp - where; 2124 return (ENOMEM); 2125 } 2126 simple_unlock(&mntvnode_slock); 2127 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2128 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2129 return (error); 2130 bp += VPTRSZ + VNODESZ; 2131 simple_lock(&mntvnode_slock); 2132 } 2133 simple_unlock(&mntvnode_slock); 2134 simple_lock(&mountlist_slock); 2135 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2136 vfs_unbusy(mp); 2137 } 2138 simple_unlock(&mountlist_slock); 2139 2140 *sizep = bp - where; 2141 return (0); 2142 } 2143 2144 /* 2145 * Check to see if a filesystem is mounted on a block device. 2146 */ 2147 int 2148 vfs_mountedon(vp) 2149 struct vnode *vp; 2150 { 2151 struct vnode *vq; 2152 int error = 0; 2153 2154 if (vp->v_specmountpoint != NULL) 2155 return (EBUSY); 2156 if (vp->v_flag & VALIASED) { 2157 simple_lock(&spechash_slock); 2158 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2159 if (vq->v_rdev != vp->v_rdev || 2160 vq->v_type != vp->v_type) 2161 continue; 2162 if (vq->v_specmountpoint != NULL) { 2163 error = EBUSY; 2164 break; 2165 } 2166 } 2167 simple_unlock(&spechash_slock); 2168 } 2169 return (error); 2170 } 2171 2172 static int 2173 sacheck(struct sockaddr *sa) 2174 { 2175 switch (sa->sa_family) { 2176 #ifdef INET 2177 case AF_INET: { 2178 struct sockaddr_in *sin = (struct sockaddr_in *)sa; 2179 char *p = (char *)sin->sin_zero; 2180 size_t i; 2181 2182 if (sin->sin_len != sizeof(*sin)) 2183 return -1; 2184 if (sin->sin_port != 0) 2185 return -1; 2186 for (i = 0; i < sizeof(sin->sin_zero); i++) 2187 if (*p++ != '\0') 2188 return -1; 2189 return 0; 2190 } 2191 #endif 2192 #ifdef INET6 2193 case AF_INET6: { 2194 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; 2195 2196 if (sin6->sin6_len != sizeof(*sin6)) 2197 return -1; 2198 if (sin6->sin6_port != 0) 2199 return -1; 2200 return 0; 2201 } 2202 #endif 2203 default: 2204 return -1; 2205 } 2206 } 2207 2208 /* 2209 * Build hash lists of net addresses and hang them off the mount point. 2210 * Called by ufs_mount() to set up the lists of export addresses. 2211 */ 2212 static int 2213 vfs_hang_addrlist(mp, nep, argp) 2214 struct mount *mp; 2215 struct netexport *nep; 2216 struct export_args *argp; 2217 { 2218 struct netcred *np, *enp; 2219 struct radix_node_head *rnh; 2220 int i; 2221 struct sockaddr *saddr, *smask = 0; 2222 struct domain *dom; 2223 int error; 2224 2225 if (argp->ex_addrlen == 0) { 2226 if (mp->mnt_flag & MNT_DEFEXPORTED) 2227 return (EPERM); 2228 np = &nep->ne_defexported; 2229 np->netc_exflags = argp->ex_flags; 2230 crcvt(&np->netc_anon, &argp->ex_anon); 2231 np->netc_anon.cr_ref = 1; 2232 mp->mnt_flag |= MNT_DEFEXPORTED; 2233 return (0); 2234 } 2235 2236 if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN) 2237 return (EINVAL); 2238 2239 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2240 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 2241 memset((caddr_t)np, 0, i); 2242 saddr = (struct sockaddr *)(np + 1); 2243 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 2244 if (error) 2245 goto out; 2246 if (saddr->sa_len > argp->ex_addrlen) 2247 saddr->sa_len = argp->ex_addrlen; 2248 if (sacheck(saddr) == -1) 2249 return EINVAL; 2250 if (argp->ex_masklen) { 2251 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 2252 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 2253 if (error) 2254 goto out; 2255 if (smask->sa_len > argp->ex_masklen) 2256 smask->sa_len = argp->ex_masklen; 2257 if (smask->sa_family != saddr->sa_family) 2258 return EINVAL; 2259 if (sacheck(smask) == -1) 2260 return EINVAL; 2261 } 2262 i = saddr->sa_family; 2263 if ((rnh = nep->ne_rtable[i]) == 0) { 2264 /* 2265 * Seems silly to initialize every AF when most are not 2266 * used, do so on demand here 2267 */ 2268 for (dom = domains; dom; dom = dom->dom_next) 2269 if (dom->dom_family == i && dom->dom_rtattach) { 2270 dom->dom_rtattach((void **)&nep->ne_rtable[i], 2271 dom->dom_rtoffset); 2272 break; 2273 } 2274 if ((rnh = nep->ne_rtable[i]) == 0) { 2275 error = ENOBUFS; 2276 goto out; 2277 } 2278 } 2279 2280 enp = (struct netcred *)(*rnh->rnh_addaddr)(saddr, smask, rnh, 2281 np->netc_rnodes); 2282 if (enp != np) { 2283 if (enp == NULL) { 2284 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 2285 smask, rnh); 2286 if (enp == NULL) { 2287 error = EPERM; 2288 goto out; 2289 } 2290 } else 2291 enp->netc_refcnt++; 2292 2293 goto check; 2294 } else 2295 enp->netc_refcnt = 1; 2296 2297 np->netc_exflags = argp->ex_flags; 2298 crcvt(&np->netc_anon, &argp->ex_anon); 2299 np->netc_anon.cr_ref = 1; 2300 return 0; 2301 check: 2302 if (enp->netc_exflags != argp->ex_flags || 2303 crcmp(&enp->netc_anon, &argp->ex_anon) != 0) 2304 error = EPERM; 2305 else 2306 error = 0; 2307 out: 2308 free(np, M_NETADDR); 2309 return error; 2310 } 2311 2312 /* ARGSUSED */ 2313 static int 2314 vfs_free_netcred(rn, w) 2315 struct radix_node *rn; 2316 void *w; 2317 { 2318 struct radix_node_head *rnh = (struct radix_node_head *)w; 2319 struct netcred *np = (struct netcred *)(void *)rn; 2320 2321 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 2322 if (--(np->netc_refcnt) <= 0) 2323 free(np, M_NETADDR); 2324 return (0); 2325 } 2326 2327 /* 2328 * Free the net address hash lists that are hanging off the mount points. 2329 */ 2330 static void 2331 vfs_free_addrlist(nep) 2332 struct netexport *nep; 2333 { 2334 int i; 2335 struct radix_node_head *rnh; 2336 2337 for (i = 0; i <= AF_MAX; i++) 2338 if ((rnh = nep->ne_rtable[i]) != NULL) { 2339 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 2340 free((caddr_t)rnh, M_RTABLE); 2341 nep->ne_rtable[i] = 0; 2342 } 2343 } 2344 2345 int 2346 vfs_export(mp, nep, argp) 2347 struct mount *mp; 2348 struct netexport *nep; 2349 struct export_args *argp; 2350 { 2351 int error; 2352 2353 if (argp->ex_flags & MNT_DELEXPORT) { 2354 if (mp->mnt_flag & MNT_EXPUBLIC) { 2355 vfs_setpublicfs(NULL, NULL, NULL); 2356 mp->mnt_flag &= ~MNT_EXPUBLIC; 2357 } 2358 vfs_free_addrlist(nep); 2359 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2360 } 2361 if (argp->ex_flags & MNT_EXPORTED) { 2362 if (argp->ex_flags & MNT_EXPUBLIC) { 2363 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2364 return (error); 2365 mp->mnt_flag |= MNT_EXPUBLIC; 2366 } 2367 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 2368 return (error); 2369 mp->mnt_flag |= MNT_EXPORTED; 2370 } 2371 return (0); 2372 } 2373 2374 /* 2375 * Set the publicly exported filesystem (WebNFS). Currently, only 2376 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2377 */ 2378 int 2379 vfs_setpublicfs(mp, nep, argp) 2380 struct mount *mp; 2381 struct netexport *nep; 2382 struct export_args *argp; 2383 { 2384 int error; 2385 struct vnode *rvp; 2386 char *cp; 2387 2388 /* 2389 * mp == NULL -> invalidate the current info, the FS is 2390 * no longer exported. May be called from either vfs_export 2391 * or unmount, so check if it hasn't already been done. 2392 */ 2393 if (mp == NULL) { 2394 if (nfs_pub.np_valid) { 2395 nfs_pub.np_valid = 0; 2396 if (nfs_pub.np_index != NULL) { 2397 FREE(nfs_pub.np_index, M_TEMP); 2398 nfs_pub.np_index = NULL; 2399 } 2400 } 2401 return (0); 2402 } 2403 2404 /* 2405 * Only one allowed at a time. 2406 */ 2407 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2408 return (EBUSY); 2409 2410 /* 2411 * Get real filehandle for root of exported FS. 2412 */ 2413 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 2414 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2415 2416 if ((error = VFS_ROOT(mp, &rvp))) 2417 return (error); 2418 2419 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2420 return (error); 2421 2422 vput(rvp); 2423 2424 /* 2425 * If an indexfile was specified, pull it in. 2426 */ 2427 if (argp->ex_indexfile != NULL) { 2428 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2429 M_WAITOK); 2430 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2431 MAXNAMLEN, (size_t *)0); 2432 if (!error) { 2433 /* 2434 * Check for illegal filenames. 2435 */ 2436 for (cp = nfs_pub.np_index; *cp; cp++) { 2437 if (*cp == '/') { 2438 error = EINVAL; 2439 break; 2440 } 2441 } 2442 } 2443 if (error) { 2444 FREE(nfs_pub.np_index, M_TEMP); 2445 return (error); 2446 } 2447 } 2448 2449 nfs_pub.np_mount = mp; 2450 nfs_pub.np_valid = 1; 2451 return (0); 2452 } 2453 2454 struct netcred * 2455 vfs_export_lookup(mp, nep, nam) 2456 struct mount *mp; 2457 struct netexport *nep; 2458 struct mbuf *nam; 2459 { 2460 struct netcred *np; 2461 struct radix_node_head *rnh; 2462 struct sockaddr *saddr; 2463 2464 np = NULL; 2465 if (mp->mnt_flag & MNT_EXPORTED) { 2466 /* 2467 * Lookup in the export list first. 2468 */ 2469 if (nam != NULL) { 2470 saddr = mtod(nam, struct sockaddr *); 2471 rnh = nep->ne_rtable[saddr->sa_family]; 2472 if (rnh != NULL) { 2473 np = (struct netcred *) 2474 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2475 rnh); 2476 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2477 np = NULL; 2478 } 2479 } 2480 /* 2481 * If no address match, use the default if it exists. 2482 */ 2483 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2484 np = &nep->ne_defexported; 2485 } 2486 return (np); 2487 } 2488 2489 /* 2490 * Do the usual access checking. 2491 * file_mode, uid and gid are from the vnode in question, 2492 * while acc_mode and cred are from the VOP_ACCESS parameter list 2493 */ 2494 int 2495 vaccess(type, file_mode, uid, gid, acc_mode, cred) 2496 enum vtype type; 2497 mode_t file_mode; 2498 uid_t uid; 2499 gid_t gid; 2500 mode_t acc_mode; 2501 struct ucred *cred; 2502 { 2503 mode_t mask; 2504 2505 /* 2506 * Super-user always gets read/write access, but execute access depends 2507 * on at least one execute bit being set. 2508 */ 2509 if (cred->cr_uid == 0) { 2510 if ((acc_mode & VEXEC) && type != VDIR && 2511 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2512 return (EACCES); 2513 return (0); 2514 } 2515 2516 mask = 0; 2517 2518 /* Otherwise, check the owner. */ 2519 if (cred->cr_uid == uid) { 2520 if (acc_mode & VEXEC) 2521 mask |= S_IXUSR; 2522 if (acc_mode & VREAD) 2523 mask |= S_IRUSR; 2524 if (acc_mode & VWRITE) 2525 mask |= S_IWUSR; 2526 return ((file_mode & mask) == mask ? 0 : EACCES); 2527 } 2528 2529 /* Otherwise, check the groups. */ 2530 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2531 if (acc_mode & VEXEC) 2532 mask |= S_IXGRP; 2533 if (acc_mode & VREAD) 2534 mask |= S_IRGRP; 2535 if (acc_mode & VWRITE) 2536 mask |= S_IWGRP; 2537 return ((file_mode & mask) == mask ? 0 : EACCES); 2538 } 2539 2540 /* Otherwise, check everyone else. */ 2541 if (acc_mode & VEXEC) 2542 mask |= S_IXOTH; 2543 if (acc_mode & VREAD) 2544 mask |= S_IROTH; 2545 if (acc_mode & VWRITE) 2546 mask |= S_IWOTH; 2547 return ((file_mode & mask) == mask ? 0 : EACCES); 2548 } 2549 2550 /* 2551 * Unmount all file systems. 2552 * We traverse the list in reverse order under the assumption that doing so 2553 * will avoid needing to worry about dependencies. 2554 */ 2555 void 2556 vfs_unmountall(p) 2557 struct proc *p; 2558 { 2559 struct mount *mp, *nmp; 2560 int allerror, error; 2561 2562 for (allerror = 0, 2563 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2564 nmp = mp->mnt_list.cqe_prev; 2565 #ifdef DEBUG 2566 printf("unmounting %s (%s)...\n", 2567 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2568 #endif 2569 /* 2570 * XXX Freeze syncer. Must do this before locking the 2571 * mount point. See dounmount() for details. 2572 */ 2573 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2574 if (vfs_busy(mp, 0, 0)) { 2575 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2576 continue; 2577 } 2578 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2579 printf("unmount of %s failed with error %d\n", 2580 mp->mnt_stat.f_mntonname, error); 2581 allerror = 1; 2582 } 2583 } 2584 if (allerror) 2585 printf("WARNING: some file systems would not unmount\n"); 2586 } 2587 2588 /* 2589 * Sync and unmount file systems before shutting down. 2590 */ 2591 void 2592 vfs_shutdown() 2593 { 2594 struct buf *bp; 2595 int iter, nbusy, nbusy_prev = 0, dcount, s; 2596 struct lwp *l = curlwp; 2597 struct proc *p; 2598 2599 /* XXX we're certainly not running in proc0's context! */ 2600 if (l == NULL || (p = l->l_proc) == NULL) 2601 p = &proc0; 2602 2603 printf("syncing disks... "); 2604 2605 /* remove user process from run queue */ 2606 suspendsched(); 2607 (void) spl0(); 2608 2609 /* avoid coming back this way again if we panic. */ 2610 doing_shutdown = 1; 2611 2612 sys_sync(l, NULL, NULL); 2613 2614 /* Wait for sync to finish. */ 2615 dcount = 10000; 2616 for (iter = 0; iter < 20;) { 2617 nbusy = 0; 2618 for (bp = &buf[nbuf]; --bp >= buf; ) { 2619 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2620 nbusy++; 2621 /* 2622 * With soft updates, some buffers that are 2623 * written will be remarked as dirty until other 2624 * buffers are written. 2625 */ 2626 if (bp->b_vp && bp->b_vp->v_mount 2627 && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP) 2628 && (bp->b_flags & B_DELWRI)) { 2629 s = splbio(); 2630 bremfree(bp); 2631 bp->b_flags |= B_BUSY; 2632 splx(s); 2633 nbusy++; 2634 bawrite(bp); 2635 if (dcount-- <= 0) { 2636 printf("softdep "); 2637 goto fail; 2638 } 2639 } 2640 } 2641 if (nbusy == 0) 2642 break; 2643 if (nbusy_prev == 0) 2644 nbusy_prev = nbusy; 2645 printf("%d ", nbusy); 2646 tsleep(&nbusy, PRIBIO, "bflush", 2647 (iter == 0) ? 1 : hz / 25 * iter); 2648 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 2649 iter++; 2650 else 2651 nbusy_prev = nbusy; 2652 } 2653 if (nbusy) { 2654 fail: 2655 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 2656 printf("giving up\nPrinting vnodes for busy buffers\n"); 2657 for (bp = &buf[nbuf]; --bp >= buf; ) 2658 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2659 vprint(NULL, bp->b_vp); 2660 2661 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2662 Debugger(); 2663 #endif 2664 2665 #else /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2666 printf("giving up\n"); 2667 #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2668 return; 2669 } else 2670 printf("done\n"); 2671 2672 /* 2673 * If we've panic'd, don't make the situation potentially 2674 * worse by unmounting the file systems. 2675 */ 2676 if (panicstr != NULL) 2677 return; 2678 2679 /* Release inodes held by texts before update. */ 2680 #ifdef notdef 2681 vnshutdown(); 2682 #endif 2683 /* Unmount file systems. */ 2684 vfs_unmountall(p); 2685 } 2686 2687 /* 2688 * Mount the root file system. If the operator didn't specify a 2689 * file system to use, try all possible file systems until one 2690 * succeeds. 2691 */ 2692 int 2693 vfs_mountroot() 2694 { 2695 struct vfsops *v; 2696 2697 if (root_device == NULL) 2698 panic("vfs_mountroot: root device unknown"); 2699 2700 switch (root_device->dv_class) { 2701 case DV_IFNET: 2702 if (rootdev != NODEV) 2703 panic("vfs_mountroot: rootdev set for DV_IFNET " 2704 "(0x%08x -> %d,%d)", rootdev, 2705 major(rootdev), minor(rootdev)); 2706 break; 2707 2708 case DV_DISK: 2709 if (rootdev == NODEV) 2710 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2711 break; 2712 2713 default: 2714 printf("%s: inappropriate for root file system\n", 2715 root_device->dv_xname); 2716 return (ENODEV); 2717 } 2718 2719 /* 2720 * If user specified a file system, use it. 2721 */ 2722 if (mountroot != NULL) 2723 return ((*mountroot)()); 2724 2725 /* 2726 * Try each file system currently configured into the kernel. 2727 */ 2728 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2729 if (v->vfs_mountroot == NULL) 2730 continue; 2731 #ifdef DEBUG 2732 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 2733 #endif 2734 if ((*v->vfs_mountroot)() == 0) { 2735 aprint_normal("root file system type: %s\n", 2736 v->vfs_name); 2737 break; 2738 } 2739 } 2740 2741 if (v == NULL) { 2742 printf("no file system for %s", root_device->dv_xname); 2743 if (root_device->dv_class == DV_DISK) 2744 printf(" (dev 0x%x)", rootdev); 2745 printf("\n"); 2746 return (EFTYPE); 2747 } 2748 return (0); 2749 } 2750 2751 /* 2752 * Given a file system name, look up the vfsops for that 2753 * file system, or return NULL if file system isn't present 2754 * in the kernel. 2755 */ 2756 struct vfsops * 2757 vfs_getopsbyname(name) 2758 const char *name; 2759 { 2760 struct vfsops *v; 2761 2762 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2763 if (strcmp(v->vfs_name, name) == 0) 2764 break; 2765 } 2766 2767 return (v); 2768 } 2769 2770 /* 2771 * Establish a file system and initialize it. 2772 */ 2773 int 2774 vfs_attach(vfs) 2775 struct vfsops *vfs; 2776 { 2777 struct vfsops *v; 2778 int error = 0; 2779 2780 2781 /* 2782 * Make sure this file system doesn't already exist. 2783 */ 2784 LIST_FOREACH(v, &vfs_list, vfs_list) { 2785 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2786 error = EEXIST; 2787 goto out; 2788 } 2789 } 2790 2791 /* 2792 * Initialize the vnode operations for this file system. 2793 */ 2794 vfs_opv_init(vfs->vfs_opv_descs); 2795 2796 /* 2797 * Now initialize the file system itself. 2798 */ 2799 (*vfs->vfs_init)(); 2800 2801 /* 2802 * ...and link it into the kernel's list. 2803 */ 2804 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2805 2806 /* 2807 * Sanity: make sure the reference count is 0. 2808 */ 2809 vfs->vfs_refcount = 0; 2810 2811 out: 2812 return (error); 2813 } 2814 2815 /* 2816 * Remove a file system from the kernel. 2817 */ 2818 int 2819 vfs_detach(vfs) 2820 struct vfsops *vfs; 2821 { 2822 struct vfsops *v; 2823 2824 /* 2825 * Make sure no one is using the filesystem. 2826 */ 2827 if (vfs->vfs_refcount != 0) 2828 return (EBUSY); 2829 2830 /* 2831 * ...and remove it from the kernel's list. 2832 */ 2833 LIST_FOREACH(v, &vfs_list, vfs_list) { 2834 if (v == vfs) { 2835 LIST_REMOVE(v, vfs_list); 2836 break; 2837 } 2838 } 2839 2840 if (v == NULL) 2841 return (ESRCH); 2842 2843 /* 2844 * Now run the file system-specific cleanups. 2845 */ 2846 (*vfs->vfs_done)(); 2847 2848 /* 2849 * Free the vnode operations vector. 2850 */ 2851 vfs_opv_free(vfs->vfs_opv_descs); 2852 return (0); 2853 } 2854 2855 void 2856 vfs_reinit(void) 2857 { 2858 struct vfsops *vfs; 2859 2860 LIST_FOREACH(vfs, &vfs_list, vfs_list) { 2861 if (vfs->vfs_reinit) { 2862 (*vfs->vfs_reinit)(); 2863 } 2864 } 2865 } 2866 2867 void 2868 copy_statfs_info(struct statfs *sbp, const struct mount *mp) 2869 { 2870 const struct statfs *mbp; 2871 2872 if (sbp == (mbp = &mp->mnt_stat)) 2873 return; 2874 2875 sbp->f_oflags = mbp->f_oflags; 2876 sbp->f_type = mbp->f_type; 2877 (void)memcpy(&sbp->f_fsid, &mbp->f_fsid, sizeof(sbp->f_fsid)); 2878 sbp->f_owner = mbp->f_owner; 2879 sbp->f_flags = mbp->f_flags; 2880 sbp->f_syncwrites = mbp->f_syncwrites; 2881 sbp->f_asyncwrites = mbp->f_asyncwrites; 2882 sbp->f_spare[0] = mbp->f_spare[0]; 2883 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename, 2884 sizeof(sbp->f_fstypename)); 2885 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname, 2886 sizeof(sbp->f_mntonname)); 2887 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, 2888 sizeof(sbp->f_mntfromname)); 2889 } 2890 2891 int 2892 set_statfs_info(const char *onp, int ukon, const char *fromp, int ukfrom, 2893 struct mount *mp, struct proc *p) 2894 { 2895 int error; 2896 size_t size; 2897 struct statfs *sfs = &mp->mnt_stat; 2898 int (*fun)(const void *, void *, size_t, size_t *); 2899 2900 (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name, 2901 sizeof(mp->mnt_stat.f_fstypename)); 2902 2903 if (onp) { 2904 struct cwdinfo *cwdi = p->p_cwdi; 2905 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr; 2906 if (cwdi->cwdi_rdir != NULL) { 2907 size_t len; 2908 char *bp; 2909 char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 2910 2911 if (!path) 2912 return ENOMEM; 2913 2914 bp = path + MAXPATHLEN; 2915 *--bp = '\0'; 2916 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, 2917 path, MAXPATHLEN / 2, 0, p); 2918 if (error) { 2919 free(path, M_TEMP); 2920 return error; 2921 } 2922 2923 len = strlen(bp); 2924 if (len > sizeof(sfs->f_mntonname) - 1) 2925 len = sizeof(sfs->f_mntonname) - 1; 2926 (void)strncpy(sfs->f_mntonname, bp, len); 2927 free(path, M_TEMP); 2928 2929 if (len < sizeof(sfs->f_mntonname) - 1) { 2930 error = (*fun)(onp, &sfs->f_mntonname[len], 2931 sizeof(sfs->f_mntonname) - len - 1, &size); 2932 if (error) 2933 return error; 2934 size += len; 2935 } else { 2936 size = len; 2937 } 2938 } else { 2939 error = (*fun)(onp, &sfs->f_mntonname, 2940 sizeof(sfs->f_mntonname) - 1, &size); 2941 if (error) 2942 return error; 2943 } 2944 (void)memset(sfs->f_mntonname + size, 0, 2945 sizeof(sfs->f_mntonname) - size); 2946 } 2947 2948 if (fromp) { 2949 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr; 2950 error = (*fun)(fromp, sfs->f_mntfromname, 2951 sizeof(sfs->f_mntfromname) - 1, &size); 2952 if (error) 2953 return error; 2954 (void)memset(sfs->f_mntfromname + size, 0, 2955 sizeof(sfs->f_mntfromname) - size); 2956 } 2957 return 0; 2958 } 2959 2960 #ifdef DDB 2961 const char buf_flagbits[] = 2962 "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI" 2963 "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE" 2964 "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED" 2965 "\32XXX\33VFLUSH"; 2966 2967 void 2968 vfs_buf_print(bp, full, pr) 2969 struct buf *bp; 2970 int full; 2971 void (*pr) __P((const char *, ...)); 2972 { 2973 char buf[1024]; 2974 2975 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" dev 0x%x\n", 2976 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev); 2977 2978 bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf)); 2979 (*pr)(" error %d flags 0x%s\n", bp->b_error, buf); 2980 2981 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 2982 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2983 (*pr)(" data %p saveaddr %p dep %p\n", 2984 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2985 (*pr)(" iodone %p\n", bp->b_iodone); 2986 } 2987 2988 2989 const char vnode_flagbits[] = 2990 "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\5EXECMAP" 2991 "\11XLOCK\12XWANT\13BWAIT\14ALIASED" 2992 "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY"; 2993 2994 const char * const vnode_tags[] = { 2995 "VT_NON", 2996 "VT_UFS", 2997 "VT_NFS", 2998 "VT_MFS", 2999 "VT_MSDOSFS", 3000 "VT_LFS", 3001 "VT_LOFS", 3002 "VT_FDESC", 3003 "VT_PORTAL", 3004 "VT_NULL", 3005 "VT_UMAP", 3006 "VT_KERNFS", 3007 "VT_PROCFS", 3008 "VT_AFS", 3009 "VT_ISOFS", 3010 "VT_UNION", 3011 "VT_ADOSFS", 3012 "VT_EXT2FS", 3013 "VT_CODA", 3014 "VT_FILECORE", 3015 "VT_NTFS", 3016 "VT_VFS", 3017 "VT_OVERLAY", 3018 "VT_SMBFS" 3019 }; 3020 3021 void 3022 vfs_vnode_print(vp, full, pr) 3023 struct vnode *vp; 3024 int full; 3025 void (*pr) __P((const char *, ...)); 3026 { 3027 char buf[256]; 3028 const char *vtype, *vtag; 3029 3030 uvm_object_printit(&vp->v_uobj, full, pr); 3031 bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf)); 3032 (*pr)("\nVNODE flags %s\n", buf); 3033 (*pr)("mp %p numoutput %d size 0x%llx\n", 3034 vp->v_mount, vp->v_numoutput, vp->v_size); 3035 3036 (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n", 3037 vp->v_data, vp->v_usecount, vp->v_writecount, 3038 vp->v_holdcnt, vp->v_numoutput); 3039 3040 vtype = (vp->v_type >= 0 && 3041 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ? 3042 vnode_types[vp->v_type] : "UNKNOWN"; 3043 vtag = (vp->v_tag >= 0 && 3044 vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ? 3045 vnode_tags[vp->v_tag] : "UNKNOWN"; 3046 3047 (*pr)("type %s(%d) tag %s(%d) mount %p typedata %p\n", 3048 vtype, vp->v_type, vtag, vp->v_tag, 3049 vp->v_mount, vp->v_mountedhere); 3050 3051 if (full) { 3052 struct buf *bp; 3053 3054 (*pr)("clean bufs:\n"); 3055 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 3056 (*pr)(" bp %p\n", bp); 3057 vfs_buf_print(bp, full, pr); 3058 } 3059 3060 (*pr)("dirty bufs:\n"); 3061 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 3062 (*pr)(" bp %p\n", bp); 3063 vfs_buf_print(bp, full, pr); 3064 } 3065 } 3066 } 3067 #endif 3068