1 /* $NetBSD: vfs_subr.c,v 1.201 2003/06/29 22:31:33 fvdl Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include <sys/cdefs.h> 85 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.201 2003/06/29 22:31:33 fvdl Exp $"); 86 87 #include "opt_inet.h" 88 #include "opt_ddb.h" 89 #include "opt_compat_netbsd.h" 90 #include "opt_compat_43.h" 91 92 #include <sys/param.h> 93 #include <sys/systm.h> 94 #include <sys/proc.h> 95 #include <sys/kernel.h> 96 #include <sys/mount.h> 97 #include <sys/time.h> 98 #include <sys/event.h> 99 #include <sys/fcntl.h> 100 #include <sys/vnode.h> 101 #include <sys/stat.h> 102 #include <sys/namei.h> 103 #include <sys/ucred.h> 104 #include <sys/buf.h> 105 #include <sys/errno.h> 106 #include <sys/malloc.h> 107 #include <sys/domain.h> 108 #include <sys/mbuf.h> 109 #include <sys/sa.h> 110 #include <sys/syscallargs.h> 111 #include <sys/device.h> 112 #include <sys/dirent.h> 113 #include <sys/filedesc.h> 114 115 #include <miscfs/specfs/specdev.h> 116 #include <miscfs/genfs/genfs.h> 117 #include <miscfs/syncfs/syncfs.h> 118 119 #include <netinet/in.h> 120 121 #include <uvm/uvm.h> 122 #include <uvm/uvm_ddb.h> 123 124 #include <netinet/in.h> 125 126 #include <sys/sysctl.h> 127 128 const enum vtype iftovt_tab[16] = { 129 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 130 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 131 }; 132 const int vttoif_tab[9] = { 133 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 134 S_IFSOCK, S_IFIFO, S_IFMT, 135 }; 136 137 int doforce = 1; /* 1 => permit forcible unmounting */ 138 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 139 140 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 141 142 /* 143 * Insq/Remq for the vnode usage lists. 144 */ 145 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 146 #define bufremvn(bp) { \ 147 LIST_REMOVE(bp, b_vnbufs); \ 148 (bp)->b_vnbufs.le_next = NOLIST; \ 149 } 150 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 151 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 152 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 153 154 struct mntlist mountlist = /* mounted filesystem list */ 155 CIRCLEQ_HEAD_INITIALIZER(mountlist); 156 struct vfs_list_head vfs_list = /* vfs list */ 157 LIST_HEAD_INITIALIZER(vfs_list); 158 159 struct nfs_public nfs_pub; /* publicly exported FS */ 160 161 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 162 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 163 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 164 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 165 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 166 167 /* XXX - gross; single global lock to protect v_numoutput */ 168 struct simplelock global_v_numoutput_slock = SIMPLELOCK_INITIALIZER; 169 170 /* 171 * These define the root filesystem and device. 172 */ 173 struct mount *rootfs; 174 struct vnode *rootvnode; 175 struct device *root_device; /* root device */ 176 177 struct pool vnode_pool; /* memory pool for vnodes */ 178 179 MALLOC_DEFINE(M_VNODE, "vnodes", "Dynamically allocated vnodes"); 180 181 /* 182 * Local declarations. 183 */ 184 void insmntque __P((struct vnode *, struct mount *)); 185 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 186 void vgoneall __P((struct vnode *)); 187 188 void vclean(struct vnode *, int, struct proc *); 189 190 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 191 struct export_args *)); 192 static int vfs_free_netcred __P((struct radix_node *, void *)); 193 static void vfs_free_addrlist __P((struct netexport *)); 194 195 #ifdef DEBUG 196 void printlockedvnodes __P((void)); 197 #endif 198 199 /* 200 * Initialize the vnode management data structures. 201 */ 202 void 203 vntblinit() 204 { 205 206 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 207 &pool_allocator_nointr); 208 209 /* 210 * Initialize the filesystem syncer. 211 */ 212 vn_initialize_syncerd(); 213 } 214 215 /* 216 * Mark a mount point as busy. Used to synchronize access and to delay 217 * unmounting. Interlock is not released on failure. 218 */ 219 int 220 vfs_busy(mp, flags, interlkp) 221 struct mount *mp; 222 int flags; 223 struct simplelock *interlkp; 224 { 225 int lkflags; 226 227 while (mp->mnt_flag & MNT_UNMOUNT) { 228 int gone; 229 230 if (flags & LK_NOWAIT) 231 return (ENOENT); 232 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 233 && mp->mnt_unmounter == curproc) 234 return (EDEADLK); 235 if (interlkp) 236 simple_unlock(interlkp); 237 /* 238 * Since all busy locks are shared except the exclusive 239 * lock granted when unmounting, the only place that a 240 * wakeup needs to be done is at the release of the 241 * exclusive lock at the end of dounmount. 242 * 243 * XXX MP: add spinlock protecting mnt_wcnt here once you 244 * can atomically unlock-and-sleep. 245 */ 246 mp->mnt_wcnt++; 247 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 248 mp->mnt_wcnt--; 249 gone = mp->mnt_flag & MNT_GONE; 250 251 if (mp->mnt_wcnt == 0) 252 wakeup(&mp->mnt_wcnt); 253 if (interlkp) 254 simple_lock(interlkp); 255 if (gone) 256 return (ENOENT); 257 } 258 lkflags = LK_SHARED; 259 if (interlkp) 260 lkflags |= LK_INTERLOCK; 261 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 262 panic("vfs_busy: unexpected lock failure"); 263 return (0); 264 } 265 266 /* 267 * Free a busy filesystem. 268 */ 269 void 270 vfs_unbusy(mp) 271 struct mount *mp; 272 { 273 274 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 275 } 276 277 /* 278 * Lookup a filesystem type, and if found allocate and initialize 279 * a mount structure for it. 280 * 281 * Devname is usually updated by mount(8) after booting. 282 */ 283 int 284 vfs_rootmountalloc(fstypename, devname, mpp) 285 char *fstypename; 286 char *devname; 287 struct mount **mpp; 288 { 289 struct vfsops *vfsp = NULL; 290 struct mount *mp; 291 292 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 293 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 294 break; 295 296 if (vfsp == NULL) 297 return (ENODEV); 298 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 299 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 300 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 301 (void)vfs_busy(mp, LK_NOWAIT, 0); 302 LIST_INIT(&mp->mnt_vnodelist); 303 mp->mnt_op = vfsp; 304 mp->mnt_flag = MNT_RDONLY; 305 mp->mnt_vnodecovered = NULLVP; 306 vfsp->vfs_refcount++; 307 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 308 mp->mnt_stat.f_mntonname[0] = '/'; 309 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 310 *mpp = mp; 311 return (0); 312 } 313 314 /* 315 * Lookup a mount point by filesystem identifier. 316 */ 317 struct mount * 318 vfs_getvfs(fsid) 319 fsid_t *fsid; 320 { 321 struct mount *mp; 322 323 simple_lock(&mountlist_slock); 324 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { 325 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 326 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 327 simple_unlock(&mountlist_slock); 328 return (mp); 329 } 330 } 331 simple_unlock(&mountlist_slock); 332 return ((struct mount *)0); 333 } 334 335 /* 336 * Get a new unique fsid 337 */ 338 void 339 vfs_getnewfsid(mp) 340 struct mount *mp; 341 { 342 static u_short xxxfs_mntid; 343 fsid_t tfsid; 344 int mtype; 345 346 simple_lock(&mntid_slock); 347 mtype = makefstype(mp->mnt_op->vfs_name); 348 mp->mnt_stat.f_fsid.val[0] = makedev(mtype, 0); 349 mp->mnt_stat.f_fsid.val[1] = mtype; 350 if (xxxfs_mntid == 0) 351 ++xxxfs_mntid; 352 tfsid.val[0] = makedev(mtype & 0xff, xxxfs_mntid); 353 tfsid.val[1] = mtype; 354 if (!CIRCLEQ_EMPTY(&mountlist)) { 355 while (vfs_getvfs(&tfsid)) { 356 tfsid.val[0]++; 357 xxxfs_mntid++; 358 } 359 } 360 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 361 simple_unlock(&mntid_slock); 362 } 363 364 /* 365 * Make a 'unique' number from a mount type name. 366 */ 367 long 368 makefstype(type) 369 const char *type; 370 { 371 long rv; 372 373 for (rv = 0; *type; type++) { 374 rv <<= 2; 375 rv ^= *type; 376 } 377 return rv; 378 } 379 380 381 /* 382 * Set vnode attributes to VNOVAL 383 */ 384 void 385 vattr_null(vap) 386 struct vattr *vap; 387 { 388 389 vap->va_type = VNON; 390 391 /* 392 * Assign individually so that it is safe even if size and 393 * sign of each member are varied. 394 */ 395 vap->va_mode = VNOVAL; 396 vap->va_nlink = VNOVAL; 397 vap->va_uid = VNOVAL; 398 vap->va_gid = VNOVAL; 399 vap->va_fsid = VNOVAL; 400 vap->va_fileid = VNOVAL; 401 vap->va_size = VNOVAL; 402 vap->va_blocksize = VNOVAL; 403 vap->va_atime.tv_sec = 404 vap->va_mtime.tv_sec = 405 vap->va_ctime.tv_sec = 406 vap->va_birthtime.tv_sec = VNOVAL; 407 vap->va_atime.tv_nsec = 408 vap->va_mtime.tv_nsec = 409 vap->va_ctime.tv_nsec = 410 vap->va_birthtime.tv_nsec = VNOVAL; 411 vap->va_gen = VNOVAL; 412 vap->va_flags = VNOVAL; 413 vap->va_rdev = VNOVAL; 414 vap->va_bytes = VNOVAL; 415 vap->va_vaflags = 0; 416 } 417 418 /* 419 * Routines having to do with the management of the vnode table. 420 */ 421 extern int (**dead_vnodeop_p) __P((void *)); 422 long numvnodes; 423 424 /* 425 * Return the next vnode from the free list. 426 */ 427 int 428 getnewvnode(tag, mp, vops, vpp) 429 enum vtagtype tag; 430 struct mount *mp; 431 int (**vops) __P((void *)); 432 struct vnode **vpp; 433 { 434 extern struct uvm_pagerops uvm_vnodeops; 435 struct uvm_object *uobj; 436 struct proc *p = curproc; /* XXX */ 437 struct freelst *listhd; 438 static int toggle; 439 struct vnode *vp; 440 int error = 0, tryalloc; 441 442 try_again: 443 if (mp) { 444 /* 445 * Mark filesystem busy while we're creating a vnode. 446 * If unmount is in progress, this will wait; if the 447 * unmount succeeds (only if umount -f), this will 448 * return an error. If the unmount fails, we'll keep 449 * going afterwards. 450 * (This puts the per-mount vnode list logically under 451 * the protection of the vfs_busy lock). 452 */ 453 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 454 if (error && error != EDEADLK) 455 return error; 456 } 457 458 /* 459 * We must choose whether to allocate a new vnode or recycle an 460 * existing one. The criterion for allocating a new one is that 461 * the total number of vnodes is less than the number desired or 462 * there are no vnodes on either free list. Generally we only 463 * want to recycle vnodes that have no buffers associated with 464 * them, so we look first on the vnode_free_list. If it is empty, 465 * we next consider vnodes with referencing buffers on the 466 * vnode_hold_list. The toggle ensures that half the time we 467 * will use a buffer from the vnode_hold_list, and half the time 468 * we will allocate a new one unless the list has grown to twice 469 * the desired size. We are reticent to recycle vnodes from the 470 * vnode_hold_list because we will lose the identity of all its 471 * referencing buffers. 472 */ 473 474 vp = NULL; 475 476 simple_lock(&vnode_free_list_slock); 477 478 toggle ^= 1; 479 if (numvnodes > 2 * desiredvnodes) 480 toggle = 0; 481 482 tryalloc = numvnodes < desiredvnodes || 483 (TAILQ_FIRST(&vnode_free_list) == NULL && 484 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 485 486 if (tryalloc && 487 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { 488 simple_unlock(&vnode_free_list_slock); 489 memset(vp, 0, sizeof(*vp)); 490 simple_lock_init(&vp->v_interlock); 491 uobj = &vp->v_uobj; 492 uobj->pgops = &uvm_vnodeops; 493 uobj->uo_npages = 0; 494 TAILQ_INIT(&uobj->memq); 495 numvnodes++; 496 } else { 497 if ((vp = TAILQ_FIRST(listhd = &vnode_free_list)) == NULL) 498 vp = TAILQ_FIRST(listhd = &vnode_hold_list); 499 for (; vp != NULL; vp = TAILQ_NEXT(vp, v_freelist)) { 500 if (simple_lock_try(&vp->v_interlock)) { 501 if ((vp->v_flag & VLAYER) == 0) { 502 break; 503 } 504 if (VOP_ISLOCKED(vp) == 0) 505 break; 506 else 507 simple_unlock(&vp->v_interlock); 508 } 509 } 510 /* 511 * Unless this is a bad time of the month, at most 512 * the first NCPUS items on the free list are 513 * locked, so this is close enough to being empty. 514 */ 515 if (vp == NULLVP) { 516 simple_unlock(&vnode_free_list_slock); 517 if (mp && error != EDEADLK) 518 vfs_unbusy(mp); 519 if (tryalloc) { 520 printf("WARNING: unable to allocate new " 521 "vnode, retrying...\n"); 522 (void) tsleep(&lbolt, PRIBIO, "newvn", hz); 523 goto try_again; 524 } 525 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 526 *vpp = 0; 527 return (ENFILE); 528 } 529 if (vp->v_usecount) 530 panic("free vnode isn't, vp %p", vp); 531 TAILQ_REMOVE(listhd, vp, v_freelist); 532 /* see comment on why 0xdeadb is set at end of vgone (below) */ 533 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 534 simple_unlock(&vnode_free_list_slock); 535 vp->v_lease = NULL; 536 537 if (vp->v_type != VBAD) 538 vgonel(vp, p); 539 else 540 simple_unlock(&vp->v_interlock); 541 #ifdef DIAGNOSTIC 542 if (vp->v_data || vp->v_uobj.uo_npages || 543 TAILQ_FIRST(&vp->v_uobj.memq)) 544 panic("cleaned vnode isn't, vp %p", vp); 545 if (vp->v_numoutput) 546 panic("clean vnode has pending I/O's, vp %p", vp); 547 #endif 548 KASSERT((vp->v_flag & VONWORKLST) == 0); 549 vp->v_flag = 0; 550 vp->v_socket = NULL; 551 #ifdef VERIFIED_EXEC 552 vp->fp_status = FINGERPRINT_INVALID; 553 #endif 554 } 555 vp->v_type = VNON; 556 vp->v_vnlock = &vp->v_lock; 557 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 558 cache_purge(vp); 559 vp->v_tag = tag; 560 vp->v_op = vops; 561 insmntque(vp, mp); 562 *vpp = vp; 563 vp->v_usecount = 1; 564 vp->v_data = 0; 565 simple_lock_init(&vp->v_uobj.vmobjlock); 566 567 /* 568 * initialize uvm_object within vnode. 569 */ 570 571 uobj = &vp->v_uobj; 572 KASSERT(uobj->pgops == &uvm_vnodeops); 573 KASSERT(uobj->uo_npages == 0); 574 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 575 vp->v_size = VSIZENOTSET; 576 577 if (mp && error != EDEADLK) 578 vfs_unbusy(mp); 579 return (0); 580 } 581 582 /* 583 * This is really just the reverse of getnewvnode(). Needed for 584 * VFS_VGET functions who may need to push back a vnode in case 585 * of a locking race. 586 */ 587 void 588 ungetnewvnode(vp) 589 struct vnode *vp; 590 { 591 #ifdef DIAGNOSTIC 592 if (vp->v_usecount != 1) 593 panic("ungetnewvnode: busy vnode"); 594 #endif 595 vp->v_usecount--; 596 insmntque(vp, NULL); 597 vp->v_type = VBAD; 598 599 simple_lock(&vp->v_interlock); 600 /* 601 * Insert at head of LRU list 602 */ 603 simple_lock(&vnode_free_list_slock); 604 if (vp->v_holdcnt > 0) 605 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 606 else 607 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 608 simple_unlock(&vnode_free_list_slock); 609 simple_unlock(&vp->v_interlock); 610 } 611 612 /* 613 * Move a vnode from one mount queue to another. 614 */ 615 void 616 insmntque(vp, mp) 617 struct vnode *vp; 618 struct mount *mp; 619 { 620 621 #ifdef DIAGNOSTIC 622 if ((mp != NULL) && 623 (mp->mnt_flag & MNT_UNMOUNT) && 624 !(mp->mnt_flag & MNT_SOFTDEP) && 625 vp->v_tag != VT_VFS) { 626 panic("insmntque into dying filesystem"); 627 } 628 #endif 629 630 simple_lock(&mntvnode_slock); 631 /* 632 * Delete from old mount point vnode list, if on one. 633 */ 634 if (vp->v_mount != NULL) 635 LIST_REMOVE(vp, v_mntvnodes); 636 /* 637 * Insert into list of vnodes for the new mount point, if available. 638 */ 639 if ((vp->v_mount = mp) != NULL) 640 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 641 simple_unlock(&mntvnode_slock); 642 } 643 644 /* 645 * Update outstanding I/O count and do wakeup if requested. 646 */ 647 void 648 vwakeup(bp) 649 struct buf *bp; 650 { 651 struct vnode *vp; 652 653 if ((vp = bp->b_vp) != NULL) { 654 /* XXX global lock hack 655 * can't use v_interlock here since this is called 656 * in interrupt context from biodone(). 657 */ 658 simple_lock(&global_v_numoutput_slock); 659 if (--vp->v_numoutput < 0) 660 panic("vwakeup: neg numoutput, vp %p", vp); 661 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 662 vp->v_flag &= ~VBWAIT; 663 wakeup((caddr_t)&vp->v_numoutput); 664 } 665 simple_unlock(&global_v_numoutput_slock); 666 } 667 } 668 669 /* 670 * Flush out and invalidate all buffers associated with a vnode. 671 * Called with the underlying vnode locked, which should prevent new dirty 672 * buffers from being queued. 673 */ 674 int 675 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 676 struct vnode *vp; 677 int flags; 678 struct ucred *cred; 679 struct proc *p; 680 int slpflag, slptimeo; 681 { 682 struct buf *bp, *nbp; 683 int s, error; 684 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 685 (flags & V_SAVE ? PGO_CLEANIT : 0); 686 687 /* XXXUBC this doesn't look at flags or slp* */ 688 simple_lock(&vp->v_interlock); 689 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 690 if (error) { 691 return error; 692 } 693 694 if (flags & V_SAVE) { 695 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p); 696 if (error) 697 return (error); 698 #ifdef DIAGNOSTIC 699 s = splbio(); 700 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 701 panic("vinvalbuf: dirty bufs, vp %p", vp); 702 splx(s); 703 #endif 704 } 705 706 s = splbio(); 707 708 restart: 709 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 710 nbp = LIST_NEXT(bp, b_vnbufs); 711 simple_lock(&bp->b_interlock); 712 if (bp->b_flags & B_BUSY) { 713 bp->b_flags |= B_WANTED; 714 error = ltsleep((caddr_t)bp, 715 slpflag | (PRIBIO + 1) | PNORELOCK, 716 "vinvalbuf", slptimeo, &bp->b_interlock); 717 if (error) { 718 splx(s); 719 return (error); 720 } 721 goto restart; 722 } 723 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 724 simple_unlock(&bp->b_interlock); 725 brelse(bp); 726 } 727 728 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 729 nbp = LIST_NEXT(bp, b_vnbufs); 730 simple_lock(&bp->b_interlock); 731 if (bp->b_flags & B_BUSY) { 732 bp->b_flags |= B_WANTED; 733 error = ltsleep((caddr_t)bp, 734 slpflag | (PRIBIO + 1) | PNORELOCK, 735 "vinvalbuf", slptimeo, &bp->b_interlock); 736 if (error) { 737 splx(s); 738 return (error); 739 } 740 goto restart; 741 } 742 /* 743 * XXX Since there are no node locks for NFS, I believe 744 * there is a slight chance that a delayed write will 745 * occur while sleeping just above, so check for it. 746 */ 747 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 748 #ifdef DEBUG 749 printf("buffer still DELWRI\n"); 750 #endif 751 bp->b_flags |= B_BUSY | B_VFLUSH; 752 simple_unlock(&bp->b_interlock); 753 VOP_BWRITE(bp); 754 goto restart; 755 } 756 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 757 simple_unlock(&bp->b_interlock); 758 brelse(bp); 759 } 760 761 #ifdef DIAGNOSTIC 762 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 763 panic("vinvalbuf: flush failed, vp %p", vp); 764 #endif 765 766 splx(s); 767 768 return (0); 769 } 770 771 /* 772 * Destroy any in core blocks past the truncation length. 773 * Called with the underlying vnode locked, which should prevent new dirty 774 * buffers from being queued. 775 */ 776 int 777 vtruncbuf(vp, lbn, slpflag, slptimeo) 778 struct vnode *vp; 779 daddr_t lbn; 780 int slpflag, slptimeo; 781 { 782 struct buf *bp, *nbp; 783 int s, error; 784 voff_t off; 785 786 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 787 simple_lock(&vp->v_interlock); 788 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 789 if (error) { 790 return error; 791 } 792 793 s = splbio(); 794 795 restart: 796 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 797 nbp = LIST_NEXT(bp, b_vnbufs); 798 if (bp->b_lblkno < lbn) 799 continue; 800 simple_lock(&bp->b_interlock); 801 if (bp->b_flags & B_BUSY) { 802 bp->b_flags |= B_WANTED; 803 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 804 "vtruncbuf", slptimeo, &bp->b_interlock); 805 if (error) { 806 splx(s); 807 return (error); 808 } 809 goto restart; 810 } 811 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 812 simple_unlock(&bp->b_interlock); 813 brelse(bp); 814 } 815 816 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 817 nbp = LIST_NEXT(bp, b_vnbufs); 818 if (bp->b_lblkno < lbn) 819 continue; 820 simple_lock(&bp->b_interlock); 821 if (bp->b_flags & B_BUSY) { 822 bp->b_flags |= B_WANTED; 823 error = ltsleep(bp, slpflag | (PRIBIO + 1) | PNORELOCK, 824 "vtruncbuf", slptimeo, &bp->b_interlock); 825 if (error) { 826 splx(s); 827 return (error); 828 } 829 goto restart; 830 } 831 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 832 simple_unlock(&bp->b_interlock); 833 brelse(bp); 834 } 835 836 splx(s); 837 838 return (0); 839 } 840 841 void 842 vflushbuf(vp, sync) 843 struct vnode *vp; 844 int sync; 845 { 846 struct buf *bp, *nbp; 847 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); 848 int s; 849 850 simple_lock(&vp->v_interlock); 851 (void) VOP_PUTPAGES(vp, 0, 0, flags); 852 853 loop: 854 s = splbio(); 855 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 856 nbp = LIST_NEXT(bp, b_vnbufs); 857 simple_lock(&bp->b_interlock); 858 if ((bp->b_flags & B_BUSY)) { 859 simple_unlock(&bp->b_interlock); 860 continue; 861 } 862 if ((bp->b_flags & B_DELWRI) == 0) 863 panic("vflushbuf: not dirty, bp %p", bp); 864 bp->b_flags |= B_BUSY | B_VFLUSH; 865 simple_unlock(&bp->b_interlock); 866 splx(s); 867 /* 868 * Wait for I/O associated with indirect blocks to complete, 869 * since there is no way to quickly wait for them below. 870 */ 871 if (bp->b_vp == vp || sync == 0) 872 (void) bawrite(bp); 873 else 874 (void) bwrite(bp); 875 goto loop; 876 } 877 if (sync == 0) { 878 splx(s); 879 return; 880 } 881 simple_lock(&global_v_numoutput_slock); 882 while (vp->v_numoutput) { 883 vp->v_flag |= VBWAIT; 884 ltsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0, 885 &global_v_numoutput_slock); 886 } 887 simple_unlock(&global_v_numoutput_slock); 888 splx(s); 889 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 890 vprint("vflushbuf: dirty", vp); 891 goto loop; 892 } 893 } 894 895 /* 896 * Associate a buffer with a vnode. 897 */ 898 void 899 bgetvp(vp, bp) 900 struct vnode *vp; 901 struct buf *bp; 902 { 903 int s; 904 905 if (bp->b_vp) 906 panic("bgetvp: not free, bp %p", bp); 907 VHOLD(vp); 908 s = splbio(); 909 bp->b_vp = vp; 910 if (vp->v_type == VBLK || vp->v_type == VCHR) 911 bp->b_dev = vp->v_rdev; 912 else 913 bp->b_dev = NODEV; 914 /* 915 * Insert onto list for new vnode. 916 */ 917 bufinsvn(bp, &vp->v_cleanblkhd); 918 splx(s); 919 } 920 921 /* 922 * Disassociate a buffer from a vnode. 923 */ 924 void 925 brelvp(bp) 926 struct buf *bp; 927 { 928 struct vnode *vp; 929 int s; 930 931 if (bp->b_vp == NULL) 932 panic("brelvp: vp NULL, bp %p", bp); 933 934 s = splbio(); 935 vp = bp->b_vp; 936 /* 937 * Delete from old vnode list, if on one. 938 */ 939 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 940 bufremvn(bp); 941 942 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) && 943 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 944 vp->v_flag &= ~VONWORKLST; 945 LIST_REMOVE(vp, v_synclist); 946 } 947 948 bp->b_vp = NULL; 949 HOLDRELE(vp); 950 splx(s); 951 } 952 953 /* 954 * Reassign a buffer from one vnode to another. 955 * Used to assign file specific control information 956 * (indirect blocks) to the vnode to which they belong. 957 * 958 * This function must be called at splbio(). 959 */ 960 void 961 reassignbuf(bp, newvp) 962 struct buf *bp; 963 struct vnode *newvp; 964 { 965 struct buflists *listheadp; 966 int delay; 967 968 /* 969 * Delete from old vnode list, if on one. 970 */ 971 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 972 bufremvn(bp); 973 /* 974 * If dirty, put on list of dirty buffers; 975 * otherwise insert onto list of clean buffers. 976 */ 977 if ((bp->b_flags & B_DELWRI) == 0) { 978 listheadp = &newvp->v_cleanblkhd; 979 if (TAILQ_EMPTY(&newvp->v_uobj.memq) && 980 (newvp->v_flag & VONWORKLST) && 981 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 982 newvp->v_flag &= ~VONWORKLST; 983 LIST_REMOVE(newvp, v_synclist); 984 } 985 } else { 986 listheadp = &newvp->v_dirtyblkhd; 987 if ((newvp->v_flag & VONWORKLST) == 0) { 988 switch (newvp->v_type) { 989 case VDIR: 990 delay = dirdelay; 991 break; 992 case VBLK: 993 if (newvp->v_specmountpoint != NULL) { 994 delay = metadelay; 995 break; 996 } 997 /* fall through */ 998 default: 999 delay = filedelay; 1000 break; 1001 } 1002 if (!newvp->v_mount || 1003 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 1004 vn_syncer_add_to_worklist(newvp, delay); 1005 } 1006 } 1007 bufinsvn(bp, listheadp); 1008 } 1009 1010 /* 1011 * Create a vnode for a block device. 1012 * Used for root filesystem and swap areas. 1013 * Also used for memory file system special devices. 1014 */ 1015 int 1016 bdevvp(dev, vpp) 1017 dev_t dev; 1018 struct vnode **vpp; 1019 { 1020 1021 return (getdevvp(dev, vpp, VBLK)); 1022 } 1023 1024 /* 1025 * Create a vnode for a character device. 1026 * Used for kernfs and some console handling. 1027 */ 1028 int 1029 cdevvp(dev, vpp) 1030 dev_t dev; 1031 struct vnode **vpp; 1032 { 1033 1034 return (getdevvp(dev, vpp, VCHR)); 1035 } 1036 1037 /* 1038 * Create a vnode for a device. 1039 * Used by bdevvp (block device) for root file system etc., 1040 * and by cdevvp (character device) for console and kernfs. 1041 */ 1042 int 1043 getdevvp(dev, vpp, type) 1044 dev_t dev; 1045 struct vnode **vpp; 1046 enum vtype type; 1047 { 1048 struct vnode *vp; 1049 struct vnode *nvp; 1050 int error; 1051 1052 if (dev == NODEV) { 1053 *vpp = NULLVP; 1054 return (0); 1055 } 1056 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1057 if (error) { 1058 *vpp = NULLVP; 1059 return (error); 1060 } 1061 vp = nvp; 1062 vp->v_type = type; 1063 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1064 vput(vp); 1065 vp = nvp; 1066 } 1067 *vpp = vp; 1068 return (0); 1069 } 1070 1071 /* 1072 * Check to see if the new vnode represents a special device 1073 * for which we already have a vnode (either because of 1074 * bdevvp() or because of a different vnode representing 1075 * the same block device). If such an alias exists, deallocate 1076 * the existing contents and return the aliased vnode. The 1077 * caller is responsible for filling it with its new contents. 1078 */ 1079 struct vnode * 1080 checkalias(nvp, nvp_rdev, mp) 1081 struct vnode *nvp; 1082 dev_t nvp_rdev; 1083 struct mount *mp; 1084 { 1085 struct proc *p = curproc; /* XXX */ 1086 struct vnode *vp; 1087 struct vnode **vpp; 1088 1089 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1090 return (NULLVP); 1091 1092 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1093 loop: 1094 simple_lock(&spechash_slock); 1095 for (vp = *vpp; vp; vp = vp->v_specnext) { 1096 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1097 continue; 1098 /* 1099 * Alias, but not in use, so flush it out. 1100 */ 1101 simple_lock(&vp->v_interlock); 1102 if (vp->v_usecount == 0) { 1103 simple_unlock(&spechash_slock); 1104 vgonel(vp, p); 1105 goto loop; 1106 } 1107 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 1108 simple_unlock(&spechash_slock); 1109 goto loop; 1110 } 1111 break; 1112 } 1113 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1114 MALLOC(nvp->v_specinfo, struct specinfo *, 1115 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1116 /* XXX Erg. */ 1117 if (nvp->v_specinfo == NULL) { 1118 simple_unlock(&spechash_slock); 1119 uvm_wait("checkalias"); 1120 goto loop; 1121 } 1122 1123 nvp->v_rdev = nvp_rdev; 1124 nvp->v_hashchain = vpp; 1125 nvp->v_specnext = *vpp; 1126 nvp->v_specmountpoint = NULL; 1127 simple_unlock(&spechash_slock); 1128 nvp->v_speclockf = NULL; 1129 *vpp = nvp; 1130 if (vp != NULLVP) { 1131 nvp->v_flag |= VALIASED; 1132 vp->v_flag |= VALIASED; 1133 vput(vp); 1134 } 1135 return (NULLVP); 1136 } 1137 simple_unlock(&spechash_slock); 1138 VOP_UNLOCK(vp, 0); 1139 simple_lock(&vp->v_interlock); 1140 vclean(vp, 0, p); 1141 vp->v_op = nvp->v_op; 1142 vp->v_tag = nvp->v_tag; 1143 vp->v_vnlock = &vp->v_lock; 1144 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1145 nvp->v_type = VNON; 1146 insmntque(vp, mp); 1147 return (vp); 1148 } 1149 1150 /* 1151 * Grab a particular vnode from the free list, increment its 1152 * reference count and lock it. If the vnode lock bit is set the 1153 * vnode is being eliminated in vgone. In that case, we can not 1154 * grab the vnode, so the process is awakened when the transition is 1155 * completed, and an error returned to indicate that the vnode is no 1156 * longer usable (possibly having been changed to a new file system type). 1157 */ 1158 int 1159 vget(vp, flags) 1160 struct vnode *vp; 1161 int flags; 1162 { 1163 int error; 1164 1165 /* 1166 * If the vnode is in the process of being cleaned out for 1167 * another use, we wait for the cleaning to finish and then 1168 * return failure. Cleaning is determined by checking that 1169 * the VXLOCK flag is set. 1170 */ 1171 1172 if ((flags & LK_INTERLOCK) == 0) 1173 simple_lock(&vp->v_interlock); 1174 if (vp->v_flag & VXLOCK) { 1175 if (flags & LK_NOWAIT) { 1176 simple_unlock(&vp->v_interlock); 1177 return EBUSY; 1178 } 1179 vp->v_flag |= VXWANT; 1180 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock); 1181 return (ENOENT); 1182 } 1183 if (vp->v_usecount == 0) { 1184 simple_lock(&vnode_free_list_slock); 1185 if (vp->v_holdcnt > 0) 1186 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1187 else 1188 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1189 simple_unlock(&vnode_free_list_slock); 1190 } 1191 vp->v_usecount++; 1192 #ifdef DIAGNOSTIC 1193 if (vp->v_usecount == 0) { 1194 vprint("vget", vp); 1195 panic("vget: usecount overflow, vp %p", vp); 1196 } 1197 #endif 1198 if (flags & LK_TYPE_MASK) { 1199 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1200 /* 1201 * must expand vrele here because we do not want 1202 * to call VOP_INACTIVE if the reference count 1203 * drops back to zero since it was never really 1204 * active. We must remove it from the free list 1205 * before sleeping so that multiple processes do 1206 * not try to recycle it. 1207 */ 1208 simple_lock(&vp->v_interlock); 1209 vp->v_usecount--; 1210 if (vp->v_usecount > 0) { 1211 simple_unlock(&vp->v_interlock); 1212 return (error); 1213 } 1214 /* 1215 * insert at tail of LRU list 1216 */ 1217 simple_lock(&vnode_free_list_slock); 1218 if (vp->v_holdcnt > 0) 1219 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, 1220 v_freelist); 1221 else 1222 TAILQ_INSERT_TAIL(&vnode_free_list, vp, 1223 v_freelist); 1224 simple_unlock(&vnode_free_list_slock); 1225 simple_unlock(&vp->v_interlock); 1226 } 1227 return (error); 1228 } 1229 simple_unlock(&vp->v_interlock); 1230 return (0); 1231 } 1232 1233 /* 1234 * vput(), just unlock and vrele() 1235 */ 1236 void 1237 vput(vp) 1238 struct vnode *vp; 1239 { 1240 struct proc *p = curproc; /* XXX */ 1241 1242 #ifdef DIAGNOSTIC 1243 if (vp == NULL) 1244 panic("vput: null vp"); 1245 #endif 1246 simple_lock(&vp->v_interlock); 1247 vp->v_usecount--; 1248 if (vp->v_usecount > 0) { 1249 simple_unlock(&vp->v_interlock); 1250 VOP_UNLOCK(vp, 0); 1251 return; 1252 } 1253 #ifdef DIAGNOSTIC 1254 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1255 vprint("vput: bad ref count", vp); 1256 panic("vput: ref cnt"); 1257 } 1258 #endif 1259 /* 1260 * Insert at tail of LRU list. 1261 */ 1262 simple_lock(&vnode_free_list_slock); 1263 if (vp->v_holdcnt > 0) 1264 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1265 else 1266 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1267 simple_unlock(&vnode_free_list_slock); 1268 if (vp->v_flag & VEXECMAP) { 1269 uvmexp.execpages -= vp->v_uobj.uo_npages; 1270 uvmexp.filepages += vp->v_uobj.uo_npages; 1271 } 1272 vp->v_flag &= ~(VTEXT|VEXECMAP); 1273 simple_unlock(&vp->v_interlock); 1274 VOP_INACTIVE(vp, p); 1275 } 1276 1277 /* 1278 * Vnode release. 1279 * If count drops to zero, call inactive routine and return to freelist. 1280 */ 1281 void 1282 vrele(vp) 1283 struct vnode *vp; 1284 { 1285 struct proc *p = curproc; /* XXX */ 1286 1287 #ifdef DIAGNOSTIC 1288 if (vp == NULL) 1289 panic("vrele: null vp"); 1290 #endif 1291 simple_lock(&vp->v_interlock); 1292 vp->v_usecount--; 1293 if (vp->v_usecount > 0) { 1294 simple_unlock(&vp->v_interlock); 1295 return; 1296 } 1297 #ifdef DIAGNOSTIC 1298 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1299 vprint("vrele: bad ref count", vp); 1300 panic("vrele: ref cnt vp %p", vp); 1301 } 1302 #endif 1303 /* 1304 * Insert at tail of LRU list. 1305 */ 1306 simple_lock(&vnode_free_list_slock); 1307 if (vp->v_holdcnt > 0) 1308 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1309 else 1310 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1311 simple_unlock(&vnode_free_list_slock); 1312 if (vp->v_flag & VEXECMAP) { 1313 uvmexp.execpages -= vp->v_uobj.uo_npages; 1314 uvmexp.filepages += vp->v_uobj.uo_npages; 1315 } 1316 vp->v_flag &= ~(VTEXT|VEXECMAP); 1317 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1318 VOP_INACTIVE(vp, p); 1319 } 1320 1321 #ifdef DIAGNOSTIC 1322 /* 1323 * Page or buffer structure gets a reference. 1324 */ 1325 void 1326 vhold(vp) 1327 struct vnode *vp; 1328 { 1329 1330 /* 1331 * If it is on the freelist and the hold count is currently 1332 * zero, move it to the hold list. The test of the back 1333 * pointer and the use reference count of zero is because 1334 * it will be removed from a free list by getnewvnode, 1335 * but will not have its reference count incremented until 1336 * after calling vgone. If the reference count were 1337 * incremented first, vgone would (incorrectly) try to 1338 * close the previous instance of the underlying object. 1339 * So, the back pointer is explicitly set to `0xdeadb' in 1340 * getnewvnode after removing it from a freelist to ensure 1341 * that we do not try to move it here. 1342 */ 1343 simple_lock(&vp->v_interlock); 1344 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1345 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1346 simple_lock(&vnode_free_list_slock); 1347 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1348 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1349 simple_unlock(&vnode_free_list_slock); 1350 } 1351 vp->v_holdcnt++; 1352 simple_unlock(&vp->v_interlock); 1353 } 1354 1355 /* 1356 * Page or buffer structure frees a reference. 1357 */ 1358 void 1359 holdrele(vp) 1360 struct vnode *vp; 1361 { 1362 1363 simple_lock(&vp->v_interlock); 1364 if (vp->v_holdcnt <= 0) 1365 panic("holdrele: holdcnt vp %p", vp); 1366 vp->v_holdcnt--; 1367 1368 /* 1369 * If it is on the holdlist and the hold count drops to 1370 * zero, move it to the free list. The test of the back 1371 * pointer and the use reference count of zero is because 1372 * it will be removed from a free list by getnewvnode, 1373 * but will not have its reference count incremented until 1374 * after calling vgone. If the reference count were 1375 * incremented first, vgone would (incorrectly) try to 1376 * close the previous instance of the underlying object. 1377 * So, the back pointer is explicitly set to `0xdeadb' in 1378 * getnewvnode after removing it from a freelist to ensure 1379 * that we do not try to move it here. 1380 */ 1381 1382 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1383 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1384 simple_lock(&vnode_free_list_slock); 1385 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1386 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1387 simple_unlock(&vnode_free_list_slock); 1388 } 1389 simple_unlock(&vp->v_interlock); 1390 } 1391 1392 /* 1393 * Vnode reference. 1394 */ 1395 void 1396 vref(vp) 1397 struct vnode *vp; 1398 { 1399 1400 simple_lock(&vp->v_interlock); 1401 if (vp->v_usecount <= 0) 1402 panic("vref used where vget required, vp %p", vp); 1403 vp->v_usecount++; 1404 #ifdef DIAGNOSTIC 1405 if (vp->v_usecount == 0) { 1406 vprint("vref", vp); 1407 panic("vref: usecount overflow, vp %p", vp); 1408 } 1409 #endif 1410 simple_unlock(&vp->v_interlock); 1411 } 1412 #endif /* DIAGNOSTIC */ 1413 1414 /* 1415 * Remove any vnodes in the vnode table belonging to mount point mp. 1416 * 1417 * If FORCECLOSE is not specified, there should not be any active ones, 1418 * return error if any are found (nb: this is a user error, not a 1419 * system error). If FORCECLOSE is specified, detach any active vnodes 1420 * that are found. 1421 * 1422 * If WRITECLOSE is set, only flush out regular file vnodes open for 1423 * writing. 1424 * 1425 * SKIPSYSTEM causes any vnodes marked V_SYSTEM to be skipped. 1426 */ 1427 #ifdef DEBUG 1428 int busyprt = 0; /* print out busy vnodes */ 1429 struct ctldebug debug1 = { "busyprt", &busyprt }; 1430 #endif 1431 1432 int 1433 vflush(mp, skipvp, flags) 1434 struct mount *mp; 1435 struct vnode *skipvp; 1436 int flags; 1437 { 1438 struct proc *p = curproc; /* XXX */ 1439 struct vnode *vp, *nvp; 1440 int busy = 0; 1441 1442 simple_lock(&mntvnode_slock); 1443 loop: 1444 for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 1445 if (vp->v_mount != mp) 1446 goto loop; 1447 nvp = LIST_NEXT(vp, v_mntvnodes); 1448 /* 1449 * Skip over a selected vnode. 1450 */ 1451 if (vp == skipvp) 1452 continue; 1453 simple_lock(&vp->v_interlock); 1454 /* 1455 * Skip over a vnodes marked VSYSTEM. 1456 */ 1457 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1458 simple_unlock(&vp->v_interlock); 1459 continue; 1460 } 1461 /* 1462 * If WRITECLOSE is set, only flush out regular file 1463 * vnodes open for writing. 1464 */ 1465 if ((flags & WRITECLOSE) && 1466 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1467 simple_unlock(&vp->v_interlock); 1468 continue; 1469 } 1470 /* 1471 * With v_usecount == 0, all we need to do is clear 1472 * out the vnode data structures and we are done. 1473 */ 1474 if (vp->v_usecount == 0) { 1475 simple_unlock(&mntvnode_slock); 1476 vgonel(vp, p); 1477 simple_lock(&mntvnode_slock); 1478 continue; 1479 } 1480 /* 1481 * If FORCECLOSE is set, forcibly close the vnode. 1482 * For block or character devices, revert to an 1483 * anonymous device. For all other files, just kill them. 1484 */ 1485 if (flags & FORCECLOSE) { 1486 simple_unlock(&mntvnode_slock); 1487 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1488 vgonel(vp, p); 1489 } else { 1490 vclean(vp, 0, p); 1491 vp->v_op = spec_vnodeop_p; 1492 insmntque(vp, (struct mount *)0); 1493 } 1494 simple_lock(&mntvnode_slock); 1495 continue; 1496 } 1497 #ifdef DEBUG 1498 if (busyprt) 1499 vprint("vflush: busy vnode", vp); 1500 #endif 1501 simple_unlock(&vp->v_interlock); 1502 busy++; 1503 } 1504 simple_unlock(&mntvnode_slock); 1505 if (busy) 1506 return (EBUSY); 1507 return (0); 1508 } 1509 1510 /* 1511 * Disassociate the underlying file system from a vnode. 1512 */ 1513 void 1514 vclean(vp, flags, p) 1515 struct vnode *vp; 1516 int flags; 1517 struct proc *p; 1518 { 1519 int active; 1520 1521 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1522 1523 /* 1524 * Check to see if the vnode is in use. 1525 * If so we have to reference it before we clean it out 1526 * so that its count cannot fall to zero and generate a 1527 * race against ourselves to recycle it. 1528 */ 1529 1530 if ((active = vp->v_usecount) != 0) { 1531 vp->v_usecount++; 1532 #ifdef DIAGNOSTIC 1533 if (vp->v_usecount == 0) { 1534 vprint("vclean", vp); 1535 panic("vclean: usecount overflow"); 1536 } 1537 #endif 1538 } 1539 1540 /* 1541 * Prevent the vnode from being recycled or 1542 * brought into use while we clean it out. 1543 */ 1544 if (vp->v_flag & VXLOCK) 1545 panic("vclean: deadlock, vp %p", vp); 1546 vp->v_flag |= VXLOCK; 1547 if (vp->v_flag & VEXECMAP) { 1548 uvmexp.execpages -= vp->v_uobj.uo_npages; 1549 uvmexp.filepages += vp->v_uobj.uo_npages; 1550 } 1551 vp->v_flag &= ~(VTEXT|VEXECMAP); 1552 1553 /* 1554 * Even if the count is zero, the VOP_INACTIVE routine may still 1555 * have the object locked while it cleans it out. The VOP_LOCK 1556 * ensures that the VOP_INACTIVE routine is done with its work. 1557 * For active vnodes, it ensures that no other activity can 1558 * occur while the underlying object is being cleaned out. 1559 */ 1560 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1561 1562 /* 1563 * Clean out any cached data associated with the vnode. 1564 */ 1565 if (flags & DOCLOSE) { 1566 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1567 KASSERT((vp->v_flag & VONWORKLST) == 0); 1568 } 1569 LOCK_ASSERT(!simple_lock_held(&vp->v_interlock)); 1570 1571 /* 1572 * If purging an active vnode, it must be closed and 1573 * deactivated before being reclaimed. Note that the 1574 * VOP_INACTIVE will unlock the vnode. 1575 */ 1576 if (active) { 1577 if (flags & DOCLOSE) 1578 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1579 VOP_INACTIVE(vp, p); 1580 } else { 1581 /* 1582 * Any other processes trying to obtain this lock must first 1583 * wait for VXLOCK to clear, then call the new lock operation. 1584 */ 1585 VOP_UNLOCK(vp, 0); 1586 } 1587 /* 1588 * Reclaim the vnode. 1589 */ 1590 if (VOP_RECLAIM(vp, p)) 1591 panic("vclean: cannot reclaim, vp %p", vp); 1592 if (active) { 1593 /* 1594 * Inline copy of vrele() since VOP_INACTIVE 1595 * has already been called. 1596 */ 1597 simple_lock(&vp->v_interlock); 1598 if (--vp->v_usecount <= 0) { 1599 #ifdef DIAGNOSTIC 1600 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1601 vprint("vclean: bad ref count", vp); 1602 panic("vclean: ref cnt"); 1603 } 1604 #endif 1605 /* 1606 * Insert at tail of LRU list. 1607 */ 1608 1609 simple_unlock(&vp->v_interlock); 1610 simple_lock(&vnode_free_list_slock); 1611 #ifdef DIAGNOSTIC 1612 if (vp->v_holdcnt > 0) 1613 panic("vclean: not clean, vp %p", vp); 1614 #endif 1615 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1616 simple_unlock(&vnode_free_list_slock); 1617 } else 1618 simple_unlock(&vp->v_interlock); 1619 } 1620 1621 KASSERT(vp->v_uobj.uo_npages == 0); 1622 cache_purge(vp); 1623 1624 /* 1625 * Done with purge, notify sleepers of the grim news. 1626 */ 1627 vp->v_op = dead_vnodeop_p; 1628 vp->v_tag = VT_NON; 1629 simple_lock(&vp->v_interlock); 1630 VN_KNOTE(vp, NOTE_REVOKE); /* FreeBSD has this in vn_pollgone() */ 1631 vp->v_flag &= ~VXLOCK; 1632 if (vp->v_flag & VXWANT) { 1633 vp->v_flag &= ~VXWANT; 1634 simple_unlock(&vp->v_interlock); 1635 wakeup((caddr_t)vp); 1636 } else 1637 simple_unlock(&vp->v_interlock); 1638 } 1639 1640 /* 1641 * Recycle an unused vnode to the front of the free list. 1642 * Release the passed interlock if the vnode will be recycled. 1643 */ 1644 int 1645 vrecycle(vp, inter_lkp, p) 1646 struct vnode *vp; 1647 struct simplelock *inter_lkp; 1648 struct proc *p; 1649 { 1650 1651 simple_lock(&vp->v_interlock); 1652 if (vp->v_usecount == 0) { 1653 if (inter_lkp) 1654 simple_unlock(inter_lkp); 1655 vgonel(vp, p); 1656 return (1); 1657 } 1658 simple_unlock(&vp->v_interlock); 1659 return (0); 1660 } 1661 1662 /* 1663 * Eliminate all activity associated with a vnode 1664 * in preparation for reuse. 1665 */ 1666 void 1667 vgone(vp) 1668 struct vnode *vp; 1669 { 1670 struct proc *p = curproc; /* XXX */ 1671 1672 simple_lock(&vp->v_interlock); 1673 vgonel(vp, p); 1674 } 1675 1676 /* 1677 * vgone, with the vp interlock held. 1678 */ 1679 void 1680 vgonel(vp, p) 1681 struct vnode *vp; 1682 struct proc *p; 1683 { 1684 struct vnode *vq; 1685 struct vnode *vx; 1686 1687 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1688 1689 /* 1690 * If a vgone (or vclean) is already in progress, 1691 * wait until it is done and return. 1692 */ 1693 1694 if (vp->v_flag & VXLOCK) { 1695 vp->v_flag |= VXWANT; 1696 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock); 1697 return; 1698 } 1699 1700 /* 1701 * Clean out the filesystem specific data. 1702 */ 1703 1704 vclean(vp, DOCLOSE, p); 1705 KASSERT((vp->v_flag & VONWORKLST) == 0); 1706 1707 /* 1708 * Delete from old mount point vnode list, if on one. 1709 */ 1710 1711 if (vp->v_mount != NULL) 1712 insmntque(vp, (struct mount *)0); 1713 1714 /* 1715 * If special device, remove it from special device alias list. 1716 * if it is on one. 1717 */ 1718 1719 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1720 simple_lock(&spechash_slock); 1721 if (vp->v_hashchain != NULL) { 1722 if (*vp->v_hashchain == vp) { 1723 *vp->v_hashchain = vp->v_specnext; 1724 } else { 1725 for (vq = *vp->v_hashchain; vq; 1726 vq = vq->v_specnext) { 1727 if (vq->v_specnext != vp) 1728 continue; 1729 vq->v_specnext = vp->v_specnext; 1730 break; 1731 } 1732 if (vq == NULL) 1733 panic("missing bdev"); 1734 } 1735 if (vp->v_flag & VALIASED) { 1736 vx = NULL; 1737 for (vq = *vp->v_hashchain; vq; 1738 vq = vq->v_specnext) { 1739 if (vq->v_rdev != vp->v_rdev || 1740 vq->v_type != vp->v_type) 1741 continue; 1742 if (vx) 1743 break; 1744 vx = vq; 1745 } 1746 if (vx == NULL) 1747 panic("missing alias"); 1748 if (vq == NULL) 1749 vx->v_flag &= ~VALIASED; 1750 vp->v_flag &= ~VALIASED; 1751 } 1752 } 1753 simple_unlock(&spechash_slock); 1754 FREE(vp->v_specinfo, M_VNODE); 1755 vp->v_specinfo = NULL; 1756 } 1757 1758 /* 1759 * If it is on the freelist and not already at the head, 1760 * move it to the head of the list. The test of the back 1761 * pointer and the reference count of zero is because 1762 * it will be removed from the free list by getnewvnode, 1763 * but will not have its reference count incremented until 1764 * after calling vgone. If the reference count were 1765 * incremented first, vgone would (incorrectly) try to 1766 * close the previous instance of the underlying object. 1767 * So, the back pointer is explicitly set to `0xdeadb' in 1768 * getnewvnode after removing it from the freelist to ensure 1769 * that we do not try to move it here. 1770 */ 1771 1772 if (vp->v_usecount == 0) { 1773 simple_lock(&vnode_free_list_slock); 1774 if (vp->v_holdcnt > 0) 1775 panic("vgonel: not clean, vp %p", vp); 1776 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1777 TAILQ_FIRST(&vnode_free_list) != vp) { 1778 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1779 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1780 } 1781 simple_unlock(&vnode_free_list_slock); 1782 } 1783 vp->v_type = VBAD; 1784 } 1785 1786 /* 1787 * Lookup a vnode by device number. 1788 */ 1789 int 1790 vfinddev(dev, type, vpp) 1791 dev_t dev; 1792 enum vtype type; 1793 struct vnode **vpp; 1794 { 1795 struct vnode *vp; 1796 int rc = 0; 1797 1798 simple_lock(&spechash_slock); 1799 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1800 if (dev != vp->v_rdev || type != vp->v_type) 1801 continue; 1802 *vpp = vp; 1803 rc = 1; 1804 break; 1805 } 1806 simple_unlock(&spechash_slock); 1807 return (rc); 1808 } 1809 1810 /* 1811 * Revoke all the vnodes corresponding to the specified minor number 1812 * range (endpoints inclusive) of the specified major. 1813 */ 1814 void 1815 vdevgone(maj, minl, minh, type) 1816 int maj, minl, minh; 1817 enum vtype type; 1818 { 1819 struct vnode *vp; 1820 int mn; 1821 1822 for (mn = minl; mn <= minh; mn++) 1823 if (vfinddev(makedev(maj, mn), type, &vp)) 1824 VOP_REVOKE(vp, REVOKEALL); 1825 } 1826 1827 /* 1828 * Calculate the total number of references to a special device. 1829 */ 1830 int 1831 vcount(vp) 1832 struct vnode *vp; 1833 { 1834 struct vnode *vq, *vnext; 1835 int count; 1836 1837 loop: 1838 if ((vp->v_flag & VALIASED) == 0) 1839 return (vp->v_usecount); 1840 simple_lock(&spechash_slock); 1841 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1842 vnext = vq->v_specnext; 1843 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1844 continue; 1845 /* 1846 * Alias, but not in use, so flush it out. 1847 */ 1848 if (vq->v_usecount == 0 && vq != vp && 1849 (vq->v_flag & VXLOCK) == 0) { 1850 simple_unlock(&spechash_slock); 1851 vgone(vq); 1852 goto loop; 1853 } 1854 count += vq->v_usecount; 1855 } 1856 simple_unlock(&spechash_slock); 1857 return (count); 1858 } 1859 1860 /* 1861 * Print out a description of a vnode. 1862 */ 1863 const char * const vnode_types[] = { 1864 "VNON", 1865 "VREG", 1866 "VDIR", 1867 "VBLK", 1868 "VCHR", 1869 "VLNK", 1870 "VSOCK", 1871 "VFIFO", 1872 "VBAD" 1873 }; 1874 1875 void 1876 vprint(label, vp) 1877 char *label; 1878 struct vnode *vp; 1879 { 1880 char buf[96]; 1881 1882 if (label != NULL) 1883 printf("%s: ", label); 1884 printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,", 1885 vp->v_tag, vnode_types[vp->v_type], 1886 vp->v_usecount, vp->v_writecount, vp->v_holdcnt); 1887 buf[0] = '\0'; 1888 if (vp->v_flag & VROOT) 1889 strlcat(buf, "|VROOT", sizeof(buf)); 1890 if (vp->v_flag & VTEXT) 1891 strlcat(buf, "|VTEXT", sizeof(buf)); 1892 if (vp->v_flag & VEXECMAP) 1893 strlcat(buf, "|VEXECMAP", sizeof(buf)); 1894 if (vp->v_flag & VSYSTEM) 1895 strlcat(buf, "|VSYSTEM", sizeof(buf)); 1896 if (vp->v_flag & VXLOCK) 1897 strlcat(buf, "|VXLOCK", sizeof(buf)); 1898 if (vp->v_flag & VXWANT) 1899 strlcat(buf, "|VXWANT", sizeof(buf)); 1900 if (vp->v_flag & VBWAIT) 1901 strlcat(buf, "|VBWAIT", sizeof(buf)); 1902 if (vp->v_flag & VALIASED) 1903 strlcat(buf, "|VALIASED", sizeof(buf)); 1904 if (buf[0] != '\0') 1905 printf(" flags (%s)", &buf[1]); 1906 if (vp->v_data == NULL) { 1907 printf("\n"); 1908 } else { 1909 printf("\n\t"); 1910 VOP_PRINT(vp); 1911 } 1912 } 1913 1914 #ifdef DEBUG 1915 /* 1916 * List all of the locked vnodes in the system. 1917 * Called when debugging the kernel. 1918 */ 1919 void 1920 printlockedvnodes() 1921 { 1922 struct mount *mp, *nmp; 1923 struct vnode *vp; 1924 1925 printf("Locked vnodes\n"); 1926 simple_lock(&mountlist_slock); 1927 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 1928 mp = nmp) { 1929 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1930 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1931 continue; 1932 } 1933 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1934 if (VOP_ISLOCKED(vp)) 1935 vprint(NULL, vp); 1936 } 1937 simple_lock(&mountlist_slock); 1938 nmp = CIRCLEQ_NEXT(mp, mnt_list); 1939 vfs_unbusy(mp); 1940 } 1941 simple_unlock(&mountlist_slock); 1942 } 1943 #endif 1944 1945 /* 1946 * Top level filesystem related information gathering. 1947 */ 1948 int 1949 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1950 int *name; 1951 u_int namelen; 1952 void *oldp; 1953 size_t *oldlenp; 1954 void *newp; 1955 size_t newlen; 1956 struct proc *p; 1957 { 1958 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1959 struct vfsconf vfc; 1960 extern const char * const mountcompatnames[]; 1961 extern int nmountcompatnames; 1962 #endif 1963 struct vfsops *vfsp; 1964 1965 /* all sysctl names at this level are at least name and field */ 1966 if (namelen < 2) 1967 return (ENOTDIR); /* overloaded */ 1968 1969 /* Not generic: goes to file system. */ 1970 if (name[0] != VFS_GENERIC) { 1971 static const struct ctlname vfsnames[VFS_MAXID+1]=CTL_VFS_NAMES; 1972 const char *vfsname; 1973 1974 if (name[0] < 0 || name[0] > VFS_MAXID 1975 || (vfsname = vfsnames[name[0]].ctl_name) == NULL) 1976 return (EOPNOTSUPP); 1977 1978 vfsp = vfs_getopsbyname(vfsname); 1979 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1980 return (EOPNOTSUPP); 1981 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1982 oldp, oldlenp, newp, newlen, p)); 1983 } 1984 1985 /* The rest are generic vfs sysctls. */ 1986 switch (name[1]) { 1987 case VFS_USERMOUNT: 1988 return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount); 1989 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1990 case VFS_MAXTYPENUM: 1991 /* 1992 * Provided for 4.4BSD-Lite2 compatibility. 1993 */ 1994 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1995 case VFS_CONF: 1996 /* 1997 * Special: a node, next is a file system name. 1998 * Provided for 4.4BSD-Lite2 compatibility. 1999 */ 2000 if (namelen < 3) 2001 return (ENOTDIR); /* overloaded */ 2002 if (name[2] >= nmountcompatnames || name[2] < 0 || 2003 mountcompatnames[name[2]] == NULL) 2004 return (EOPNOTSUPP); 2005 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 2006 if (vfsp == NULL) 2007 return (EOPNOTSUPP); 2008 vfc.vfc_vfsops = vfsp; 2009 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 2010 vfc.vfc_typenum = name[2]; 2011 vfc.vfc_refcount = vfsp->vfs_refcount; 2012 vfc.vfc_flags = 0; 2013 vfc.vfc_mountroot = vfsp->vfs_mountroot; 2014 vfc.vfc_next = NULL; 2015 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 2016 sizeof(struct vfsconf))); 2017 #endif 2018 default: 2019 break; 2020 } 2021 return (EOPNOTSUPP); 2022 } 2023 2024 int kinfo_vdebug = 1; 2025 int kinfo_vgetfailed; 2026 #define KINFO_VNODESLOP 10 2027 /* 2028 * Dump vnode list (via sysctl). 2029 * Copyout address of vnode followed by vnode. 2030 */ 2031 /* ARGSUSED */ 2032 int 2033 sysctl_vnode(where, sizep, p) 2034 char *where; 2035 size_t *sizep; 2036 struct proc *p; 2037 { 2038 struct mount *mp, *nmp; 2039 struct vnode *nvp, *vp; 2040 char *bp = where, *savebp; 2041 char *ewhere; 2042 int error; 2043 2044 #define VPTRSZ sizeof(struct vnode *) 2045 #define VNODESZ sizeof(struct vnode) 2046 if (where == NULL) { 2047 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 2048 return (0); 2049 } 2050 ewhere = where + *sizep; 2051 2052 simple_lock(&mountlist_slock); 2053 for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist; 2054 mp = nmp) { 2055 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 2056 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2057 continue; 2058 } 2059 savebp = bp; 2060 again: 2061 simple_lock(&mntvnode_slock); 2062 for (vp = LIST_FIRST(&mp->mnt_vnodelist); 2063 vp != NULL; 2064 vp = nvp) { 2065 /* 2066 * Check that the vp is still associated with 2067 * this filesystem. RACE: could have been 2068 * recycled onto the same filesystem. 2069 */ 2070 if (vp->v_mount != mp) { 2071 simple_unlock(&mntvnode_slock); 2072 if (kinfo_vdebug) 2073 printf("kinfo: vp changed\n"); 2074 bp = savebp; 2075 goto again; 2076 } 2077 nvp = LIST_NEXT(vp, v_mntvnodes); 2078 if (bp + VPTRSZ + VNODESZ > ewhere) { 2079 simple_unlock(&mntvnode_slock); 2080 *sizep = bp - where; 2081 return (ENOMEM); 2082 } 2083 simple_unlock(&mntvnode_slock); 2084 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2085 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2086 return (error); 2087 bp += VPTRSZ + VNODESZ; 2088 simple_lock(&mntvnode_slock); 2089 } 2090 simple_unlock(&mntvnode_slock); 2091 simple_lock(&mountlist_slock); 2092 nmp = CIRCLEQ_NEXT(mp, mnt_list); 2093 vfs_unbusy(mp); 2094 } 2095 simple_unlock(&mountlist_slock); 2096 2097 *sizep = bp - where; 2098 return (0); 2099 } 2100 2101 /* 2102 * Check to see if a filesystem is mounted on a block device. 2103 */ 2104 int 2105 vfs_mountedon(vp) 2106 struct vnode *vp; 2107 { 2108 struct vnode *vq; 2109 int error = 0; 2110 2111 if (vp->v_specmountpoint != NULL) 2112 return (EBUSY); 2113 if (vp->v_flag & VALIASED) { 2114 simple_lock(&spechash_slock); 2115 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2116 if (vq->v_rdev != vp->v_rdev || 2117 vq->v_type != vp->v_type) 2118 continue; 2119 if (vq->v_specmountpoint != NULL) { 2120 error = EBUSY; 2121 break; 2122 } 2123 } 2124 simple_unlock(&spechash_slock); 2125 } 2126 return (error); 2127 } 2128 2129 static int 2130 sacheck(struct sockaddr *sa) 2131 { 2132 switch (sa->sa_family) { 2133 #ifdef INET 2134 case AF_INET: { 2135 struct sockaddr_in *sin = (struct sockaddr_in *)sa; 2136 char *p = (char *)sin->sin_zero; 2137 size_t i; 2138 2139 if (sin->sin_len != sizeof(*sin)) 2140 return -1; 2141 if (sin->sin_port != 0) 2142 return -1; 2143 for (i = 0; i < sizeof(sin->sin_zero); i++) 2144 if (*p++ != '\0') 2145 return -1; 2146 return 0; 2147 } 2148 #endif 2149 #ifdef INET6 2150 case AF_INET6: { 2151 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; 2152 2153 if (sin6->sin6_len != sizeof(*sin6)) 2154 return -1; 2155 if (sin6->sin6_port != 0) 2156 return -1; 2157 return 0; 2158 } 2159 #endif 2160 default: 2161 return -1; 2162 } 2163 } 2164 2165 /* 2166 * Build hash lists of net addresses and hang them off the mount point. 2167 * Called by ufs_mount() to set up the lists of export addresses. 2168 */ 2169 static int 2170 vfs_hang_addrlist(mp, nep, argp) 2171 struct mount *mp; 2172 struct netexport *nep; 2173 struct export_args *argp; 2174 { 2175 struct netcred *np, *enp; 2176 struct radix_node_head *rnh; 2177 int i; 2178 struct sockaddr *saddr, *smask = 0; 2179 struct domain *dom; 2180 int error; 2181 2182 if (argp->ex_addrlen == 0) { 2183 if (mp->mnt_flag & MNT_DEFEXPORTED) 2184 return (EPERM); 2185 np = &nep->ne_defexported; 2186 np->netc_exflags = argp->ex_flags; 2187 crcvt(&np->netc_anon, &argp->ex_anon); 2188 np->netc_anon.cr_ref = 1; 2189 mp->mnt_flag |= MNT_DEFEXPORTED; 2190 return (0); 2191 } 2192 2193 if (argp->ex_addrlen > MLEN || argp->ex_masklen > MLEN) 2194 return (EINVAL); 2195 2196 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2197 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 2198 memset((caddr_t)np, 0, i); 2199 saddr = (struct sockaddr *)(np + 1); 2200 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 2201 if (error) 2202 goto out; 2203 if (saddr->sa_len > argp->ex_addrlen) 2204 saddr->sa_len = argp->ex_addrlen; 2205 if (sacheck(saddr) == -1) 2206 return EINVAL; 2207 if (argp->ex_masklen) { 2208 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 2209 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 2210 if (error) 2211 goto out; 2212 if (smask->sa_len > argp->ex_masklen) 2213 smask->sa_len = argp->ex_masklen; 2214 if (smask->sa_family != saddr->sa_family) 2215 return EINVAL; 2216 if (sacheck(smask) == -1) 2217 return EINVAL; 2218 } 2219 i = saddr->sa_family; 2220 if ((rnh = nep->ne_rtable[i]) == 0) { 2221 /* 2222 * Seems silly to initialize every AF when most are not 2223 * used, do so on demand here 2224 */ 2225 for (dom = domains; dom; dom = dom->dom_next) 2226 if (dom->dom_family == i && dom->dom_rtattach) { 2227 dom->dom_rtattach((void **)&nep->ne_rtable[i], 2228 dom->dom_rtoffset); 2229 break; 2230 } 2231 if ((rnh = nep->ne_rtable[i]) == 0) { 2232 error = ENOBUFS; 2233 goto out; 2234 } 2235 } 2236 2237 enp = (struct netcred *)(*rnh->rnh_addaddr)(saddr, smask, rnh, 2238 np->netc_rnodes); 2239 if (enp != np) { 2240 if (enp == NULL) { 2241 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 2242 smask, rnh); 2243 if (enp == NULL) { 2244 error = EPERM; 2245 goto out; 2246 } 2247 } else 2248 enp->netc_refcnt++; 2249 2250 goto check; 2251 } else 2252 enp->netc_refcnt = 1; 2253 2254 np->netc_exflags = argp->ex_flags; 2255 crcvt(&np->netc_anon, &argp->ex_anon); 2256 np->netc_anon.cr_ref = 1; 2257 return 0; 2258 check: 2259 if (enp->netc_exflags != argp->ex_flags || 2260 crcmp(&enp->netc_anon, &argp->ex_anon) != 0) 2261 error = EPERM; 2262 else 2263 error = 0; 2264 out: 2265 free(np, M_NETADDR); 2266 return error; 2267 } 2268 2269 /* ARGSUSED */ 2270 static int 2271 vfs_free_netcred(rn, w) 2272 struct radix_node *rn; 2273 void *w; 2274 { 2275 struct radix_node_head *rnh = (struct radix_node_head *)w; 2276 struct netcred *np = (struct netcred *)(void *)rn; 2277 2278 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 2279 if (--(np->netc_refcnt) <= 0) 2280 free(np, M_NETADDR); 2281 return (0); 2282 } 2283 2284 /* 2285 * Free the net address hash lists that are hanging off the mount points. 2286 */ 2287 static void 2288 vfs_free_addrlist(nep) 2289 struct netexport *nep; 2290 { 2291 int i; 2292 struct radix_node_head *rnh; 2293 2294 for (i = 0; i <= AF_MAX; i++) 2295 if ((rnh = nep->ne_rtable[i]) != NULL) { 2296 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 2297 free((caddr_t)rnh, M_RTABLE); 2298 nep->ne_rtable[i] = 0; 2299 } 2300 } 2301 2302 int 2303 vfs_export(mp, nep, argp) 2304 struct mount *mp; 2305 struct netexport *nep; 2306 struct export_args *argp; 2307 { 2308 int error; 2309 2310 if (argp->ex_flags & MNT_DELEXPORT) { 2311 if (mp->mnt_flag & MNT_EXPUBLIC) { 2312 vfs_setpublicfs(NULL, NULL, NULL); 2313 mp->mnt_flag &= ~MNT_EXPUBLIC; 2314 } 2315 vfs_free_addrlist(nep); 2316 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2317 } 2318 if (argp->ex_flags & MNT_EXPORTED) { 2319 if (argp->ex_flags & MNT_EXPUBLIC) { 2320 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2321 return (error); 2322 mp->mnt_flag |= MNT_EXPUBLIC; 2323 } 2324 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 2325 return (error); 2326 mp->mnt_flag |= MNT_EXPORTED; 2327 } 2328 return (0); 2329 } 2330 2331 /* 2332 * Set the publicly exported filesystem (WebNFS). Currently, only 2333 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2334 */ 2335 int 2336 vfs_setpublicfs(mp, nep, argp) 2337 struct mount *mp; 2338 struct netexport *nep; 2339 struct export_args *argp; 2340 { 2341 int error; 2342 struct vnode *rvp; 2343 char *cp; 2344 2345 /* 2346 * mp == NULL -> invalidate the current info, the FS is 2347 * no longer exported. May be called from either vfs_export 2348 * or unmount, so check if it hasn't already been done. 2349 */ 2350 if (mp == NULL) { 2351 if (nfs_pub.np_valid) { 2352 nfs_pub.np_valid = 0; 2353 if (nfs_pub.np_index != NULL) { 2354 FREE(nfs_pub.np_index, M_TEMP); 2355 nfs_pub.np_index = NULL; 2356 } 2357 } 2358 return (0); 2359 } 2360 2361 /* 2362 * Only one allowed at a time. 2363 */ 2364 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2365 return (EBUSY); 2366 2367 /* 2368 * Get real filehandle for root of exported FS. 2369 */ 2370 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 2371 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2372 2373 if ((error = VFS_ROOT(mp, &rvp))) 2374 return (error); 2375 2376 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2377 return (error); 2378 2379 vput(rvp); 2380 2381 /* 2382 * If an indexfile was specified, pull it in. 2383 */ 2384 if (argp->ex_indexfile != NULL) { 2385 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2386 M_WAITOK); 2387 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2388 MAXNAMLEN, (size_t *)0); 2389 if (!error) { 2390 /* 2391 * Check for illegal filenames. 2392 */ 2393 for (cp = nfs_pub.np_index; *cp; cp++) { 2394 if (*cp == '/') { 2395 error = EINVAL; 2396 break; 2397 } 2398 } 2399 } 2400 if (error) { 2401 FREE(nfs_pub.np_index, M_TEMP); 2402 return (error); 2403 } 2404 } 2405 2406 nfs_pub.np_mount = mp; 2407 nfs_pub.np_valid = 1; 2408 return (0); 2409 } 2410 2411 struct netcred * 2412 vfs_export_lookup(mp, nep, nam) 2413 struct mount *mp; 2414 struct netexport *nep; 2415 struct mbuf *nam; 2416 { 2417 struct netcred *np; 2418 struct radix_node_head *rnh; 2419 struct sockaddr *saddr; 2420 2421 np = NULL; 2422 if (mp->mnt_flag & MNT_EXPORTED) { 2423 /* 2424 * Lookup in the export list first. 2425 */ 2426 if (nam != NULL) { 2427 saddr = mtod(nam, struct sockaddr *); 2428 rnh = nep->ne_rtable[saddr->sa_family]; 2429 if (rnh != NULL) { 2430 np = (struct netcred *) 2431 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2432 rnh); 2433 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2434 np = NULL; 2435 } 2436 } 2437 /* 2438 * If no address match, use the default if it exists. 2439 */ 2440 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2441 np = &nep->ne_defexported; 2442 } 2443 return (np); 2444 } 2445 2446 /* 2447 * Do the usual access checking. 2448 * file_mode, uid and gid are from the vnode in question, 2449 * while acc_mode and cred are from the VOP_ACCESS parameter list 2450 */ 2451 int 2452 vaccess(type, file_mode, uid, gid, acc_mode, cred) 2453 enum vtype type; 2454 mode_t file_mode; 2455 uid_t uid; 2456 gid_t gid; 2457 mode_t acc_mode; 2458 struct ucred *cred; 2459 { 2460 mode_t mask; 2461 2462 /* 2463 * Super-user always gets read/write access, but execute access depends 2464 * on at least one execute bit being set. 2465 */ 2466 if (cred->cr_uid == 0) { 2467 if ((acc_mode & VEXEC) && type != VDIR && 2468 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2469 return (EACCES); 2470 return (0); 2471 } 2472 2473 mask = 0; 2474 2475 /* Otherwise, check the owner. */ 2476 if (cred->cr_uid == uid) { 2477 if (acc_mode & VEXEC) 2478 mask |= S_IXUSR; 2479 if (acc_mode & VREAD) 2480 mask |= S_IRUSR; 2481 if (acc_mode & VWRITE) 2482 mask |= S_IWUSR; 2483 return ((file_mode & mask) == mask ? 0 : EACCES); 2484 } 2485 2486 /* Otherwise, check the groups. */ 2487 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2488 if (acc_mode & VEXEC) 2489 mask |= S_IXGRP; 2490 if (acc_mode & VREAD) 2491 mask |= S_IRGRP; 2492 if (acc_mode & VWRITE) 2493 mask |= S_IWGRP; 2494 return ((file_mode & mask) == mask ? 0 : EACCES); 2495 } 2496 2497 /* Otherwise, check everyone else. */ 2498 if (acc_mode & VEXEC) 2499 mask |= S_IXOTH; 2500 if (acc_mode & VREAD) 2501 mask |= S_IROTH; 2502 if (acc_mode & VWRITE) 2503 mask |= S_IWOTH; 2504 return ((file_mode & mask) == mask ? 0 : EACCES); 2505 } 2506 2507 /* 2508 * Unmount all file systems. 2509 * We traverse the list in reverse order under the assumption that doing so 2510 * will avoid needing to worry about dependencies. 2511 */ 2512 void 2513 vfs_unmountall(p) 2514 struct proc *p; 2515 { 2516 struct mount *mp, *nmp; 2517 int allerror, error; 2518 2519 for (allerror = 0, 2520 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2521 nmp = mp->mnt_list.cqe_prev; 2522 #ifdef DEBUG 2523 printf("unmounting %s (%s)...\n", 2524 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2525 #endif 2526 /* 2527 * XXX Freeze syncer. Must do this before locking the 2528 * mount point. See dounmount() for details. 2529 */ 2530 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2531 if (vfs_busy(mp, 0, 0)) { 2532 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2533 continue; 2534 } 2535 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2536 printf("unmount of %s failed with error %d\n", 2537 mp->mnt_stat.f_mntonname, error); 2538 allerror = 1; 2539 } 2540 } 2541 if (allerror) 2542 printf("WARNING: some file systems would not unmount\n"); 2543 } 2544 2545 /* 2546 * Sync and unmount file systems before shutting down. 2547 */ 2548 void 2549 vfs_shutdown() 2550 { 2551 struct buf *bp; 2552 int iter, nbusy, nbusy_prev = 0, dcount, s; 2553 struct lwp *l = curlwp; 2554 struct proc *p; 2555 2556 /* XXX we're certainly not running in proc0's context! */ 2557 if (l == NULL || (p = l->l_proc) == NULL) 2558 p = &proc0; 2559 2560 printf("syncing disks... "); 2561 2562 /* remove user process from run queue */ 2563 suspendsched(); 2564 (void) spl0(); 2565 2566 /* avoid coming back this way again if we panic. */ 2567 doing_shutdown = 1; 2568 2569 sys_sync(l, NULL, NULL); 2570 2571 /* Wait for sync to finish. */ 2572 dcount = 10000; 2573 for (iter = 0; iter < 20;) { 2574 nbusy = 0; 2575 for (bp = &buf[nbuf]; --bp >= buf; ) { 2576 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2577 nbusy++; 2578 /* 2579 * With soft updates, some buffers that are 2580 * written will be remarked as dirty until other 2581 * buffers are written. 2582 */ 2583 if (bp->b_vp && bp->b_vp->v_mount 2584 && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP) 2585 && (bp->b_flags & B_DELWRI)) { 2586 s = splbio(); 2587 bremfree(bp); 2588 bp->b_flags |= B_BUSY; 2589 splx(s); 2590 nbusy++; 2591 bawrite(bp); 2592 if (dcount-- <= 0) { 2593 printf("softdep "); 2594 goto fail; 2595 } 2596 } 2597 } 2598 if (nbusy == 0) 2599 break; 2600 if (nbusy_prev == 0) 2601 nbusy_prev = nbusy; 2602 printf("%d ", nbusy); 2603 tsleep(&nbusy, PRIBIO, "bflush", 2604 (iter == 0) ? 1 : hz / 25 * iter); 2605 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 2606 iter++; 2607 else 2608 nbusy_prev = nbusy; 2609 } 2610 if (nbusy) { 2611 fail: 2612 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 2613 printf("giving up\nPrinting vnodes for busy buffers\n"); 2614 for (bp = &buf[nbuf]; --bp >= buf; ) 2615 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2616 vprint(NULL, bp->b_vp); 2617 2618 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2619 Debugger(); 2620 #endif 2621 2622 #else /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2623 printf("giving up\n"); 2624 #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2625 return; 2626 } else 2627 printf("done\n"); 2628 2629 /* 2630 * If we've panic'd, don't make the situation potentially 2631 * worse by unmounting the file systems. 2632 */ 2633 if (panicstr != NULL) 2634 return; 2635 2636 /* Release inodes held by texts before update. */ 2637 #ifdef notdef 2638 vnshutdown(); 2639 #endif 2640 /* Unmount file systems. */ 2641 vfs_unmountall(p); 2642 } 2643 2644 /* 2645 * Mount the root file system. If the operator didn't specify a 2646 * file system to use, try all possible file systems until one 2647 * succeeds. 2648 */ 2649 int 2650 vfs_mountroot() 2651 { 2652 struct vfsops *v; 2653 2654 if (root_device == NULL) 2655 panic("vfs_mountroot: root device unknown"); 2656 2657 switch (root_device->dv_class) { 2658 case DV_IFNET: 2659 if (rootdev != NODEV) 2660 panic("vfs_mountroot: rootdev set for DV_IFNET " 2661 "(0x%08x -> %d,%d)", rootdev, 2662 major(rootdev), minor(rootdev)); 2663 break; 2664 2665 case DV_DISK: 2666 if (rootdev == NODEV) 2667 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2668 break; 2669 2670 default: 2671 printf("%s: inappropriate for root file system\n", 2672 root_device->dv_xname); 2673 return (ENODEV); 2674 } 2675 2676 /* 2677 * If user specified a file system, use it. 2678 */ 2679 if (mountroot != NULL) 2680 return ((*mountroot)()); 2681 2682 /* 2683 * Try each file system currently configured into the kernel. 2684 */ 2685 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2686 if (v->vfs_mountroot == NULL) 2687 continue; 2688 #ifdef DEBUG 2689 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 2690 #endif 2691 if ((*v->vfs_mountroot)() == 0) { 2692 aprint_normal("root file system type: %s\n", 2693 v->vfs_name); 2694 break; 2695 } 2696 } 2697 2698 if (v == NULL) { 2699 printf("no file system for %s", root_device->dv_xname); 2700 if (root_device->dv_class == DV_DISK) 2701 printf(" (dev 0x%x)", rootdev); 2702 printf("\n"); 2703 return (EFTYPE); 2704 } 2705 return (0); 2706 } 2707 2708 /* 2709 * Given a file system name, look up the vfsops for that 2710 * file system, or return NULL if file system isn't present 2711 * in the kernel. 2712 */ 2713 struct vfsops * 2714 vfs_getopsbyname(name) 2715 const char *name; 2716 { 2717 struct vfsops *v; 2718 2719 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2720 if (strcmp(v->vfs_name, name) == 0) 2721 break; 2722 } 2723 2724 return (v); 2725 } 2726 2727 /* 2728 * Establish a file system and initialize it. 2729 */ 2730 int 2731 vfs_attach(vfs) 2732 struct vfsops *vfs; 2733 { 2734 struct vfsops *v; 2735 int error = 0; 2736 2737 2738 /* 2739 * Make sure this file system doesn't already exist. 2740 */ 2741 LIST_FOREACH(v, &vfs_list, vfs_list) { 2742 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2743 error = EEXIST; 2744 goto out; 2745 } 2746 } 2747 2748 /* 2749 * Initialize the vnode operations for this file system. 2750 */ 2751 vfs_opv_init(vfs->vfs_opv_descs); 2752 2753 /* 2754 * Now initialize the file system itself. 2755 */ 2756 (*vfs->vfs_init)(); 2757 2758 /* 2759 * ...and link it into the kernel's list. 2760 */ 2761 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2762 2763 /* 2764 * Sanity: make sure the reference count is 0. 2765 */ 2766 vfs->vfs_refcount = 0; 2767 2768 out: 2769 return (error); 2770 } 2771 2772 /* 2773 * Remove a file system from the kernel. 2774 */ 2775 int 2776 vfs_detach(vfs) 2777 struct vfsops *vfs; 2778 { 2779 struct vfsops *v; 2780 2781 /* 2782 * Make sure no one is using the filesystem. 2783 */ 2784 if (vfs->vfs_refcount != 0) 2785 return (EBUSY); 2786 2787 /* 2788 * ...and remove it from the kernel's list. 2789 */ 2790 LIST_FOREACH(v, &vfs_list, vfs_list) { 2791 if (v == vfs) { 2792 LIST_REMOVE(v, vfs_list); 2793 break; 2794 } 2795 } 2796 2797 if (v == NULL) 2798 return (ESRCH); 2799 2800 /* 2801 * Now run the file system-specific cleanups. 2802 */ 2803 (*vfs->vfs_done)(); 2804 2805 /* 2806 * Free the vnode operations vector. 2807 */ 2808 vfs_opv_free(vfs->vfs_opv_descs); 2809 return (0); 2810 } 2811 2812 void 2813 vfs_reinit(void) 2814 { 2815 struct vfsops *vfs; 2816 2817 LIST_FOREACH(vfs, &vfs_list, vfs_list) { 2818 if (vfs->vfs_reinit) { 2819 (*vfs->vfs_reinit)(); 2820 } 2821 } 2822 } 2823 2824 void 2825 copy_statfs_info(struct statfs *sbp, const struct mount *mp) 2826 { 2827 const struct statfs *mbp; 2828 2829 if (sbp == (mbp = &mp->mnt_stat)) 2830 return; 2831 2832 sbp->f_oflags = mbp->f_oflags; 2833 sbp->f_type = mbp->f_type; 2834 (void)memcpy(&sbp->f_fsid, &mbp->f_fsid, sizeof(sbp->f_fsid)); 2835 sbp->f_owner = mbp->f_owner; 2836 sbp->f_flags = mbp->f_flags; 2837 sbp->f_syncwrites = mbp->f_syncwrites; 2838 sbp->f_asyncwrites = mbp->f_asyncwrites; 2839 sbp->f_spare[0] = mbp->f_spare[0]; 2840 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename, 2841 sizeof(sbp->f_fstypename)); 2842 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname, 2843 sizeof(sbp->f_mntonname)); 2844 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, 2845 sizeof(sbp->f_mntfromname)); 2846 } 2847 2848 int 2849 set_statfs_info(const char *onp, int ukon, const char *fromp, int ukfrom, 2850 struct mount *mp, struct proc *p) 2851 { 2852 int error; 2853 size_t size; 2854 struct statfs *sfs = &mp->mnt_stat; 2855 int (*fun)(const void *, void *, size_t, size_t *); 2856 2857 (void)strncpy(mp->mnt_stat.f_fstypename, mp->mnt_op->vfs_name, 2858 sizeof(mp->mnt_stat.f_fstypename)); 2859 2860 if (onp) { 2861 struct cwdinfo *cwdi = p->p_cwdi; 2862 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr; 2863 if (cwdi->cwdi_rdir != NULL) { 2864 size_t len; 2865 char *bp; 2866 char *path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 2867 2868 if (!path) 2869 return ENOMEM; 2870 2871 bp = path + MAXPATHLEN; 2872 *--bp = '\0'; 2873 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, 2874 path, MAXPATHLEN / 2, 0, p); 2875 if (error) { 2876 free(path, M_TEMP); 2877 return error; 2878 } 2879 2880 len = strlen(bp); 2881 if (len > sizeof(sfs->f_mntonname) - 1) 2882 len = sizeof(sfs->f_mntonname) - 1; 2883 (void)strncpy(sfs->f_mntonname, bp, len); 2884 free(path, M_TEMP); 2885 2886 if (len < sizeof(sfs->f_mntonname) - 1) { 2887 error = (*fun)(onp, &sfs->f_mntonname[len], 2888 sizeof(sfs->f_mntonname) - len - 1, &size); 2889 if (error) 2890 return error; 2891 size += len; 2892 } else { 2893 size = len; 2894 } 2895 } else { 2896 error = (*fun)(onp, &sfs->f_mntonname, 2897 sizeof(sfs->f_mntonname) - 1, &size); 2898 if (error) 2899 return error; 2900 } 2901 (void)memset(sfs->f_mntonname + size, 0, 2902 sizeof(sfs->f_mntonname) - size); 2903 } 2904 2905 if (fromp) { 2906 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr; 2907 error = (*fun)(fromp, sfs->f_mntfromname, 2908 sizeof(sfs->f_mntfromname) - 1, &size); 2909 if (error) 2910 return error; 2911 (void)memset(sfs->f_mntfromname + size, 0, 2912 sizeof(sfs->f_mntfromname) - size); 2913 } 2914 return 0; 2915 } 2916 2917 #ifdef DDB 2918 const char buf_flagbits[] = 2919 "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI" 2920 "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE" 2921 "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED" 2922 "\32XXX\33VFLUSH"; 2923 2924 void 2925 vfs_buf_print(bp, full, pr) 2926 struct buf *bp; 2927 int full; 2928 void (*pr) __P((const char *, ...)); 2929 { 2930 char buf[1024]; 2931 2932 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" dev 0x%x\n", 2933 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev); 2934 2935 bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf)); 2936 (*pr)(" error %d flags 0x%s\n", bp->b_error, buf); 2937 2938 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 2939 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2940 (*pr)(" data %p saveaddr %p dep %p\n", 2941 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2942 (*pr)(" iodone %p\n", bp->b_iodone); 2943 } 2944 2945 2946 const char vnode_flagbits[] = 2947 "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\5EXECMAP" 2948 "\11XLOCK\12XWANT\13BWAIT\14ALIASED" 2949 "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY"; 2950 2951 const char * const vnode_tags[] = { 2952 "VT_NON", 2953 "VT_UFS", 2954 "VT_NFS", 2955 "VT_MFS", 2956 "VT_MSDOSFS", 2957 "VT_LFS", 2958 "VT_LOFS", 2959 "VT_FDESC", 2960 "VT_PORTAL", 2961 "VT_NULL", 2962 "VT_UMAP", 2963 "VT_KERNFS", 2964 "VT_PROCFS", 2965 "VT_AFS", 2966 "VT_ISOFS", 2967 "VT_UNION", 2968 "VT_ADOSFS", 2969 "VT_EXT2FS", 2970 "VT_CODA", 2971 "VT_FILECORE", 2972 "VT_NTFS", 2973 "VT_VFS", 2974 "VT_OVERLAY", 2975 "VT_SMBFS" 2976 }; 2977 2978 void 2979 vfs_vnode_print(vp, full, pr) 2980 struct vnode *vp; 2981 int full; 2982 void (*pr) __P((const char *, ...)); 2983 { 2984 char buf[256]; 2985 const char *vtype, *vtag; 2986 2987 uvm_object_printit(&vp->v_uobj, full, pr); 2988 bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf)); 2989 (*pr)("\nVNODE flags %s\n", buf); 2990 (*pr)("mp %p numoutput %d size 0x%llx\n", 2991 vp->v_mount, vp->v_numoutput, vp->v_size); 2992 2993 (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n", 2994 vp->v_data, vp->v_usecount, vp->v_writecount, 2995 vp->v_holdcnt, vp->v_numoutput); 2996 2997 vtype = (vp->v_type >= 0 && 2998 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ? 2999 vnode_types[vp->v_type] : "UNKNOWN"; 3000 vtag = (vp->v_tag >= 0 && 3001 vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ? 3002 vnode_tags[vp->v_tag] : "UNKNOWN"; 3003 3004 (*pr)("type %s(%d) tag %s(%d) id 0x%lx mount %p typedata %p\n", 3005 vtype, vp->v_type, vtag, vp->v_tag, 3006 vp->v_id, vp->v_mount, vp->v_mountedhere); 3007 3008 if (full) { 3009 struct buf *bp; 3010 3011 (*pr)("clean bufs:\n"); 3012 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 3013 (*pr)(" bp %p\n", bp); 3014 vfs_buf_print(bp, full, pr); 3015 } 3016 3017 (*pr)("dirty bufs:\n"); 3018 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 3019 (*pr)(" bp %p\n", bp); 3020 vfs_buf_print(bp, full, pr); 3021 } 3022 } 3023 } 3024 #endif 3025