1 /* $NetBSD: vfs_subr.c,v 1.118 2000/03/03 05:21:03 mycroft Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include "opt_compat_netbsd.h" 85 #include "opt_compat_43.h" 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/proc.h> 90 #include <sys/mount.h> 91 #include <sys/time.h> 92 #include <sys/fcntl.h> 93 #include <sys/vnode.h> 94 #include <sys/stat.h> 95 #include <sys/namei.h> 96 #include <sys/ucred.h> 97 #include <sys/buf.h> 98 #include <sys/errno.h> 99 #include <sys/malloc.h> 100 #include <sys/domain.h> 101 #include <sys/mbuf.h> 102 #include <sys/syscallargs.h> 103 #include <sys/device.h> 104 #include <sys/dirent.h> 105 106 #include <vm/vm.h> 107 #include <sys/sysctl.h> 108 109 #include <miscfs/specfs/specdev.h> 110 #include <miscfs/genfs/genfs.h> 111 #include <miscfs/syncfs/syncfs.h> 112 113 #include <uvm/uvm_extern.h> 114 115 enum vtype iftovt_tab[16] = { 116 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 117 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 118 }; 119 int vttoif_tab[9] = { 120 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 121 S_IFSOCK, S_IFIFO, S_IFMT, 122 }; 123 124 int doforce = 1; /* 1 => permit forcible unmounting */ 125 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 126 127 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 128 129 /* 130 * Insq/Remq for the vnode usage lists. 131 */ 132 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 133 #define bufremvn(bp) { \ 134 LIST_REMOVE(bp, b_vnbufs); \ 135 (bp)->b_vnbufs.le_next = NOLIST; \ 136 } 137 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 138 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 139 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 140 141 struct mntlist mountlist = /* mounted filesystem list */ 142 CIRCLEQ_HEAD_INITIALIZER(mountlist); 143 struct vfs_list_head vfs_list = /* vfs list */ 144 LIST_HEAD_INITIALIZER(vfs_list); 145 146 struct nfs_public nfs_pub; /* publicly exported FS */ 147 148 struct simplelock mountlist_slock; 149 static struct simplelock mntid_slock; 150 struct simplelock mntvnode_slock; 151 struct simplelock vnode_free_list_slock; 152 struct simplelock spechash_slock; 153 154 /* 155 * These define the root filesystem and device. 156 */ 157 struct mount *rootfs; 158 struct vnode *rootvnode; 159 struct device *root_device; /* root device */ 160 161 struct pool vnode_pool; /* memory pool for vnodes */ 162 163 /* 164 * Local declarations. 165 */ 166 void insmntque __P((struct vnode *, struct mount *)); 167 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 168 void vgoneall __P((struct vnode *)); 169 170 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 171 struct export_args *)); 172 static int vfs_free_netcred __P((struct radix_node *, void *)); 173 static void vfs_free_addrlist __P((struct netexport *)); 174 175 #ifdef DEBUG 176 void printlockedvnodes __P((void)); 177 #endif 178 179 /* 180 * Initialize the vnode management data structures. 181 */ 182 void 183 vntblinit() 184 { 185 186 simple_lock_init(&mntvnode_slock); 187 simple_lock_init(&mntid_slock); 188 simple_lock_init(&spechash_slock); 189 simple_lock_init(&vnode_free_list_slock); 190 191 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 192 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE); 193 194 /* 195 * Initialize the filesystem syncer. 196 */ 197 vn_initialize_syncerd(); 198 } 199 200 /* 201 * Mark a mount point as busy. Used to synchronize access and to delay 202 * unmounting. Interlock is not released on failure. 203 */ 204 int 205 vfs_busy(mp, flags, interlkp) 206 struct mount *mp; 207 int flags; 208 struct simplelock *interlkp; 209 { 210 int lkflags; 211 212 while (mp->mnt_flag & MNT_UNMOUNT) { 213 int gone; 214 215 if (flags & LK_NOWAIT) 216 return (ENOENT); 217 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 218 && mp->mnt_unmounter == curproc) 219 return (EDEADLK); 220 if (interlkp) 221 simple_unlock(interlkp); 222 /* 223 * Since all busy locks are shared except the exclusive 224 * lock granted when unmounting, the only place that a 225 * wakeup needs to be done is at the release of the 226 * exclusive lock at the end of dounmount. 227 * 228 * XXX MP: add spinlock protecting mnt_wcnt here once you 229 * can atomically unlock-and-sleep. 230 */ 231 mp->mnt_wcnt++; 232 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 233 mp->mnt_wcnt--; 234 gone = mp->mnt_flag & MNT_GONE; 235 236 if (mp->mnt_wcnt == 0) 237 wakeup(&mp->mnt_wcnt); 238 if (interlkp) 239 simple_lock(interlkp); 240 if (gone) 241 return (ENOENT); 242 } 243 lkflags = LK_SHARED; 244 if (interlkp) 245 lkflags |= LK_INTERLOCK; 246 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 247 panic("vfs_busy: unexpected lock failure"); 248 return (0); 249 } 250 251 /* 252 * Free a busy filesystem. 253 */ 254 void 255 vfs_unbusy(mp) 256 struct mount *mp; 257 { 258 259 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 260 } 261 262 /* 263 * Lookup a filesystem type, and if found allocate and initialize 264 * a mount structure for it. 265 * 266 * Devname is usually updated by mount(8) after booting. 267 */ 268 int 269 vfs_rootmountalloc(fstypename, devname, mpp) 270 char *fstypename; 271 char *devname; 272 struct mount **mpp; 273 { 274 struct vfsops *vfsp = NULL; 275 struct mount *mp; 276 277 for (vfsp = LIST_FIRST(&vfs_list); vfsp != NULL; 278 vfsp = LIST_NEXT(vfsp, vfs_list)) 279 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 280 break; 281 282 if (vfsp == NULL) 283 return (ENODEV); 284 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 285 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 286 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 287 (void)vfs_busy(mp, LK_NOWAIT, 0); 288 LIST_INIT(&mp->mnt_vnodelist); 289 mp->mnt_op = vfsp; 290 mp->mnt_flag = MNT_RDONLY; 291 mp->mnt_vnodecovered = NULLVP; 292 vfsp->vfs_refcount++; 293 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 294 mp->mnt_stat.f_mntonname[0] = '/'; 295 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 296 *mpp = mp; 297 return (0); 298 } 299 300 /* 301 * Lookup a mount point by filesystem identifier. 302 */ 303 struct mount * 304 vfs_getvfs(fsid) 305 fsid_t *fsid; 306 { 307 register struct mount *mp; 308 309 simple_lock(&mountlist_slock); 310 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 311 mp = mp->mnt_list.cqe_next) { 312 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 313 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 314 simple_unlock(&mountlist_slock); 315 return (mp); 316 } 317 } 318 simple_unlock(&mountlist_slock); 319 return ((struct mount *)0); 320 } 321 322 /* 323 * Get a new unique fsid 324 */ 325 void 326 vfs_getnewfsid(mp, fstypename) 327 struct mount *mp; 328 char *fstypename; 329 { 330 static u_short xxxfs_mntid; 331 fsid_t tfsid; 332 int mtype; 333 334 simple_lock(&mntid_slock); 335 mtype = makefstype(fstypename); 336 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 337 mp->mnt_stat.f_fsid.val[1] = mtype; 338 if (xxxfs_mntid == 0) 339 ++xxxfs_mntid; 340 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 341 tfsid.val[1] = mtype; 342 if (mountlist.cqh_first != (void *)&mountlist) { 343 while (vfs_getvfs(&tfsid)) { 344 tfsid.val[0]++; 345 xxxfs_mntid++; 346 } 347 } 348 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 349 simple_unlock(&mntid_slock); 350 } 351 352 /* 353 * Make a 'unique' number from a mount type name. 354 */ 355 long 356 makefstype(type) 357 char *type; 358 { 359 long rv; 360 361 for (rv = 0; *type; type++) { 362 rv <<= 2; 363 rv ^= *type; 364 } 365 return rv; 366 } 367 368 369 /* 370 * Set vnode attributes to VNOVAL 371 */ 372 void 373 vattr_null(vap) 374 register struct vattr *vap; 375 { 376 377 vap->va_type = VNON; 378 379 /* 380 * Assign individually so that it is safe even if size and 381 * sign of each member are varied. 382 */ 383 vap->va_mode = VNOVAL; 384 vap->va_nlink = VNOVAL; 385 vap->va_uid = VNOVAL; 386 vap->va_gid = VNOVAL; 387 vap->va_fsid = VNOVAL; 388 vap->va_fileid = VNOVAL; 389 vap->va_size = VNOVAL; 390 vap->va_blocksize = VNOVAL; 391 vap->va_atime.tv_sec = 392 vap->va_mtime.tv_sec = 393 vap->va_ctime.tv_sec = VNOVAL; 394 vap->va_atime.tv_nsec = 395 vap->va_mtime.tv_nsec = 396 vap->va_ctime.tv_nsec = VNOVAL; 397 vap->va_gen = VNOVAL; 398 vap->va_flags = VNOVAL; 399 vap->va_rdev = VNOVAL; 400 vap->va_bytes = VNOVAL; 401 vap->va_vaflags = 0; 402 } 403 404 /* 405 * Routines having to do with the management of the vnode table. 406 */ 407 extern int (**dead_vnodeop_p) __P((void *)); 408 long numvnodes; 409 410 /* 411 * Return the next vnode from the free list. 412 */ 413 int 414 getnewvnode(tag, mp, vops, vpp) 415 enum vtagtype tag; 416 struct mount *mp; 417 int (**vops) __P((void *)); 418 struct vnode **vpp; 419 { 420 struct proc *p = curproc; /* XXX */ 421 struct freelst *listhd; 422 static int toggle; 423 struct vnode *vp; 424 int error = 0; 425 #ifdef DIAGNOSTIC 426 int s; 427 #endif 428 if (mp) { 429 /* 430 * Mark filesystem busy while we're creating a vnode. 431 * If unmount is in progress, this will wait; if the 432 * unmount succeeds (only if umount -f), this will 433 * return an error. If the unmount fails, we'll keep 434 * going afterwards. 435 * (This puts the per-mount vnode list logically under 436 * the protection of the vfs_busy lock). 437 */ 438 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 439 if (error && error != EDEADLK) 440 return error; 441 } 442 443 /* 444 * We must choose whether to allocate a new vnode or recycle an 445 * existing one. The criterion for allocating a new one is that 446 * the total number of vnodes is less than the number desired or 447 * there are no vnodes on either free list. Generally we only 448 * want to recycle vnodes that have no buffers associated with 449 * them, so we look first on the vnode_free_list. If it is empty, 450 * we next consider vnodes with referencing buffers on the 451 * vnode_hold_list. The toggle ensures that half the time we 452 * will use a buffer from the vnode_hold_list, and half the time 453 * we will allocate a new one unless the list has grown to twice 454 * the desired size. We are reticent to recycle vnodes from the 455 * vnode_hold_list because we will lose the identity of all its 456 * referencing buffers. 457 */ 458 toggle ^= 1; 459 if (numvnodes > 2 * desiredvnodes) 460 toggle = 0; 461 462 simple_lock(&vnode_free_list_slock); 463 if (numvnodes < desiredvnodes || 464 (TAILQ_FIRST(listhd = &vnode_free_list) == NULL && 465 (TAILQ_FIRST(listhd = &vnode_hold_list) == NULL || toggle))) { 466 simple_unlock(&vnode_free_list_slock); 467 vp = pool_get(&vnode_pool, PR_WAITOK); 468 memset((char *)vp, 0, sizeof(*vp)); 469 simple_lock_init(&vp->v_interlock); 470 numvnodes++; 471 } else { 472 for (vp = TAILQ_FIRST(listhd); vp != NULLVP; 473 vp = TAILQ_NEXT(vp, v_freelist)) { 474 if (simple_lock_try(&vp->v_interlock)) { 475 if ((vp->v_flag & VLAYER) == 0) { 476 break; 477 } 478 if (VOP_ISLOCKED(vp) == 0) 479 break; 480 else 481 simple_unlock(&vp->v_interlock); 482 } 483 } 484 /* 485 * Unless this is a bad time of the month, at most 486 * the first NCPUS items on the free list are 487 * locked, so this is close enough to being empty. 488 */ 489 if (vp == NULLVP) { 490 simple_unlock(&vnode_free_list_slock); 491 if (mp && error != EDEADLK) 492 vfs_unbusy(mp); 493 tablefull("vnode"); 494 *vpp = 0; 495 return (ENFILE); 496 } 497 if (vp->v_usecount) 498 panic("free vnode isn't"); 499 TAILQ_REMOVE(listhd, vp, v_freelist); 500 /* see comment on why 0xdeadb is set at end of vgone (below) */ 501 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 502 simple_unlock(&vnode_free_list_slock); 503 vp->v_lease = NULL; 504 if (vp->v_type != VBAD) 505 vgonel(vp, p); 506 else 507 simple_unlock(&vp->v_interlock); 508 #ifdef DIAGNOSTIC 509 if (vp->v_data) 510 panic("cleaned vnode isn't"); 511 s = splbio(); 512 if (vp->v_numoutput) 513 panic("Clean vnode has pending I/O's"); 514 splx(s); 515 #endif 516 vp->v_flag = 0; 517 vp->v_lastr = 0; 518 vp->v_ralen = 0; 519 vp->v_maxra = 0; 520 vp->v_lastw = 0; 521 vp->v_lasta = 0; 522 vp->v_cstart = 0; 523 vp->v_clen = 0; 524 vp->v_socket = 0; 525 } 526 vp->v_type = VNON; 527 vp->v_vnlock = &vp->v_lock; 528 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 529 cache_purge(vp); 530 vp->v_tag = tag; 531 vp->v_op = vops; 532 insmntque(vp, mp); 533 *vpp = vp; 534 vp->v_usecount = 1; 535 vp->v_data = 0; 536 simple_lock_init(&vp->v_uvm.u_obj.vmobjlock); 537 if (mp && error != EDEADLK) 538 vfs_unbusy(mp); 539 return (0); 540 } 541 542 /* 543 * Move a vnode from one mount queue to another. 544 */ 545 void 546 insmntque(vp, mp) 547 register struct vnode *vp; 548 register struct mount *mp; 549 { 550 551 #ifdef DIAGNOSTIC 552 if ((mp != NULL) && 553 (mp->mnt_flag & MNT_UNMOUNT) && 554 !(mp->mnt_flag & MNT_SOFTDEP) && 555 vp->v_tag != VT_VFS) { 556 panic("insmntque into dying filesystem"); 557 } 558 #endif 559 560 simple_lock(&mntvnode_slock); 561 /* 562 * Delete from old mount point vnode list, if on one. 563 */ 564 if (vp->v_mount != NULL) 565 LIST_REMOVE(vp, v_mntvnodes); 566 /* 567 * Insert into list of vnodes for the new mount point, if available. 568 */ 569 if ((vp->v_mount = mp) != NULL) 570 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 571 simple_unlock(&mntvnode_slock); 572 } 573 574 /* 575 * Update outstanding I/O count and do wakeup if requested. 576 */ 577 void 578 vwakeup(bp) 579 register struct buf *bp; 580 { 581 register struct vnode *vp; 582 583 bp->b_flags &= ~B_WRITEINPROG; 584 if ((vp = bp->b_vp) != NULL) { 585 if (--vp->v_numoutput < 0) 586 panic("vwakeup: neg numoutput"); 587 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 588 vp->v_flag &= ~VBWAIT; 589 wakeup((caddr_t)&vp->v_numoutput); 590 } 591 } 592 } 593 594 /* 595 * Flush out and invalidate all buffers associated with a vnode. 596 * Called with the underlying object locked. 597 */ 598 int 599 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 600 register struct vnode *vp; 601 int flags; 602 struct ucred *cred; 603 struct proc *p; 604 int slpflag, slptimeo; 605 { 606 register struct buf *bp; 607 struct buf *nbp, *blist; 608 int s, error; 609 610 if (flags & V_SAVE) { 611 s = splbio(); 612 while (vp->v_numoutput) { 613 vp->v_flag |= VBWAIT; 614 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, 615 "vbwait", 0); 616 } 617 if (vp->v_dirtyblkhd.lh_first != NULL) { 618 splx(s); 619 if ((error = VOP_FSYNC(vp, cred, FSYNC_WAIT, p)) != 0) 620 return (error); 621 s = splbio(); 622 if (vp->v_numoutput > 0 || 623 vp->v_dirtyblkhd.lh_first != NULL) 624 panic("vinvalbuf: dirty bufs"); 625 } 626 splx(s); 627 } 628 629 s = splbio(); 630 631 for (;;) { 632 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) 633 while (blist && blist->b_lblkno < 0) 634 blist = blist->b_vnbufs.le_next; 635 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 636 (flags & V_SAVEMETA)) { 637 while (blist && blist->b_lblkno < 0) 638 blist = blist->b_vnbufs.le_next; 639 } 640 if (!blist) 641 break; 642 643 for (bp = blist; bp; bp = nbp) { 644 nbp = bp->b_vnbufs.le_next; 645 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 646 continue; 647 if (bp->b_flags & B_BUSY) { 648 bp->b_flags |= B_WANTED; 649 error = tsleep((caddr_t)bp, 650 slpflag | (PRIBIO + 1), "vinvalbuf", 651 slptimeo); 652 if (error) { 653 splx(s); 654 return (error); 655 } 656 break; 657 } 658 bp->b_flags |= B_BUSY | B_VFLUSH; 659 /* 660 * XXX Since there are no node locks for NFS, I believe 661 * there is a slight chance that a delayed write will 662 * occur while sleeping just above, so check for it. 663 */ 664 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 665 VOP_BWRITE(bp); 666 #ifdef DEBUG 667 printf("buffer still DELWRI\n"); 668 #endif 669 /* VOP_FSYNC(vp, cred, FSYNC_WAIT, p); */ 670 continue; 671 } 672 bp->b_flags |= B_INVAL; 673 brelse(bp); 674 } 675 } 676 677 if (!(flags & V_SAVEMETA) && 678 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 679 panic("vinvalbuf: flush failed"); 680 681 splx(s); 682 683 return (0); 684 } 685 686 void 687 vflushbuf(vp, sync) 688 register struct vnode *vp; 689 int sync; 690 { 691 register struct buf *bp, *nbp; 692 int s; 693 694 loop: 695 s = splbio(); 696 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 697 nbp = bp->b_vnbufs.le_next; 698 if ((bp->b_flags & B_BUSY)) 699 continue; 700 if ((bp->b_flags & B_DELWRI) == 0) 701 panic("vflushbuf: not dirty"); 702 bp->b_flags |= B_BUSY | B_VFLUSH; 703 splx(s); 704 /* 705 * Wait for I/O associated with indirect blocks to complete, 706 * since there is no way to quickly wait for them below. 707 */ 708 if (bp->b_vp == vp || sync == 0) 709 (void) bawrite(bp); 710 else 711 (void) bwrite(bp); 712 goto loop; 713 } 714 if (sync == 0) { 715 splx(s); 716 return; 717 } 718 while (vp->v_numoutput) { 719 vp->v_flag |= VBWAIT; 720 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 721 } 722 splx(s); 723 if (vp->v_dirtyblkhd.lh_first != NULL) { 724 vprint("vflushbuf: dirty", vp); 725 goto loop; 726 } 727 } 728 729 /* 730 * Associate a buffer with a vnode. 731 */ 732 void 733 bgetvp(vp, bp) 734 register struct vnode *vp; 735 register struct buf *bp; 736 { 737 int s; 738 739 if (bp->b_vp) 740 panic("bgetvp: not free"); 741 VHOLD(vp); 742 s = splbio(); 743 bp->b_vp = vp; 744 if (vp->v_type == VBLK || vp->v_type == VCHR) 745 bp->b_dev = vp->v_rdev; 746 else 747 bp->b_dev = NODEV; 748 /* 749 * Insert onto list for new vnode. 750 */ 751 bufinsvn(bp, &vp->v_cleanblkhd); 752 splx(s); 753 } 754 755 /* 756 * Disassociate a buffer from a vnode. 757 */ 758 void 759 brelvp(bp) 760 register struct buf *bp; 761 { 762 struct vnode *vp; 763 int s; 764 765 if (bp->b_vp == (struct vnode *) 0) 766 panic("brelvp: NULL"); 767 768 s = splbio(); 769 vp = bp->b_vp; 770 /* 771 * Delete from old vnode list, if on one. 772 */ 773 if (bp->b_vnbufs.le_next != NOLIST) 774 bufremvn(bp); 775 if ((vp->v_flag & VONWORKLST) && LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 776 vp->v_flag &= ~VONWORKLST; 777 LIST_REMOVE(vp, v_synclist); 778 } 779 bp->b_vp = (struct vnode *) 0; 780 HOLDRELE(vp); 781 splx(s); 782 } 783 784 /* 785 * Reassign a buffer from one vnode to another. 786 * Used to assign file specific control information 787 * (indirect blocks) to the vnode to which they belong. 788 * 789 * This function must be called at splbio(). 790 */ 791 void 792 reassignbuf(bp, newvp) 793 struct buf *bp; 794 struct vnode *newvp; 795 { 796 struct buflists *listheadp; 797 int delay; 798 799 if (newvp == NULL) { 800 printf("reassignbuf: NULL"); 801 return; 802 } 803 804 /* 805 * Delete from old vnode list, if on one. 806 */ 807 if (bp->b_vnbufs.le_next != NOLIST) 808 bufremvn(bp); 809 /* 810 * If dirty, put on list of dirty buffers; 811 * otherwise insert onto list of clean buffers. 812 */ 813 if ((bp->b_flags & B_DELWRI) == 0) { 814 listheadp = &newvp->v_cleanblkhd; 815 if ((newvp->v_flag & VONWORKLST) && 816 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 817 newvp->v_flag &= ~VONWORKLST; 818 LIST_REMOVE(newvp, v_synclist); 819 } 820 } else { 821 listheadp = &newvp->v_dirtyblkhd; 822 if ((newvp->v_flag & VONWORKLST) == 0) { 823 switch (newvp->v_type) { 824 case VDIR: 825 delay = dirdelay; 826 break; 827 case VBLK: 828 if (newvp->v_specmountpoint != NULL) { 829 delay = metadelay; 830 break; 831 } 832 /* fall through */ 833 default: 834 delay = filedelay; 835 break; 836 } 837 if (!newvp->v_mount || 838 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 839 vn_syncer_add_to_worklist(newvp, delay); 840 } 841 } 842 bufinsvn(bp, listheadp); 843 } 844 845 /* 846 * Create a vnode for a block device. 847 * Used for root filesystem and swap areas. 848 * Also used for memory file system special devices. 849 */ 850 int 851 bdevvp(dev, vpp) 852 dev_t dev; 853 struct vnode **vpp; 854 { 855 856 return (getdevvp(dev, vpp, VBLK)); 857 } 858 859 /* 860 * Create a vnode for a character device. 861 * Used for kernfs and some console handling. 862 */ 863 int 864 cdevvp(dev, vpp) 865 dev_t dev; 866 struct vnode **vpp; 867 { 868 869 return (getdevvp(dev, vpp, VCHR)); 870 } 871 872 /* 873 * Create a vnode for a device. 874 * Used by bdevvp (block device) for root file system etc., 875 * and by cdevvp (character device) for console and kernfs. 876 */ 877 int 878 getdevvp(dev, vpp, type) 879 dev_t dev; 880 struct vnode **vpp; 881 enum vtype type; 882 { 883 register struct vnode *vp; 884 struct vnode *nvp; 885 int error; 886 887 if (dev == NODEV) { 888 *vpp = NULLVP; 889 return (0); 890 } 891 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 892 if (error) { 893 *vpp = NULLVP; 894 return (error); 895 } 896 vp = nvp; 897 vp->v_type = type; 898 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 899 vput(vp); 900 vp = nvp; 901 } 902 *vpp = vp; 903 return (0); 904 } 905 906 /* 907 * Check to see if the new vnode represents a special device 908 * for which we already have a vnode (either because of 909 * bdevvp() or because of a different vnode representing 910 * the same block device). If such an alias exists, deallocate 911 * the existing contents and return the aliased vnode. The 912 * caller is responsible for filling it with its new contents. 913 */ 914 struct vnode * 915 checkalias(nvp, nvp_rdev, mp) 916 register struct vnode *nvp; 917 dev_t nvp_rdev; 918 struct mount *mp; 919 { 920 struct proc *p = curproc; /* XXX */ 921 register struct vnode *vp; 922 struct vnode **vpp; 923 924 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 925 return (NULLVP); 926 927 vpp = &speclisth[SPECHASH(nvp_rdev)]; 928 loop: 929 simple_lock(&spechash_slock); 930 for (vp = *vpp; vp; vp = vp->v_specnext) { 931 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 932 continue; 933 /* 934 * Alias, but not in use, so flush it out. 935 */ 936 simple_lock(&vp->v_interlock); 937 if (vp->v_usecount == 0) { 938 simple_unlock(&spechash_slock); 939 vgonel(vp, p); 940 goto loop; 941 } 942 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 943 simple_unlock(&spechash_slock); 944 goto loop; 945 } 946 break; 947 } 948 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 949 MALLOC(nvp->v_specinfo, struct specinfo *, 950 sizeof(struct specinfo), M_VNODE, M_WAITOK); 951 nvp->v_rdev = nvp_rdev; 952 nvp->v_hashchain = vpp; 953 nvp->v_specnext = *vpp; 954 nvp->v_specmountpoint = NULL; 955 simple_unlock(&spechash_slock); 956 nvp->v_speclockf = NULL; 957 *vpp = nvp; 958 if (vp != NULLVP) { 959 nvp->v_flag |= VALIASED; 960 vp->v_flag |= VALIASED; 961 vput(vp); 962 } 963 return (NULLVP); 964 } 965 simple_unlock(&spechash_slock); 966 VOP_UNLOCK(vp, 0); 967 simple_lock(&vp->v_interlock); 968 vclean(vp, 0, p); 969 vp->v_op = nvp->v_op; 970 vp->v_tag = nvp->v_tag; 971 vp->v_vnlock = &vp->v_lock; 972 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 973 nvp->v_type = VNON; 974 insmntque(vp, mp); 975 return (vp); 976 } 977 978 /* 979 * Grab a particular vnode from the free list, increment its 980 * reference count and lock it. If the vnode lock bit is set the 981 * vnode is being eliminated in vgone. In that case, we can not 982 * grab the vnode, so the process is awakened when the transition is 983 * completed, and an error returned to indicate that the vnode is no 984 * longer usable (possibly having been changed to a new file system type). 985 */ 986 int 987 vget(vp, flags) 988 struct vnode *vp; 989 int flags; 990 { 991 int error; 992 993 /* 994 * If the vnode is in the process of being cleaned out for 995 * another use, we wait for the cleaning to finish and then 996 * return failure. Cleaning is determined by checking that 997 * the VXLOCK flag is set. 998 */ 999 if ((flags & LK_INTERLOCK) == 0) 1000 simple_lock(&vp->v_interlock); 1001 if (vp->v_flag & VXLOCK) { 1002 vp->v_flag |= VXWANT; 1003 simple_unlock(&vp->v_interlock); 1004 tsleep((caddr_t)vp, PINOD, "vget", 0); 1005 return (ENOENT); 1006 } 1007 if (vp->v_usecount == 0) { 1008 simple_lock(&vnode_free_list_slock); 1009 if (vp->v_holdcnt > 0) 1010 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1011 else 1012 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1013 simple_unlock(&vnode_free_list_slock); 1014 } 1015 vp->v_usecount++; 1016 #ifdef DIAGNOSTIC 1017 if (vp->v_usecount == 0) { 1018 vprint("vget", vp); 1019 panic("vget: usecount overflow"); 1020 } 1021 #endif 1022 if (flags & LK_TYPE_MASK) { 1023 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1024 /* 1025 * must expand vrele here because we do not want 1026 * to call VOP_INACTIVE if the reference count 1027 * drops back to zero since it was never really 1028 * active. We must remove it from the free list 1029 * before sleeping so that multiple processes do 1030 * not try to recycle it. 1031 */ 1032 simple_lock(&vp->v_interlock); 1033 vp->v_usecount--; 1034 if (vp->v_usecount > 0) { 1035 simple_unlock(&vp->v_interlock); 1036 return (error); 1037 } 1038 /* 1039 * insert at tail of LRU list 1040 */ 1041 simple_lock(&vnode_free_list_slock); 1042 if (vp->v_holdcnt > 0) 1043 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, 1044 v_freelist); 1045 else 1046 TAILQ_INSERT_TAIL(&vnode_free_list, vp, 1047 v_freelist); 1048 simple_unlock(&vnode_free_list_slock); 1049 simple_unlock(&vp->v_interlock); 1050 } 1051 return (error); 1052 } 1053 simple_unlock(&vp->v_interlock); 1054 return (0); 1055 } 1056 1057 /* 1058 * vput(), just unlock and vrele() 1059 */ 1060 void 1061 vput(vp) 1062 struct vnode *vp; 1063 { 1064 struct proc *p = curproc; /* XXX */ 1065 1066 #ifdef DIAGNOSTIC 1067 if (vp == NULL) 1068 panic("vput: null vp"); 1069 #endif 1070 simple_lock(&vp->v_interlock); 1071 vp->v_usecount--; 1072 if (vp->v_usecount > 0) { 1073 simple_unlock(&vp->v_interlock); 1074 VOP_UNLOCK(vp, 0); 1075 return; 1076 } 1077 #ifdef DIAGNOSTIC 1078 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1079 vprint("vput: bad ref count", vp); 1080 panic("vput: ref cnt"); 1081 } 1082 #endif 1083 /* 1084 * Insert at tail of LRU list. 1085 */ 1086 simple_lock(&vnode_free_list_slock); 1087 if (vp->v_holdcnt > 0) 1088 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1089 else 1090 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1091 simple_unlock(&vnode_free_list_slock); 1092 simple_unlock(&vp->v_interlock); 1093 VOP_INACTIVE(vp, p); 1094 } 1095 1096 /* 1097 * Vnode release. 1098 * If count drops to zero, call inactive routine and return to freelist. 1099 */ 1100 void 1101 vrele(vp) 1102 struct vnode *vp; 1103 { 1104 struct proc *p = curproc; /* XXX */ 1105 1106 #ifdef DIAGNOSTIC 1107 if (vp == NULL) 1108 panic("vrele: null vp"); 1109 #endif 1110 simple_lock(&vp->v_interlock); 1111 vp->v_usecount--; 1112 if (vp->v_usecount > 0) { 1113 simple_unlock(&vp->v_interlock); 1114 return; 1115 } 1116 #ifdef DIAGNOSTIC 1117 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1118 vprint("vrele: bad ref count", vp); 1119 panic("vrele: ref cnt"); 1120 } 1121 #endif 1122 /* 1123 * Insert at tail of LRU list. 1124 */ 1125 simple_lock(&vnode_free_list_slock); 1126 if (vp->v_holdcnt > 0) 1127 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1128 else 1129 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1130 simple_unlock(&vnode_free_list_slock); 1131 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1132 VOP_INACTIVE(vp, p); 1133 } 1134 1135 #ifdef DIAGNOSTIC 1136 /* 1137 * Page or buffer structure gets a reference. 1138 */ 1139 void 1140 vhold(vp) 1141 register struct vnode *vp; 1142 { 1143 1144 /* 1145 * If it is on the freelist and the hold count is currently 1146 * zero, move it to the hold list. The test of the back 1147 * pointer and the use reference count of zero is because 1148 * it will be removed from a free list by getnewvnode, 1149 * but will not have its reference count incremented until 1150 * after calling vgone. If the reference count were 1151 * incremented first, vgone would (incorrectly) try to 1152 * close the previous instance of the underlying object. 1153 * So, the back pointer is explicitly set to `0xdeadb' in 1154 * getnewvnode after removing it from a freelist to ensure 1155 * that we do not try to move it here. 1156 */ 1157 simple_lock(&vp->v_interlock); 1158 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1159 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1160 simple_lock(&vnode_free_list_slock); 1161 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1162 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1163 simple_unlock(&vnode_free_list_slock); 1164 } 1165 vp->v_holdcnt++; 1166 simple_unlock(&vp->v_interlock); 1167 } 1168 1169 /* 1170 * Page or buffer structure frees a reference. 1171 */ 1172 void 1173 holdrele(vp) 1174 register struct vnode *vp; 1175 { 1176 1177 simple_lock(&vp->v_interlock); 1178 if (vp->v_holdcnt <= 0) 1179 panic("holdrele: holdcnt"); 1180 vp->v_holdcnt--; 1181 /* 1182 * If it is on the holdlist and the hold count drops to 1183 * zero, move it to the free list. The test of the back 1184 * pointer and the use reference count of zero is because 1185 * it will be removed from a free list by getnewvnode, 1186 * but will not have its reference count incremented until 1187 * after calling vgone. If the reference count were 1188 * incremented first, vgone would (incorrectly) try to 1189 * close the previous instance of the underlying object. 1190 * So, the back pointer is explicitly set to `0xdeadb' in 1191 * getnewvnode after removing it from a freelist to ensure 1192 * that we do not try to move it here. 1193 */ 1194 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1195 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1196 simple_lock(&vnode_free_list_slock); 1197 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1198 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1199 simple_unlock(&vnode_free_list_slock); 1200 } 1201 simple_unlock(&vp->v_interlock); 1202 } 1203 1204 /* 1205 * Vnode reference. 1206 */ 1207 void 1208 vref(vp) 1209 struct vnode *vp; 1210 { 1211 1212 simple_lock(&vp->v_interlock); 1213 if (vp->v_usecount <= 0) 1214 panic("vref used where vget required"); 1215 vp->v_usecount++; 1216 #ifdef DIAGNOSTIC 1217 if (vp->v_usecount == 0) { 1218 vprint("vref", vp); 1219 panic("vref: usecount overflow"); 1220 } 1221 #endif 1222 simple_unlock(&vp->v_interlock); 1223 } 1224 #endif /* DIAGNOSTIC */ 1225 1226 /* 1227 * Remove any vnodes in the vnode table belonging to mount point mp. 1228 * 1229 * If MNT_NOFORCE is specified, there should not be any active ones, 1230 * return error if any are found (nb: this is a user error, not a 1231 * system error). If MNT_FORCE is specified, detach any active vnodes 1232 * that are found. 1233 */ 1234 #ifdef DEBUG 1235 int busyprt = 0; /* print out busy vnodes */ 1236 struct ctldebug debug1 = { "busyprt", &busyprt }; 1237 #endif 1238 1239 int 1240 vflush(mp, skipvp, flags) 1241 struct mount *mp; 1242 struct vnode *skipvp; 1243 int flags; 1244 { 1245 struct proc *p = curproc; /* XXX */ 1246 register struct vnode *vp, *nvp; 1247 int busy = 0; 1248 1249 simple_lock(&mntvnode_slock); 1250 loop: 1251 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1252 if (vp->v_mount != mp) 1253 goto loop; 1254 nvp = vp->v_mntvnodes.le_next; 1255 /* 1256 * Skip over a selected vnode. 1257 */ 1258 if (vp == skipvp) 1259 continue; 1260 simple_lock(&vp->v_interlock); 1261 /* 1262 * Skip over a vnodes marked VSYSTEM. 1263 */ 1264 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1265 simple_unlock(&vp->v_interlock); 1266 continue; 1267 } 1268 /* 1269 * If WRITECLOSE is set, only flush out regular file 1270 * vnodes open for writing. 1271 */ 1272 if ((flags & WRITECLOSE) && 1273 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1274 simple_unlock(&vp->v_interlock); 1275 continue; 1276 } 1277 /* 1278 * With v_usecount == 0, all we need to do is clear 1279 * out the vnode data structures and we are done. 1280 */ 1281 if (vp->v_usecount == 0) { 1282 simple_unlock(&mntvnode_slock); 1283 vgonel(vp, p); 1284 simple_lock(&mntvnode_slock); 1285 continue; 1286 } 1287 /* 1288 * If FORCECLOSE is set, forcibly close the vnode. 1289 * For block or character devices, revert to an 1290 * anonymous device. For all other files, just kill them. 1291 */ 1292 if (flags & FORCECLOSE) { 1293 simple_unlock(&mntvnode_slock); 1294 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1295 vgonel(vp, p); 1296 } else { 1297 vclean(vp, 0, p); 1298 vp->v_op = spec_vnodeop_p; 1299 insmntque(vp, (struct mount *)0); 1300 } 1301 simple_lock(&mntvnode_slock); 1302 continue; 1303 } 1304 #ifdef DEBUG 1305 if (busyprt) 1306 vprint("vflush: busy vnode", vp); 1307 #endif 1308 simple_unlock(&vp->v_interlock); 1309 busy++; 1310 } 1311 simple_unlock(&mntvnode_slock); 1312 if (busy) 1313 return (EBUSY); 1314 return (0); 1315 } 1316 1317 /* 1318 * Disassociate the underlying file system from a vnode. 1319 */ 1320 void 1321 vclean(vp, flags, p) 1322 register struct vnode *vp; 1323 int flags; 1324 struct proc *p; 1325 { 1326 int active; 1327 1328 /* 1329 * Check to see if the vnode is in use. 1330 * If so we have to reference it before we clean it out 1331 * so that its count cannot fall to zero and generate a 1332 * race against ourselves to recycle it. 1333 */ 1334 if ((active = vp->v_usecount) != 0) { 1335 /* We have the vnode interlock. */ 1336 vp->v_usecount++; 1337 #ifdef DIAGNOSTIC 1338 if (vp->v_usecount == 0) { 1339 vprint("vclean", vp); 1340 panic("vclean: usecount overflow"); 1341 } 1342 #endif 1343 } 1344 1345 /* 1346 * Prevent the vnode from being recycled or 1347 * brought into use while we clean it out. 1348 */ 1349 if (vp->v_flag & VXLOCK) 1350 panic("vclean: deadlock"); 1351 vp->v_flag |= VXLOCK; 1352 /* 1353 * Even if the count is zero, the VOP_INACTIVE routine may still 1354 * have the object locked while it cleans it out. The VOP_LOCK 1355 * ensures that the VOP_INACTIVE routine is done with its work. 1356 * For active vnodes, it ensures that no other activity can 1357 * occur while the underlying object is being cleaned out. 1358 */ 1359 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1360 1361 /* 1362 * clean out any VM data associated with the vnode. 1363 */ 1364 uvm_vnp_terminate(vp); 1365 /* 1366 * Clean out any buffers associated with the vnode. 1367 */ 1368 if (flags & DOCLOSE) 1369 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1370 1371 /* 1372 * If purging an active vnode, it must be closed and 1373 * deactivated before being reclaimed. Note that the 1374 * VOP_INACTIVE will unlock the vnode. 1375 */ 1376 if (active) { 1377 if (flags & DOCLOSE) 1378 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1379 VOP_INACTIVE(vp, p); 1380 } else { 1381 /* 1382 * Any other processes trying to obtain this lock must first 1383 * wait for VXLOCK to clear, then call the new lock operation. 1384 */ 1385 VOP_UNLOCK(vp, 0); 1386 } 1387 /* 1388 * Reclaim the vnode. 1389 */ 1390 if (VOP_RECLAIM(vp, p)) 1391 panic("vclean: cannot reclaim"); 1392 1393 if (active) { 1394 /* 1395 * Inline copy of vrele() since VOP_INACTIVE 1396 * has already been called. 1397 */ 1398 simple_lock(&vp->v_interlock); 1399 if (--vp->v_usecount <= 0) { 1400 #ifdef DIAGNOSTIC 1401 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1402 vprint("vclean: bad ref count", vp); 1403 panic("vclean: ref cnt"); 1404 } 1405 #endif 1406 /* 1407 * Insert at tail of LRU list. 1408 */ 1409 simple_unlock(&vp->v_interlock); 1410 simple_lock(&vnode_free_list_slock); 1411 #ifdef DIAGNOSTIC 1412 if (vp->v_vnlock) { 1413 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1414 vprint("vclean: lock not drained", vp); 1415 } 1416 if (vp->v_holdcnt > 0) 1417 panic("vclean: not clean"); 1418 #endif 1419 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1420 simple_unlock(&vnode_free_list_slock); 1421 } else 1422 simple_unlock(&vp->v_interlock); 1423 } 1424 1425 cache_purge(vp); 1426 1427 /* 1428 * Done with purge, notify sleepers of the grim news. 1429 */ 1430 vp->v_op = dead_vnodeop_p; 1431 vp->v_tag = VT_NON; 1432 vp->v_flag &= ~VXLOCK; 1433 if (vp->v_flag & VXWANT) { 1434 vp->v_flag &= ~VXWANT; 1435 wakeup((caddr_t)vp); 1436 } 1437 } 1438 1439 /* 1440 * Recycle an unused vnode to the front of the free list. 1441 * Release the passed interlock if the vnode will be recycled. 1442 */ 1443 int 1444 vrecycle(vp, inter_lkp, p) 1445 struct vnode *vp; 1446 struct simplelock *inter_lkp; 1447 struct proc *p; 1448 { 1449 1450 simple_lock(&vp->v_interlock); 1451 if (vp->v_usecount == 0) { 1452 if (inter_lkp) 1453 simple_unlock(inter_lkp); 1454 vgonel(vp, p); 1455 return (1); 1456 } 1457 simple_unlock(&vp->v_interlock); 1458 return (0); 1459 } 1460 1461 /* 1462 * Eliminate all activity associated with a vnode 1463 * in preparation for reuse. 1464 */ 1465 void 1466 vgone(vp) 1467 struct vnode *vp; 1468 { 1469 struct proc *p = curproc; /* XXX */ 1470 1471 simple_lock(&vp->v_interlock); 1472 vgonel(vp, p); 1473 } 1474 1475 /* 1476 * vgone, with the vp interlock held. 1477 */ 1478 void 1479 vgonel(vp, p) 1480 register struct vnode *vp; 1481 struct proc *p; 1482 { 1483 struct vnode *vq; 1484 struct vnode *vx; 1485 1486 /* 1487 * If a vgone (or vclean) is already in progress, 1488 * wait until it is done and return. 1489 */ 1490 if (vp->v_flag & VXLOCK) { 1491 vp->v_flag |= VXWANT; 1492 simple_unlock(&vp->v_interlock); 1493 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1494 return; 1495 } 1496 /* 1497 * Clean out the filesystem specific data. 1498 */ 1499 vclean(vp, DOCLOSE, p); 1500 /* 1501 * Delete from old mount point vnode list, if on one. 1502 */ 1503 if (vp->v_mount != NULL) 1504 insmntque(vp, (struct mount *)0); 1505 /* 1506 * If special device, remove it from special device alias list. 1507 * if it is on one. 1508 */ 1509 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1510 simple_lock(&spechash_slock); 1511 if (vp->v_hashchain != NULL) { 1512 if (*vp->v_hashchain == vp) { 1513 *vp->v_hashchain = vp->v_specnext; 1514 } else { 1515 for (vq = *vp->v_hashchain; vq; 1516 vq = vq->v_specnext) { 1517 if (vq->v_specnext != vp) 1518 continue; 1519 vq->v_specnext = vp->v_specnext; 1520 break; 1521 } 1522 if (vq == NULL) 1523 panic("missing bdev"); 1524 } 1525 if (vp->v_flag & VALIASED) { 1526 vx = NULL; 1527 for (vq = *vp->v_hashchain; vq; 1528 vq = vq->v_specnext) { 1529 if (vq->v_rdev != vp->v_rdev || 1530 vq->v_type != vp->v_type) 1531 continue; 1532 if (vx) 1533 break; 1534 vx = vq; 1535 } 1536 if (vx == NULL) 1537 panic("missing alias"); 1538 if (vq == NULL) 1539 vx->v_flag &= ~VALIASED; 1540 vp->v_flag &= ~VALIASED; 1541 } 1542 } 1543 simple_unlock(&spechash_slock); 1544 FREE(vp->v_specinfo, M_VNODE); 1545 vp->v_specinfo = NULL; 1546 } 1547 /* 1548 * If it is on the freelist and not already at the head, 1549 * move it to the head of the list. The test of the back 1550 * pointer and the reference count of zero is because 1551 * it will be removed from the free list by getnewvnode, 1552 * but will not have its reference count incremented until 1553 * after calling vgone. If the reference count were 1554 * incremented first, vgone would (incorrectly) try to 1555 * close the previous instance of the underlying object. 1556 * So, the back pointer is explicitly set to `0xdeadb' in 1557 * getnewvnode after removing it from the freelist to ensure 1558 * that we do not try to move it here. 1559 */ 1560 if (vp->v_usecount == 0) { 1561 simple_lock(&vnode_free_list_slock); 1562 if (vp->v_holdcnt > 0) 1563 panic("vgonel: not clean"); 1564 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1565 TAILQ_FIRST(&vnode_free_list) != vp) { 1566 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1567 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1568 } 1569 simple_unlock(&vnode_free_list_slock); 1570 } 1571 vp->v_type = VBAD; 1572 } 1573 1574 /* 1575 * Lookup a vnode by device number. 1576 */ 1577 int 1578 vfinddev(dev, type, vpp) 1579 dev_t dev; 1580 enum vtype type; 1581 struct vnode **vpp; 1582 { 1583 struct vnode *vp; 1584 int rc = 0; 1585 1586 simple_lock(&spechash_slock); 1587 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1588 if (dev != vp->v_rdev || type != vp->v_type) 1589 continue; 1590 *vpp = vp; 1591 rc = 1; 1592 break; 1593 } 1594 simple_unlock(&spechash_slock); 1595 return (rc); 1596 } 1597 1598 /* 1599 * Revoke all the vnodes corresponding to the specified minor number 1600 * range (endpoints inclusive) of the specified major. 1601 */ 1602 void 1603 vdevgone(maj, minl, minh, type) 1604 int maj, minl, minh; 1605 enum vtype type; 1606 { 1607 struct vnode *vp; 1608 int mn; 1609 1610 for (mn = minl; mn <= minh; mn++) 1611 if (vfinddev(makedev(maj, mn), type, &vp)) 1612 VOP_REVOKE(vp, REVOKEALL); 1613 } 1614 1615 /* 1616 * Calculate the total number of references to a special device. 1617 */ 1618 int 1619 vcount(vp) 1620 register struct vnode *vp; 1621 { 1622 register struct vnode *vq, *vnext; 1623 int count; 1624 1625 loop: 1626 if ((vp->v_flag & VALIASED) == 0) 1627 return (vp->v_usecount); 1628 simple_lock(&spechash_slock); 1629 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1630 vnext = vq->v_specnext; 1631 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1632 continue; 1633 /* 1634 * Alias, but not in use, so flush it out. 1635 */ 1636 if (vq->v_usecount == 0 && vq != vp) { 1637 simple_unlock(&spechash_slock); 1638 vgone(vq); 1639 goto loop; 1640 } 1641 count += vq->v_usecount; 1642 } 1643 simple_unlock(&spechash_slock); 1644 return (count); 1645 } 1646 1647 /* 1648 * Print out a description of a vnode. 1649 */ 1650 static char *typename[] = 1651 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1652 1653 void 1654 vprint(label, vp) 1655 char *label; 1656 register struct vnode *vp; 1657 { 1658 char buf[64]; 1659 1660 if (label != NULL) 1661 printf("%s: ", label); 1662 printf("tag %d type %s, usecount %ld, writecount %ld, refcount %ld,", 1663 vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1664 vp->v_holdcnt); 1665 buf[0] = '\0'; 1666 if (vp->v_flag & VROOT) 1667 strcat(buf, "|VROOT"); 1668 if (vp->v_flag & VTEXT) 1669 strcat(buf, "|VTEXT"); 1670 if (vp->v_flag & VSYSTEM) 1671 strcat(buf, "|VSYSTEM"); 1672 if (vp->v_flag & VXLOCK) 1673 strcat(buf, "|VXLOCK"); 1674 if (vp->v_flag & VXWANT) 1675 strcat(buf, "|VXWANT"); 1676 if (vp->v_flag & VBWAIT) 1677 strcat(buf, "|VBWAIT"); 1678 if (vp->v_flag & VALIASED) 1679 strcat(buf, "|VALIASED"); 1680 if (buf[0] != '\0') 1681 printf(" flags (%s)", &buf[1]); 1682 if (vp->v_data == NULL) { 1683 printf("\n"); 1684 } else { 1685 printf("\n\t"); 1686 VOP_PRINT(vp); 1687 } 1688 } 1689 1690 #ifdef DEBUG 1691 /* 1692 * List all of the locked vnodes in the system. 1693 * Called when debugging the kernel. 1694 */ 1695 void 1696 printlockedvnodes() 1697 { 1698 struct mount *mp, *nmp; 1699 struct vnode *vp; 1700 1701 printf("Locked vnodes\n"); 1702 simple_lock(&mountlist_slock); 1703 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1704 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1705 nmp = mp->mnt_list.cqe_next; 1706 continue; 1707 } 1708 for (vp = mp->mnt_vnodelist.lh_first; 1709 vp != NULL; 1710 vp = vp->v_mntvnodes.le_next) { 1711 if (VOP_ISLOCKED(vp)) 1712 vprint((char *)0, vp); 1713 } 1714 simple_lock(&mountlist_slock); 1715 nmp = mp->mnt_list.cqe_next; 1716 vfs_unbusy(mp); 1717 } 1718 simple_unlock(&mountlist_slock); 1719 } 1720 #endif 1721 1722 extern const char *mountcompatnames[]; 1723 extern const int nmountcompatnames; 1724 1725 /* 1726 * Top level filesystem related information gathering. 1727 */ 1728 int 1729 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1730 int *name; 1731 u_int namelen; 1732 void *oldp; 1733 size_t *oldlenp; 1734 void *newp; 1735 size_t newlen; 1736 struct proc *p; 1737 { 1738 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1739 struct vfsconf vfc; 1740 #endif 1741 struct vfsops *vfsp; 1742 1743 /* all sysctl names at this level are at least name and field */ 1744 if (namelen < 2) 1745 return (ENOTDIR); /* overloaded */ 1746 1747 /* Not generic: goes to file system. */ 1748 if (name[0] != VFS_GENERIC) { 1749 if (name[0] >= nmountcompatnames || name[0] < 0 || 1750 mountcompatnames[name[0]] == NULL) 1751 return (EOPNOTSUPP); 1752 vfsp = vfs_getopsbyname(mountcompatnames[name[0]]); 1753 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1754 return (EOPNOTSUPP); 1755 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1756 oldp, oldlenp, newp, newlen, p)); 1757 } 1758 1759 /* The rest are generic vfs sysctls. */ 1760 switch (name[1]) { 1761 case VFS_USERMOUNT: 1762 return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount); 1763 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1764 case VFS_MAXTYPENUM: 1765 /* 1766 * Provided for 4.4BSD-Lite2 compatibility. 1767 */ 1768 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1769 case VFS_CONF: 1770 /* 1771 * Special: a node, next is a file system name. 1772 * Provided for 4.4BSD-Lite2 compatibility. 1773 */ 1774 if (namelen < 3) 1775 return (ENOTDIR); /* overloaded */ 1776 if (name[2] >= nmountcompatnames || name[2] < 0 || 1777 mountcompatnames[name[2]] == NULL) 1778 return (EOPNOTSUPP); 1779 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 1780 if (vfsp == NULL) 1781 return (EOPNOTSUPP); 1782 vfc.vfc_vfsops = vfsp; 1783 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 1784 vfc.vfc_typenum = name[2]; 1785 vfc.vfc_refcount = vfsp->vfs_refcount; 1786 vfc.vfc_flags = 0; 1787 vfc.vfc_mountroot = vfsp->vfs_mountroot; 1788 vfc.vfc_next = NULL; 1789 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 1790 sizeof(struct vfsconf))); 1791 #endif 1792 default: 1793 break; 1794 } 1795 return (EOPNOTSUPP); 1796 } 1797 1798 int kinfo_vdebug = 1; 1799 int kinfo_vgetfailed; 1800 #define KINFO_VNODESLOP 10 1801 /* 1802 * Dump vnode list (via sysctl). 1803 * Copyout address of vnode followed by vnode. 1804 */ 1805 /* ARGSUSED */ 1806 int 1807 sysctl_vnode(where, sizep, p) 1808 char *where; 1809 size_t *sizep; 1810 struct proc *p; 1811 { 1812 struct mount *mp, *nmp; 1813 struct vnode *nvp, *vp; 1814 char *bp = where, *savebp; 1815 char *ewhere; 1816 int error; 1817 1818 #define VPTRSZ sizeof(struct vnode *) 1819 #define VNODESZ sizeof(struct vnode) 1820 if (where == NULL) { 1821 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1822 return (0); 1823 } 1824 ewhere = where + *sizep; 1825 1826 simple_lock(&mountlist_slock); 1827 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1828 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1829 nmp = mp->mnt_list.cqe_next; 1830 continue; 1831 } 1832 savebp = bp; 1833 again: 1834 simple_lock(&mntvnode_slock); 1835 for (vp = mp->mnt_vnodelist.lh_first; 1836 vp != NULL; 1837 vp = nvp) { 1838 /* 1839 * Check that the vp is still associated with 1840 * this filesystem. RACE: could have been 1841 * recycled onto the same filesystem. 1842 */ 1843 if (vp->v_mount != mp) { 1844 simple_unlock(&mntvnode_slock); 1845 if (kinfo_vdebug) 1846 printf("kinfo: vp changed\n"); 1847 bp = savebp; 1848 goto again; 1849 } 1850 nvp = vp->v_mntvnodes.le_next; 1851 if (bp + VPTRSZ + VNODESZ > ewhere) { 1852 simple_unlock(&mntvnode_slock); 1853 *sizep = bp - where; 1854 return (ENOMEM); 1855 } 1856 simple_unlock(&mntvnode_slock); 1857 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1858 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1859 return (error); 1860 bp += VPTRSZ + VNODESZ; 1861 simple_lock(&mntvnode_slock); 1862 } 1863 simple_unlock(&mntvnode_slock); 1864 simple_lock(&mountlist_slock); 1865 nmp = mp->mnt_list.cqe_next; 1866 vfs_unbusy(mp); 1867 } 1868 simple_unlock(&mountlist_slock); 1869 1870 *sizep = bp - where; 1871 return (0); 1872 } 1873 1874 /* 1875 * Check to see if a filesystem is mounted on a block device. 1876 */ 1877 int 1878 vfs_mountedon(vp) 1879 struct vnode *vp; 1880 { 1881 struct vnode *vq; 1882 int error = 0; 1883 1884 if (vp->v_specmountpoint != NULL) 1885 return (EBUSY); 1886 if (vp->v_flag & VALIASED) { 1887 simple_lock(&spechash_slock); 1888 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1889 if (vq->v_rdev != vp->v_rdev || 1890 vq->v_type != vp->v_type) 1891 continue; 1892 if (vq->v_specmountpoint != NULL) { 1893 error = EBUSY; 1894 break; 1895 } 1896 } 1897 simple_unlock(&spechash_slock); 1898 } 1899 return (error); 1900 } 1901 1902 /* 1903 * Build hash lists of net addresses and hang them off the mount point. 1904 * Called by ufs_mount() to set up the lists of export addresses. 1905 */ 1906 static int 1907 vfs_hang_addrlist(mp, nep, argp) 1908 struct mount *mp; 1909 struct netexport *nep; 1910 struct export_args *argp; 1911 { 1912 register struct netcred *np, *enp; 1913 register struct radix_node_head *rnh; 1914 register int i; 1915 struct radix_node *rn; 1916 struct sockaddr *saddr, *smask = 0; 1917 struct domain *dom; 1918 int error; 1919 1920 if (argp->ex_addrlen == 0) { 1921 if (mp->mnt_flag & MNT_DEFEXPORTED) 1922 return (EPERM); 1923 np = &nep->ne_defexported; 1924 np->netc_exflags = argp->ex_flags; 1925 np->netc_anon = argp->ex_anon; 1926 np->netc_anon.cr_ref = 1; 1927 mp->mnt_flag |= MNT_DEFEXPORTED; 1928 return (0); 1929 } 1930 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1931 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1932 memset((caddr_t)np, 0, i); 1933 saddr = (struct sockaddr *)(np + 1); 1934 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 1935 if (error) 1936 goto out; 1937 if (saddr->sa_len > argp->ex_addrlen) 1938 saddr->sa_len = argp->ex_addrlen; 1939 if (argp->ex_masklen) { 1940 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1941 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1942 if (error) 1943 goto out; 1944 if (smask->sa_len > argp->ex_masklen) 1945 smask->sa_len = argp->ex_masklen; 1946 } 1947 i = saddr->sa_family; 1948 if ((rnh = nep->ne_rtable[i]) == 0) { 1949 /* 1950 * Seems silly to initialize every AF when most are not 1951 * used, do so on demand here 1952 */ 1953 for (dom = domains; dom; dom = dom->dom_next) 1954 if (dom->dom_family == i && dom->dom_rtattach) { 1955 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1956 dom->dom_rtoffset); 1957 break; 1958 } 1959 if ((rnh = nep->ne_rtable[i]) == 0) { 1960 error = ENOBUFS; 1961 goto out; 1962 } 1963 } 1964 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1965 np->netc_rnodes); 1966 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1967 if (rn == 0) { 1968 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 1969 smask, rnh); 1970 if (enp == 0) { 1971 error = EPERM; 1972 goto out; 1973 } 1974 } else 1975 enp = (struct netcred *)rn; 1976 1977 if (enp->netc_exflags != argp->ex_flags || 1978 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 1979 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 1980 enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups || 1981 memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 1982 enp->netc_anon.cr_ngroups)) 1983 error = EPERM; 1984 else 1985 error = 0; 1986 goto out; 1987 } 1988 np->netc_exflags = argp->ex_flags; 1989 np->netc_anon = argp->ex_anon; 1990 np->netc_anon.cr_ref = 1; 1991 return (0); 1992 out: 1993 free(np, M_NETADDR); 1994 return (error); 1995 } 1996 1997 /* ARGSUSED */ 1998 static int 1999 vfs_free_netcred(rn, w) 2000 struct radix_node *rn; 2001 void *w; 2002 { 2003 register struct radix_node_head *rnh = (struct radix_node_head *)w; 2004 2005 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 2006 free((caddr_t)rn, M_NETADDR); 2007 return (0); 2008 } 2009 2010 /* 2011 * Free the net address hash lists that are hanging off the mount points. 2012 */ 2013 static void 2014 vfs_free_addrlist(nep) 2015 struct netexport *nep; 2016 { 2017 register int i; 2018 register struct radix_node_head *rnh; 2019 2020 for (i = 0; i <= AF_MAX; i++) 2021 if ((rnh = nep->ne_rtable[i]) != NULL) { 2022 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 2023 free((caddr_t)rnh, M_RTABLE); 2024 nep->ne_rtable[i] = 0; 2025 } 2026 } 2027 2028 int 2029 vfs_export(mp, nep, argp) 2030 struct mount *mp; 2031 struct netexport *nep; 2032 struct export_args *argp; 2033 { 2034 int error; 2035 2036 if (argp->ex_flags & MNT_DELEXPORT) { 2037 if (mp->mnt_flag & MNT_EXPUBLIC) { 2038 vfs_setpublicfs(NULL, NULL, NULL); 2039 mp->mnt_flag &= ~MNT_EXPUBLIC; 2040 } 2041 vfs_free_addrlist(nep); 2042 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2043 } 2044 if (argp->ex_flags & MNT_EXPORTED) { 2045 if (argp->ex_flags & MNT_EXPUBLIC) { 2046 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2047 return (error); 2048 mp->mnt_flag |= MNT_EXPUBLIC; 2049 } 2050 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 2051 return (error); 2052 mp->mnt_flag |= MNT_EXPORTED; 2053 } 2054 return (0); 2055 } 2056 2057 /* 2058 * Set the publicly exported filesystem (WebNFS). Currently, only 2059 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2060 */ 2061 int 2062 vfs_setpublicfs(mp, nep, argp) 2063 struct mount *mp; 2064 struct netexport *nep; 2065 struct export_args *argp; 2066 { 2067 int error; 2068 struct vnode *rvp; 2069 char *cp; 2070 2071 /* 2072 * mp == NULL -> invalidate the current info, the FS is 2073 * no longer exported. May be called from either vfs_export 2074 * or unmount, so check if it hasn't already been done. 2075 */ 2076 if (mp == NULL) { 2077 if (nfs_pub.np_valid) { 2078 nfs_pub.np_valid = 0; 2079 if (nfs_pub.np_index != NULL) { 2080 FREE(nfs_pub.np_index, M_TEMP); 2081 nfs_pub.np_index = NULL; 2082 } 2083 } 2084 return (0); 2085 } 2086 2087 /* 2088 * Only one allowed at a time. 2089 */ 2090 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2091 return (EBUSY); 2092 2093 /* 2094 * Get real filehandle for root of exported FS. 2095 */ 2096 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 2097 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2098 2099 if ((error = VFS_ROOT(mp, &rvp))) 2100 return (error); 2101 2102 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2103 return (error); 2104 2105 vput(rvp); 2106 2107 /* 2108 * If an indexfile was specified, pull it in. 2109 */ 2110 if (argp->ex_indexfile != NULL) { 2111 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2112 M_WAITOK); 2113 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2114 MAXNAMLEN, (size_t *)0); 2115 if (!error) { 2116 /* 2117 * Check for illegal filenames. 2118 */ 2119 for (cp = nfs_pub.np_index; *cp; cp++) { 2120 if (*cp == '/') { 2121 error = EINVAL; 2122 break; 2123 } 2124 } 2125 } 2126 if (error) { 2127 FREE(nfs_pub.np_index, M_TEMP); 2128 return (error); 2129 } 2130 } 2131 2132 nfs_pub.np_mount = mp; 2133 nfs_pub.np_valid = 1; 2134 return (0); 2135 } 2136 2137 struct netcred * 2138 vfs_export_lookup(mp, nep, nam) 2139 register struct mount *mp; 2140 struct netexport *nep; 2141 struct mbuf *nam; 2142 { 2143 register struct netcred *np; 2144 register struct radix_node_head *rnh; 2145 struct sockaddr *saddr; 2146 2147 np = NULL; 2148 if (mp->mnt_flag & MNT_EXPORTED) { 2149 /* 2150 * Lookup in the export list first. 2151 */ 2152 if (nam != NULL) { 2153 saddr = mtod(nam, struct sockaddr *); 2154 rnh = nep->ne_rtable[saddr->sa_family]; 2155 if (rnh != NULL) { 2156 np = (struct netcred *) 2157 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2158 rnh); 2159 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2160 np = NULL; 2161 } 2162 } 2163 /* 2164 * If no address match, use the default if it exists. 2165 */ 2166 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2167 np = &nep->ne_defexported; 2168 } 2169 return (np); 2170 } 2171 2172 /* 2173 * Do the usual access checking. 2174 * file_mode, uid and gid are from the vnode in question, 2175 * while acc_mode and cred are from the VOP_ACCESS parameter list 2176 */ 2177 int 2178 vaccess(type, file_mode, uid, gid, acc_mode, cred) 2179 enum vtype type; 2180 mode_t file_mode; 2181 uid_t uid; 2182 gid_t gid; 2183 mode_t acc_mode; 2184 struct ucred *cred; 2185 { 2186 mode_t mask; 2187 2188 /* 2189 * Super-user always gets read/write access, but execute access depends 2190 * on at least one execute bit being set. 2191 */ 2192 if (cred->cr_uid == 0) { 2193 if ((acc_mode & VEXEC) && type != VDIR && 2194 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2195 return (EACCES); 2196 return (0); 2197 } 2198 2199 mask = 0; 2200 2201 /* Otherwise, check the owner. */ 2202 if (cred->cr_uid == uid) { 2203 if (acc_mode & VEXEC) 2204 mask |= S_IXUSR; 2205 if (acc_mode & VREAD) 2206 mask |= S_IRUSR; 2207 if (acc_mode & VWRITE) 2208 mask |= S_IWUSR; 2209 return ((file_mode & mask) == mask ? 0 : EACCES); 2210 } 2211 2212 /* Otherwise, check the groups. */ 2213 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2214 if (acc_mode & VEXEC) 2215 mask |= S_IXGRP; 2216 if (acc_mode & VREAD) 2217 mask |= S_IRGRP; 2218 if (acc_mode & VWRITE) 2219 mask |= S_IWGRP; 2220 return ((file_mode & mask) == mask ? 0 : EACCES); 2221 } 2222 2223 /* Otherwise, check everyone else. */ 2224 if (acc_mode & VEXEC) 2225 mask |= S_IXOTH; 2226 if (acc_mode & VREAD) 2227 mask |= S_IROTH; 2228 if (acc_mode & VWRITE) 2229 mask |= S_IWOTH; 2230 return ((file_mode & mask) == mask ? 0 : EACCES); 2231 } 2232 2233 /* 2234 * Unmount all file systems. 2235 * We traverse the list in reverse order under the assumption that doing so 2236 * will avoid needing to worry about dependencies. 2237 */ 2238 void 2239 vfs_unmountall() 2240 { 2241 register struct mount *mp, *nmp; 2242 int allerror, error; 2243 struct proc *p = curproc; /* XXX */ 2244 2245 /* 2246 * Unmounting a file system blocks the requesting process. 2247 * However, it's possible for this routine to be called when 2248 * curproc is NULL (e.g. panic situation, or via the debugger). 2249 * If we get stuck in this situation, just abort, since any 2250 * attempts to sleep will fault. 2251 */ 2252 if (p == NULL) { 2253 printf("vfs_unmountall: no context, aborting\n"); 2254 return; 2255 } 2256 2257 for (allerror = 0, 2258 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2259 nmp = mp->mnt_list.cqe_prev; 2260 #ifdef DEBUG 2261 printf("unmounting %s (%s)...\n", 2262 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2263 #endif 2264 if (vfs_busy(mp, 0, 0)) 2265 continue; 2266 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2267 printf("unmount of %s failed with error %d\n", 2268 mp->mnt_stat.f_mntonname, error); 2269 allerror = 1; 2270 } 2271 } 2272 if (allerror) 2273 printf("WARNING: some file systems would not unmount\n"); 2274 } 2275 2276 /* 2277 * Sync and unmount file systems before shutting down. 2278 */ 2279 void 2280 vfs_shutdown() 2281 { 2282 register struct buf *bp; 2283 int iter, nbusy, dcount, s; 2284 2285 printf("syncing disks... "); 2286 2287 /* XXX Should suspend scheduling. */ 2288 (void) spl0(); 2289 2290 sys_sync(&proc0, (void *)0, (register_t *)0); 2291 2292 /* Wait for sync to finish. */ 2293 dcount = 10000; 2294 for (iter = 0; iter < 20; iter++) { 2295 nbusy = 0; 2296 for (bp = &buf[nbuf]; --bp >= buf; ) { 2297 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 2298 nbusy++; 2299 /* 2300 * With soft updates, some buffers that are 2301 * written will be remarked as dirty until other 2302 * buffers are written. 2303 */ 2304 if (bp->b_vp && bp->b_vp->v_mount 2305 && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP) 2306 && (bp->b_flags & B_DELWRI)) { 2307 s = splbio(); 2308 bremfree(bp); 2309 bp->b_flags |= B_BUSY; 2310 splx(s); 2311 nbusy++; 2312 bawrite(bp); 2313 if (dcount-- <= 0) { 2314 printf("softdep "); 2315 goto fail; 2316 } 2317 } 2318 } 2319 if (nbusy == 0) 2320 break; 2321 printf("%d ", nbusy); 2322 DELAY(40000 * iter); 2323 } 2324 if (nbusy) { 2325 fail: 2326 #ifdef DEBUG 2327 printf("giving up\nPrinting vnodes for busy buffers\n"); 2328 for (bp = &buf[nbuf]; --bp >= buf; ) 2329 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 2330 vprint(NULL, bp->b_vp); 2331 #else 2332 printf("giving up\n"); 2333 #endif 2334 return; 2335 } else 2336 printf("done\n"); 2337 2338 /* 2339 * If we've panic'd, don't make the situation potentially 2340 * worse by unmounting the file systems. 2341 */ 2342 if (panicstr != NULL) 2343 return; 2344 2345 /* Release inodes held by texts before update. */ 2346 #ifdef notdef 2347 vnshutdown(); 2348 #endif 2349 /* Unmount file systems. */ 2350 vfs_unmountall(); 2351 } 2352 2353 /* 2354 * Mount the root file system. If the operator didn't specify a 2355 * file system to use, try all possible file systems until one 2356 * succeeds. 2357 */ 2358 int 2359 vfs_mountroot() 2360 { 2361 extern int (*mountroot) __P((void)); 2362 struct vfsops *v; 2363 2364 if (root_device == NULL) 2365 panic("vfs_mountroot: root device unknown"); 2366 2367 switch (root_device->dv_class) { 2368 case DV_IFNET: 2369 if (rootdev != NODEV) 2370 panic("vfs_mountroot: rootdev set for DV_IFNET"); 2371 break; 2372 2373 case DV_DISK: 2374 if (rootdev == NODEV) 2375 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2376 break; 2377 2378 default: 2379 printf("%s: inappropriate for root file system\n", 2380 root_device->dv_xname); 2381 return (ENODEV); 2382 } 2383 2384 /* 2385 * If user specified a file system, use it. 2386 */ 2387 if (mountroot != NULL) 2388 return ((*mountroot)()); 2389 2390 /* 2391 * Try each file system currently configured into the kernel. 2392 */ 2393 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2394 if (v->vfs_mountroot == NULL) 2395 continue; 2396 #ifdef DEBUG 2397 printf("mountroot: trying %s...\n", v->vfs_name); 2398 #endif 2399 if ((*v->vfs_mountroot)() == 0) { 2400 printf("root file system type: %s\n", v->vfs_name); 2401 break; 2402 } 2403 } 2404 2405 if (v == NULL) { 2406 printf("no file system for %s", root_device->dv_xname); 2407 if (root_device->dv_class == DV_DISK) 2408 printf(" (dev 0x%x)", rootdev); 2409 printf("\n"); 2410 return (EFTYPE); 2411 } 2412 return (0); 2413 } 2414 2415 /* 2416 * Given a file system name, look up the vfsops for that 2417 * file system, or return NULL if file system isn't present 2418 * in the kernel. 2419 */ 2420 struct vfsops * 2421 vfs_getopsbyname(name) 2422 const char *name; 2423 { 2424 struct vfsops *v; 2425 2426 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2427 if (strcmp(v->vfs_name, name) == 0) 2428 break; 2429 } 2430 2431 return (v); 2432 } 2433 2434 /* 2435 * Establish a file system and initialize it. 2436 */ 2437 int 2438 vfs_attach(vfs) 2439 struct vfsops *vfs; 2440 { 2441 struct vfsops *v; 2442 int error = 0; 2443 2444 2445 /* 2446 * Make sure this file system doesn't already exist. 2447 */ 2448 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2449 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2450 error = EEXIST; 2451 goto out; 2452 } 2453 } 2454 2455 /* 2456 * Initialize the vnode operations for this file system. 2457 */ 2458 vfs_opv_init(vfs->vfs_opv_descs); 2459 2460 /* 2461 * Now initialize the file system itself. 2462 */ 2463 (*vfs->vfs_init)(); 2464 2465 /* 2466 * ...and link it into the kernel's list. 2467 */ 2468 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2469 2470 /* 2471 * Sanity: make sure the reference count is 0. 2472 */ 2473 vfs->vfs_refcount = 0; 2474 2475 out: 2476 return (error); 2477 } 2478 2479 /* 2480 * Remove a file system from the kernel. 2481 */ 2482 int 2483 vfs_detach(vfs) 2484 struct vfsops *vfs; 2485 { 2486 struct vfsops *v; 2487 2488 /* 2489 * Make sure no one is using the filesystem. 2490 */ 2491 if (vfs->vfs_refcount != 0) 2492 return (EBUSY); 2493 2494 /* 2495 * ...and remove it from the kernel's list. 2496 */ 2497 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2498 if (v == vfs) { 2499 LIST_REMOVE(v, vfs_list); 2500 break; 2501 } 2502 } 2503 2504 if (v == NULL) 2505 return (ESRCH); 2506 2507 /* 2508 * Free the vnode operations vector. 2509 */ 2510 vfs_opv_free(vfs->vfs_opv_descs); 2511 return (0); 2512 } 2513