1 /* $NetBSD: vfs_subr.c,v 1.116 1999/12/15 07:10:32 perseant Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include "opt_compat_netbsd.h" 85 #include "opt_compat_43.h" 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/proc.h> 90 #include <sys/mount.h> 91 #include <sys/time.h> 92 #include <sys/fcntl.h> 93 #include <sys/vnode.h> 94 #include <sys/stat.h> 95 #include <sys/namei.h> 96 #include <sys/ucred.h> 97 #include <sys/buf.h> 98 #include <sys/errno.h> 99 #include <sys/malloc.h> 100 #include <sys/domain.h> 101 #include <sys/mbuf.h> 102 #include <sys/syscallargs.h> 103 #include <sys/device.h> 104 #include <sys/dirent.h> 105 106 #include <vm/vm.h> 107 #include <sys/sysctl.h> 108 109 #include <miscfs/specfs/specdev.h> 110 #include <miscfs/genfs/genfs.h> 111 #include <miscfs/syncfs/syncfs.h> 112 113 #include <uvm/uvm_extern.h> 114 115 enum vtype iftovt_tab[16] = { 116 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 117 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 118 }; 119 int vttoif_tab[9] = { 120 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 121 S_IFSOCK, S_IFIFO, S_IFMT, 122 }; 123 124 int doforce = 1; /* 1 => permit forcible unmounting */ 125 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 126 127 /* 128 * Insq/Remq for the vnode usage lists. 129 */ 130 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 131 #define bufremvn(bp) { \ 132 LIST_REMOVE(bp, b_vnbufs); \ 133 (bp)->b_vnbufs.le_next = NOLIST; \ 134 } 135 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 136 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 137 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 138 139 struct mntlist mountlist = /* mounted filesystem list */ 140 CIRCLEQ_HEAD_INITIALIZER(mountlist); 141 struct vfs_list_head vfs_list = /* vfs list */ 142 LIST_HEAD_INITIALIZER(vfs_list); 143 144 struct nfs_public nfs_pub; /* publicly exported FS */ 145 146 struct simplelock mountlist_slock; 147 static struct simplelock mntid_slock; 148 struct simplelock mntvnode_slock; 149 struct simplelock vnode_free_list_slock; 150 struct simplelock spechash_slock; 151 152 /* 153 * These define the root filesystem and device. 154 */ 155 struct mount *rootfs; 156 struct vnode *rootvnode; 157 struct device *root_device; /* root device */ 158 159 struct pool vnode_pool; /* memory pool for vnodes */ 160 161 /* 162 * Local declarations. 163 */ 164 void insmntque __P((struct vnode *, struct mount *)); 165 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 166 void vgoneall __P((struct vnode *)); 167 168 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 169 struct export_args *)); 170 static int vfs_free_netcred __P((struct radix_node *, void *)); 171 static void vfs_free_addrlist __P((struct netexport *)); 172 173 #ifdef DEBUG 174 void printlockedvnodes __P((void)); 175 #endif 176 177 /* 178 * Initialize the vnode management data structures. 179 */ 180 void 181 vntblinit() 182 { 183 184 simple_lock_init(&mntvnode_slock); 185 simple_lock_init(&mntid_slock); 186 simple_lock_init(&spechash_slock); 187 simple_lock_init(&vnode_free_list_slock); 188 189 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 190 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE); 191 192 /* 193 * Initialize the filesystem syncer. 194 */ 195 vn_initialize_syncerd(); 196 } 197 198 /* 199 * Mark a mount point as busy. Used to synchronize access and to delay 200 * unmounting. Interlock is not released on failure. 201 */ 202 int 203 vfs_busy(mp, flags, interlkp) 204 struct mount *mp; 205 int flags; 206 struct simplelock *interlkp; 207 { 208 int lkflags; 209 210 while (mp->mnt_flag & MNT_UNMOUNT) { 211 int gone; 212 213 if (flags & LK_NOWAIT) 214 return (ENOENT); 215 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 216 && mp->mnt_unmounter == curproc) 217 return (EDEADLK); 218 if (interlkp) 219 simple_unlock(interlkp); 220 /* 221 * Since all busy locks are shared except the exclusive 222 * lock granted when unmounting, the only place that a 223 * wakeup needs to be done is at the release of the 224 * exclusive lock at the end of dounmount. 225 * 226 * XXX MP: add spinlock protecting mnt_wcnt here once you 227 * can atomically unlock-and-sleep. 228 */ 229 mp->mnt_wcnt++; 230 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 231 mp->mnt_wcnt--; 232 gone = mp->mnt_flag & MNT_GONE; 233 234 if (mp->mnt_wcnt == 0) 235 wakeup(&mp->mnt_wcnt); 236 if (interlkp) 237 simple_lock(interlkp); 238 if (gone) 239 return (ENOENT); 240 } 241 lkflags = LK_SHARED; 242 if (interlkp) 243 lkflags |= LK_INTERLOCK; 244 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 245 panic("vfs_busy: unexpected lock failure"); 246 return (0); 247 } 248 249 /* 250 * Free a busy filesystem. 251 */ 252 void 253 vfs_unbusy(mp) 254 struct mount *mp; 255 { 256 257 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 258 } 259 260 /* 261 * Lookup a filesystem type, and if found allocate and initialize 262 * a mount structure for it. 263 * 264 * Devname is usually updated by mount(8) after booting. 265 */ 266 int 267 vfs_rootmountalloc(fstypename, devname, mpp) 268 char *fstypename; 269 char *devname; 270 struct mount **mpp; 271 { 272 struct vfsops *vfsp = NULL; 273 struct mount *mp; 274 275 for (vfsp = LIST_FIRST(&vfs_list); vfsp != NULL; 276 vfsp = LIST_NEXT(vfsp, vfs_list)) 277 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 278 break; 279 280 if (vfsp == NULL) 281 return (ENODEV); 282 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 283 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 284 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 285 (void)vfs_busy(mp, LK_NOWAIT, 0); 286 LIST_INIT(&mp->mnt_vnodelist); 287 mp->mnt_op = vfsp; 288 mp->mnt_flag = MNT_RDONLY; 289 mp->mnt_vnodecovered = NULLVP; 290 vfsp->vfs_refcount++; 291 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 292 mp->mnt_stat.f_mntonname[0] = '/'; 293 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 294 *mpp = mp; 295 return (0); 296 } 297 298 /* 299 * Lookup a mount point by filesystem identifier. 300 */ 301 struct mount * 302 vfs_getvfs(fsid) 303 fsid_t *fsid; 304 { 305 register struct mount *mp; 306 307 simple_lock(&mountlist_slock); 308 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 309 mp = mp->mnt_list.cqe_next) { 310 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 311 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 312 simple_unlock(&mountlist_slock); 313 return (mp); 314 } 315 } 316 simple_unlock(&mountlist_slock); 317 return ((struct mount *)0); 318 } 319 320 /* 321 * Get a new unique fsid 322 */ 323 void 324 vfs_getnewfsid(mp, fstypename) 325 struct mount *mp; 326 char *fstypename; 327 { 328 static u_short xxxfs_mntid; 329 fsid_t tfsid; 330 int mtype; 331 332 simple_lock(&mntid_slock); 333 mtype = makefstype(fstypename); 334 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 335 mp->mnt_stat.f_fsid.val[1] = mtype; 336 if (xxxfs_mntid == 0) 337 ++xxxfs_mntid; 338 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 339 tfsid.val[1] = mtype; 340 if (mountlist.cqh_first != (void *)&mountlist) { 341 while (vfs_getvfs(&tfsid)) { 342 tfsid.val[0]++; 343 xxxfs_mntid++; 344 } 345 } 346 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 347 simple_unlock(&mntid_slock); 348 } 349 350 /* 351 * Make a 'unique' number from a mount type name. 352 */ 353 long 354 makefstype(type) 355 char *type; 356 { 357 long rv; 358 359 for (rv = 0; *type; type++) { 360 rv <<= 2; 361 rv ^= *type; 362 } 363 return rv; 364 } 365 366 367 /* 368 * Set vnode attributes to VNOVAL 369 */ 370 void 371 vattr_null(vap) 372 register struct vattr *vap; 373 { 374 375 vap->va_type = VNON; 376 377 /* 378 * Assign individually so that it is safe even if size and 379 * sign of each member are varied. 380 */ 381 vap->va_mode = VNOVAL; 382 vap->va_nlink = VNOVAL; 383 vap->va_uid = VNOVAL; 384 vap->va_gid = VNOVAL; 385 vap->va_fsid = VNOVAL; 386 vap->va_fileid = VNOVAL; 387 vap->va_size = VNOVAL; 388 vap->va_blocksize = VNOVAL; 389 vap->va_atime.tv_sec = 390 vap->va_mtime.tv_sec = 391 vap->va_ctime.tv_sec = VNOVAL; 392 vap->va_atime.tv_nsec = 393 vap->va_mtime.tv_nsec = 394 vap->va_ctime.tv_nsec = VNOVAL; 395 vap->va_gen = VNOVAL; 396 vap->va_flags = VNOVAL; 397 vap->va_rdev = VNOVAL; 398 vap->va_bytes = VNOVAL; 399 vap->va_vaflags = 0; 400 } 401 402 /* 403 * Routines having to do with the management of the vnode table. 404 */ 405 extern int (**dead_vnodeop_p) __P((void *)); 406 long numvnodes; 407 408 /* 409 * Return the next vnode from the free list. 410 */ 411 int 412 getnewvnode(tag, mp, vops, vpp) 413 enum vtagtype tag; 414 struct mount *mp; 415 int (**vops) __P((void *)); 416 struct vnode **vpp; 417 { 418 struct proc *p = curproc; /* XXX */ 419 struct freelst *listhd; 420 static int toggle; 421 struct vnode *vp; 422 int error = 0; 423 #ifdef DIAGNOSTIC 424 int s; 425 #endif 426 if (mp) { 427 /* 428 * Mark filesystem busy while we're creating a vnode. 429 * If unmount is in progress, this will wait; if the 430 * unmount succeeds (only if umount -f), this will 431 * return an error. If the unmount fails, we'll keep 432 * going afterwards. 433 * (This puts the per-mount vnode list logically under 434 * the protection of the vfs_busy lock). 435 */ 436 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 437 if (error && error != EDEADLK) 438 return error; 439 } 440 441 /* 442 * We must choose whether to allocate a new vnode or recycle an 443 * existing one. The criterion for allocating a new one is that 444 * the total number of vnodes is less than the number desired or 445 * there are no vnodes on either free list. Generally we only 446 * want to recycle vnodes that have no buffers associated with 447 * them, so we look first on the vnode_free_list. If it is empty, 448 * we next consider vnodes with referencing buffers on the 449 * vnode_hold_list. The toggle ensures that half the time we 450 * will use a buffer from the vnode_hold_list, and half the time 451 * we will allocate a new one unless the list has grown to twice 452 * the desired size. We are reticent to recycle vnodes from the 453 * vnode_hold_list because we will lose the identity of all its 454 * referencing buffers. 455 */ 456 toggle ^= 1; 457 if (numvnodes > 2 * desiredvnodes) 458 toggle = 0; 459 460 simple_lock(&vnode_free_list_slock); 461 if (numvnodes < desiredvnodes || 462 (TAILQ_FIRST(listhd = &vnode_free_list) == NULL && 463 (TAILQ_FIRST(listhd = &vnode_hold_list) == NULL || toggle))) { 464 simple_unlock(&vnode_free_list_slock); 465 vp = pool_get(&vnode_pool, PR_WAITOK); 466 memset((char *)vp, 0, sizeof(*vp)); 467 simple_lock_init(&vp->v_interlock); 468 numvnodes++; 469 } else { 470 for (vp = TAILQ_FIRST(listhd); vp != NULLVP; 471 vp = TAILQ_NEXT(vp, v_freelist)) { 472 if (simple_lock_try(&vp->v_interlock)) { 473 if ((vp->v_flag & VLAYER) == 0) { 474 break; 475 } 476 if (VOP_ISLOCKED(vp) == 0) 477 break; 478 else 479 simple_unlock(&vp->v_interlock); 480 } 481 } 482 /* 483 * Unless this is a bad time of the month, at most 484 * the first NCPUS items on the free list are 485 * locked, so this is close enough to being empty. 486 */ 487 if (vp == NULLVP) { 488 simple_unlock(&vnode_free_list_slock); 489 if (mp && error != EDEADLK) 490 vfs_unbusy(mp); 491 tablefull("vnode"); 492 *vpp = 0; 493 return (ENFILE); 494 } 495 if (vp->v_usecount) 496 panic("free vnode isn't"); 497 TAILQ_REMOVE(listhd, vp, v_freelist); 498 /* see comment on why 0xdeadb is set at end of vgone (below) */ 499 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 500 simple_unlock(&vnode_free_list_slock); 501 vp->v_lease = NULL; 502 if (vp->v_type != VBAD) 503 vgonel(vp, p); 504 else 505 simple_unlock(&vp->v_interlock); 506 #ifdef DIAGNOSTIC 507 if (vp->v_data) 508 panic("cleaned vnode isn't"); 509 s = splbio(); 510 if (vp->v_numoutput) 511 panic("Clean vnode has pending I/O's"); 512 splx(s); 513 #endif 514 vp->v_flag = 0; 515 vp->v_lastr = 0; 516 vp->v_ralen = 0; 517 vp->v_maxra = 0; 518 vp->v_lastw = 0; 519 vp->v_lasta = 0; 520 vp->v_cstart = 0; 521 vp->v_clen = 0; 522 vp->v_socket = 0; 523 } 524 vp->v_type = VNON; 525 vp->v_vnlock = &vp->v_lock; 526 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 527 cache_purge(vp); 528 vp->v_tag = tag; 529 vp->v_op = vops; 530 insmntque(vp, mp); 531 *vpp = vp; 532 vp->v_usecount = 1; 533 vp->v_data = 0; 534 simple_lock_init(&vp->v_uvm.u_obj.vmobjlock); 535 if (mp && error != EDEADLK) 536 vfs_unbusy(mp); 537 return (0); 538 } 539 540 /* 541 * Move a vnode from one mount queue to another. 542 */ 543 void 544 insmntque(vp, mp) 545 register struct vnode *vp; 546 register struct mount *mp; 547 { 548 549 #ifdef DIAGNOSTIC 550 if ((mp != NULL) && 551 (mp->mnt_flag & MNT_UNMOUNT) && 552 !(mp->mnt_flag & MNT_SOFTDEP) && 553 vp->v_tag != VT_VFS) { 554 panic("insmntque into dying filesystem"); 555 } 556 #endif 557 558 simple_lock(&mntvnode_slock); 559 /* 560 * Delete from old mount point vnode list, if on one. 561 */ 562 if (vp->v_mount != NULL) 563 LIST_REMOVE(vp, v_mntvnodes); 564 /* 565 * Insert into list of vnodes for the new mount point, if available. 566 */ 567 if ((vp->v_mount = mp) != NULL) 568 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 569 simple_unlock(&mntvnode_slock); 570 } 571 572 /* 573 * Update outstanding I/O count and do wakeup if requested. 574 */ 575 void 576 vwakeup(bp) 577 register struct buf *bp; 578 { 579 register struct vnode *vp; 580 581 bp->b_flags &= ~B_WRITEINPROG; 582 if ((vp = bp->b_vp) != NULL) { 583 if (--vp->v_numoutput < 0) 584 panic("vwakeup: neg numoutput"); 585 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 586 vp->v_flag &= ~VBWAIT; 587 wakeup((caddr_t)&vp->v_numoutput); 588 } 589 } 590 } 591 592 /* 593 * Flush out and invalidate all buffers associated with a vnode. 594 * Called with the underlying object locked. 595 */ 596 int 597 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 598 register struct vnode *vp; 599 int flags; 600 struct ucred *cred; 601 struct proc *p; 602 int slpflag, slptimeo; 603 { 604 register struct buf *bp; 605 struct buf *nbp, *blist; 606 int s, error; 607 608 if (flags & V_SAVE) { 609 s = splbio(); 610 while (vp->v_numoutput) { 611 vp->v_flag |= VBWAIT; 612 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, 613 "vbwait", 0); 614 } 615 if (vp->v_dirtyblkhd.lh_first != NULL) { 616 splx(s); 617 if ((error = VOP_FSYNC(vp, cred, FSYNC_WAIT, p)) != 0) 618 return (error); 619 s = splbio(); 620 if (vp->v_numoutput > 0 || 621 vp->v_dirtyblkhd.lh_first != NULL) 622 panic("vinvalbuf: dirty bufs"); 623 } 624 splx(s); 625 } 626 627 s = splbio(); 628 629 for (;;) { 630 if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA)) 631 while (blist && blist->b_lblkno < 0) 632 blist = blist->b_vnbufs.le_next; 633 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 634 (flags & V_SAVEMETA)) { 635 while (blist && blist->b_lblkno < 0) 636 blist = blist->b_vnbufs.le_next; 637 } 638 if (!blist) 639 break; 640 641 for (bp = blist; bp; bp = nbp) { 642 nbp = bp->b_vnbufs.le_next; 643 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 644 continue; 645 if (bp->b_flags & B_BUSY) { 646 bp->b_flags |= B_WANTED; 647 error = tsleep((caddr_t)bp, 648 slpflag | (PRIBIO + 1), "vinvalbuf", 649 slptimeo); 650 if (error) { 651 splx(s); 652 return (error); 653 } 654 break; 655 } 656 bp->b_flags |= B_BUSY | B_VFLUSH; 657 /* 658 * XXX Since there are no node locks for NFS, I believe 659 * there is a slight chance that a delayed write will 660 * occur while sleeping just above, so check for it. 661 */ 662 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 663 VOP_BWRITE(bp); 664 #ifdef DEBUG 665 printf("buffer still DELWRI\n"); 666 #endif 667 /* VOP_FSYNC(vp, cred, FSYNC_WAIT, p); */ 668 continue; 669 } 670 bp->b_flags |= B_INVAL; 671 brelse(bp); 672 } 673 } 674 675 if (!(flags & V_SAVEMETA) && 676 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 677 panic("vinvalbuf: flush failed"); 678 679 splx(s); 680 681 return (0); 682 } 683 684 void 685 vflushbuf(vp, sync) 686 register struct vnode *vp; 687 int sync; 688 { 689 register struct buf *bp, *nbp; 690 int s; 691 692 loop: 693 s = splbio(); 694 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 695 nbp = bp->b_vnbufs.le_next; 696 if ((bp->b_flags & B_BUSY)) 697 continue; 698 if ((bp->b_flags & B_DELWRI) == 0) 699 panic("vflushbuf: not dirty"); 700 bp->b_flags |= B_BUSY | B_VFLUSH; 701 splx(s); 702 /* 703 * Wait for I/O associated with indirect blocks to complete, 704 * since there is no way to quickly wait for them below. 705 */ 706 if (bp->b_vp == vp || sync == 0) 707 (void) bawrite(bp); 708 else 709 (void) bwrite(bp); 710 goto loop; 711 } 712 if (sync == 0) { 713 splx(s); 714 return; 715 } 716 while (vp->v_numoutput) { 717 vp->v_flag |= VBWAIT; 718 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 719 } 720 splx(s); 721 if (vp->v_dirtyblkhd.lh_first != NULL) { 722 vprint("vflushbuf: dirty", vp); 723 goto loop; 724 } 725 } 726 727 /* 728 * Associate a buffer with a vnode. 729 */ 730 void 731 bgetvp(vp, bp) 732 register struct vnode *vp; 733 register struct buf *bp; 734 { 735 int s; 736 737 if (bp->b_vp) 738 panic("bgetvp: not free"); 739 VHOLD(vp); 740 s = splbio(); 741 bp->b_vp = vp; 742 if (vp->v_type == VBLK || vp->v_type == VCHR) 743 bp->b_dev = vp->v_rdev; 744 else 745 bp->b_dev = NODEV; 746 /* 747 * Insert onto list for new vnode. 748 */ 749 bufinsvn(bp, &vp->v_cleanblkhd); 750 splx(s); 751 } 752 753 /* 754 * Disassociate a buffer from a vnode. 755 */ 756 void 757 brelvp(bp) 758 register struct buf *bp; 759 { 760 struct vnode *vp; 761 int s; 762 763 if (bp->b_vp == (struct vnode *) 0) 764 panic("brelvp: NULL"); 765 766 s = splbio(); 767 vp = bp->b_vp; 768 /* 769 * Delete from old vnode list, if on one. 770 */ 771 if (bp->b_vnbufs.le_next != NOLIST) 772 bufremvn(bp); 773 if ((vp->v_flag & VONWORKLST) && LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 774 vp->v_flag &= ~VONWORKLST; 775 LIST_REMOVE(vp, v_synclist); 776 } 777 bp->b_vp = (struct vnode *) 0; 778 HOLDRELE(vp); 779 splx(s); 780 } 781 782 /* 783 * Reassign a buffer from one vnode to another. 784 * Used to assign file specific control information 785 * (indirect blocks) to the vnode to which they belong. 786 * 787 * This function must be called at splbio(). 788 */ 789 void 790 reassignbuf(bp, newvp) 791 struct buf *bp; 792 struct vnode *newvp; 793 { 794 struct buflists *listheadp; 795 int delay; 796 797 if (newvp == NULL) { 798 printf("reassignbuf: NULL"); 799 return; 800 } 801 802 /* 803 * Delete from old vnode list, if on one. 804 */ 805 if (bp->b_vnbufs.le_next != NOLIST) 806 bufremvn(bp); 807 /* 808 * If dirty, put on list of dirty buffers; 809 * otherwise insert onto list of clean buffers. 810 */ 811 if ((bp->b_flags & B_DELWRI) == 0) { 812 listheadp = &newvp->v_cleanblkhd; 813 if ((newvp->v_flag & VONWORKLST) && 814 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 815 newvp->v_flag &= ~VONWORKLST; 816 LIST_REMOVE(newvp, v_synclist); 817 } 818 } else { 819 listheadp = &newvp->v_dirtyblkhd; 820 if ((newvp->v_flag & VONWORKLST) == 0) { 821 switch (newvp->v_type) { 822 case VDIR: 823 delay = dirdelay; 824 break; 825 case VBLK: 826 if (newvp->v_specmountpoint != NULL) { 827 delay = metadelay; 828 break; 829 } 830 /* fall through */ 831 default: 832 delay = filedelay;; 833 } 834 vn_syncer_add_to_worklist(newvp, delay); 835 } 836 } 837 bufinsvn(bp, listheadp); 838 } 839 840 /* 841 * Create a vnode for a block device. 842 * Used for root filesystem and swap areas. 843 * Also used for memory file system special devices. 844 */ 845 int 846 bdevvp(dev, vpp) 847 dev_t dev; 848 struct vnode **vpp; 849 { 850 851 return (getdevvp(dev, vpp, VBLK)); 852 } 853 854 /* 855 * Create a vnode for a character device. 856 * Used for kernfs and some console handling. 857 */ 858 int 859 cdevvp(dev, vpp) 860 dev_t dev; 861 struct vnode **vpp; 862 { 863 864 return (getdevvp(dev, vpp, VCHR)); 865 } 866 867 /* 868 * Create a vnode for a device. 869 * Used by bdevvp (block device) for root file system etc., 870 * and by cdevvp (character device) for console and kernfs. 871 */ 872 int 873 getdevvp(dev, vpp, type) 874 dev_t dev; 875 struct vnode **vpp; 876 enum vtype type; 877 { 878 register struct vnode *vp; 879 struct vnode *nvp; 880 int error; 881 882 if (dev == NODEV) { 883 *vpp = NULLVP; 884 return (0); 885 } 886 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 887 if (error) { 888 *vpp = NULLVP; 889 return (error); 890 } 891 vp = nvp; 892 vp->v_type = type; 893 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 894 vput(vp); 895 vp = nvp; 896 } 897 *vpp = vp; 898 return (0); 899 } 900 901 /* 902 * Check to see if the new vnode represents a special device 903 * for which we already have a vnode (either because of 904 * bdevvp() or because of a different vnode representing 905 * the same block device). If such an alias exists, deallocate 906 * the existing contents and return the aliased vnode. The 907 * caller is responsible for filling it with its new contents. 908 */ 909 struct vnode * 910 checkalias(nvp, nvp_rdev, mp) 911 register struct vnode *nvp; 912 dev_t nvp_rdev; 913 struct mount *mp; 914 { 915 struct proc *p = curproc; /* XXX */ 916 register struct vnode *vp; 917 struct vnode **vpp; 918 919 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 920 return (NULLVP); 921 922 vpp = &speclisth[SPECHASH(nvp_rdev)]; 923 loop: 924 simple_lock(&spechash_slock); 925 for (vp = *vpp; vp; vp = vp->v_specnext) { 926 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 927 continue; 928 /* 929 * Alias, but not in use, so flush it out. 930 */ 931 simple_lock(&vp->v_interlock); 932 if (vp->v_usecount == 0) { 933 simple_unlock(&spechash_slock); 934 vgonel(vp, p); 935 goto loop; 936 } 937 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 938 simple_unlock(&spechash_slock); 939 goto loop; 940 } 941 break; 942 } 943 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 944 MALLOC(nvp->v_specinfo, struct specinfo *, 945 sizeof(struct specinfo), M_VNODE, M_WAITOK); 946 nvp->v_rdev = nvp_rdev; 947 nvp->v_hashchain = vpp; 948 nvp->v_specnext = *vpp; 949 nvp->v_specmountpoint = NULL; 950 simple_unlock(&spechash_slock); 951 nvp->v_speclockf = NULL; 952 *vpp = nvp; 953 if (vp != NULLVP) { 954 nvp->v_flag |= VALIASED; 955 vp->v_flag |= VALIASED; 956 vput(vp); 957 } 958 return (NULLVP); 959 } 960 simple_unlock(&spechash_slock); 961 VOP_UNLOCK(vp, 0); 962 simple_lock(&vp->v_interlock); 963 vclean(vp, 0, p); 964 vp->v_op = nvp->v_op; 965 vp->v_tag = nvp->v_tag; 966 vp->v_vnlock = &vp->v_lock; 967 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 968 nvp->v_type = VNON; 969 insmntque(vp, mp); 970 return (vp); 971 } 972 973 /* 974 * Grab a particular vnode from the free list, increment its 975 * reference count and lock it. If the vnode lock bit is set the 976 * vnode is being eliminated in vgone. In that case, we can not 977 * grab the vnode, so the process is awakened when the transition is 978 * completed, and an error returned to indicate that the vnode is no 979 * longer usable (possibly having been changed to a new file system type). 980 */ 981 int 982 vget(vp, flags) 983 struct vnode *vp; 984 int flags; 985 { 986 int error; 987 988 /* 989 * If the vnode is in the process of being cleaned out for 990 * another use, we wait for the cleaning to finish and then 991 * return failure. Cleaning is determined by checking that 992 * the VXLOCK flag is set. 993 */ 994 if ((flags & LK_INTERLOCK) == 0) 995 simple_lock(&vp->v_interlock); 996 if (vp->v_flag & VXLOCK) { 997 vp->v_flag |= VXWANT; 998 simple_unlock(&vp->v_interlock); 999 tsleep((caddr_t)vp, PINOD, "vget", 0); 1000 return (ENOENT); 1001 } 1002 if (vp->v_usecount == 0) { 1003 simple_lock(&vnode_free_list_slock); 1004 if (vp->v_holdcnt > 0) 1005 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1006 else 1007 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1008 simple_unlock(&vnode_free_list_slock); 1009 } 1010 vp->v_usecount++; 1011 #ifdef DIAGNOSTIC 1012 if (vp->v_usecount == 0) { 1013 vprint("vget", vp); 1014 panic("vget: usecount overflow"); 1015 } 1016 #endif 1017 if (flags & LK_TYPE_MASK) { 1018 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1019 /* 1020 * must expand vrele here because we do not want 1021 * to call VOP_INACTIVE if the reference count 1022 * drops back to zero since it was never really 1023 * active. We must remove it from the free list 1024 * before sleeping so that multiple processes do 1025 * not try to recycle it. 1026 */ 1027 simple_lock(&vp->v_interlock); 1028 vp->v_usecount--; 1029 if (vp->v_usecount > 0) { 1030 simple_unlock(&vp->v_interlock); 1031 return (error); 1032 } 1033 /* 1034 * insert at tail of LRU list 1035 */ 1036 simple_lock(&vnode_free_list_slock); 1037 if (vp->v_holdcnt > 0) 1038 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, 1039 v_freelist); 1040 else 1041 TAILQ_INSERT_TAIL(&vnode_free_list, vp, 1042 v_freelist); 1043 simple_unlock(&vnode_free_list_slock); 1044 simple_unlock(&vp->v_interlock); 1045 } 1046 return (error); 1047 } 1048 simple_unlock(&vp->v_interlock); 1049 return (0); 1050 } 1051 1052 /* 1053 * vput(), just unlock and vrele() 1054 */ 1055 void 1056 vput(vp) 1057 struct vnode *vp; 1058 { 1059 struct proc *p = curproc; /* XXX */ 1060 1061 #ifdef DIAGNOSTIC 1062 if (vp == NULL) 1063 panic("vput: null vp"); 1064 #endif 1065 simple_lock(&vp->v_interlock); 1066 vp->v_usecount--; 1067 if (vp->v_usecount > 0) { 1068 simple_unlock(&vp->v_interlock); 1069 VOP_UNLOCK(vp, 0); 1070 return; 1071 } 1072 #ifdef DIAGNOSTIC 1073 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1074 vprint("vput: bad ref count", vp); 1075 panic("vput: ref cnt"); 1076 } 1077 #endif 1078 /* 1079 * Insert at tail of LRU list. 1080 */ 1081 simple_lock(&vnode_free_list_slock); 1082 if (vp->v_holdcnt > 0) 1083 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1084 else 1085 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1086 simple_unlock(&vnode_free_list_slock); 1087 simple_unlock(&vp->v_interlock); 1088 VOP_INACTIVE(vp, p); 1089 } 1090 1091 /* 1092 * Vnode release. 1093 * If count drops to zero, call inactive routine and return to freelist. 1094 */ 1095 void 1096 vrele(vp) 1097 struct vnode *vp; 1098 { 1099 struct proc *p = curproc; /* XXX */ 1100 1101 #ifdef DIAGNOSTIC 1102 if (vp == NULL) 1103 panic("vrele: null vp"); 1104 #endif 1105 simple_lock(&vp->v_interlock); 1106 vp->v_usecount--; 1107 if (vp->v_usecount > 0) { 1108 simple_unlock(&vp->v_interlock); 1109 return; 1110 } 1111 #ifdef DIAGNOSTIC 1112 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1113 vprint("vrele: bad ref count", vp); 1114 panic("vrele: ref cnt"); 1115 } 1116 #endif 1117 /* 1118 * Insert at tail of LRU list. 1119 */ 1120 simple_lock(&vnode_free_list_slock); 1121 if (vp->v_holdcnt > 0) 1122 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1123 else 1124 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1125 simple_unlock(&vnode_free_list_slock); 1126 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1127 VOP_INACTIVE(vp, p); 1128 } 1129 1130 #ifdef DIAGNOSTIC 1131 /* 1132 * Page or buffer structure gets a reference. 1133 */ 1134 void 1135 vhold(vp) 1136 register struct vnode *vp; 1137 { 1138 1139 /* 1140 * If it is on the freelist and the hold count is currently 1141 * zero, move it to the hold list. The test of the back 1142 * pointer and the use reference count of zero is because 1143 * it will be removed from a free list by getnewvnode, 1144 * but will not have its reference count incremented until 1145 * after calling vgone. If the reference count were 1146 * incremented first, vgone would (incorrectly) try to 1147 * close the previous instance of the underlying object. 1148 * So, the back pointer is explicitly set to `0xdeadb' in 1149 * getnewvnode after removing it from a freelist to ensure 1150 * that we do not try to move it here. 1151 */ 1152 simple_lock(&vp->v_interlock); 1153 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1154 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1155 simple_lock(&vnode_free_list_slock); 1156 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1157 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1158 simple_unlock(&vnode_free_list_slock); 1159 } 1160 vp->v_holdcnt++; 1161 simple_unlock(&vp->v_interlock); 1162 } 1163 1164 /* 1165 * Page or buffer structure frees a reference. 1166 */ 1167 void 1168 holdrele(vp) 1169 register struct vnode *vp; 1170 { 1171 1172 simple_lock(&vp->v_interlock); 1173 if (vp->v_holdcnt <= 0) 1174 panic("holdrele: holdcnt"); 1175 vp->v_holdcnt--; 1176 /* 1177 * If it is on the holdlist and the hold count drops to 1178 * zero, move it to the free list. The test of the back 1179 * pointer and the use reference count of zero is because 1180 * it will be removed from a free list by getnewvnode, 1181 * but will not have its reference count incremented until 1182 * after calling vgone. If the reference count were 1183 * incremented first, vgone would (incorrectly) try to 1184 * close the previous instance of the underlying object. 1185 * So, the back pointer is explicitly set to `0xdeadb' in 1186 * getnewvnode after removing it from a freelist to ensure 1187 * that we do not try to move it here. 1188 */ 1189 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1190 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1191 simple_lock(&vnode_free_list_slock); 1192 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1193 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1194 simple_unlock(&vnode_free_list_slock); 1195 } 1196 simple_unlock(&vp->v_interlock); 1197 } 1198 1199 /* 1200 * Vnode reference. 1201 */ 1202 void 1203 vref(vp) 1204 struct vnode *vp; 1205 { 1206 1207 simple_lock(&vp->v_interlock); 1208 if (vp->v_usecount <= 0) 1209 panic("vref used where vget required"); 1210 vp->v_usecount++; 1211 #ifdef DIAGNOSTIC 1212 if (vp->v_usecount == 0) { 1213 vprint("vref", vp); 1214 panic("vref: usecount overflow"); 1215 } 1216 #endif 1217 simple_unlock(&vp->v_interlock); 1218 } 1219 #endif /* DIAGNOSTIC */ 1220 1221 /* 1222 * Remove any vnodes in the vnode table belonging to mount point mp. 1223 * 1224 * If MNT_NOFORCE is specified, there should not be any active ones, 1225 * return error if any are found (nb: this is a user error, not a 1226 * system error). If MNT_FORCE is specified, detach any active vnodes 1227 * that are found. 1228 */ 1229 #ifdef DEBUG 1230 int busyprt = 0; /* print out busy vnodes */ 1231 struct ctldebug debug1 = { "busyprt", &busyprt }; 1232 #endif 1233 1234 int 1235 vflush(mp, skipvp, flags) 1236 struct mount *mp; 1237 struct vnode *skipvp; 1238 int flags; 1239 { 1240 struct proc *p = curproc; /* XXX */ 1241 register struct vnode *vp, *nvp; 1242 int busy = 0; 1243 1244 simple_lock(&mntvnode_slock); 1245 loop: 1246 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1247 if (vp->v_mount != mp) 1248 goto loop; 1249 nvp = vp->v_mntvnodes.le_next; 1250 /* 1251 * Skip over a selected vnode. 1252 */ 1253 if (vp == skipvp) 1254 continue; 1255 simple_lock(&vp->v_interlock); 1256 /* 1257 * Skip over a vnodes marked VSYSTEM. 1258 */ 1259 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1260 simple_unlock(&vp->v_interlock); 1261 continue; 1262 } 1263 /* 1264 * If WRITECLOSE is set, only flush out regular file 1265 * vnodes open for writing. 1266 */ 1267 if ((flags & WRITECLOSE) && 1268 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1269 simple_unlock(&vp->v_interlock); 1270 continue; 1271 } 1272 /* 1273 * With v_usecount == 0, all we need to do is clear 1274 * out the vnode data structures and we are done. 1275 */ 1276 if (vp->v_usecount == 0) { 1277 simple_unlock(&mntvnode_slock); 1278 vgonel(vp, p); 1279 simple_lock(&mntvnode_slock); 1280 continue; 1281 } 1282 /* 1283 * If FORCECLOSE is set, forcibly close the vnode. 1284 * For block or character devices, revert to an 1285 * anonymous device. For all other files, just kill them. 1286 */ 1287 if (flags & FORCECLOSE) { 1288 simple_unlock(&mntvnode_slock); 1289 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1290 vgonel(vp, p); 1291 } else { 1292 vclean(vp, 0, p); 1293 vp->v_op = spec_vnodeop_p; 1294 insmntque(vp, (struct mount *)0); 1295 } 1296 simple_lock(&mntvnode_slock); 1297 continue; 1298 } 1299 #ifdef DEBUG 1300 if (busyprt) 1301 vprint("vflush: busy vnode", vp); 1302 #endif 1303 simple_unlock(&vp->v_interlock); 1304 busy++; 1305 } 1306 simple_unlock(&mntvnode_slock); 1307 if (busy) 1308 return (EBUSY); 1309 return (0); 1310 } 1311 1312 /* 1313 * Disassociate the underlying file system from a vnode. 1314 */ 1315 void 1316 vclean(vp, flags, p) 1317 register struct vnode *vp; 1318 int flags; 1319 struct proc *p; 1320 { 1321 int active; 1322 1323 /* 1324 * Check to see if the vnode is in use. 1325 * If so we have to reference it before we clean it out 1326 * so that its count cannot fall to zero and generate a 1327 * race against ourselves to recycle it. 1328 */ 1329 if ((active = vp->v_usecount) != 0) { 1330 /* We have the vnode interlock. */ 1331 vp->v_usecount++; 1332 #ifdef DIAGNOSTIC 1333 if (vp->v_usecount == 0) { 1334 vprint("vclean", vp); 1335 panic("vclean: usecount overflow"); 1336 } 1337 #endif 1338 } 1339 1340 /* 1341 * Prevent the vnode from being recycled or 1342 * brought into use while we clean it out. 1343 */ 1344 if (vp->v_flag & VXLOCK) 1345 panic("vclean: deadlock"); 1346 vp->v_flag |= VXLOCK; 1347 /* 1348 * Even if the count is zero, the VOP_INACTIVE routine may still 1349 * have the object locked while it cleans it out. The VOP_LOCK 1350 * ensures that the VOP_INACTIVE routine is done with its work. 1351 * For active vnodes, it ensures that no other activity can 1352 * occur while the underlying object is being cleaned out. 1353 */ 1354 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1355 1356 /* 1357 * clean out any VM data associated with the vnode. 1358 */ 1359 uvm_vnp_terminate(vp); 1360 /* 1361 * Clean out any buffers associated with the vnode. 1362 */ 1363 if (flags & DOCLOSE) 1364 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1365 1366 /* 1367 * If purging an active vnode, it must be closed and 1368 * deactivated before being reclaimed. Note that the 1369 * VOP_INACTIVE will unlock the vnode. 1370 */ 1371 if (active) { 1372 if (flags & DOCLOSE) 1373 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1374 VOP_INACTIVE(vp, p); 1375 } else { 1376 /* 1377 * Any other processes trying to obtain this lock must first 1378 * wait for VXLOCK to clear, then call the new lock operation. 1379 */ 1380 VOP_UNLOCK(vp, 0); 1381 } 1382 /* 1383 * Reclaim the vnode. 1384 */ 1385 if (VOP_RECLAIM(vp, p)) 1386 panic("vclean: cannot reclaim"); 1387 1388 if (active) { 1389 /* 1390 * Inline copy of vrele() since VOP_INACTIVE 1391 * has already been called. 1392 */ 1393 simple_lock(&vp->v_interlock); 1394 if (--vp->v_usecount <= 0) { 1395 #ifdef DIAGNOSTIC 1396 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1397 vprint("vclean: bad ref count", vp); 1398 panic("vclean: ref cnt"); 1399 } 1400 #endif 1401 /* 1402 * Insert at tail of LRU list. 1403 */ 1404 simple_unlock(&vp->v_interlock); 1405 simple_lock(&vnode_free_list_slock); 1406 #ifdef DIAGNOSTIC 1407 if (vp->v_vnlock) { 1408 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1409 vprint("vclean: lock not drained", vp); 1410 } 1411 if (vp->v_holdcnt > 0) 1412 panic("vclean: not clean"); 1413 #endif 1414 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1415 simple_unlock(&vnode_free_list_slock); 1416 } else 1417 simple_unlock(&vp->v_interlock); 1418 } 1419 1420 cache_purge(vp); 1421 1422 /* 1423 * Done with purge, notify sleepers of the grim news. 1424 */ 1425 vp->v_op = dead_vnodeop_p; 1426 vp->v_tag = VT_NON; 1427 vp->v_flag &= ~VXLOCK; 1428 if (vp->v_flag & VXWANT) { 1429 vp->v_flag &= ~VXWANT; 1430 wakeup((caddr_t)vp); 1431 } 1432 } 1433 1434 /* 1435 * Recycle an unused vnode to the front of the free list. 1436 * Release the passed interlock if the vnode will be recycled. 1437 */ 1438 int 1439 vrecycle(vp, inter_lkp, p) 1440 struct vnode *vp; 1441 struct simplelock *inter_lkp; 1442 struct proc *p; 1443 { 1444 1445 simple_lock(&vp->v_interlock); 1446 if (vp->v_usecount == 0) { 1447 if (inter_lkp) 1448 simple_unlock(inter_lkp); 1449 vgonel(vp, p); 1450 return (1); 1451 } 1452 simple_unlock(&vp->v_interlock); 1453 return (0); 1454 } 1455 1456 /* 1457 * Eliminate all activity associated with a vnode 1458 * in preparation for reuse. 1459 */ 1460 void 1461 vgone(vp) 1462 struct vnode *vp; 1463 { 1464 struct proc *p = curproc; /* XXX */ 1465 1466 simple_lock(&vp->v_interlock); 1467 vgonel(vp, p); 1468 } 1469 1470 /* 1471 * vgone, with the vp interlock held. 1472 */ 1473 void 1474 vgonel(vp, p) 1475 register struct vnode *vp; 1476 struct proc *p; 1477 { 1478 struct vnode *vq; 1479 struct vnode *vx; 1480 1481 /* 1482 * If a vgone (or vclean) is already in progress, 1483 * wait until it is done and return. 1484 */ 1485 if (vp->v_flag & VXLOCK) { 1486 vp->v_flag |= VXWANT; 1487 simple_unlock(&vp->v_interlock); 1488 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1489 return; 1490 } 1491 /* 1492 * Clean out the filesystem specific data. 1493 */ 1494 vclean(vp, DOCLOSE, p); 1495 /* 1496 * Delete from old mount point vnode list, if on one. 1497 */ 1498 if (vp->v_mount != NULL) 1499 insmntque(vp, (struct mount *)0); 1500 /* 1501 * If special device, remove it from special device alias list. 1502 * if it is on one. 1503 */ 1504 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1505 simple_lock(&spechash_slock); 1506 if (vp->v_hashchain != NULL) { 1507 if (*vp->v_hashchain == vp) { 1508 *vp->v_hashchain = vp->v_specnext; 1509 } else { 1510 for (vq = *vp->v_hashchain; vq; 1511 vq = vq->v_specnext) { 1512 if (vq->v_specnext != vp) 1513 continue; 1514 vq->v_specnext = vp->v_specnext; 1515 break; 1516 } 1517 if (vq == NULL) 1518 panic("missing bdev"); 1519 } 1520 if (vp->v_flag & VALIASED) { 1521 vx = NULL; 1522 for (vq = *vp->v_hashchain; vq; 1523 vq = vq->v_specnext) { 1524 if (vq->v_rdev != vp->v_rdev || 1525 vq->v_type != vp->v_type) 1526 continue; 1527 if (vx) 1528 break; 1529 vx = vq; 1530 } 1531 if (vx == NULL) 1532 panic("missing alias"); 1533 if (vq == NULL) 1534 vx->v_flag &= ~VALIASED; 1535 vp->v_flag &= ~VALIASED; 1536 } 1537 } 1538 simple_unlock(&spechash_slock); 1539 FREE(vp->v_specinfo, M_VNODE); 1540 vp->v_specinfo = NULL; 1541 } 1542 /* 1543 * If it is on the freelist and not already at the head, 1544 * move it to the head of the list. The test of the back 1545 * pointer and the reference count of zero is because 1546 * it will be removed from the free list by getnewvnode, 1547 * but will not have its reference count incremented until 1548 * after calling vgone. If the reference count were 1549 * incremented first, vgone would (incorrectly) try to 1550 * close the previous instance of the underlying object. 1551 * So, the back pointer is explicitly set to `0xdeadb' in 1552 * getnewvnode after removing it from the freelist to ensure 1553 * that we do not try to move it here. 1554 */ 1555 if (vp->v_usecount == 0) { 1556 simple_lock(&vnode_free_list_slock); 1557 if (vp->v_holdcnt > 0) 1558 panic("vgonel: not clean"); 1559 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1560 TAILQ_FIRST(&vnode_free_list) != vp) { 1561 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1562 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1563 } 1564 simple_unlock(&vnode_free_list_slock); 1565 } 1566 vp->v_type = VBAD; 1567 } 1568 1569 /* 1570 * Lookup a vnode by device number. 1571 */ 1572 int 1573 vfinddev(dev, type, vpp) 1574 dev_t dev; 1575 enum vtype type; 1576 struct vnode **vpp; 1577 { 1578 struct vnode *vp; 1579 int rc = 0; 1580 1581 simple_lock(&spechash_slock); 1582 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1583 if (dev != vp->v_rdev || type != vp->v_type) 1584 continue; 1585 *vpp = vp; 1586 rc = 1; 1587 break; 1588 } 1589 simple_unlock(&spechash_slock); 1590 return (rc); 1591 } 1592 1593 /* 1594 * Revoke all the vnodes corresponding to the specified minor number 1595 * range (endpoints inclusive) of the specified major. 1596 */ 1597 void 1598 vdevgone(maj, minl, minh, type) 1599 int maj, minl, minh; 1600 enum vtype type; 1601 { 1602 struct vnode *vp; 1603 int mn; 1604 1605 for (mn = minl; mn <= minh; mn++) 1606 if (vfinddev(makedev(maj, mn), type, &vp)) 1607 VOP_REVOKE(vp, REVOKEALL); 1608 } 1609 1610 /* 1611 * Calculate the total number of references to a special device. 1612 */ 1613 int 1614 vcount(vp) 1615 register struct vnode *vp; 1616 { 1617 register struct vnode *vq, *vnext; 1618 int count; 1619 1620 loop: 1621 if ((vp->v_flag & VALIASED) == 0) 1622 return (vp->v_usecount); 1623 simple_lock(&spechash_slock); 1624 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1625 vnext = vq->v_specnext; 1626 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1627 continue; 1628 /* 1629 * Alias, but not in use, so flush it out. 1630 */ 1631 if (vq->v_usecount == 0 && vq != vp) { 1632 simple_unlock(&spechash_slock); 1633 vgone(vq); 1634 goto loop; 1635 } 1636 count += vq->v_usecount; 1637 } 1638 simple_unlock(&spechash_slock); 1639 return (count); 1640 } 1641 1642 /* 1643 * Print out a description of a vnode. 1644 */ 1645 static char *typename[] = 1646 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1647 1648 void 1649 vprint(label, vp) 1650 char *label; 1651 register struct vnode *vp; 1652 { 1653 char buf[64]; 1654 1655 if (label != NULL) 1656 printf("%s: ", label); 1657 printf("tag %d type %s, usecount %ld, writecount %ld, refcount %ld,", 1658 vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1659 vp->v_holdcnt); 1660 buf[0] = '\0'; 1661 if (vp->v_flag & VROOT) 1662 strcat(buf, "|VROOT"); 1663 if (vp->v_flag & VTEXT) 1664 strcat(buf, "|VTEXT"); 1665 if (vp->v_flag & VSYSTEM) 1666 strcat(buf, "|VSYSTEM"); 1667 if (vp->v_flag & VXLOCK) 1668 strcat(buf, "|VXLOCK"); 1669 if (vp->v_flag & VXWANT) 1670 strcat(buf, "|VXWANT"); 1671 if (vp->v_flag & VBWAIT) 1672 strcat(buf, "|VBWAIT"); 1673 if (vp->v_flag & VALIASED) 1674 strcat(buf, "|VALIASED"); 1675 if (buf[0] != '\0') 1676 printf(" flags (%s)", &buf[1]); 1677 if (vp->v_data == NULL) { 1678 printf("\n"); 1679 } else { 1680 printf("\n\t"); 1681 VOP_PRINT(vp); 1682 } 1683 } 1684 1685 #ifdef DEBUG 1686 /* 1687 * List all of the locked vnodes in the system. 1688 * Called when debugging the kernel. 1689 */ 1690 void 1691 printlockedvnodes() 1692 { 1693 struct mount *mp, *nmp; 1694 struct vnode *vp; 1695 1696 printf("Locked vnodes\n"); 1697 simple_lock(&mountlist_slock); 1698 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1699 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1700 nmp = mp->mnt_list.cqe_next; 1701 continue; 1702 } 1703 for (vp = mp->mnt_vnodelist.lh_first; 1704 vp != NULL; 1705 vp = vp->v_mntvnodes.le_next) { 1706 if (VOP_ISLOCKED(vp)) 1707 vprint((char *)0, vp); 1708 } 1709 simple_lock(&mountlist_slock); 1710 nmp = mp->mnt_list.cqe_next; 1711 vfs_unbusy(mp); 1712 } 1713 simple_unlock(&mountlist_slock); 1714 } 1715 #endif 1716 1717 extern const char *mountcompatnames[]; 1718 extern const int nmountcompatnames; 1719 1720 /* 1721 * Top level filesystem related information gathering. 1722 */ 1723 int 1724 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1725 int *name; 1726 u_int namelen; 1727 void *oldp; 1728 size_t *oldlenp; 1729 void *newp; 1730 size_t newlen; 1731 struct proc *p; 1732 { 1733 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1734 struct vfsconf vfc; 1735 #endif 1736 struct vfsops *vfsp; 1737 1738 /* all sysctl names at this level are at least name and field */ 1739 if (namelen < 2) 1740 return (ENOTDIR); /* overloaded */ 1741 1742 /* Not generic: goes to file system. */ 1743 if (name[0] != VFS_GENERIC) { 1744 if (name[0] >= nmountcompatnames || name[0] < 0 || 1745 mountcompatnames[name[0]] == NULL) 1746 return (EOPNOTSUPP); 1747 vfsp = vfs_getopsbyname(mountcompatnames[name[0]]); 1748 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1749 return (EOPNOTSUPP); 1750 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1751 oldp, oldlenp, newp, newlen, p)); 1752 } 1753 1754 /* The rest are generic vfs sysctls. */ 1755 switch (name[1]) { 1756 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1757 case VFS_MAXTYPENUM: 1758 /* 1759 * Provided for 4.4BSD-Lite2 compatibility. 1760 */ 1761 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1762 case VFS_CONF: 1763 /* 1764 * Special: a node, next is a file system name. 1765 * Provided for 4.4BSD-Lite2 compatibility. 1766 */ 1767 if (namelen < 3) 1768 return (ENOTDIR); /* overloaded */ 1769 if (name[2] >= nmountcompatnames || name[2] < 0 || 1770 mountcompatnames[name[2]] == NULL) 1771 return (EOPNOTSUPP); 1772 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 1773 if (vfsp == NULL) 1774 return (EOPNOTSUPP); 1775 vfc.vfc_vfsops = vfsp; 1776 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 1777 vfc.vfc_typenum = name[2]; 1778 vfc.vfc_refcount = vfsp->vfs_refcount; 1779 vfc.vfc_flags = 0; 1780 vfc.vfc_mountroot = vfsp->vfs_mountroot; 1781 vfc.vfc_next = NULL; 1782 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 1783 sizeof(struct vfsconf))); 1784 #endif 1785 default: 1786 break; 1787 } 1788 return (EOPNOTSUPP); 1789 } 1790 1791 int kinfo_vdebug = 1; 1792 int kinfo_vgetfailed; 1793 #define KINFO_VNODESLOP 10 1794 /* 1795 * Dump vnode list (via sysctl). 1796 * Copyout address of vnode followed by vnode. 1797 */ 1798 /* ARGSUSED */ 1799 int 1800 sysctl_vnode(where, sizep, p) 1801 char *where; 1802 size_t *sizep; 1803 struct proc *p; 1804 { 1805 struct mount *mp, *nmp; 1806 struct vnode *nvp, *vp; 1807 char *bp = where, *savebp; 1808 char *ewhere; 1809 int error; 1810 1811 #define VPTRSZ sizeof(struct vnode *) 1812 #define VNODESZ sizeof(struct vnode) 1813 if (where == NULL) { 1814 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1815 return (0); 1816 } 1817 ewhere = where + *sizep; 1818 1819 simple_lock(&mountlist_slock); 1820 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1821 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1822 nmp = mp->mnt_list.cqe_next; 1823 continue; 1824 } 1825 savebp = bp; 1826 again: 1827 simple_lock(&mntvnode_slock); 1828 for (vp = mp->mnt_vnodelist.lh_first; 1829 vp != NULL; 1830 vp = nvp) { 1831 /* 1832 * Check that the vp is still associated with 1833 * this filesystem. RACE: could have been 1834 * recycled onto the same filesystem. 1835 */ 1836 if (vp->v_mount != mp) { 1837 simple_unlock(&mntvnode_slock); 1838 if (kinfo_vdebug) 1839 printf("kinfo: vp changed\n"); 1840 bp = savebp; 1841 goto again; 1842 } 1843 nvp = vp->v_mntvnodes.le_next; 1844 if (bp + VPTRSZ + VNODESZ > ewhere) { 1845 simple_unlock(&mntvnode_slock); 1846 *sizep = bp - where; 1847 return (ENOMEM); 1848 } 1849 simple_unlock(&mntvnode_slock); 1850 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1851 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1852 return (error); 1853 bp += VPTRSZ + VNODESZ; 1854 simple_lock(&mntvnode_slock); 1855 } 1856 simple_unlock(&mntvnode_slock); 1857 simple_lock(&mountlist_slock); 1858 nmp = mp->mnt_list.cqe_next; 1859 vfs_unbusy(mp); 1860 } 1861 simple_unlock(&mountlist_slock); 1862 1863 *sizep = bp - where; 1864 return (0); 1865 } 1866 1867 /* 1868 * Check to see if a filesystem is mounted on a block device. 1869 */ 1870 int 1871 vfs_mountedon(vp) 1872 struct vnode *vp; 1873 { 1874 struct vnode *vq; 1875 int error = 0; 1876 1877 if (vp->v_specmountpoint != NULL) 1878 return (EBUSY); 1879 if (vp->v_flag & VALIASED) { 1880 simple_lock(&spechash_slock); 1881 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1882 if (vq->v_rdev != vp->v_rdev || 1883 vq->v_type != vp->v_type) 1884 continue; 1885 if (vq->v_specmountpoint != NULL) { 1886 error = EBUSY; 1887 break; 1888 } 1889 } 1890 simple_unlock(&spechash_slock); 1891 } 1892 return (error); 1893 } 1894 1895 /* 1896 * Build hash lists of net addresses and hang them off the mount point. 1897 * Called by ufs_mount() to set up the lists of export addresses. 1898 */ 1899 static int 1900 vfs_hang_addrlist(mp, nep, argp) 1901 struct mount *mp; 1902 struct netexport *nep; 1903 struct export_args *argp; 1904 { 1905 register struct netcred *np, *enp; 1906 register struct radix_node_head *rnh; 1907 register int i; 1908 struct radix_node *rn; 1909 struct sockaddr *saddr, *smask = 0; 1910 struct domain *dom; 1911 int error; 1912 1913 if (argp->ex_addrlen == 0) { 1914 if (mp->mnt_flag & MNT_DEFEXPORTED) 1915 return (EPERM); 1916 np = &nep->ne_defexported; 1917 np->netc_exflags = argp->ex_flags; 1918 np->netc_anon = argp->ex_anon; 1919 np->netc_anon.cr_ref = 1; 1920 mp->mnt_flag |= MNT_DEFEXPORTED; 1921 return (0); 1922 } 1923 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1924 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1925 memset((caddr_t)np, 0, i); 1926 saddr = (struct sockaddr *)(np + 1); 1927 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 1928 if (error) 1929 goto out; 1930 if (saddr->sa_len > argp->ex_addrlen) 1931 saddr->sa_len = argp->ex_addrlen; 1932 if (argp->ex_masklen) { 1933 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1934 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1935 if (error) 1936 goto out; 1937 if (smask->sa_len > argp->ex_masklen) 1938 smask->sa_len = argp->ex_masklen; 1939 } 1940 i = saddr->sa_family; 1941 if ((rnh = nep->ne_rtable[i]) == 0) { 1942 /* 1943 * Seems silly to initialize every AF when most are not 1944 * used, do so on demand here 1945 */ 1946 for (dom = domains; dom; dom = dom->dom_next) 1947 if (dom->dom_family == i && dom->dom_rtattach) { 1948 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1949 dom->dom_rtoffset); 1950 break; 1951 } 1952 if ((rnh = nep->ne_rtable[i]) == 0) { 1953 error = ENOBUFS; 1954 goto out; 1955 } 1956 } 1957 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1958 np->netc_rnodes); 1959 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1960 if (rn == 0) { 1961 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 1962 smask, rnh); 1963 if (enp == 0) { 1964 error = EPERM; 1965 goto out; 1966 } 1967 } else 1968 enp = (struct netcred *)rn; 1969 1970 if (enp->netc_exflags != argp->ex_flags || 1971 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 1972 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 1973 enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups || 1974 memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 1975 enp->netc_anon.cr_ngroups)) 1976 error = EPERM; 1977 else 1978 error = 0; 1979 goto out; 1980 } 1981 np->netc_exflags = argp->ex_flags; 1982 np->netc_anon = argp->ex_anon; 1983 np->netc_anon.cr_ref = 1; 1984 return (0); 1985 out: 1986 free(np, M_NETADDR); 1987 return (error); 1988 } 1989 1990 /* ARGSUSED */ 1991 static int 1992 vfs_free_netcred(rn, w) 1993 struct radix_node *rn; 1994 void *w; 1995 { 1996 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1997 1998 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1999 free((caddr_t)rn, M_NETADDR); 2000 return (0); 2001 } 2002 2003 /* 2004 * Free the net address hash lists that are hanging off the mount points. 2005 */ 2006 static void 2007 vfs_free_addrlist(nep) 2008 struct netexport *nep; 2009 { 2010 register int i; 2011 register struct radix_node_head *rnh; 2012 2013 for (i = 0; i <= AF_MAX; i++) 2014 if ((rnh = nep->ne_rtable[i]) != NULL) { 2015 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 2016 free((caddr_t)rnh, M_RTABLE); 2017 nep->ne_rtable[i] = 0; 2018 } 2019 } 2020 2021 int 2022 vfs_export(mp, nep, argp) 2023 struct mount *mp; 2024 struct netexport *nep; 2025 struct export_args *argp; 2026 { 2027 int error; 2028 2029 if (argp->ex_flags & MNT_DELEXPORT) { 2030 if (mp->mnt_flag & MNT_EXPUBLIC) { 2031 vfs_setpublicfs(NULL, NULL, NULL); 2032 mp->mnt_flag &= ~MNT_EXPUBLIC; 2033 } 2034 vfs_free_addrlist(nep); 2035 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2036 } 2037 if (argp->ex_flags & MNT_EXPORTED) { 2038 if (argp->ex_flags & MNT_EXPUBLIC) { 2039 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2040 return (error); 2041 mp->mnt_flag |= MNT_EXPUBLIC; 2042 } 2043 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 2044 return (error); 2045 mp->mnt_flag |= MNT_EXPORTED; 2046 } 2047 return (0); 2048 } 2049 2050 /* 2051 * Set the publicly exported filesystem (WebNFS). Currently, only 2052 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2053 */ 2054 int 2055 vfs_setpublicfs(mp, nep, argp) 2056 struct mount *mp; 2057 struct netexport *nep; 2058 struct export_args *argp; 2059 { 2060 int error; 2061 struct vnode *rvp; 2062 char *cp; 2063 2064 /* 2065 * mp == NULL -> invalidate the current info, the FS is 2066 * no longer exported. May be called from either vfs_export 2067 * or unmount, so check if it hasn't already been done. 2068 */ 2069 if (mp == NULL) { 2070 if (nfs_pub.np_valid) { 2071 nfs_pub.np_valid = 0; 2072 if (nfs_pub.np_index != NULL) { 2073 FREE(nfs_pub.np_index, M_TEMP); 2074 nfs_pub.np_index = NULL; 2075 } 2076 } 2077 return (0); 2078 } 2079 2080 /* 2081 * Only one allowed at a time. 2082 */ 2083 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2084 return (EBUSY); 2085 2086 /* 2087 * Get real filehandle for root of exported FS. 2088 */ 2089 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 2090 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2091 2092 if ((error = VFS_ROOT(mp, &rvp))) 2093 return (error); 2094 2095 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2096 return (error); 2097 2098 vput(rvp); 2099 2100 /* 2101 * If an indexfile was specified, pull it in. 2102 */ 2103 if (argp->ex_indexfile != NULL) { 2104 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2105 M_WAITOK); 2106 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2107 MAXNAMLEN, (size_t *)0); 2108 if (!error) { 2109 /* 2110 * Check for illegal filenames. 2111 */ 2112 for (cp = nfs_pub.np_index; *cp; cp++) { 2113 if (*cp == '/') { 2114 error = EINVAL; 2115 break; 2116 } 2117 } 2118 } 2119 if (error) { 2120 FREE(nfs_pub.np_index, M_TEMP); 2121 return (error); 2122 } 2123 } 2124 2125 nfs_pub.np_mount = mp; 2126 nfs_pub.np_valid = 1; 2127 return (0); 2128 } 2129 2130 struct netcred * 2131 vfs_export_lookup(mp, nep, nam) 2132 register struct mount *mp; 2133 struct netexport *nep; 2134 struct mbuf *nam; 2135 { 2136 register struct netcred *np; 2137 register struct radix_node_head *rnh; 2138 struct sockaddr *saddr; 2139 2140 np = NULL; 2141 if (mp->mnt_flag & MNT_EXPORTED) { 2142 /* 2143 * Lookup in the export list first. 2144 */ 2145 if (nam != NULL) { 2146 saddr = mtod(nam, struct sockaddr *); 2147 rnh = nep->ne_rtable[saddr->sa_family]; 2148 if (rnh != NULL) { 2149 np = (struct netcred *) 2150 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2151 rnh); 2152 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2153 np = NULL; 2154 } 2155 } 2156 /* 2157 * If no address match, use the default if it exists. 2158 */ 2159 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2160 np = &nep->ne_defexported; 2161 } 2162 return (np); 2163 } 2164 2165 /* 2166 * Do the usual access checking. 2167 * file_mode, uid and gid are from the vnode in question, 2168 * while acc_mode and cred are from the VOP_ACCESS parameter list 2169 */ 2170 int 2171 vaccess(type, file_mode, uid, gid, acc_mode, cred) 2172 enum vtype type; 2173 mode_t file_mode; 2174 uid_t uid; 2175 gid_t gid; 2176 mode_t acc_mode; 2177 struct ucred *cred; 2178 { 2179 mode_t mask; 2180 2181 /* 2182 * Super-user always gets read/write access, but execute access depends 2183 * on at least one execute bit being set. 2184 */ 2185 if (cred->cr_uid == 0) { 2186 if ((acc_mode & VEXEC) && type != VDIR && 2187 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2188 return (EACCES); 2189 return (0); 2190 } 2191 2192 mask = 0; 2193 2194 /* Otherwise, check the owner. */ 2195 if (cred->cr_uid == uid) { 2196 if (acc_mode & VEXEC) 2197 mask |= S_IXUSR; 2198 if (acc_mode & VREAD) 2199 mask |= S_IRUSR; 2200 if (acc_mode & VWRITE) 2201 mask |= S_IWUSR; 2202 return ((file_mode & mask) == mask ? 0 : EACCES); 2203 } 2204 2205 /* Otherwise, check the groups. */ 2206 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2207 if (acc_mode & VEXEC) 2208 mask |= S_IXGRP; 2209 if (acc_mode & VREAD) 2210 mask |= S_IRGRP; 2211 if (acc_mode & VWRITE) 2212 mask |= S_IWGRP; 2213 return ((file_mode & mask) == mask ? 0 : EACCES); 2214 } 2215 2216 /* Otherwise, check everyone else. */ 2217 if (acc_mode & VEXEC) 2218 mask |= S_IXOTH; 2219 if (acc_mode & VREAD) 2220 mask |= S_IROTH; 2221 if (acc_mode & VWRITE) 2222 mask |= S_IWOTH; 2223 return ((file_mode & mask) == mask ? 0 : EACCES); 2224 } 2225 2226 /* 2227 * Unmount all file systems. 2228 * We traverse the list in reverse order under the assumption that doing so 2229 * will avoid needing to worry about dependencies. 2230 */ 2231 void 2232 vfs_unmountall() 2233 { 2234 register struct mount *mp, *nmp; 2235 int allerror, error; 2236 struct proc *p = curproc; /* XXX */ 2237 2238 /* 2239 * Unmounting a file system blocks the requesting process. 2240 * However, it's possible for this routine to be called when 2241 * curproc is NULL (e.g. panic situation, or via the debugger). 2242 * If we get stuck in this situation, just abort, since any 2243 * attempts to sleep will fault. 2244 */ 2245 if (p == NULL) { 2246 printf("vfs_unmountall: no context, aborting\n"); 2247 return; 2248 } 2249 2250 for (allerror = 0, 2251 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2252 nmp = mp->mnt_list.cqe_prev; 2253 #ifdef DEBUG 2254 printf("unmounting %s (%s)...\n", 2255 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2256 #endif 2257 if (vfs_busy(mp, 0, 0)) 2258 continue; 2259 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2260 printf("unmount of %s failed with error %d\n", 2261 mp->mnt_stat.f_mntonname, error); 2262 allerror = 1; 2263 } 2264 } 2265 if (allerror) 2266 printf("WARNING: some file systems would not unmount\n"); 2267 } 2268 2269 /* 2270 * Sync and unmount file systems before shutting down. 2271 */ 2272 void 2273 vfs_shutdown() 2274 { 2275 register struct buf *bp; 2276 int iter, nbusy, dcount, s; 2277 2278 printf("syncing disks... "); 2279 2280 /* XXX Should suspend scheduling. */ 2281 (void) spl0(); 2282 2283 sys_sync(&proc0, (void *)0, (register_t *)0); 2284 2285 /* Wait for sync to finish. */ 2286 dcount = 10000; 2287 for (iter = 0; iter < 20; iter++) { 2288 nbusy = 0; 2289 for (bp = &buf[nbuf]; --bp >= buf; ) { 2290 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 2291 nbusy++; 2292 /* 2293 * With soft updates, some buffers that are 2294 * written will be remarked as dirty until other 2295 * buffers are written. 2296 */ 2297 if (bp->b_vp && bp->b_vp->v_mount 2298 && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP) 2299 && (bp->b_flags & B_DELWRI)) { 2300 s = splbio(); 2301 bremfree(bp); 2302 bp->b_flags |= B_BUSY; 2303 splx(s); 2304 nbusy++; 2305 bawrite(bp); 2306 if (dcount-- <= 0) { 2307 printf("softdep "); 2308 goto fail; 2309 } 2310 } 2311 } 2312 if (nbusy == 0) 2313 break; 2314 printf("%d ", nbusy); 2315 DELAY(40000 * iter); 2316 } 2317 if (nbusy) { 2318 fail: 2319 #ifdef DEBUG 2320 printf("giving up\nPrinting vnodes for busy buffers\n"); 2321 for (bp = &buf[nbuf]; --bp >= buf; ) 2322 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 2323 vprint(NULL, bp->b_vp); 2324 #else 2325 printf("giving up\n"); 2326 #endif 2327 return; 2328 } else 2329 printf("done\n"); 2330 2331 /* 2332 * If we've panic'd, don't make the situation potentially 2333 * worse by unmounting the file systems. 2334 */ 2335 if (panicstr != NULL) 2336 return; 2337 2338 /* Release inodes held by texts before update. */ 2339 #ifdef notdef 2340 vnshutdown(); 2341 #endif 2342 /* Unmount file systems. */ 2343 vfs_unmountall(); 2344 } 2345 2346 /* 2347 * Mount the root file system. If the operator didn't specify a 2348 * file system to use, try all possible file systems until one 2349 * succeeds. 2350 */ 2351 int 2352 vfs_mountroot() 2353 { 2354 extern int (*mountroot) __P((void)); 2355 struct vfsops *v; 2356 2357 if (root_device == NULL) 2358 panic("vfs_mountroot: root device unknown"); 2359 2360 switch (root_device->dv_class) { 2361 case DV_IFNET: 2362 if (rootdev != NODEV) 2363 panic("vfs_mountroot: rootdev set for DV_IFNET"); 2364 break; 2365 2366 case DV_DISK: 2367 if (rootdev == NODEV) 2368 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2369 break; 2370 2371 default: 2372 printf("%s: inappropriate for root file system\n", 2373 root_device->dv_xname); 2374 return (ENODEV); 2375 } 2376 2377 /* 2378 * If user specified a file system, use it. 2379 */ 2380 if (mountroot != NULL) 2381 return ((*mountroot)()); 2382 2383 /* 2384 * Try each file system currently configured into the kernel. 2385 */ 2386 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2387 if (v->vfs_mountroot == NULL) 2388 continue; 2389 #ifdef DEBUG 2390 printf("mountroot: trying %s...\n", v->vfs_name); 2391 #endif 2392 if ((*v->vfs_mountroot)() == 0) { 2393 printf("root file system type: %s\n", v->vfs_name); 2394 break; 2395 } 2396 } 2397 2398 if (v == NULL) { 2399 printf("no file system for %s", root_device->dv_xname); 2400 if (root_device->dv_class == DV_DISK) 2401 printf(" (dev 0x%x)", rootdev); 2402 printf("\n"); 2403 return (EFTYPE); 2404 } 2405 return (0); 2406 } 2407 2408 /* 2409 * Given a file system name, look up the vfsops for that 2410 * file system, or return NULL if file system isn't present 2411 * in the kernel. 2412 */ 2413 struct vfsops * 2414 vfs_getopsbyname(name) 2415 const char *name; 2416 { 2417 struct vfsops *v; 2418 2419 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2420 if (strcmp(v->vfs_name, name) == 0) 2421 break; 2422 } 2423 2424 return (v); 2425 } 2426 2427 /* 2428 * Establish a file system and initialize it. 2429 */ 2430 int 2431 vfs_attach(vfs) 2432 struct vfsops *vfs; 2433 { 2434 struct vfsops *v; 2435 int error = 0; 2436 2437 2438 /* 2439 * Make sure this file system doesn't already exist. 2440 */ 2441 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2442 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2443 error = EEXIST; 2444 goto out; 2445 } 2446 } 2447 2448 /* 2449 * Initialize the vnode operations for this file system. 2450 */ 2451 vfs_opv_init(vfs->vfs_opv_descs); 2452 2453 /* 2454 * Now initialize the file system itself. 2455 */ 2456 (*vfs->vfs_init)(); 2457 2458 /* 2459 * ...and link it into the kernel's list. 2460 */ 2461 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2462 2463 /* 2464 * Sanity: make sure the reference count is 0. 2465 */ 2466 vfs->vfs_refcount = 0; 2467 2468 out: 2469 return (error); 2470 } 2471 2472 /* 2473 * Remove a file system from the kernel. 2474 */ 2475 int 2476 vfs_detach(vfs) 2477 struct vfsops *vfs; 2478 { 2479 struct vfsops *v; 2480 2481 /* 2482 * Make sure no one is using the filesystem. 2483 */ 2484 if (vfs->vfs_refcount != 0) 2485 return (EBUSY); 2486 2487 /* 2488 * ...and remove it from the kernel's list. 2489 */ 2490 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2491 if (v == vfs) { 2492 LIST_REMOVE(v, vfs_list); 2493 break; 2494 } 2495 } 2496 2497 if (v == NULL) 2498 return (ESRCH); 2499 2500 /* 2501 * Free the vnode operations vector. 2502 */ 2503 vfs_opv_free(vfs->vfs_opv_descs); 2504 return (0); 2505 } 2506