1 /* $NetBSD: vfs_subr.c,v 1.112 1999/10/01 22:03:17 mycroft Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include "opt_compat_netbsd.h" 85 #include "opt_compat_43.h" 86 87 #include <sys/param.h> 88 #include <sys/systm.h> 89 #include <sys/proc.h> 90 #include <sys/mount.h> 91 #include <sys/time.h> 92 #include <sys/fcntl.h> 93 #include <sys/vnode.h> 94 #include <sys/stat.h> 95 #include <sys/namei.h> 96 #include <sys/ucred.h> 97 #include <sys/buf.h> 98 #include <sys/errno.h> 99 #include <sys/malloc.h> 100 #include <sys/domain.h> 101 #include <sys/mbuf.h> 102 #include <sys/syscallargs.h> 103 #include <sys/device.h> 104 #include <sys/dirent.h> 105 106 #include <vm/vm.h> 107 #include <sys/sysctl.h> 108 109 #include <miscfs/specfs/specdev.h> 110 111 #include <uvm/uvm_extern.h> 112 113 enum vtype iftovt_tab[16] = { 114 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 115 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 116 }; 117 int vttoif_tab[9] = { 118 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 119 S_IFSOCK, S_IFIFO, S_IFMT, 120 }; 121 122 int doforce = 1; /* 1 => permit forcible unmounting */ 123 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 124 125 /* 126 * Insq/Remq for the vnode usage lists. 127 */ 128 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 129 #define bufremvn(bp) { \ 130 LIST_REMOVE(bp, b_vnbufs); \ 131 (bp)->b_vnbufs.le_next = NOLIST; \ 132 } 133 TAILQ_HEAD(freelst, vnode) vnode_free_list = /* vnode free list */ 134 TAILQ_HEAD_INITIALIZER(vnode_free_list); 135 struct mntlist mountlist = /* mounted filesystem list */ 136 CIRCLEQ_HEAD_INITIALIZER(mountlist); 137 struct vfs_list_head vfs_list = /* vfs list */ 138 LIST_HEAD_INITIALIZER(vfs_list); 139 140 struct nfs_public nfs_pub; /* publicly exported FS */ 141 142 struct simplelock mountlist_slock; 143 static struct simplelock mntid_slock; 144 struct simplelock mntvnode_slock; 145 struct simplelock vnode_free_list_slock; 146 struct simplelock spechash_slock; 147 148 /* 149 * These define the root filesystem and device. 150 */ 151 struct mount *rootfs; 152 struct vnode *rootvnode; 153 struct device *root_device; /* root device */ 154 155 struct pool vnode_pool; /* memory pool for vnodes */ 156 157 /* 158 * Local declarations. 159 */ 160 void insmntque __P((struct vnode *, struct mount *)); 161 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 162 void vgoneall __P((struct vnode *)); 163 164 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 165 struct export_args *)); 166 static int vfs_free_netcred __P((struct radix_node *, void *)); 167 static void vfs_free_addrlist __P((struct netexport *)); 168 169 #ifdef DEBUG 170 void printlockedvnodes __P((void)); 171 #endif 172 173 /* 174 * Initialize the vnode management data structures. 175 */ 176 void 177 vntblinit() 178 { 179 180 simple_lock_init(&mntvnode_slock); 181 simple_lock_init(&mntid_slock); 182 simple_lock_init(&spechash_slock); 183 simple_lock_init(&vnode_free_list_slock); 184 185 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 186 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE); 187 } 188 189 /* 190 * Mark a mount point as busy. Used to synchronize access and to delay 191 * unmounting. Interlock is not released on failure. 192 */ 193 int 194 vfs_busy(mp, flags, interlkp) 195 struct mount *mp; 196 int flags; 197 struct simplelock *interlkp; 198 { 199 int lkflags; 200 201 while (mp->mnt_flag & MNT_UNMOUNT) { 202 int gone; 203 204 if (flags & LK_NOWAIT) 205 return (ENOENT); 206 if (interlkp) 207 simple_unlock(interlkp); 208 /* 209 * Since all busy locks are shared except the exclusive 210 * lock granted when unmounting, the only place that a 211 * wakeup needs to be done is at the release of the 212 * exclusive lock at the end of dounmount. 213 * 214 * XXX MP: add spinlock protecting mnt_wcnt here once you 215 * can atomically unlock-and-sleep. 216 */ 217 mp->mnt_wcnt++; 218 sleep((caddr_t)mp, PVFS); 219 mp->mnt_wcnt--; 220 gone = mp->mnt_flag & MNT_GONE; 221 222 if (mp->mnt_wcnt == 0) 223 wakeup(&mp->mnt_wcnt); 224 if (interlkp) 225 simple_lock(interlkp); 226 if (gone) 227 return (ENOENT); 228 } 229 lkflags = LK_SHARED; 230 if (interlkp) 231 lkflags |= LK_INTERLOCK; 232 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 233 panic("vfs_busy: unexpected lock failure"); 234 return (0); 235 } 236 237 /* 238 * Free a busy filesystem. 239 */ 240 void 241 vfs_unbusy(mp) 242 struct mount *mp; 243 { 244 245 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 246 } 247 248 /* 249 * Lookup a filesystem type, and if found allocate and initialize 250 * a mount structure for it. 251 * 252 * Devname is usually updated by mount(8) after booting. 253 */ 254 int 255 vfs_rootmountalloc(fstypename, devname, mpp) 256 char *fstypename; 257 char *devname; 258 struct mount **mpp; 259 { 260 struct vfsops *vfsp = NULL; 261 struct mount *mp; 262 263 for (vfsp = LIST_FIRST(&vfs_list); vfsp != NULL; 264 vfsp = LIST_NEXT(vfsp, vfs_list)) 265 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 266 break; 267 268 if (vfsp == NULL) 269 return (ENODEV); 270 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 271 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 272 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 273 (void)vfs_busy(mp, LK_NOWAIT, 0); 274 LIST_INIT(&mp->mnt_vnodelist); 275 mp->mnt_op = vfsp; 276 mp->mnt_flag = MNT_RDONLY; 277 mp->mnt_vnodecovered = NULLVP; 278 vfsp->vfs_refcount++; 279 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 280 mp->mnt_stat.f_mntonname[0] = '/'; 281 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 282 *mpp = mp; 283 return (0); 284 } 285 286 /* 287 * Lookup a mount point by filesystem identifier. 288 */ 289 struct mount * 290 vfs_getvfs(fsid) 291 fsid_t *fsid; 292 { 293 register struct mount *mp; 294 295 simple_lock(&mountlist_slock); 296 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 297 mp = mp->mnt_list.cqe_next) { 298 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 299 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 300 simple_unlock(&mountlist_slock); 301 return (mp); 302 } 303 } 304 simple_unlock(&mountlist_slock); 305 return ((struct mount *)0); 306 } 307 308 /* 309 * Get a new unique fsid 310 */ 311 void 312 vfs_getnewfsid(mp, fstypename) 313 struct mount *mp; 314 char *fstypename; 315 { 316 static u_short xxxfs_mntid; 317 fsid_t tfsid; 318 int mtype; 319 320 simple_lock(&mntid_slock); 321 mtype = makefstype(fstypename); 322 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 323 mp->mnt_stat.f_fsid.val[1] = mtype; 324 if (xxxfs_mntid == 0) 325 ++xxxfs_mntid; 326 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 327 tfsid.val[1] = mtype; 328 if (mountlist.cqh_first != (void *)&mountlist) { 329 while (vfs_getvfs(&tfsid)) { 330 tfsid.val[0]++; 331 xxxfs_mntid++; 332 } 333 } 334 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 335 simple_unlock(&mntid_slock); 336 } 337 338 /* 339 * Make a 'unique' number from a mount type name. 340 */ 341 long 342 makefstype(type) 343 char *type; 344 { 345 long rv; 346 347 for (rv = 0; *type; type++) { 348 rv <<= 2; 349 rv ^= *type; 350 } 351 return rv; 352 } 353 354 355 /* 356 * Set vnode attributes to VNOVAL 357 */ 358 void 359 vattr_null(vap) 360 register struct vattr *vap; 361 { 362 363 vap->va_type = VNON; 364 365 /* 366 * Assign individually so that it is safe even if size and 367 * sign of each member are varied. 368 */ 369 vap->va_mode = VNOVAL; 370 vap->va_nlink = VNOVAL; 371 vap->va_uid = VNOVAL; 372 vap->va_gid = VNOVAL; 373 vap->va_fsid = VNOVAL; 374 vap->va_fileid = VNOVAL; 375 vap->va_size = VNOVAL; 376 vap->va_blocksize = VNOVAL; 377 vap->va_atime.tv_sec = 378 vap->va_mtime.tv_sec = 379 vap->va_ctime.tv_sec = VNOVAL; 380 vap->va_atime.tv_nsec = 381 vap->va_mtime.tv_nsec = 382 vap->va_ctime.tv_nsec = VNOVAL; 383 vap->va_gen = VNOVAL; 384 vap->va_flags = VNOVAL; 385 vap->va_rdev = VNOVAL; 386 vap->va_bytes = VNOVAL; 387 vap->va_vaflags = 0; 388 } 389 390 /* 391 * Routines having to do with the management of the vnode table. 392 */ 393 extern int (**dead_vnodeop_p) __P((void *)); 394 long numvnodes; 395 396 /* 397 * Return the next vnode from the free list. 398 */ 399 int 400 getnewvnode(tag, mp, vops, vpp) 401 enum vtagtype tag; 402 struct mount *mp; 403 int (**vops) __P((void *)); 404 struct vnode **vpp; 405 { 406 struct proc *p = curproc; /* XXX */ 407 struct vnode *vp; 408 int error; 409 #ifdef DIAGNOSTIC 410 int s; 411 #endif 412 if (mp) { 413 /* 414 * Mark filesystem busy while we're creating a vnode. 415 * If unmount is in progress, this will wait; if the 416 * unmount succeeds (only if umount -f), this will 417 * return an error. If the unmount fails, we'll keep 418 * going afterwards. 419 * (This puts the per-mount vnode list logically under 420 * the protection of the vfs_busy lock). 421 */ 422 error = vfs_busy(mp, 0, 0); 423 if (error) 424 return error; 425 } 426 427 simple_lock(&vnode_free_list_slock); 428 if ((vnode_free_list.tqh_first == NULL && 429 numvnodes < 2 * desiredvnodes) || 430 numvnodes < desiredvnodes) { 431 simple_unlock(&vnode_free_list_slock); 432 vp = pool_get(&vnode_pool, PR_WAITOK); 433 memset((char *)vp, 0, sizeof(*vp)); 434 simple_lock_init(&vp->v_interlock); 435 numvnodes++; 436 } else { 437 for (vp = vnode_free_list.tqh_first; 438 vp != NULLVP; vp = vp->v_freelist.tqe_next) { 439 if (simple_lock_try(&vp->v_interlock)) { 440 if ((vp->v_flag & VLAYER) == 0) { 441 break; 442 } 443 if (VOP_ISLOCKED(vp) == 0) 444 break; 445 else 446 simple_unlock(&vp->v_interlock); 447 } 448 } 449 /* 450 * Unless this is a bad time of the month, at most 451 * the first NCPUS items on the free list are 452 * locked, so this is close enough to being empty. 453 */ 454 if (vp == NULLVP) { 455 simple_unlock(&vnode_free_list_slock); 456 if (mp) vfs_unbusy(mp); 457 tablefull("vnode"); 458 *vpp = 0; 459 return (ENFILE); 460 } 461 if (vp->v_usecount) 462 panic("free vnode isn't"); 463 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 464 /* see comment on why 0xdeadb is set at end of vgone (below) */ 465 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 466 simple_unlock(&vnode_free_list_slock); 467 vp->v_lease = NULL; 468 if (vp->v_type != VBAD) 469 vgonel(vp, p); 470 else 471 simple_unlock(&vp->v_interlock); 472 #ifdef DIAGNOSTIC 473 if (vp->v_data) 474 panic("cleaned vnode isn't"); 475 s = splbio(); 476 if (vp->v_numoutput) 477 panic("Clean vnode has pending I/O's"); 478 splx(s); 479 #endif 480 vp->v_flag = 0; 481 vp->v_lastr = 0; 482 vp->v_ralen = 0; 483 vp->v_maxra = 0; 484 vp->v_lastw = 0; 485 vp->v_lasta = 0; 486 vp->v_cstart = 0; 487 vp->v_clen = 0; 488 vp->v_socket = 0; 489 } 490 vp->v_type = VNON; 491 vp->v_vnlock = &vp->v_lock; 492 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 493 cache_purge(vp); 494 vp->v_tag = tag; 495 vp->v_op = vops; 496 insmntque(vp, mp); 497 *vpp = vp; 498 vp->v_usecount = 1; 499 vp->v_data = 0; 500 simple_lock_init(&vp->v_uvm.u_obj.vmobjlock); 501 if (mp) vfs_unbusy(mp); 502 return (0); 503 } 504 505 /* 506 * Move a vnode from one mount queue to another. 507 */ 508 void 509 insmntque(vp, mp) 510 register struct vnode *vp; 511 register struct mount *mp; 512 { 513 514 #ifdef DIAGNOSTIC 515 if ((mp != NULL) && 516 (mp->mnt_flag & MNT_UNMOUNT)) { 517 panic("insmntque into dying filesystem"); 518 } 519 #endif 520 521 simple_lock(&mntvnode_slock); 522 /* 523 * Delete from old mount point vnode list, if on one. 524 */ 525 if (vp->v_mount != NULL) 526 LIST_REMOVE(vp, v_mntvnodes); 527 /* 528 * Insert into list of vnodes for the new mount point, if available. 529 */ 530 if ((vp->v_mount = mp) != NULL) 531 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 532 simple_unlock(&mntvnode_slock); 533 } 534 535 /* 536 * Update outstanding I/O count and do wakeup if requested. 537 */ 538 void 539 vwakeup(bp) 540 register struct buf *bp; 541 { 542 register struct vnode *vp; 543 544 bp->b_flags &= ~B_WRITEINPROG; 545 if ((vp = bp->b_vp) != NULL) { 546 if (--vp->v_numoutput < 0) 547 panic("vwakeup: neg numoutput"); 548 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 549 vp->v_flag &= ~VBWAIT; 550 wakeup((caddr_t)&vp->v_numoutput); 551 } 552 } 553 } 554 555 /* 556 * Flush out and invalidate all buffers associated with a vnode. 557 * Called with the underlying object locked. 558 */ 559 int 560 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 561 register struct vnode *vp; 562 int flags; 563 struct ucred *cred; 564 struct proc *p; 565 int slpflag, slptimeo; 566 { 567 register struct buf *bp; 568 struct buf *nbp, *blist; 569 int s, error; 570 571 if (flags & V_SAVE) { 572 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, p); 573 if (error != 0) 574 return (error); 575 if (vp->v_dirtyblkhd.lh_first != NULL) 576 panic("vinvalbuf: dirty bufs"); 577 } 578 for (;;) { 579 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 580 while (blist && blist->b_lblkno < 0) 581 blist = blist->b_vnbufs.le_next; 582 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 583 (flags & V_SAVEMETA)) 584 while (blist && blist->b_lblkno < 0) 585 blist = blist->b_vnbufs.le_next; 586 if (!blist) 587 break; 588 589 for (bp = blist; bp; bp = nbp) { 590 nbp = bp->b_vnbufs.le_next; 591 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 592 continue; 593 s = splbio(); 594 if (bp->b_flags & B_BUSY) { 595 bp->b_flags |= B_WANTED; 596 error = tsleep((caddr_t)bp, 597 slpflag | (PRIBIO + 1), "vinvalbuf", 598 slptimeo); 599 splx(s); 600 if (error) 601 return (error); 602 break; 603 } 604 bp->b_flags |= B_BUSY | B_VFLUSH; 605 splx(s); 606 /* 607 * XXX Since there are no node locks for NFS, I believe 608 * there is a slight chance that a delayed write will 609 * occur while sleeping just above, so check for it. 610 */ 611 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 612 (void) VOP_BWRITE(bp); 613 break; 614 } 615 bp->b_flags |= B_INVAL; 616 brelse(bp); 617 } 618 } 619 if (!(flags & V_SAVEMETA) && 620 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 621 panic("vinvalbuf: flush failed"); 622 return (0); 623 } 624 625 void 626 vflushbuf(vp, sync) 627 register struct vnode *vp; 628 int sync; 629 { 630 register struct buf *bp, *nbp; 631 int s; 632 633 loop: 634 s = splbio(); 635 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 636 nbp = bp->b_vnbufs.le_next; 637 if ((bp->b_flags & B_BUSY)) 638 continue; 639 if ((bp->b_flags & B_DELWRI) == 0) 640 panic("vflushbuf: not dirty"); 641 bp->b_flags |= B_BUSY | B_VFLUSH; 642 splx(s); 643 /* 644 * Wait for I/O associated with indirect blocks to complete, 645 * since there is no way to quickly wait for them below. 646 */ 647 if (bp->b_vp == vp || sync == 0) 648 (void) bawrite(bp); 649 else 650 (void) bwrite(bp); 651 goto loop; 652 } 653 if (sync == 0) { 654 splx(s); 655 return; 656 } 657 while (vp->v_numoutput) { 658 vp->v_flag |= VBWAIT; 659 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 660 } 661 splx(s); 662 if (vp->v_dirtyblkhd.lh_first != NULL) { 663 vprint("vflushbuf: dirty", vp); 664 goto loop; 665 } 666 } 667 668 /* 669 * Associate a buffer with a vnode. 670 */ 671 void 672 bgetvp(vp, bp) 673 register struct vnode *vp; 674 register struct buf *bp; 675 { 676 677 if (bp->b_vp) 678 panic("bgetvp: not free"); 679 VHOLD(vp); 680 bp->b_vp = vp; 681 if (vp->v_type == VBLK || vp->v_type == VCHR) 682 bp->b_dev = vp->v_rdev; 683 else 684 bp->b_dev = NODEV; 685 /* 686 * Insert onto list for new vnode. 687 */ 688 bufinsvn(bp, &vp->v_cleanblkhd); 689 } 690 691 /* 692 * Disassociate a buffer from a vnode. 693 */ 694 void 695 brelvp(bp) 696 register struct buf *bp; 697 { 698 struct vnode *vp; 699 700 if (bp->b_vp == (struct vnode *) 0) 701 panic("brelvp: NULL"); 702 /* 703 * Delete from old vnode list, if on one. 704 */ 705 if (bp->b_vnbufs.le_next != NOLIST) 706 bufremvn(bp); 707 vp = bp->b_vp; 708 bp->b_vp = (struct vnode *) 0; 709 HOLDRELE(vp); 710 } 711 712 /* 713 * Reassign a buffer from one vnode to another. 714 * Used to assign file specific control information 715 * (indirect blocks) to the vnode to which they belong. 716 */ 717 void 718 reassignbuf(bp, newvp) 719 register struct buf *bp; 720 register struct vnode *newvp; 721 { 722 register struct buflists *listheadp; 723 724 if (newvp == NULL) { 725 printf("reassignbuf: NULL"); 726 return; 727 } 728 /* 729 * Delete from old vnode list, if on one. 730 */ 731 if (bp->b_vnbufs.le_next != NOLIST) 732 bufremvn(bp); 733 /* 734 * If dirty, put on list of dirty buffers; 735 * otherwise insert onto list of clean buffers. 736 */ 737 if (bp->b_flags & B_DELWRI) 738 listheadp = &newvp->v_dirtyblkhd; 739 else 740 listheadp = &newvp->v_cleanblkhd; 741 bufinsvn(bp, listheadp); 742 } 743 744 /* 745 * Create a vnode for a block device. 746 * Used for root filesystem and swap areas. 747 * Also used for memory file system special devices. 748 */ 749 int 750 bdevvp(dev, vpp) 751 dev_t dev; 752 struct vnode **vpp; 753 { 754 755 return (getdevvp(dev, vpp, VBLK)); 756 } 757 758 /* 759 * Create a vnode for a character device. 760 * Used for kernfs and some console handling. 761 */ 762 int 763 cdevvp(dev, vpp) 764 dev_t dev; 765 struct vnode **vpp; 766 { 767 768 return (getdevvp(dev, vpp, VCHR)); 769 } 770 771 /* 772 * Create a vnode for a device. 773 * Used by bdevvp (block device) for root file system etc., 774 * and by cdevvp (character device) for console and kernfs. 775 */ 776 int 777 getdevvp(dev, vpp, type) 778 dev_t dev; 779 struct vnode **vpp; 780 enum vtype type; 781 { 782 register struct vnode *vp; 783 struct vnode *nvp; 784 int error; 785 786 if (dev == NODEV) { 787 *vpp = NULLVP; 788 return (0); 789 } 790 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 791 if (error) { 792 *vpp = NULLVP; 793 return (error); 794 } 795 vp = nvp; 796 vp->v_type = type; 797 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 798 vput(vp); 799 vp = nvp; 800 } 801 *vpp = vp; 802 return (0); 803 } 804 805 /* 806 * Check to see if the new vnode represents a special device 807 * for which we already have a vnode (either because of 808 * bdevvp() or because of a different vnode representing 809 * the same block device). If such an alias exists, deallocate 810 * the existing contents and return the aliased vnode. The 811 * caller is responsible for filling it with its new contents. 812 */ 813 struct vnode * 814 checkalias(nvp, nvp_rdev, mp) 815 register struct vnode *nvp; 816 dev_t nvp_rdev; 817 struct mount *mp; 818 { 819 struct proc *p = curproc; /* XXX */ 820 register struct vnode *vp; 821 struct vnode **vpp; 822 823 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 824 return (NULLVP); 825 826 vpp = &speclisth[SPECHASH(nvp_rdev)]; 827 loop: 828 simple_lock(&spechash_slock); 829 for (vp = *vpp; vp; vp = vp->v_specnext) { 830 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 831 continue; 832 /* 833 * Alias, but not in use, so flush it out. 834 */ 835 simple_lock(&vp->v_interlock); 836 if (vp->v_usecount == 0) { 837 simple_unlock(&spechash_slock); 838 vgonel(vp, p); 839 goto loop; 840 } 841 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 842 simple_unlock(&spechash_slock); 843 goto loop; 844 } 845 break; 846 } 847 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 848 MALLOC(nvp->v_specinfo, struct specinfo *, 849 sizeof(struct specinfo), M_VNODE, M_WAITOK); 850 nvp->v_rdev = nvp_rdev; 851 nvp->v_hashchain = vpp; 852 nvp->v_specnext = *vpp; 853 nvp->v_specflags = 0; 854 simple_unlock(&spechash_slock); 855 nvp->v_speclockf = NULL; 856 *vpp = nvp; 857 if (vp != NULLVP) { 858 nvp->v_flag |= VALIASED; 859 vp->v_flag |= VALIASED; 860 vput(vp); 861 } 862 return (NULLVP); 863 } 864 simple_unlock(&spechash_slock); 865 VOP_UNLOCK(vp, 0); 866 simple_lock(&vp->v_interlock); 867 vclean(vp, 0, p); 868 vp->v_op = nvp->v_op; 869 vp->v_tag = nvp->v_tag; 870 vp->v_vnlock = &vp->v_lock; 871 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 872 nvp->v_type = VNON; 873 insmntque(vp, mp); 874 return (vp); 875 } 876 877 /* 878 * Grab a particular vnode from the free list, increment its 879 * reference count and lock it. If the vnode lock bit is set the 880 * vnode is being eliminated in vgone. In that case, we can not 881 * grab the vnode, so the process is awakened when the transition is 882 * completed, and an error returned to indicate that the vnode is no 883 * longer usable (possibly having been changed to a new file system type). 884 */ 885 int 886 vget(vp, flags) 887 struct vnode *vp; 888 int flags; 889 { 890 int error; 891 892 /* 893 * If the vnode is in the process of being cleaned out for 894 * another use, we wait for the cleaning to finish and then 895 * return failure. Cleaning is determined by checking that 896 * the VXLOCK flag is set. 897 */ 898 if ((flags & LK_INTERLOCK) == 0) 899 simple_lock(&vp->v_interlock); 900 if (vp->v_flag & VXLOCK) { 901 vp->v_flag |= VXWANT; 902 simple_unlock(&vp->v_interlock); 903 tsleep((caddr_t)vp, PINOD, "vget", 0); 904 return (ENOENT); 905 } 906 if (vp->v_usecount == 0) { 907 simple_lock(&vnode_free_list_slock); 908 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 909 simple_unlock(&vnode_free_list_slock); 910 } 911 vp->v_usecount++; 912 #ifdef DIAGNOSTIC 913 if (vp->v_usecount == 0) { 914 vprint("vget", vp); 915 panic("vget: usecount overflow"); 916 } 917 #endif 918 if (flags & LK_TYPE_MASK) { 919 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) 920 vrele(vp); 921 return (error); 922 } 923 simple_unlock(&vp->v_interlock); 924 return (0); 925 } 926 927 /* 928 * vput(), just unlock and vrele() 929 */ 930 void 931 vput(vp) 932 struct vnode *vp; 933 { 934 struct proc *p = curproc; /* XXX */ 935 936 #ifdef DIAGNOSTIC 937 if (vp == NULL) 938 panic("vput: null vp"); 939 #endif 940 simple_lock(&vp->v_interlock); 941 vp->v_usecount--; 942 if (vp->v_usecount > 0) { 943 simple_unlock(&vp->v_interlock); 944 VOP_UNLOCK(vp, 0); 945 return; 946 } 947 #ifdef DIAGNOSTIC 948 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 949 vprint("vput: bad ref count", vp); 950 panic("vput: ref cnt"); 951 } 952 #endif 953 /* 954 * Insert at tail of LRU list. 955 */ 956 simple_lock(&vnode_free_list_slock); 957 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 958 simple_unlock(&vnode_free_list_slock); 959 simple_unlock(&vp->v_interlock); 960 VOP_INACTIVE(vp, p); 961 } 962 963 /* 964 * Vnode release. 965 * If count drops to zero, call inactive routine and return to freelist. 966 */ 967 void 968 vrele(vp) 969 struct vnode *vp; 970 { 971 struct proc *p = curproc; /* XXX */ 972 973 #ifdef DIAGNOSTIC 974 if (vp == NULL) 975 panic("vrele: null vp"); 976 #endif 977 simple_lock(&vp->v_interlock); 978 vp->v_usecount--; 979 if (vp->v_usecount > 0) { 980 simple_unlock(&vp->v_interlock); 981 return; 982 } 983 #ifdef DIAGNOSTIC 984 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 985 vprint("vrele: bad ref count", vp); 986 panic("vrele: ref cnt"); 987 } 988 #endif 989 /* 990 * Insert at tail of LRU list. 991 */ 992 simple_lock(&vnode_free_list_slock); 993 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 994 simple_unlock(&vnode_free_list_slock); 995 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 996 VOP_INACTIVE(vp, p); 997 } 998 999 #ifdef DIAGNOSTIC 1000 /* 1001 * Page or buffer structure gets a reference. 1002 */ 1003 void 1004 vhold(vp) 1005 register struct vnode *vp; 1006 { 1007 1008 simple_lock(&vp->v_interlock); 1009 vp->v_holdcnt++; 1010 simple_unlock(&vp->v_interlock); 1011 } 1012 1013 /* 1014 * Page or buffer structure frees a reference. 1015 */ 1016 void 1017 holdrele(vp) 1018 register struct vnode *vp; 1019 { 1020 1021 simple_lock(&vp->v_interlock); 1022 if (vp->v_holdcnt <= 0) 1023 panic("holdrele: holdcnt"); 1024 vp->v_holdcnt--; 1025 simple_unlock(&vp->v_interlock); 1026 } 1027 1028 /* 1029 * Vnode reference. 1030 */ 1031 void 1032 vref(vp) 1033 struct vnode *vp; 1034 { 1035 1036 simple_lock(&vp->v_interlock); 1037 if (vp->v_usecount <= 0) 1038 panic("vref used where vget required"); 1039 vp->v_usecount++; 1040 #ifdef DIAGNOSTIC 1041 if (vp->v_usecount == 0) { 1042 vprint("vref", vp); 1043 panic("vref: usecount overflow"); 1044 } 1045 #endif 1046 simple_unlock(&vp->v_interlock); 1047 } 1048 #endif /* DIAGNOSTIC */ 1049 1050 /* 1051 * Remove any vnodes in the vnode table belonging to mount point mp. 1052 * 1053 * If MNT_NOFORCE is specified, there should not be any active ones, 1054 * return error if any are found (nb: this is a user error, not a 1055 * system error). If MNT_FORCE is specified, detach any active vnodes 1056 * that are found. 1057 */ 1058 #ifdef DEBUG 1059 int busyprt = 0; /* print out busy vnodes */ 1060 struct ctldebug debug1 = { "busyprt", &busyprt }; 1061 #endif 1062 1063 int 1064 vflush(mp, skipvp, flags) 1065 struct mount *mp; 1066 struct vnode *skipvp; 1067 int flags; 1068 { 1069 struct proc *p = curproc; /* XXX */ 1070 register struct vnode *vp, *nvp; 1071 int busy = 0; 1072 1073 simple_lock(&mntvnode_slock); 1074 loop: 1075 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1076 if (vp->v_mount != mp) 1077 goto loop; 1078 nvp = vp->v_mntvnodes.le_next; 1079 /* 1080 * Skip over a selected vnode. 1081 */ 1082 if (vp == skipvp) 1083 continue; 1084 simple_lock(&vp->v_interlock); 1085 /* 1086 * Skip over a vnodes marked VSYSTEM. 1087 */ 1088 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1089 simple_unlock(&vp->v_interlock); 1090 continue; 1091 } 1092 /* 1093 * If WRITECLOSE is set, only flush out regular file 1094 * vnodes open for writing. 1095 */ 1096 if ((flags & WRITECLOSE) && 1097 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1098 simple_unlock(&vp->v_interlock); 1099 continue; 1100 } 1101 /* 1102 * With v_usecount == 0, all we need to do is clear 1103 * out the vnode data structures and we are done. 1104 */ 1105 if (vp->v_usecount == 0) { 1106 simple_unlock(&mntvnode_slock); 1107 vgonel(vp, p); 1108 simple_lock(&mntvnode_slock); 1109 continue; 1110 } 1111 /* 1112 * If FORCECLOSE is set, forcibly close the vnode. 1113 * For block or character devices, revert to an 1114 * anonymous device. For all other files, just kill them. 1115 */ 1116 if (flags & FORCECLOSE) { 1117 simple_unlock(&mntvnode_slock); 1118 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1119 vgonel(vp, p); 1120 } else { 1121 vclean(vp, 0, p); 1122 vp->v_op = spec_vnodeop_p; 1123 insmntque(vp, (struct mount *)0); 1124 } 1125 simple_lock(&mntvnode_slock); 1126 continue; 1127 } 1128 #ifdef DEBUG 1129 if (busyprt) 1130 vprint("vflush: busy vnode", vp); 1131 #endif 1132 simple_unlock(&vp->v_interlock); 1133 busy++; 1134 } 1135 simple_unlock(&mntvnode_slock); 1136 if (busy) 1137 return (EBUSY); 1138 return (0); 1139 } 1140 1141 /* 1142 * Disassociate the underlying file system from a vnode. 1143 */ 1144 void 1145 vclean(vp, flags, p) 1146 register struct vnode *vp; 1147 int flags; 1148 struct proc *p; 1149 { 1150 int active; 1151 1152 /* 1153 * Check to see if the vnode is in use. 1154 * If so we have to reference it before we clean it out 1155 * so that its count cannot fall to zero and generate a 1156 * race against ourselves to recycle it. 1157 */ 1158 if ((active = vp->v_usecount) != 0) { 1159 /* We have the vnode interlock. */ 1160 vp->v_usecount++; 1161 #ifdef DIAGNOSTIC 1162 if (vp->v_usecount == 0) { 1163 vprint("vclean", vp); 1164 panic("vclean: usecount overflow"); 1165 } 1166 #endif 1167 } 1168 1169 /* 1170 * Prevent the vnode from being recycled or 1171 * brought into use while we clean it out. 1172 */ 1173 if (vp->v_flag & VXLOCK) 1174 panic("vclean: deadlock"); 1175 vp->v_flag |= VXLOCK; 1176 /* 1177 * Even if the count is zero, the VOP_INACTIVE routine may still 1178 * have the object locked while it cleans it out. The VOP_LOCK 1179 * ensures that the VOP_INACTIVE routine is done with its work. 1180 * For active vnodes, it ensures that no other activity can 1181 * occur while the underlying object is being cleaned out. 1182 */ 1183 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1184 1185 /* 1186 * clean out any VM data associated with the vnode. 1187 */ 1188 uvm_vnp_terminate(vp); 1189 /* 1190 * Clean out any buffers associated with the vnode. 1191 */ 1192 if (flags & DOCLOSE) 1193 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1194 1195 /* 1196 * If purging an active vnode, it must be closed and 1197 * deactivated before being reclaimed. Note that the 1198 * VOP_INACTIVE will unlock the vnode. 1199 */ 1200 if (active) { 1201 if (flags & DOCLOSE) 1202 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1203 VOP_INACTIVE(vp, p); 1204 } else { 1205 /* 1206 * Any other processes trying to obtain this lock must first 1207 * wait for VXLOCK to clear, then call the new lock operation. 1208 */ 1209 VOP_UNLOCK(vp, 0); 1210 } 1211 /* 1212 * Reclaim the vnode. 1213 */ 1214 if (VOP_RECLAIM(vp, p)) 1215 panic("vclean: cannot reclaim"); 1216 1217 if (active) { 1218 /* 1219 * Inline copy of vrele() since VOP_INACTIVE 1220 * has already been called. 1221 */ 1222 simple_lock(&vp->v_interlock); 1223 if (--vp->v_usecount <= 0) { 1224 #ifdef DIAGNOSTIC 1225 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1226 vprint("vclean: bad ref count", vp); 1227 panic("vclean: ref cnt"); 1228 } 1229 #endif 1230 /* 1231 * Insert at tail of LRU list. 1232 */ 1233 simple_lock(&vnode_free_list_slock); 1234 #ifdef DIAGNOSTIC 1235 if (vp->v_vnlock) { 1236 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1237 vprint("vclean: lock not drained", vp); 1238 } 1239 #endif 1240 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1241 simple_unlock(&vnode_free_list_slock); 1242 } 1243 simple_unlock(&vp->v_interlock); 1244 } 1245 1246 cache_purge(vp); 1247 1248 /* 1249 * Done with purge, notify sleepers of the grim news. 1250 */ 1251 vp->v_op = dead_vnodeop_p; 1252 vp->v_tag = VT_NON; 1253 vp->v_flag &= ~VXLOCK; 1254 if (vp->v_flag & VXWANT) { 1255 vp->v_flag &= ~VXWANT; 1256 wakeup((caddr_t)vp); 1257 } 1258 } 1259 1260 /* 1261 * Recycle an unused vnode to the front of the free list. 1262 * Release the passed interlock if the vnode will be recycled. 1263 */ 1264 int 1265 vrecycle(vp, inter_lkp, p) 1266 struct vnode *vp; 1267 struct simplelock *inter_lkp; 1268 struct proc *p; 1269 { 1270 1271 simple_lock(&vp->v_interlock); 1272 if (vp->v_usecount == 0) { 1273 if (inter_lkp) 1274 simple_unlock(inter_lkp); 1275 vgonel(vp, p); 1276 return (1); 1277 } 1278 simple_unlock(&vp->v_interlock); 1279 return (0); 1280 } 1281 1282 /* 1283 * Eliminate all activity associated with a vnode 1284 * in preparation for reuse. 1285 */ 1286 void 1287 vgone(vp) 1288 struct vnode *vp; 1289 { 1290 struct proc *p = curproc; /* XXX */ 1291 1292 simple_lock(&vp->v_interlock); 1293 vgonel(vp, p); 1294 } 1295 1296 /* 1297 * vgone, with the vp interlock held. 1298 */ 1299 void 1300 vgonel(vp, p) 1301 register struct vnode *vp; 1302 struct proc *p; 1303 { 1304 struct vnode *vq; 1305 struct vnode *vx; 1306 1307 /* 1308 * If a vgone (or vclean) is already in progress, 1309 * wait until it is done and return. 1310 */ 1311 if (vp->v_flag & VXLOCK) { 1312 vp->v_flag |= VXWANT; 1313 simple_unlock(&vp->v_interlock); 1314 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1315 return; 1316 } 1317 /* 1318 * Clean out the filesystem specific data. 1319 */ 1320 vclean(vp, DOCLOSE, p); 1321 /* 1322 * Delete from old mount point vnode list, if on one. 1323 */ 1324 if (vp->v_mount != NULL) 1325 insmntque(vp, (struct mount *)0); 1326 /* 1327 * If special device, remove it from special device alias list. 1328 * if it is on one. 1329 */ 1330 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1331 simple_lock(&spechash_slock); 1332 if (vp->v_hashchain != NULL) { 1333 if (*vp->v_hashchain == vp) { 1334 *vp->v_hashchain = vp->v_specnext; 1335 } else { 1336 for (vq = *vp->v_hashchain; vq; 1337 vq = vq->v_specnext) { 1338 if (vq->v_specnext != vp) 1339 continue; 1340 vq->v_specnext = vp->v_specnext; 1341 break; 1342 } 1343 if (vq == NULL) 1344 panic("missing bdev"); 1345 } 1346 if (vp->v_flag & VALIASED) { 1347 vx = NULL; 1348 for (vq = *vp->v_hashchain; vq; 1349 vq = vq->v_specnext) { 1350 if (vq->v_rdev != vp->v_rdev || 1351 vq->v_type != vp->v_type) 1352 continue; 1353 if (vx) 1354 break; 1355 vx = vq; 1356 } 1357 if (vx == NULL) 1358 panic("missing alias"); 1359 if (vq == NULL) 1360 vx->v_flag &= ~VALIASED; 1361 vp->v_flag &= ~VALIASED; 1362 } 1363 } 1364 simple_unlock(&spechash_slock); 1365 FREE(vp->v_specinfo, M_VNODE); 1366 vp->v_specinfo = NULL; 1367 } 1368 /* 1369 * If it is on the freelist and not already at the head, 1370 * move it to the head of the list. The test of the back 1371 * pointer and the reference count of zero is because 1372 * it will be removed from the free list by getnewvnode, 1373 * but will not have its reference count incremented until 1374 * after calling vgone. If the reference count were 1375 * incremented first, vgone would (incorrectly) try to 1376 * close the previous instance of the underlying object. 1377 * So, the back pointer is explicitly set to `0xdeadb' in 1378 * getnewvnode after removing it from the freelist to ensure 1379 * that we do not try to move it here. 1380 */ 1381 if (vp->v_usecount == 0) { 1382 simple_lock(&vnode_free_list_slock); 1383 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1384 vnode_free_list.tqh_first != vp) { 1385 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1386 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1387 } 1388 simple_unlock(&vnode_free_list_slock); 1389 } 1390 vp->v_type = VBAD; 1391 } 1392 1393 /* 1394 * Lookup a vnode by device number. 1395 */ 1396 int 1397 vfinddev(dev, type, vpp) 1398 dev_t dev; 1399 enum vtype type; 1400 struct vnode **vpp; 1401 { 1402 struct vnode *vp; 1403 int rc = 0; 1404 1405 simple_lock(&spechash_slock); 1406 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1407 if (dev != vp->v_rdev || type != vp->v_type) 1408 continue; 1409 *vpp = vp; 1410 rc = 1; 1411 break; 1412 } 1413 simple_unlock(&spechash_slock); 1414 return (rc); 1415 } 1416 1417 /* 1418 * Revoke all the vnodes corresponding to the specified minor number 1419 * range (endpoints inclusive) of the specified major. 1420 */ 1421 void 1422 vdevgone(maj, minl, minh, type) 1423 int maj, minl, minh; 1424 enum vtype type; 1425 { 1426 struct vnode *vp; 1427 int mn; 1428 1429 for (mn = minl; mn <= minh; mn++) 1430 if (vfinddev(makedev(maj, mn), type, &vp)) 1431 VOP_REVOKE(vp, REVOKEALL); 1432 } 1433 1434 /* 1435 * Calculate the total number of references to a special device. 1436 */ 1437 int 1438 vcount(vp) 1439 register struct vnode *vp; 1440 { 1441 register struct vnode *vq, *vnext; 1442 int count; 1443 1444 loop: 1445 if ((vp->v_flag & VALIASED) == 0) 1446 return (vp->v_usecount); 1447 simple_lock(&spechash_slock); 1448 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1449 vnext = vq->v_specnext; 1450 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1451 continue; 1452 /* 1453 * Alias, but not in use, so flush it out. 1454 */ 1455 if (vq->v_usecount == 0 && vq != vp) { 1456 simple_unlock(&spechash_slock); 1457 vgone(vq); 1458 goto loop; 1459 } 1460 count += vq->v_usecount; 1461 } 1462 simple_unlock(&spechash_slock); 1463 return (count); 1464 } 1465 1466 /* 1467 * Print out a description of a vnode. 1468 */ 1469 static char *typename[] = 1470 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1471 1472 void 1473 vprint(label, vp) 1474 char *label; 1475 register struct vnode *vp; 1476 { 1477 char buf[64]; 1478 1479 if (label != NULL) 1480 printf("%s: ", label); 1481 printf("type %s, usecount %ld, writecount %ld, refcount %ld,", 1482 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1483 vp->v_holdcnt); 1484 buf[0] = '\0'; 1485 if (vp->v_flag & VROOT) 1486 strcat(buf, "|VROOT"); 1487 if (vp->v_flag & VTEXT) 1488 strcat(buf, "|VTEXT"); 1489 if (vp->v_flag & VSYSTEM) 1490 strcat(buf, "|VSYSTEM"); 1491 if (vp->v_flag & VXLOCK) 1492 strcat(buf, "|VXLOCK"); 1493 if (vp->v_flag & VXWANT) 1494 strcat(buf, "|VXWANT"); 1495 if (vp->v_flag & VBWAIT) 1496 strcat(buf, "|VBWAIT"); 1497 if (vp->v_flag & VALIASED) 1498 strcat(buf, "|VALIASED"); 1499 if (buf[0] != '\0') 1500 printf(" flags (%s)", &buf[1]); 1501 if (vp->v_data == NULL) { 1502 printf("\n"); 1503 } else { 1504 printf("\n\t"); 1505 VOP_PRINT(vp); 1506 } 1507 } 1508 1509 #ifdef DEBUG 1510 /* 1511 * List all of the locked vnodes in the system. 1512 * Called when debugging the kernel. 1513 */ 1514 void 1515 printlockedvnodes() 1516 { 1517 struct mount *mp, *nmp; 1518 struct vnode *vp; 1519 1520 printf("Locked vnodes\n"); 1521 simple_lock(&mountlist_slock); 1522 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1523 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1524 nmp = mp->mnt_list.cqe_next; 1525 continue; 1526 } 1527 for (vp = mp->mnt_vnodelist.lh_first; 1528 vp != NULL; 1529 vp = vp->v_mntvnodes.le_next) { 1530 if (VOP_ISLOCKED(vp)) 1531 vprint((char *)0, vp); 1532 } 1533 simple_lock(&mountlist_slock); 1534 nmp = mp->mnt_list.cqe_next; 1535 vfs_unbusy(mp); 1536 } 1537 simple_unlock(&mountlist_slock); 1538 } 1539 #endif 1540 1541 extern const char *mountcompatnames[]; 1542 extern const int nmountcompatnames; 1543 1544 /* 1545 * Top level filesystem related information gathering. 1546 */ 1547 int 1548 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1549 int *name; 1550 u_int namelen; 1551 void *oldp; 1552 size_t *oldlenp; 1553 void *newp; 1554 size_t newlen; 1555 struct proc *p; 1556 { 1557 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1558 struct vfsconf vfc; 1559 #endif 1560 struct vfsops *vfsp; 1561 1562 /* all sysctl names at this level are at least name and field */ 1563 if (namelen < 2) 1564 return (ENOTDIR); /* overloaded */ 1565 1566 /* Not generic: goes to file system. */ 1567 if (name[0] != VFS_GENERIC) { 1568 if (name[0] >= nmountcompatnames || name[0] < 0 || 1569 mountcompatnames[name[0]] == NULL) 1570 return (EOPNOTSUPP); 1571 vfsp = vfs_getopsbyname(mountcompatnames[name[0]]); 1572 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1573 return (EOPNOTSUPP); 1574 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1575 oldp, oldlenp, newp, newlen, p)); 1576 } 1577 1578 /* The rest are generic vfs sysctls. */ 1579 switch (name[1]) { 1580 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1581 case VFS_MAXTYPENUM: 1582 /* 1583 * Provided for 4.4BSD-Lite2 compatibility. 1584 */ 1585 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1586 case VFS_CONF: 1587 /* 1588 * Special: a node, next is a file system name. 1589 * Provided for 4.4BSD-Lite2 compatibility. 1590 */ 1591 if (namelen < 3) 1592 return (ENOTDIR); /* overloaded */ 1593 if (name[2] >= nmountcompatnames || name[2] < 0 || 1594 mountcompatnames[name[2]] == NULL) 1595 return (EOPNOTSUPP); 1596 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 1597 if (vfsp == NULL) 1598 return (EOPNOTSUPP); 1599 vfc.vfc_vfsops = vfsp; 1600 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 1601 vfc.vfc_typenum = name[2]; 1602 vfc.vfc_refcount = vfsp->vfs_refcount; 1603 vfc.vfc_flags = 0; 1604 vfc.vfc_mountroot = vfsp->vfs_mountroot; 1605 vfc.vfc_next = NULL; 1606 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 1607 sizeof(struct vfsconf))); 1608 #endif 1609 default: 1610 break; 1611 } 1612 return (EOPNOTSUPP); 1613 } 1614 1615 int kinfo_vdebug = 1; 1616 int kinfo_vgetfailed; 1617 #define KINFO_VNODESLOP 10 1618 /* 1619 * Dump vnode list (via sysctl). 1620 * Copyout address of vnode followed by vnode. 1621 */ 1622 /* ARGSUSED */ 1623 int 1624 sysctl_vnode(where, sizep, p) 1625 char *where; 1626 size_t *sizep; 1627 struct proc *p; 1628 { 1629 struct mount *mp, *nmp; 1630 struct vnode *nvp, *vp; 1631 char *bp = where, *savebp; 1632 char *ewhere; 1633 int error; 1634 1635 #define VPTRSZ sizeof(struct vnode *) 1636 #define VNODESZ sizeof(struct vnode) 1637 if (where == NULL) { 1638 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1639 return (0); 1640 } 1641 ewhere = where + *sizep; 1642 1643 simple_lock(&mountlist_slock); 1644 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1645 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1646 nmp = mp->mnt_list.cqe_next; 1647 continue; 1648 } 1649 savebp = bp; 1650 again: 1651 simple_lock(&mntvnode_slock); 1652 for (vp = mp->mnt_vnodelist.lh_first; 1653 vp != NULL; 1654 vp = nvp) { 1655 /* 1656 * Check that the vp is still associated with 1657 * this filesystem. RACE: could have been 1658 * recycled onto the same filesystem. 1659 */ 1660 if (vp->v_mount != mp) { 1661 simple_unlock(&mntvnode_slock); 1662 if (kinfo_vdebug) 1663 printf("kinfo: vp changed\n"); 1664 bp = savebp; 1665 goto again; 1666 } 1667 nvp = vp->v_mntvnodes.le_next; 1668 if (bp + VPTRSZ + VNODESZ > ewhere) { 1669 simple_unlock(&mntvnode_slock); 1670 *sizep = bp - where; 1671 return (ENOMEM); 1672 } 1673 simple_unlock(&mntvnode_slock); 1674 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1675 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1676 return (error); 1677 bp += VPTRSZ + VNODESZ; 1678 simple_lock(&mntvnode_slock); 1679 } 1680 simple_unlock(&mntvnode_slock); 1681 simple_lock(&mountlist_slock); 1682 nmp = mp->mnt_list.cqe_next; 1683 vfs_unbusy(mp); 1684 } 1685 simple_unlock(&mountlist_slock); 1686 1687 *sizep = bp - where; 1688 return (0); 1689 } 1690 1691 /* 1692 * Check to see if a filesystem is mounted on a block device. 1693 */ 1694 int 1695 vfs_mountedon(vp) 1696 struct vnode *vp; 1697 { 1698 struct vnode *vq; 1699 int error = 0; 1700 1701 if (vp->v_specflags & SI_MOUNTEDON) 1702 return (EBUSY); 1703 if (vp->v_flag & VALIASED) { 1704 simple_lock(&spechash_slock); 1705 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1706 if (vq->v_rdev != vp->v_rdev || 1707 vq->v_type != vp->v_type) 1708 continue; 1709 if (vq->v_specflags & SI_MOUNTEDON) { 1710 error = EBUSY; 1711 break; 1712 } 1713 } 1714 simple_unlock(&spechash_slock); 1715 } 1716 return (error); 1717 } 1718 1719 /* 1720 * Build hash lists of net addresses and hang them off the mount point. 1721 * Called by ufs_mount() to set up the lists of export addresses. 1722 */ 1723 static int 1724 vfs_hang_addrlist(mp, nep, argp) 1725 struct mount *mp; 1726 struct netexport *nep; 1727 struct export_args *argp; 1728 { 1729 register struct netcred *np, *enp; 1730 register struct radix_node_head *rnh; 1731 register int i; 1732 struct radix_node *rn; 1733 struct sockaddr *saddr, *smask = 0; 1734 struct domain *dom; 1735 int error; 1736 1737 if (argp->ex_addrlen == 0) { 1738 if (mp->mnt_flag & MNT_DEFEXPORTED) 1739 return (EPERM); 1740 np = &nep->ne_defexported; 1741 np->netc_exflags = argp->ex_flags; 1742 np->netc_anon = argp->ex_anon; 1743 np->netc_anon.cr_ref = 1; 1744 mp->mnt_flag |= MNT_DEFEXPORTED; 1745 return (0); 1746 } 1747 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1748 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1749 memset((caddr_t)np, 0, i); 1750 saddr = (struct sockaddr *)(np + 1); 1751 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 1752 if (error) 1753 goto out; 1754 if (saddr->sa_len > argp->ex_addrlen) 1755 saddr->sa_len = argp->ex_addrlen; 1756 if (argp->ex_masklen) { 1757 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1758 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1759 if (error) 1760 goto out; 1761 if (smask->sa_len > argp->ex_masklen) 1762 smask->sa_len = argp->ex_masklen; 1763 } 1764 i = saddr->sa_family; 1765 if ((rnh = nep->ne_rtable[i]) == 0) { 1766 /* 1767 * Seems silly to initialize every AF when most are not 1768 * used, do so on demand here 1769 */ 1770 for (dom = domains; dom; dom = dom->dom_next) 1771 if (dom->dom_family == i && dom->dom_rtattach) { 1772 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1773 dom->dom_rtoffset); 1774 break; 1775 } 1776 if ((rnh = nep->ne_rtable[i]) == 0) { 1777 error = ENOBUFS; 1778 goto out; 1779 } 1780 } 1781 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1782 np->netc_rnodes); 1783 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1784 if (rn == 0) { 1785 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 1786 smask, rnh); 1787 if (enp == 0) { 1788 error = EPERM; 1789 goto out; 1790 } 1791 } else 1792 enp = (struct netcred *)rn; 1793 1794 if (enp->netc_exflags != argp->ex_flags || 1795 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 1796 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 1797 enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups || 1798 memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 1799 enp->netc_anon.cr_ngroups)) 1800 error = EPERM; 1801 else 1802 error = 0; 1803 goto out; 1804 } 1805 np->netc_exflags = argp->ex_flags; 1806 np->netc_anon = argp->ex_anon; 1807 np->netc_anon.cr_ref = 1; 1808 return (0); 1809 out: 1810 free(np, M_NETADDR); 1811 return (error); 1812 } 1813 1814 /* ARGSUSED */ 1815 static int 1816 vfs_free_netcred(rn, w) 1817 struct radix_node *rn; 1818 void *w; 1819 { 1820 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1821 1822 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1823 free((caddr_t)rn, M_NETADDR); 1824 return (0); 1825 } 1826 1827 /* 1828 * Free the net address hash lists that are hanging off the mount points. 1829 */ 1830 static void 1831 vfs_free_addrlist(nep) 1832 struct netexport *nep; 1833 { 1834 register int i; 1835 register struct radix_node_head *rnh; 1836 1837 for (i = 0; i <= AF_MAX; i++) 1838 if ((rnh = nep->ne_rtable[i]) != NULL) { 1839 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 1840 free((caddr_t)rnh, M_RTABLE); 1841 nep->ne_rtable[i] = 0; 1842 } 1843 } 1844 1845 int 1846 vfs_export(mp, nep, argp) 1847 struct mount *mp; 1848 struct netexport *nep; 1849 struct export_args *argp; 1850 { 1851 int error; 1852 1853 if (argp->ex_flags & MNT_DELEXPORT) { 1854 if (mp->mnt_flag & MNT_EXPUBLIC) { 1855 vfs_setpublicfs(NULL, NULL, NULL); 1856 mp->mnt_flag &= ~MNT_EXPUBLIC; 1857 } 1858 vfs_free_addrlist(nep); 1859 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1860 } 1861 if (argp->ex_flags & MNT_EXPORTED) { 1862 if (argp->ex_flags & MNT_EXPUBLIC) { 1863 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 1864 return (error); 1865 mp->mnt_flag |= MNT_EXPUBLIC; 1866 } 1867 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 1868 return (error); 1869 mp->mnt_flag |= MNT_EXPORTED; 1870 } 1871 return (0); 1872 } 1873 1874 /* 1875 * Set the publicly exported filesystem (WebNFS). Currently, only 1876 * one public filesystem is possible in the spec (RFC 2054 and 2055) 1877 */ 1878 int 1879 vfs_setpublicfs(mp, nep, argp) 1880 struct mount *mp; 1881 struct netexport *nep; 1882 struct export_args *argp; 1883 { 1884 int error; 1885 struct vnode *rvp; 1886 char *cp; 1887 1888 /* 1889 * mp == NULL -> invalidate the current info, the FS is 1890 * no longer exported. May be called from either vfs_export 1891 * or unmount, so check if it hasn't already been done. 1892 */ 1893 if (mp == NULL) { 1894 if (nfs_pub.np_valid) { 1895 nfs_pub.np_valid = 0; 1896 if (nfs_pub.np_index != NULL) { 1897 FREE(nfs_pub.np_index, M_TEMP); 1898 nfs_pub.np_index = NULL; 1899 } 1900 } 1901 return (0); 1902 } 1903 1904 /* 1905 * Only one allowed at a time. 1906 */ 1907 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 1908 return (EBUSY); 1909 1910 /* 1911 * Get real filehandle for root of exported FS. 1912 */ 1913 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 1914 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 1915 1916 if ((error = VFS_ROOT(mp, &rvp))) 1917 return (error); 1918 1919 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 1920 return (error); 1921 1922 vput(rvp); 1923 1924 /* 1925 * If an indexfile was specified, pull it in. 1926 */ 1927 if (argp->ex_indexfile != NULL) { 1928 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 1929 M_WAITOK); 1930 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 1931 MAXNAMLEN, (size_t *)0); 1932 if (!error) { 1933 /* 1934 * Check for illegal filenames. 1935 */ 1936 for (cp = nfs_pub.np_index; *cp; cp++) { 1937 if (*cp == '/') { 1938 error = EINVAL; 1939 break; 1940 } 1941 } 1942 } 1943 if (error) { 1944 FREE(nfs_pub.np_index, M_TEMP); 1945 return (error); 1946 } 1947 } 1948 1949 nfs_pub.np_mount = mp; 1950 nfs_pub.np_valid = 1; 1951 return (0); 1952 } 1953 1954 struct netcred * 1955 vfs_export_lookup(mp, nep, nam) 1956 register struct mount *mp; 1957 struct netexport *nep; 1958 struct mbuf *nam; 1959 { 1960 register struct netcred *np; 1961 register struct radix_node_head *rnh; 1962 struct sockaddr *saddr; 1963 1964 np = NULL; 1965 if (mp->mnt_flag & MNT_EXPORTED) { 1966 /* 1967 * Lookup in the export list first. 1968 */ 1969 if (nam != NULL) { 1970 saddr = mtod(nam, struct sockaddr *); 1971 rnh = nep->ne_rtable[saddr->sa_family]; 1972 if (rnh != NULL) { 1973 np = (struct netcred *) 1974 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1975 rnh); 1976 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1977 np = NULL; 1978 } 1979 } 1980 /* 1981 * If no address match, use the default if it exists. 1982 */ 1983 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1984 np = &nep->ne_defexported; 1985 } 1986 return (np); 1987 } 1988 1989 /* 1990 * Do the usual access checking. 1991 * file_mode, uid and gid are from the vnode in question, 1992 * while acc_mode and cred are from the VOP_ACCESS parameter list 1993 */ 1994 int 1995 vaccess(type, file_mode, uid, gid, acc_mode, cred) 1996 enum vtype type; 1997 mode_t file_mode; 1998 uid_t uid; 1999 gid_t gid; 2000 mode_t acc_mode; 2001 struct ucred *cred; 2002 { 2003 mode_t mask; 2004 2005 /* 2006 * Super-user always gets read/write access, but execute access depends 2007 * on at least one execute bit being set. 2008 */ 2009 if (cred->cr_uid == 0) { 2010 if ((acc_mode & VEXEC) && type != VDIR && 2011 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2012 return (EACCES); 2013 return (0); 2014 } 2015 2016 mask = 0; 2017 2018 /* Otherwise, check the owner. */ 2019 if (cred->cr_uid == uid) { 2020 if (acc_mode & VEXEC) 2021 mask |= S_IXUSR; 2022 if (acc_mode & VREAD) 2023 mask |= S_IRUSR; 2024 if (acc_mode & VWRITE) 2025 mask |= S_IWUSR; 2026 return ((file_mode & mask) == mask ? 0 : EACCES); 2027 } 2028 2029 /* Otherwise, check the groups. */ 2030 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2031 if (acc_mode & VEXEC) 2032 mask |= S_IXGRP; 2033 if (acc_mode & VREAD) 2034 mask |= S_IRGRP; 2035 if (acc_mode & VWRITE) 2036 mask |= S_IWGRP; 2037 return ((file_mode & mask) == mask ? 0 : EACCES); 2038 } 2039 2040 /* Otherwise, check everyone else. */ 2041 if (acc_mode & VEXEC) 2042 mask |= S_IXOTH; 2043 if (acc_mode & VREAD) 2044 mask |= S_IROTH; 2045 if (acc_mode & VWRITE) 2046 mask |= S_IWOTH; 2047 return ((file_mode & mask) == mask ? 0 : EACCES); 2048 } 2049 2050 /* 2051 * Unmount all file systems. 2052 * We traverse the list in reverse order under the assumption that doing so 2053 * will avoid needing to worry about dependencies. 2054 */ 2055 void 2056 vfs_unmountall() 2057 { 2058 register struct mount *mp, *nmp; 2059 int allerror, error; 2060 struct proc *p = curproc; /* XXX */ 2061 2062 /* 2063 * Unmounting a file system blocks the requesting process. 2064 * However, it's possible for this routine to be called when 2065 * curproc is NULL (e.g. panic situation, or via the debugger). 2066 * If we get stuck in this situation, just abort, since any 2067 * attempts to sleep will fault. 2068 */ 2069 if (p == NULL) { 2070 printf("vfs_unmountall: no context, aborting\n"); 2071 return; 2072 } 2073 2074 for (allerror = 0, 2075 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2076 nmp = mp->mnt_list.cqe_prev; 2077 #ifdef DEBUG 2078 printf("unmounting %s (%s)...\n", 2079 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2080 #endif 2081 if (vfs_busy(mp, 0, 0)) 2082 continue; 2083 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2084 printf("unmount of %s failed with error %d\n", 2085 mp->mnt_stat.f_mntonname, error); 2086 allerror = 1; 2087 } 2088 } 2089 if (allerror) 2090 printf("WARNING: some file systems would not unmount\n"); 2091 } 2092 2093 /* 2094 * Sync and unmount file systems before shutting down. 2095 */ 2096 void 2097 vfs_shutdown() 2098 { 2099 register struct buf *bp; 2100 int iter, nbusy; 2101 2102 printf("syncing disks... "); 2103 2104 /* XXX Should suspend scheduling. */ 2105 (void) spl0(); 2106 2107 sys_sync(&proc0, (void *)0, (register_t *)0); 2108 2109 /* Wait for sync to finish. */ 2110 for (iter = 0; iter < 20; iter++) { 2111 nbusy = 0; 2112 for (bp = &buf[nbuf]; --bp >= buf; ) 2113 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 2114 nbusy++; 2115 if (nbusy == 0) 2116 break; 2117 printf("%d ", nbusy); 2118 DELAY(40000 * iter); 2119 } 2120 if (nbusy) { 2121 #ifdef DEBUG 2122 printf("giving up\nPrinting vnodes for busy buffers\n"); 2123 for (bp = &buf[nbuf]; --bp >= buf; ) 2124 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 2125 vprint(NULL, bp->b_vp); 2126 #else 2127 printf("giving up\n"); 2128 #endif 2129 return; 2130 } else 2131 printf("done\n"); 2132 2133 /* 2134 * If we've panic'd, don't make the situation potentially 2135 * worse by unmounting the file systems. 2136 */ 2137 if (panicstr != NULL) 2138 return; 2139 2140 /* Release inodes held by texts before update. */ 2141 #ifdef notdef 2142 vnshutdown(); 2143 #endif 2144 /* Unmount file systems. */ 2145 vfs_unmountall(); 2146 } 2147 2148 /* 2149 * Mount the root file system. If the operator didn't specify a 2150 * file system to use, try all possible file systems until one 2151 * succeeds. 2152 */ 2153 int 2154 vfs_mountroot() 2155 { 2156 extern int (*mountroot) __P((void)); 2157 struct vfsops *v; 2158 2159 if (root_device == NULL) 2160 panic("vfs_mountroot: root device unknown"); 2161 2162 switch (root_device->dv_class) { 2163 case DV_IFNET: 2164 if (rootdev != NODEV) 2165 panic("vfs_mountroot: rootdev set for DV_IFNET"); 2166 break; 2167 2168 case DV_DISK: 2169 if (rootdev == NODEV) 2170 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2171 break; 2172 2173 default: 2174 printf("%s: inappropriate for root file system\n", 2175 root_device->dv_xname); 2176 return (ENODEV); 2177 } 2178 2179 /* 2180 * If user specified a file system, use it. 2181 */ 2182 if (mountroot != NULL) 2183 return ((*mountroot)()); 2184 2185 /* 2186 * Try each file system currently configured into the kernel. 2187 */ 2188 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2189 if (v->vfs_mountroot == NULL) 2190 continue; 2191 #ifdef DEBUG 2192 printf("mountroot: trying %s...\n", v->vfs_name); 2193 #endif 2194 if ((*v->vfs_mountroot)() == 0) { 2195 printf("root file system type: %s\n", v->vfs_name); 2196 break; 2197 } 2198 } 2199 2200 if (v == NULL) { 2201 printf("no file system for %s", root_device->dv_xname); 2202 if (root_device->dv_class == DV_DISK) 2203 printf(" (dev 0x%x)", rootdev); 2204 printf("\n"); 2205 return (EFTYPE); 2206 } 2207 return (0); 2208 } 2209 2210 /* 2211 * Given a file system name, look up the vfsops for that 2212 * file system, or return NULL if file system isn't present 2213 * in the kernel. 2214 */ 2215 struct vfsops * 2216 vfs_getopsbyname(name) 2217 const char *name; 2218 { 2219 struct vfsops *v; 2220 2221 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2222 if (strcmp(v->vfs_name, name) == 0) 2223 break; 2224 } 2225 2226 return (v); 2227 } 2228 2229 /* 2230 * Establish a file system and initialize it. 2231 */ 2232 int 2233 vfs_attach(vfs) 2234 struct vfsops *vfs; 2235 { 2236 struct vfsops *v; 2237 int error = 0; 2238 2239 2240 /* 2241 * Make sure this file system doesn't already exist. 2242 */ 2243 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2244 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2245 error = EEXIST; 2246 goto out; 2247 } 2248 } 2249 2250 /* 2251 * Initialize the vnode operations for this file system. 2252 */ 2253 vfs_opv_init(vfs->vfs_opv_descs); 2254 2255 /* 2256 * Now initialize the file system itself. 2257 */ 2258 (*vfs->vfs_init)(); 2259 2260 /* 2261 * ...and link it into the kernel's list. 2262 */ 2263 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2264 2265 /* 2266 * Sanity: make sure the reference count is 0. 2267 */ 2268 vfs->vfs_refcount = 0; 2269 2270 out: 2271 return (error); 2272 } 2273 2274 /* 2275 * Remove a file system from the kernel. 2276 */ 2277 int 2278 vfs_detach(vfs) 2279 struct vfsops *vfs; 2280 { 2281 struct vfsops *v; 2282 2283 /* 2284 * Make sure no one is using the filesystem. 2285 */ 2286 if (vfs->vfs_refcount != 0) 2287 return (EBUSY); 2288 2289 /* 2290 * ...and remove it from the kernel's list. 2291 */ 2292 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2293 if (v == vfs) { 2294 LIST_REMOVE(v, vfs_list); 2295 break; 2296 } 2297 } 2298 2299 if (v == NULL) 2300 return (ESRCH); 2301 2302 /* 2303 * Free the vnode operations vector. 2304 */ 2305 vfs_opv_free(vfs->vfs_opv_descs); 2306 return (0); 2307 } 2308