1 /* $NetBSD: vfs_subr.c,v 1.99 1999/03/22 17:24:19 sommerfe Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include "opt_compat_netbsd.h" 85 #include "opt_compat_43.h" 86 #include "opt_uvm.h" 87 88 #include <sys/param.h> 89 #include <sys/systm.h> 90 #include <sys/proc.h> 91 #include <sys/mount.h> 92 #include <sys/time.h> 93 #include <sys/fcntl.h> 94 #include <sys/vnode.h> 95 #include <sys/stat.h> 96 #include <sys/namei.h> 97 #include <sys/ucred.h> 98 #include <sys/buf.h> 99 #include <sys/errno.h> 100 #include <sys/malloc.h> 101 #include <sys/domain.h> 102 #include <sys/mbuf.h> 103 #include <sys/syscallargs.h> 104 #include <sys/device.h> 105 #include <sys/dirent.h> 106 107 #include <vm/vm.h> 108 #include <sys/sysctl.h> 109 110 #include <miscfs/specfs/specdev.h> 111 112 #if defined(UVM) 113 #include <uvm/uvm_extern.h> 114 #endif 115 116 enum vtype iftovt_tab[16] = { 117 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 118 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 119 }; 120 int vttoif_tab[9] = { 121 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 122 S_IFSOCK, S_IFIFO, S_IFMT, 123 }; 124 125 int doforce = 1; /* 1 => permit forcible unmounting */ 126 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 127 128 /* 129 * Insq/Remq for the vnode usage lists. 130 */ 131 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 132 #define bufremvn(bp) { \ 133 LIST_REMOVE(bp, b_vnbufs); \ 134 (bp)->b_vnbufs.le_next = NOLIST; \ 135 } 136 TAILQ_HEAD(freelst, vnode) vnode_free_list = /* vnode free list */ 137 TAILQ_HEAD_INITIALIZER(vnode_free_list); 138 struct mntlist mountlist = /* mounted filesystem list */ 139 CIRCLEQ_HEAD_INITIALIZER(mountlist); 140 struct vfs_list_head vfs_list = /* vfs list */ 141 LIST_HEAD_INITIALIZER(vfs_list); 142 143 struct nfs_public nfs_pub; /* publicly exported FS */ 144 145 struct simplelock mountlist_slock; 146 static struct simplelock mntid_slock; 147 struct simplelock mntvnode_slock; 148 struct simplelock vnode_free_list_slock; 149 struct simplelock spechash_slock; 150 151 /* 152 * These define the root filesystem and device. 153 */ 154 struct mount *rootfs; 155 struct vnode *rootvnode; 156 struct device *root_device; /* root device */ 157 158 struct pool vnode_pool; /* memory pool for vnodes */ 159 160 /* 161 * Local declarations. 162 */ 163 void insmntque __P((struct vnode *, struct mount *)); 164 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 165 void vgoneall __P((struct vnode *)); 166 167 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 168 struct export_args *)); 169 static int vfs_free_netcred __P((struct radix_node *, void *)); 170 static void vfs_free_addrlist __P((struct netexport *)); 171 172 #ifdef DEBUG 173 void printlockedvnodes __P((void)); 174 #endif 175 176 /* 177 * Initialize the vnode management data structures. 178 */ 179 void 180 vntblinit() 181 { 182 183 simple_lock_init(&mntvnode_slock); 184 simple_lock_init(&mntid_slock); 185 simple_lock_init(&spechash_slock); 186 simple_lock_init(&vnode_free_list_slock); 187 188 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 189 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE); 190 } 191 192 /* 193 * Mark a mount point as busy. Used to synchronize access and to delay 194 * unmounting. Interlock is not released on failure. 195 */ 196 int 197 vfs_busy(mp, flags, interlkp) 198 struct mount *mp; 199 int flags; 200 struct simplelock *interlkp; 201 { 202 int lkflags; 203 204 if (mp->mnt_flag & MNT_UNMOUNT) { 205 if (flags & LK_NOWAIT) 206 return (ENOENT); 207 mp->mnt_flag |= MNT_MWAIT; 208 if (interlkp) 209 simple_unlock(interlkp); 210 /* 211 * Since all busy locks are shared except the exclusive 212 * lock granted when unmounting, the only place that a 213 * wakeup needs to be done is at the release of the 214 * exclusive lock at the end of dounmount. 215 */ 216 sleep((caddr_t)mp, PVFS); 217 if (interlkp) 218 simple_lock(interlkp); 219 return (ENOENT); 220 } 221 lkflags = LK_SHARED; 222 if (interlkp) 223 lkflags |= LK_INTERLOCK; 224 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 225 panic("vfs_busy: unexpected lock failure"); 226 return (0); 227 } 228 229 /* 230 * Free a busy filesystem. 231 */ 232 void 233 vfs_unbusy(mp) 234 struct mount *mp; 235 { 236 237 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 238 } 239 240 /* 241 * Lookup a filesystem type, and if found allocate and initialize 242 * a mount structure for it. 243 * 244 * Devname is usually updated by mount(8) after booting. 245 */ 246 int 247 vfs_rootmountalloc(fstypename, devname, mpp) 248 char *fstypename; 249 char *devname; 250 struct mount **mpp; 251 { 252 struct vfsops *vfsp = NULL; 253 struct mount *mp; 254 255 for (vfsp = LIST_FIRST(&vfs_list); vfsp != NULL; 256 vfsp = LIST_NEXT(vfsp, vfs_list)) 257 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 258 break; 259 260 if (vfsp == NULL) 261 return (ENODEV); 262 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 263 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 264 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 265 (void)vfs_busy(mp, LK_NOWAIT, 0); 266 LIST_INIT(&mp->mnt_vnodelist); 267 mp->mnt_op = vfsp; 268 mp->mnt_flag = MNT_RDONLY; 269 mp->mnt_vnodecovered = NULLVP; 270 vfsp->vfs_refcount++; 271 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 272 mp->mnt_stat.f_mntonname[0] = '/'; 273 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 274 *mpp = mp; 275 return (0); 276 } 277 278 /* 279 * Lookup a mount point by filesystem identifier. 280 */ 281 struct mount * 282 vfs_getvfs(fsid) 283 fsid_t *fsid; 284 { 285 register struct mount *mp; 286 287 simple_lock(&mountlist_slock); 288 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 289 mp = mp->mnt_list.cqe_next) { 290 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 291 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 292 simple_unlock(&mountlist_slock); 293 return (mp); 294 } 295 } 296 simple_unlock(&mountlist_slock); 297 return ((struct mount *)0); 298 } 299 300 /* 301 * Get a new unique fsid 302 */ 303 void 304 vfs_getnewfsid(mp, fstypename) 305 struct mount *mp; 306 char *fstypename; 307 { 308 static u_short xxxfs_mntid; 309 fsid_t tfsid; 310 int mtype; 311 312 simple_lock(&mntid_slock); 313 mtype = makefstype(fstypename); 314 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 315 mp->mnt_stat.f_fsid.val[1] = mtype; 316 if (xxxfs_mntid == 0) 317 ++xxxfs_mntid; 318 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 319 tfsid.val[1] = mtype; 320 if (mountlist.cqh_first != (void *)&mountlist) { 321 while (vfs_getvfs(&tfsid)) { 322 tfsid.val[0]++; 323 xxxfs_mntid++; 324 } 325 } 326 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 327 simple_unlock(&mntid_slock); 328 } 329 330 /* 331 * Make a 'unique' number from a mount type name. 332 */ 333 long 334 makefstype(type) 335 char *type; 336 { 337 long rv; 338 339 for (rv = 0; *type; type++) { 340 rv <<= 2; 341 rv ^= *type; 342 } 343 return rv; 344 } 345 346 347 /* 348 * Set vnode attributes to VNOVAL 349 */ 350 void 351 vattr_null(vap) 352 register struct vattr *vap; 353 { 354 355 vap->va_type = VNON; 356 357 /* 358 * Assign individually so that it is safe even if size and 359 * sign of each member are varied. 360 */ 361 vap->va_mode = VNOVAL; 362 vap->va_nlink = VNOVAL; 363 vap->va_uid = VNOVAL; 364 vap->va_gid = VNOVAL; 365 vap->va_fsid = VNOVAL; 366 vap->va_fileid = VNOVAL; 367 vap->va_size = VNOVAL; 368 vap->va_blocksize = VNOVAL; 369 vap->va_atime.tv_sec = 370 vap->va_mtime.tv_sec = 371 vap->va_ctime.tv_sec = VNOVAL; 372 vap->va_atime.tv_nsec = 373 vap->va_mtime.tv_nsec = 374 vap->va_ctime.tv_nsec = VNOVAL; 375 vap->va_gen = VNOVAL; 376 vap->va_flags = VNOVAL; 377 vap->va_rdev = VNOVAL; 378 vap->va_bytes = VNOVAL; 379 vap->va_vaflags = 0; 380 } 381 382 /* 383 * Routines having to do with the management of the vnode table. 384 */ 385 extern int (**dead_vnodeop_p) __P((void *)); 386 long numvnodes; 387 388 /* 389 * Return the next vnode from the free list. 390 */ 391 int 392 getnewvnode(tag, mp, vops, vpp) 393 enum vtagtype tag; 394 struct mount *mp; 395 int (**vops) __P((void *)); 396 struct vnode **vpp; 397 { 398 struct proc *p = curproc; /* XXX */ 399 struct vnode *vp; 400 #ifdef DIAGNOSTIC 401 int s; 402 #endif 403 404 simple_lock(&vnode_free_list_slock); 405 if ((vnode_free_list.tqh_first == NULL && 406 numvnodes < 2 * desiredvnodes) || 407 numvnodes < desiredvnodes) { 408 simple_unlock(&vnode_free_list_slock); 409 vp = pool_get(&vnode_pool, PR_WAITOK); 410 memset((char *)vp, 0, sizeof(*vp)); 411 numvnodes++; 412 } else { 413 for (vp = vnode_free_list.tqh_first; 414 vp != NULLVP; vp = vp->v_freelist.tqe_next) { 415 if (simple_lock_try(&vp->v_interlock)) 416 break; 417 } 418 /* 419 * Unless this is a bad time of the month, at most 420 * the first NCPUS items on the free list are 421 * locked, so this is close enough to being empty. 422 */ 423 if (vp == NULLVP) { 424 simple_unlock(&vnode_free_list_slock); 425 tablefull("vnode"); 426 *vpp = 0; 427 return (ENFILE); 428 } 429 if (vp->v_usecount) 430 panic("free vnode isn't"); 431 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 432 /* see comment on why 0xdeadb is set at end of vgone (below) */ 433 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 434 simple_unlock(&vnode_free_list_slock); 435 vp->v_lease = NULL; 436 if (vp->v_type != VBAD) 437 vgonel(vp, p); 438 else 439 simple_unlock(&vp->v_interlock); 440 #ifdef DIAGNOSTIC 441 if (vp->v_data) 442 panic("cleaned vnode isn't"); 443 s = splbio(); 444 if (vp->v_numoutput) 445 panic("Clean vnode has pending I/O's"); 446 splx(s); 447 #endif 448 vp->v_flag = 0; 449 vp->v_lastr = 0; 450 vp->v_ralen = 0; 451 vp->v_maxra = 0; 452 vp->v_lastw = 0; 453 vp->v_lasta = 0; 454 vp->v_cstart = 0; 455 vp->v_clen = 0; 456 vp->v_socket = 0; 457 } 458 vp->v_type = VNON; 459 cache_purge(vp); 460 vp->v_tag = tag; 461 vp->v_op = vops; 462 insmntque(vp, mp); 463 *vpp = vp; 464 vp->v_usecount = 1; 465 vp->v_data = 0; 466 #ifdef UVM 467 simple_lock_init(&vp->v_uvm.u_obj.vmobjlock); 468 #endif 469 return (0); 470 } 471 472 /* 473 * Move a vnode from one mount queue to another. 474 */ 475 void 476 insmntque(vp, mp) 477 register struct vnode *vp; 478 register struct mount *mp; 479 { 480 481 simple_lock(&mntvnode_slock); 482 /* 483 * Delete from old mount point vnode list, if on one. 484 */ 485 if (vp->v_mount != NULL) 486 LIST_REMOVE(vp, v_mntvnodes); 487 /* 488 * Insert into list of vnodes for the new mount point, if available. 489 */ 490 if ((vp->v_mount = mp) != NULL) 491 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 492 simple_unlock(&mntvnode_slock); 493 } 494 495 /* 496 * Update outstanding I/O count and do wakeup if requested. 497 */ 498 void 499 vwakeup(bp) 500 register struct buf *bp; 501 { 502 register struct vnode *vp; 503 504 bp->b_flags &= ~B_WRITEINPROG; 505 if ((vp = bp->b_vp) != NULL) { 506 if (--vp->v_numoutput < 0) 507 panic("vwakeup: neg numoutput"); 508 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 509 vp->v_flag &= ~VBWAIT; 510 wakeup((caddr_t)&vp->v_numoutput); 511 } 512 } 513 } 514 515 /* 516 * Flush out and invalidate all buffers associated with a vnode. 517 * Called with the underlying object locked. 518 */ 519 int 520 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 521 register struct vnode *vp; 522 int flags; 523 struct ucred *cred; 524 struct proc *p; 525 int slpflag, slptimeo; 526 { 527 register struct buf *bp; 528 struct buf *nbp, *blist; 529 int s, error; 530 531 if (flags & V_SAVE) { 532 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, p); 533 if (error != 0) 534 return (error); 535 if (vp->v_dirtyblkhd.lh_first != NULL) 536 panic("vinvalbuf: dirty bufs"); 537 } 538 for (;;) { 539 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 540 while (blist && blist->b_lblkno < 0) 541 blist = blist->b_vnbufs.le_next; 542 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 543 (flags & V_SAVEMETA)) 544 while (blist && blist->b_lblkno < 0) 545 blist = blist->b_vnbufs.le_next; 546 if (!blist) 547 break; 548 549 for (bp = blist; bp; bp = nbp) { 550 nbp = bp->b_vnbufs.le_next; 551 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 552 continue; 553 s = splbio(); 554 if (bp->b_flags & B_BUSY) { 555 bp->b_flags |= B_WANTED; 556 error = tsleep((caddr_t)bp, 557 slpflag | (PRIBIO + 1), "vinvalbuf", 558 slptimeo); 559 splx(s); 560 if (error) 561 return (error); 562 break; 563 } 564 bp->b_flags |= B_BUSY | B_VFLUSH; 565 splx(s); 566 /* 567 * XXX Since there are no node locks for NFS, I believe 568 * there is a slight chance that a delayed write will 569 * occur while sleeping just above, so check for it. 570 */ 571 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 572 (void) VOP_BWRITE(bp); 573 break; 574 } 575 bp->b_flags |= B_INVAL; 576 brelse(bp); 577 } 578 } 579 if (!(flags & V_SAVEMETA) && 580 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 581 panic("vinvalbuf: flush failed"); 582 return (0); 583 } 584 585 void 586 vflushbuf(vp, sync) 587 register struct vnode *vp; 588 int sync; 589 { 590 register struct buf *bp, *nbp; 591 int s; 592 593 loop: 594 s = splbio(); 595 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 596 nbp = bp->b_vnbufs.le_next; 597 if ((bp->b_flags & B_BUSY)) 598 continue; 599 if ((bp->b_flags & B_DELWRI) == 0) 600 panic("vflushbuf: not dirty"); 601 bp->b_flags |= B_BUSY | B_VFLUSH; 602 splx(s); 603 /* 604 * Wait for I/O associated with indirect blocks to complete, 605 * since there is no way to quickly wait for them below. 606 */ 607 if (bp->b_vp == vp || sync == 0) 608 (void) bawrite(bp); 609 else 610 (void) bwrite(bp); 611 goto loop; 612 } 613 if (sync == 0) { 614 splx(s); 615 return; 616 } 617 while (vp->v_numoutput) { 618 vp->v_flag |= VBWAIT; 619 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 620 } 621 splx(s); 622 if (vp->v_dirtyblkhd.lh_first != NULL) { 623 vprint("vflushbuf: dirty", vp); 624 goto loop; 625 } 626 } 627 628 /* 629 * Associate a buffer with a vnode. 630 */ 631 void 632 bgetvp(vp, bp) 633 register struct vnode *vp; 634 register struct buf *bp; 635 { 636 637 if (bp->b_vp) 638 panic("bgetvp: not free"); 639 VHOLD(vp); 640 bp->b_vp = vp; 641 if (vp->v_type == VBLK || vp->v_type == VCHR) 642 bp->b_dev = vp->v_rdev; 643 else 644 bp->b_dev = NODEV; 645 /* 646 * Insert onto list for new vnode. 647 */ 648 bufinsvn(bp, &vp->v_cleanblkhd); 649 } 650 651 /* 652 * Disassociate a buffer from a vnode. 653 */ 654 void 655 brelvp(bp) 656 register struct buf *bp; 657 { 658 struct vnode *vp; 659 660 if (bp->b_vp == (struct vnode *) 0) 661 panic("brelvp: NULL"); 662 /* 663 * Delete from old vnode list, if on one. 664 */ 665 if (bp->b_vnbufs.le_next != NOLIST) 666 bufremvn(bp); 667 vp = bp->b_vp; 668 bp->b_vp = (struct vnode *) 0; 669 HOLDRELE(vp); 670 } 671 672 /* 673 * Reassign a buffer from one vnode to another. 674 * Used to assign file specific control information 675 * (indirect blocks) to the vnode to which they belong. 676 */ 677 void 678 reassignbuf(bp, newvp) 679 register struct buf *bp; 680 register struct vnode *newvp; 681 { 682 register struct buflists *listheadp; 683 684 if (newvp == NULL) { 685 printf("reassignbuf: NULL"); 686 return; 687 } 688 /* 689 * Delete from old vnode list, if on one. 690 */ 691 if (bp->b_vnbufs.le_next != NOLIST) 692 bufremvn(bp); 693 /* 694 * If dirty, put on list of dirty buffers; 695 * otherwise insert onto list of clean buffers. 696 */ 697 if (bp->b_flags & B_DELWRI) 698 listheadp = &newvp->v_dirtyblkhd; 699 else 700 listheadp = &newvp->v_cleanblkhd; 701 bufinsvn(bp, listheadp); 702 } 703 704 /* 705 * Create a vnode for a block device. 706 * Used for root filesystem and swap areas. 707 * Also used for memory file system special devices. 708 */ 709 int 710 bdevvp(dev, vpp) 711 dev_t dev; 712 struct vnode **vpp; 713 { 714 715 return (getdevvp(dev, vpp, VBLK)); 716 } 717 718 /* 719 * Create a vnode for a character device. 720 * Used for kernfs and some console handling. 721 */ 722 int 723 cdevvp(dev, vpp) 724 dev_t dev; 725 struct vnode **vpp; 726 { 727 728 return (getdevvp(dev, vpp, VCHR)); 729 } 730 731 /* 732 * Create a vnode for a device. 733 * Used by bdevvp (block device) for root file system etc., 734 * and by cdevvp (character device) for console and kernfs. 735 */ 736 int 737 getdevvp(dev, vpp, type) 738 dev_t dev; 739 struct vnode **vpp; 740 enum vtype type; 741 { 742 register struct vnode *vp; 743 struct vnode *nvp; 744 int error; 745 746 if (dev == NODEV) { 747 *vpp = NULLVP; 748 return (0); 749 } 750 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 751 if (error) { 752 *vpp = NULLVP; 753 return (error); 754 } 755 vp = nvp; 756 vp->v_type = type; 757 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 758 vput(vp); 759 vp = nvp; 760 } 761 *vpp = vp; 762 return (0); 763 } 764 765 /* 766 * Check to see if the new vnode represents a special device 767 * for which we already have a vnode (either because of 768 * bdevvp() or because of a different vnode representing 769 * the same block device). If such an alias exists, deallocate 770 * the existing contents and return the aliased vnode. The 771 * caller is responsible for filling it with its new contents. 772 */ 773 struct vnode * 774 checkalias(nvp, nvp_rdev, mp) 775 register struct vnode *nvp; 776 dev_t nvp_rdev; 777 struct mount *mp; 778 { 779 struct proc *p = curproc; /* XXX */ 780 register struct vnode *vp; 781 struct vnode **vpp; 782 783 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 784 return (NULLVP); 785 786 vpp = &speclisth[SPECHASH(nvp_rdev)]; 787 loop: 788 simple_lock(&spechash_slock); 789 for (vp = *vpp; vp; vp = vp->v_specnext) { 790 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 791 continue; 792 /* 793 * Alias, but not in use, so flush it out. 794 */ 795 simple_lock(&vp->v_interlock); 796 if (vp->v_usecount == 0) { 797 simple_unlock(&spechash_slock); 798 vgonel(vp, p); 799 goto loop; 800 } 801 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 802 simple_unlock(&spechash_slock); 803 goto loop; 804 } 805 break; 806 } 807 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 808 MALLOC(nvp->v_specinfo, struct specinfo *, 809 sizeof(struct specinfo), M_VNODE, M_WAITOK); 810 nvp->v_rdev = nvp_rdev; 811 nvp->v_hashchain = vpp; 812 nvp->v_specnext = *vpp; 813 nvp->v_specflags = 0; 814 simple_unlock(&spechash_slock); 815 nvp->v_speclockf = NULL; 816 *vpp = nvp; 817 if (vp != NULLVP) { 818 nvp->v_flag |= VALIASED; 819 vp->v_flag |= VALIASED; 820 vput(vp); 821 } 822 return (NULLVP); 823 } 824 simple_unlock(&spechash_slock); 825 VOP_UNLOCK(vp, 0); 826 simple_lock(&vp->v_interlock); 827 vclean(vp, 0, p); 828 vp->v_op = nvp->v_op; 829 vp->v_tag = nvp->v_tag; 830 nvp->v_type = VNON; 831 insmntque(vp, mp); 832 return (vp); 833 } 834 835 /* 836 * Grab a particular vnode from the free list, increment its 837 * reference count and lock it. If the vnode lock bit is set the 838 * vnode is being eliminated in vgone. In that case, we can not 839 * grab the vnode, so the process is awakened when the transition is 840 * completed, and an error returned to indicate that the vnode is no 841 * longer usable (possibly having been changed to a new file system type). 842 */ 843 int 844 vget(vp, flags) 845 struct vnode *vp; 846 int flags; 847 { 848 int error; 849 850 /* 851 * If the vnode is in the process of being cleaned out for 852 * another use, we wait for the cleaning to finish and then 853 * return failure. Cleaning is determined by checking that 854 * the VXLOCK flag is set. 855 */ 856 if ((flags & LK_INTERLOCK) == 0) 857 simple_lock(&vp->v_interlock); 858 if (vp->v_flag & VXLOCK) { 859 vp->v_flag |= VXWANT; 860 simple_unlock(&vp->v_interlock); 861 tsleep((caddr_t)vp, PINOD, "vget", 0); 862 return (ENOENT); 863 } 864 if (vp->v_usecount == 0) { 865 simple_lock(&vnode_free_list_slock); 866 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 867 simple_unlock(&vnode_free_list_slock); 868 } 869 vp->v_usecount++; 870 if (flags & LK_TYPE_MASK) { 871 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) 872 vrele(vp); 873 return (error); 874 } 875 simple_unlock(&vp->v_interlock); 876 return (0); 877 } 878 879 /* 880 * vput(), just unlock and vrele() 881 */ 882 void 883 vput(vp) 884 struct vnode *vp; 885 { 886 struct proc *p = curproc; /* XXX */ 887 888 #ifdef DIGANOSTIC 889 if (vp == NULL) 890 panic("vput: null vp"); 891 #endif 892 simple_lock(&vp->v_interlock); 893 vp->v_usecount--; 894 if (vp->v_usecount > 0) { 895 simple_unlock(&vp->v_interlock); 896 VOP_UNLOCK(vp, 0); 897 return; 898 } 899 #ifdef DIAGNOSTIC 900 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 901 vprint("vput: bad ref count", vp); 902 panic("vput: ref cnt"); 903 } 904 #endif 905 /* 906 * Insert at tail of LRU list. 907 */ 908 simple_lock(&vnode_free_list_slock); 909 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 910 simple_unlock(&vnode_free_list_slock); 911 simple_unlock(&vp->v_interlock); 912 VOP_INACTIVE(vp, p); 913 } 914 915 /* 916 * Vnode release. 917 * If count drops to zero, call inactive routine and return to freelist. 918 */ 919 void 920 vrele(vp) 921 struct vnode *vp; 922 { 923 struct proc *p = curproc; /* XXX */ 924 925 #ifdef DIAGNOSTIC 926 if (vp == NULL) 927 panic("vrele: null vp"); 928 #endif 929 simple_lock(&vp->v_interlock); 930 vp->v_usecount--; 931 if (vp->v_usecount > 0) { 932 simple_unlock(&vp->v_interlock); 933 return; 934 } 935 #ifdef DIAGNOSTIC 936 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 937 vprint("vrele: bad ref count", vp); 938 panic("vrele: ref cnt"); 939 } 940 #endif 941 /* 942 * Insert at tail of LRU list. 943 */ 944 simple_lock(&vnode_free_list_slock); 945 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 946 simple_unlock(&vnode_free_list_slock); 947 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 948 VOP_INACTIVE(vp, p); 949 } 950 951 #ifdef DIAGNOSTIC 952 /* 953 * Page or buffer structure gets a reference. 954 */ 955 void 956 vhold(vp) 957 register struct vnode *vp; 958 { 959 960 simple_lock(&vp->v_interlock); 961 vp->v_holdcnt++; 962 simple_unlock(&vp->v_interlock); 963 } 964 965 /* 966 * Page or buffer structure frees a reference. 967 */ 968 void 969 holdrele(vp) 970 register struct vnode *vp; 971 { 972 973 simple_lock(&vp->v_interlock); 974 if (vp->v_holdcnt <= 0) 975 panic("holdrele: holdcnt"); 976 vp->v_holdcnt--; 977 simple_unlock(&vp->v_interlock); 978 } 979 980 /* 981 * Vnode reference. 982 */ 983 void 984 vref(vp) 985 struct vnode *vp; 986 { 987 988 simple_lock(&vp->v_interlock); 989 if (vp->v_usecount <= 0) 990 panic("vref used where vget required"); 991 vp->v_usecount++; 992 simple_unlock(&vp->v_interlock); 993 } 994 #endif /* DIAGNOSTIC */ 995 996 /* 997 * Remove any vnodes in the vnode table belonging to mount point mp. 998 * 999 * If MNT_NOFORCE is specified, there should not be any active ones, 1000 * return error if any are found (nb: this is a user error, not a 1001 * system error). If MNT_FORCE is specified, detach any active vnodes 1002 * that are found. 1003 */ 1004 #ifdef DEBUG 1005 int busyprt = 0; /* print out busy vnodes */ 1006 struct ctldebug debug1 = { "busyprt", &busyprt }; 1007 #endif 1008 1009 int 1010 vflush(mp, skipvp, flags) 1011 struct mount *mp; 1012 struct vnode *skipvp; 1013 int flags; 1014 { 1015 struct proc *p = curproc; /* XXX */ 1016 register struct vnode *vp, *nvp; 1017 int busy = 0; 1018 1019 simple_lock(&mntvnode_slock); 1020 loop: 1021 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1022 if (vp->v_mount != mp) 1023 goto loop; 1024 nvp = vp->v_mntvnodes.le_next; 1025 /* 1026 * Skip over a selected vnode. 1027 */ 1028 if (vp == skipvp) 1029 continue; 1030 simple_lock(&vp->v_interlock); 1031 /* 1032 * Skip over a vnodes marked VSYSTEM. 1033 */ 1034 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1035 simple_unlock(&vp->v_interlock); 1036 continue; 1037 } 1038 /* 1039 * If WRITECLOSE is set, only flush out regular file 1040 * vnodes open for writing. 1041 */ 1042 if ((flags & WRITECLOSE) && 1043 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1044 simple_unlock(&vp->v_interlock); 1045 continue; 1046 } 1047 /* 1048 * With v_usecount == 0, all we need to do is clear 1049 * out the vnode data structures and we are done. 1050 */ 1051 if (vp->v_usecount == 0) { 1052 simple_unlock(&mntvnode_slock); 1053 vgonel(vp, p); 1054 simple_lock(&mntvnode_slock); 1055 continue; 1056 } 1057 /* 1058 * If FORCECLOSE is set, forcibly close the vnode. 1059 * For block or character devices, revert to an 1060 * anonymous device. For all other files, just kill them. 1061 */ 1062 if (flags & FORCECLOSE) { 1063 simple_unlock(&mntvnode_slock); 1064 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1065 vgonel(vp, p); 1066 } else { 1067 vclean(vp, 0, p); 1068 vp->v_op = spec_vnodeop_p; 1069 insmntque(vp, (struct mount *)0); 1070 } 1071 simple_lock(&mntvnode_slock); 1072 continue; 1073 } 1074 #ifdef DEBUG 1075 if (busyprt) 1076 vprint("vflush: busy vnode", vp); 1077 #endif 1078 simple_unlock(&vp->v_interlock); 1079 busy++; 1080 } 1081 simple_unlock(&mntvnode_slock); 1082 if (busy) 1083 return (EBUSY); 1084 return (0); 1085 } 1086 1087 /* 1088 * Disassociate the underlying file system from a vnode. 1089 */ 1090 void 1091 vclean(vp, flags, p) 1092 register struct vnode *vp; 1093 int flags; 1094 struct proc *p; 1095 { 1096 int active; 1097 1098 /* 1099 * Check to see if the vnode is in use. 1100 * If so we have to reference it before we clean it out 1101 * so that its count cannot fall to zero and generate a 1102 * race against ourselves to recycle it. 1103 */ 1104 if ((active = vp->v_usecount) != 0) 1105 /* We have the vnode interlock. */ 1106 vp->v_usecount++; 1107 1108 /* 1109 * Prevent the vnode from being recycled or 1110 * brought into use while we clean it out. 1111 */ 1112 if (vp->v_flag & VXLOCK) 1113 panic("vclean: deadlock"); 1114 vp->v_flag |= VXLOCK; 1115 /* 1116 * Even if the count is zero, the VOP_INACTIVE routine may still 1117 * have the object locked while it cleans it out. The VOP_LOCK 1118 * ensures that the VOP_INACTIVE routine is done with its work. 1119 * For active vnodes, it ensures that no other activity can 1120 * occur while the underlying object is being cleaned out. 1121 */ 1122 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1123 1124 #ifdef UVM 1125 /* 1126 * clean out any VM data associated with the vnode. 1127 */ 1128 uvm_vnp_terminate(vp); 1129 #endif 1130 /* 1131 * Clean out any buffers associated with the vnode. 1132 */ 1133 if (flags & DOCLOSE) 1134 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1135 1136 /* 1137 * If purging an active vnode, it must be closed and 1138 * deactivated before being reclaimed. Note that the 1139 * VOP_INACTIVE will unlock the vnode. 1140 */ 1141 if (active) { 1142 if (flags & DOCLOSE) 1143 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1144 VOP_INACTIVE(vp, p); 1145 } else { 1146 /* 1147 * Any other processes trying to obtain this lock must first 1148 * wait for VXLOCK to clear, then call the new lock operation. 1149 */ 1150 VOP_UNLOCK(vp, 0); 1151 } 1152 /* 1153 * Reclaim the vnode. 1154 */ 1155 if (VOP_RECLAIM(vp, p)) 1156 panic("vclean: cannot reclaim"); 1157 1158 if (active) { 1159 /* 1160 * Inline copy of vrele() since VOP_INACTIVE 1161 * has already been called. 1162 */ 1163 simple_lock(&vp->v_interlock); 1164 if (--vp->v_usecount <= 0) { 1165 #ifdef DIAGNOSTIC 1166 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1167 vprint("vclean: bad ref count", vp); 1168 panic("vclean: ref cnt"); 1169 } 1170 #endif 1171 /* 1172 * Insert at tail of LRU list. 1173 */ 1174 simple_lock(&vnode_free_list_slock); 1175 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1176 simple_unlock(&vnode_free_list_slock); 1177 } 1178 simple_unlock(&vp->v_interlock); 1179 } 1180 1181 cache_purge(vp); 1182 if (vp->v_vnlock) { 1183 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1184 vprint("vclean: lock not drained", vp); 1185 FREE(vp->v_vnlock, M_VNODE); 1186 vp->v_vnlock = NULL; 1187 } 1188 1189 /* 1190 * Done with purge, notify sleepers of the grim news. 1191 */ 1192 vp->v_op = dead_vnodeop_p; 1193 vp->v_tag = VT_NON; 1194 vp->v_flag &= ~VXLOCK; 1195 if (vp->v_flag & VXWANT) { 1196 vp->v_flag &= ~VXWANT; 1197 wakeup((caddr_t)vp); 1198 } 1199 } 1200 1201 /* 1202 * Recycle an unused vnode to the front of the free list. 1203 * Release the passed interlock if the vnode will be recycled. 1204 */ 1205 int 1206 vrecycle(vp, inter_lkp, p) 1207 struct vnode *vp; 1208 struct simplelock *inter_lkp; 1209 struct proc *p; 1210 { 1211 1212 simple_lock(&vp->v_interlock); 1213 if (vp->v_usecount == 0) { 1214 if (inter_lkp) 1215 simple_unlock(inter_lkp); 1216 vgonel(vp, p); 1217 return (1); 1218 } 1219 simple_unlock(&vp->v_interlock); 1220 return (0); 1221 } 1222 1223 /* 1224 * Eliminate all activity associated with a vnode 1225 * in preparation for reuse. 1226 */ 1227 void 1228 vgone(vp) 1229 struct vnode *vp; 1230 { 1231 struct proc *p = curproc; /* XXX */ 1232 1233 simple_lock(&vp->v_interlock); 1234 vgonel(vp, p); 1235 } 1236 1237 /* 1238 * vgone, with the vp interlock held. 1239 */ 1240 void 1241 vgonel(vp, p) 1242 register struct vnode *vp; 1243 struct proc *p; 1244 { 1245 struct vnode *vq; 1246 struct vnode *vx; 1247 1248 /* 1249 * If a vgone (or vclean) is already in progress, 1250 * wait until it is done and return. 1251 */ 1252 if (vp->v_flag & VXLOCK) { 1253 vp->v_flag |= VXWANT; 1254 simple_unlock(&vp->v_interlock); 1255 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1256 return; 1257 } 1258 /* 1259 * Clean out the filesystem specific data. 1260 */ 1261 vclean(vp, DOCLOSE, p); 1262 /* 1263 * Delete from old mount point vnode list, if on one. 1264 */ 1265 if (vp->v_mount != NULL) 1266 insmntque(vp, (struct mount *)0); 1267 /* 1268 * If special device, remove it from special device alias list. 1269 * if it is on one. 1270 */ 1271 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1272 simple_lock(&spechash_slock); 1273 if (*vp->v_hashchain == vp) { 1274 *vp->v_hashchain = vp->v_specnext; 1275 } else { 1276 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1277 if (vq->v_specnext != vp) 1278 continue; 1279 vq->v_specnext = vp->v_specnext; 1280 break; 1281 } 1282 if (vq == NULL) 1283 panic("missing bdev"); 1284 } 1285 if (vp->v_flag & VALIASED) { 1286 vx = NULL; 1287 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1288 if (vq->v_rdev != vp->v_rdev || 1289 vq->v_type != vp->v_type) 1290 continue; 1291 if (vx) 1292 break; 1293 vx = vq; 1294 } 1295 if (vx == NULL) 1296 panic("missing alias"); 1297 if (vq == NULL) 1298 vx->v_flag &= ~VALIASED; 1299 vp->v_flag &= ~VALIASED; 1300 } 1301 simple_unlock(&spechash_slock); 1302 FREE(vp->v_specinfo, M_VNODE); 1303 vp->v_specinfo = NULL; 1304 } 1305 /* 1306 * If it is on the freelist and not already at the head, 1307 * move it to the head of the list. The test of the back 1308 * pointer and the reference count of zero is because 1309 * it will be removed from the free list by getnewvnode, 1310 * but will not have its reference count incremented until 1311 * after calling vgone. If the reference count were 1312 * incremented first, vgone would (incorrectly) try to 1313 * close the previous instance of the underlying object. 1314 * So, the back pointer is explicitly set to `0xdeadb' in 1315 * getnewvnode after removing it from the freelist to ensure 1316 * that we do not try to move it here. 1317 */ 1318 if (vp->v_usecount == 0) { 1319 simple_lock(&vnode_free_list_slock); 1320 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1321 vnode_free_list.tqh_first != vp) { 1322 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1323 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1324 } 1325 simple_unlock(&vnode_free_list_slock); 1326 } 1327 vp->v_type = VBAD; 1328 } 1329 1330 /* 1331 * Lookup a vnode by device number. 1332 */ 1333 int 1334 vfinddev(dev, type, vpp) 1335 dev_t dev; 1336 enum vtype type; 1337 struct vnode **vpp; 1338 { 1339 struct vnode *vp; 1340 int rc = 0; 1341 1342 simple_lock(&spechash_slock); 1343 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1344 if (dev != vp->v_rdev || type != vp->v_type) 1345 continue; 1346 *vpp = vp; 1347 rc = 1; 1348 break; 1349 } 1350 simple_unlock(&spechash_slock); 1351 return (rc); 1352 } 1353 1354 /* 1355 * Revoke all the vnodes corresponding to the specified minor number 1356 * range (endpoints inclusive) of the specified major. 1357 */ 1358 void 1359 vdevgone(maj, minl, minh, type) 1360 int maj, minl, minh; 1361 enum vtype type; 1362 { 1363 struct vnode *vp; 1364 int mn; 1365 1366 for (mn = minl; mn <= minh; mn++) 1367 if (vfinddev(makedev(maj, mn), type, &vp)) 1368 VOP_REVOKE(vp, REVOKEALL); 1369 } 1370 1371 /* 1372 * Calculate the total number of references to a special device. 1373 */ 1374 int 1375 vcount(vp) 1376 register struct vnode *vp; 1377 { 1378 register struct vnode *vq, *vnext; 1379 int count; 1380 1381 loop: 1382 if ((vp->v_flag & VALIASED) == 0) 1383 return (vp->v_usecount); 1384 simple_lock(&spechash_slock); 1385 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1386 vnext = vq->v_specnext; 1387 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1388 continue; 1389 /* 1390 * Alias, but not in use, so flush it out. 1391 */ 1392 if (vq->v_usecount == 0 && vq != vp) { 1393 simple_unlock(&spechash_slock); 1394 vgone(vq); 1395 goto loop; 1396 } 1397 count += vq->v_usecount; 1398 } 1399 simple_unlock(&spechash_slock); 1400 return (count); 1401 } 1402 1403 /* 1404 * Print out a description of a vnode. 1405 */ 1406 static char *typename[] = 1407 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1408 1409 void 1410 vprint(label, vp) 1411 char *label; 1412 register struct vnode *vp; 1413 { 1414 char buf[64]; 1415 1416 if (label != NULL) 1417 printf("%s: ", label); 1418 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1419 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1420 vp->v_holdcnt); 1421 buf[0] = '\0'; 1422 if (vp->v_flag & VROOT) 1423 strcat(buf, "|VROOT"); 1424 if (vp->v_flag & VTEXT) 1425 strcat(buf, "|VTEXT"); 1426 if (vp->v_flag & VSYSTEM) 1427 strcat(buf, "|VSYSTEM"); 1428 if (vp->v_flag & VXLOCK) 1429 strcat(buf, "|VXLOCK"); 1430 if (vp->v_flag & VXWANT) 1431 strcat(buf, "|VXWANT"); 1432 if (vp->v_flag & VBWAIT) 1433 strcat(buf, "|VBWAIT"); 1434 if (vp->v_flag & VALIASED) 1435 strcat(buf, "|VALIASED"); 1436 if (buf[0] != '\0') 1437 printf(" flags (%s)", &buf[1]); 1438 if (vp->v_data == NULL) { 1439 printf("\n"); 1440 } else { 1441 printf("\n\t"); 1442 VOP_PRINT(vp); 1443 } 1444 } 1445 1446 #ifdef DEBUG 1447 /* 1448 * List all of the locked vnodes in the system. 1449 * Called when debugging the kernel. 1450 */ 1451 void 1452 printlockedvnodes() 1453 { 1454 struct mount *mp, *nmp; 1455 struct vnode *vp; 1456 1457 printf("Locked vnodes\n"); 1458 simple_lock(&mountlist_slock); 1459 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1460 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1461 nmp = mp->mnt_list.cqe_next; 1462 continue; 1463 } 1464 for (vp = mp->mnt_vnodelist.lh_first; 1465 vp != NULL; 1466 vp = vp->v_mntvnodes.le_next) { 1467 if (VOP_ISLOCKED(vp)) 1468 vprint((char *)0, vp); 1469 } 1470 simple_lock(&mountlist_slock); 1471 nmp = mp->mnt_list.cqe_next; 1472 vfs_unbusy(mp); 1473 } 1474 simple_unlock(&mountlist_slock); 1475 } 1476 #endif 1477 1478 extern const char *mountcompatnames[]; 1479 extern const int nmountcompatnames; 1480 1481 /* 1482 * Top level filesystem related information gathering. 1483 */ 1484 int 1485 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1486 int *name; 1487 u_int namelen; 1488 void *oldp; 1489 size_t *oldlenp; 1490 void *newp; 1491 size_t newlen; 1492 struct proc *p; 1493 { 1494 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1495 struct vfsconf vfc; 1496 #endif 1497 struct vfsops *vfsp; 1498 1499 /* all sysctl names at this level are at least name and field */ 1500 if (namelen < 2) 1501 return (ENOTDIR); /* overloaded */ 1502 1503 /* Not generic: goes to file system. */ 1504 if (name[0] != VFS_GENERIC) { 1505 if (name[0] >= nmountcompatnames || name[0] < 0 || 1506 mountcompatnames[name[0]] == NULL) 1507 return (EOPNOTSUPP); 1508 vfsp = vfs_getopsbyname(mountcompatnames[name[0]]); 1509 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1510 return (EOPNOTSUPP); 1511 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1512 oldp, oldlenp, newp, newlen, p)); 1513 } 1514 1515 /* The rest are generic vfs sysctls. */ 1516 switch (name[1]) { 1517 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1518 case VFS_MAXTYPENUM: 1519 /* 1520 * Provided for 4.4BSD-Lite2 compatibility. 1521 */ 1522 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1523 case VFS_CONF: 1524 /* 1525 * Special: a node, next is a file system name. 1526 * Provided for 4.4BSD-Lite2 compatibility. 1527 */ 1528 if (namelen < 3) 1529 return (ENOTDIR); /* overloaded */ 1530 if (name[2] >= nmountcompatnames || name[2] < 0 || 1531 mountcompatnames[name[2]] == NULL) 1532 return (EOPNOTSUPP); 1533 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 1534 if (vfsp == NULL) 1535 return (EOPNOTSUPP); 1536 vfc.vfc_vfsops = vfsp; 1537 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 1538 vfc.vfc_typenum = name[2]; 1539 vfc.vfc_refcount = vfsp->vfs_refcount; 1540 vfc.vfc_flags = 0; 1541 vfc.vfc_mountroot = vfsp->vfs_mountroot; 1542 vfc.vfc_next = NULL; 1543 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 1544 sizeof(struct vfsconf))); 1545 #endif 1546 default: 1547 break; 1548 } 1549 return (EOPNOTSUPP); 1550 } 1551 1552 int kinfo_vdebug = 1; 1553 int kinfo_vgetfailed; 1554 #define KINFO_VNODESLOP 10 1555 /* 1556 * Dump vnode list (via sysctl). 1557 * Copyout address of vnode followed by vnode. 1558 */ 1559 /* ARGSUSED */ 1560 int 1561 sysctl_vnode(where, sizep, p) 1562 char *where; 1563 size_t *sizep; 1564 struct proc *p; 1565 { 1566 struct mount *mp, *nmp; 1567 struct vnode *nvp, *vp; 1568 char *bp = where, *savebp; 1569 char *ewhere; 1570 int error; 1571 1572 #define VPTRSZ sizeof(struct vnode *) 1573 #define VNODESZ sizeof(struct vnode) 1574 if (where == NULL) { 1575 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1576 return (0); 1577 } 1578 ewhere = where + *sizep; 1579 1580 simple_lock(&mountlist_slock); 1581 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1582 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1583 nmp = mp->mnt_list.cqe_next; 1584 continue; 1585 } 1586 savebp = bp; 1587 again: 1588 simple_lock(&mntvnode_slock); 1589 for (vp = mp->mnt_vnodelist.lh_first; 1590 vp != NULL; 1591 vp = nvp) { 1592 /* 1593 * Check that the vp is still associated with 1594 * this filesystem. RACE: could have been 1595 * recycled onto the same filesystem. 1596 */ 1597 if (vp->v_mount != mp) { 1598 simple_unlock(&mntvnode_slock); 1599 if (kinfo_vdebug) 1600 printf("kinfo: vp changed\n"); 1601 bp = savebp; 1602 goto again; 1603 } 1604 nvp = vp->v_mntvnodes.le_next; 1605 if (bp + VPTRSZ + VNODESZ > ewhere) { 1606 simple_unlock(&mntvnode_slock); 1607 *sizep = bp - where; 1608 return (ENOMEM); 1609 } 1610 simple_unlock(&mntvnode_slock); 1611 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1612 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1613 return (error); 1614 bp += VPTRSZ + VNODESZ; 1615 simple_lock(&mntvnode_slock); 1616 } 1617 simple_unlock(&mntvnode_slock); 1618 simple_lock(&mountlist_slock); 1619 nmp = mp->mnt_list.cqe_next; 1620 vfs_unbusy(mp); 1621 } 1622 simple_unlock(&mountlist_slock); 1623 1624 *sizep = bp - where; 1625 return (0); 1626 } 1627 1628 /* 1629 * Check to see if a filesystem is mounted on a block device. 1630 */ 1631 int 1632 vfs_mountedon(vp) 1633 struct vnode *vp; 1634 { 1635 struct vnode *vq; 1636 int error = 0; 1637 1638 if (vp->v_specflags & SI_MOUNTEDON) 1639 return (EBUSY); 1640 if (vp->v_flag & VALIASED) { 1641 simple_lock(&spechash_slock); 1642 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1643 if (vq->v_rdev != vp->v_rdev || 1644 vq->v_type != vp->v_type) 1645 continue; 1646 if (vq->v_specflags & SI_MOUNTEDON) { 1647 error = EBUSY; 1648 break; 1649 } 1650 } 1651 simple_unlock(&spechash_slock); 1652 } 1653 return (error); 1654 } 1655 1656 /* 1657 * Build hash lists of net addresses and hang them off the mount point. 1658 * Called by ufs_mount() to set up the lists of export addresses. 1659 */ 1660 static int 1661 vfs_hang_addrlist(mp, nep, argp) 1662 struct mount *mp; 1663 struct netexport *nep; 1664 struct export_args *argp; 1665 { 1666 register struct netcred *np, *enp; 1667 register struct radix_node_head *rnh; 1668 register int i; 1669 struct radix_node *rn; 1670 struct sockaddr *saddr, *smask = 0; 1671 struct domain *dom; 1672 int error; 1673 1674 if (argp->ex_addrlen == 0) { 1675 if (mp->mnt_flag & MNT_DEFEXPORTED) 1676 return (EPERM); 1677 np = &nep->ne_defexported; 1678 np->netc_exflags = argp->ex_flags; 1679 np->netc_anon = argp->ex_anon; 1680 np->netc_anon.cr_ref = 1; 1681 mp->mnt_flag |= MNT_DEFEXPORTED; 1682 return (0); 1683 } 1684 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1685 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1686 memset((caddr_t)np, 0, i); 1687 saddr = (struct sockaddr *)(np + 1); 1688 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 1689 if (error) 1690 goto out; 1691 if (saddr->sa_len > argp->ex_addrlen) 1692 saddr->sa_len = argp->ex_addrlen; 1693 if (argp->ex_masklen) { 1694 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1695 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1696 if (error) 1697 goto out; 1698 if (smask->sa_len > argp->ex_masklen) 1699 smask->sa_len = argp->ex_masklen; 1700 } 1701 i = saddr->sa_family; 1702 if ((rnh = nep->ne_rtable[i]) == 0) { 1703 /* 1704 * Seems silly to initialize every AF when most are not 1705 * used, do so on demand here 1706 */ 1707 for (dom = domains; dom; dom = dom->dom_next) 1708 if (dom->dom_family == i && dom->dom_rtattach) { 1709 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1710 dom->dom_rtoffset); 1711 break; 1712 } 1713 if ((rnh = nep->ne_rtable[i]) == 0) { 1714 error = ENOBUFS; 1715 goto out; 1716 } 1717 } 1718 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1719 np->netc_rnodes); 1720 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1721 if (rn == 0) { 1722 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 1723 smask, rnh); 1724 if (enp == 0) { 1725 error = EPERM; 1726 goto out; 1727 } 1728 } else 1729 enp = (struct netcred *)rn; 1730 1731 if (enp->netc_exflags != argp->ex_flags || 1732 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 1733 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 1734 enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups || 1735 memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 1736 enp->netc_anon.cr_ngroups)) 1737 error = EPERM; 1738 else 1739 error = 0; 1740 goto out; 1741 } 1742 np->netc_exflags = argp->ex_flags; 1743 np->netc_anon = argp->ex_anon; 1744 np->netc_anon.cr_ref = 1; 1745 return (0); 1746 out: 1747 free(np, M_NETADDR); 1748 return (error); 1749 } 1750 1751 /* ARGSUSED */ 1752 static int 1753 vfs_free_netcred(rn, w) 1754 struct radix_node *rn; 1755 void *w; 1756 { 1757 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1758 1759 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1760 free((caddr_t)rn, M_NETADDR); 1761 return (0); 1762 } 1763 1764 /* 1765 * Free the net address hash lists that are hanging off the mount points. 1766 */ 1767 static void 1768 vfs_free_addrlist(nep) 1769 struct netexport *nep; 1770 { 1771 register int i; 1772 register struct radix_node_head *rnh; 1773 1774 for (i = 0; i <= AF_MAX; i++) 1775 if ((rnh = nep->ne_rtable[i]) != NULL) { 1776 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 1777 free((caddr_t)rnh, M_RTABLE); 1778 nep->ne_rtable[i] = 0; 1779 } 1780 } 1781 1782 int 1783 vfs_export(mp, nep, argp) 1784 struct mount *mp; 1785 struct netexport *nep; 1786 struct export_args *argp; 1787 { 1788 int error; 1789 1790 if (argp->ex_flags & MNT_DELEXPORT) { 1791 if (mp->mnt_flag & MNT_EXPUBLIC) { 1792 vfs_setpublicfs(NULL, NULL, NULL); 1793 mp->mnt_flag &= ~MNT_EXPUBLIC; 1794 } 1795 vfs_free_addrlist(nep); 1796 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1797 } 1798 if (argp->ex_flags & MNT_EXPORTED) { 1799 if (argp->ex_flags & MNT_EXPUBLIC) { 1800 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 1801 return (error); 1802 mp->mnt_flag |= MNT_EXPUBLIC; 1803 } 1804 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 1805 return (error); 1806 mp->mnt_flag |= MNT_EXPORTED; 1807 } 1808 return (0); 1809 } 1810 1811 /* 1812 * Set the publicly exported filesystem (WebNFS). Currently, only 1813 * one public filesystem is possible in the spec (RFC 2054 and 2055) 1814 */ 1815 int 1816 vfs_setpublicfs(mp, nep, argp) 1817 struct mount *mp; 1818 struct netexport *nep; 1819 struct export_args *argp; 1820 { 1821 int error; 1822 struct vnode *rvp; 1823 char *cp; 1824 1825 /* 1826 * mp == NULL -> invalidate the current info, the FS is 1827 * no longer exported. May be called from either vfs_export 1828 * or unmount, so check if it hasn't already been done. 1829 */ 1830 if (mp == NULL) { 1831 if (nfs_pub.np_valid) { 1832 nfs_pub.np_valid = 0; 1833 if (nfs_pub.np_index != NULL) { 1834 FREE(nfs_pub.np_index, M_TEMP); 1835 nfs_pub.np_index = NULL; 1836 } 1837 } 1838 return (0); 1839 } 1840 1841 /* 1842 * Only one allowed at a time. 1843 */ 1844 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 1845 return (EBUSY); 1846 1847 /* 1848 * Get real filehandle for root of exported FS. 1849 */ 1850 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 1851 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 1852 1853 if ((error = VFS_ROOT(mp, &rvp))) 1854 return (error); 1855 1856 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 1857 return (error); 1858 1859 vput(rvp); 1860 1861 /* 1862 * If an indexfile was specified, pull it in. 1863 */ 1864 if (argp->ex_indexfile != NULL) { 1865 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 1866 M_WAITOK); 1867 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 1868 MAXNAMLEN, (size_t *)0); 1869 if (!error) { 1870 /* 1871 * Check for illegal filenames. 1872 */ 1873 for (cp = nfs_pub.np_index; *cp; cp++) { 1874 if (*cp == '/') { 1875 error = EINVAL; 1876 break; 1877 } 1878 } 1879 } 1880 if (error) { 1881 FREE(nfs_pub.np_index, M_TEMP); 1882 return (error); 1883 } 1884 } 1885 1886 nfs_pub.np_mount = mp; 1887 nfs_pub.np_valid = 1; 1888 return (0); 1889 } 1890 1891 struct netcred * 1892 vfs_export_lookup(mp, nep, nam) 1893 register struct mount *mp; 1894 struct netexport *nep; 1895 struct mbuf *nam; 1896 { 1897 register struct netcred *np; 1898 register struct radix_node_head *rnh; 1899 struct sockaddr *saddr; 1900 1901 np = NULL; 1902 if (mp->mnt_flag & MNT_EXPORTED) { 1903 /* 1904 * Lookup in the export list first. 1905 */ 1906 if (nam != NULL) { 1907 saddr = mtod(nam, struct sockaddr *); 1908 rnh = nep->ne_rtable[saddr->sa_family]; 1909 if (rnh != NULL) { 1910 np = (struct netcred *) 1911 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1912 rnh); 1913 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1914 np = NULL; 1915 } 1916 } 1917 /* 1918 * If no address match, use the default if it exists. 1919 */ 1920 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1921 np = &nep->ne_defexported; 1922 } 1923 return (np); 1924 } 1925 1926 /* 1927 * Do the usual access checking. 1928 * file_mode, uid and gid are from the vnode in question, 1929 * while acc_mode and cred are from the VOP_ACCESS parameter list 1930 */ 1931 int 1932 vaccess(type, file_mode, uid, gid, acc_mode, cred) 1933 enum vtype type; 1934 mode_t file_mode; 1935 uid_t uid; 1936 gid_t gid; 1937 mode_t acc_mode; 1938 struct ucred *cred; 1939 { 1940 mode_t mask; 1941 1942 /* 1943 * Super-user always gets read/write access, but execute access depends 1944 * on at least one execute bit being set. 1945 */ 1946 if (cred->cr_uid == 0) { 1947 if ((acc_mode & VEXEC) && type != VDIR && 1948 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 1949 return (EACCES); 1950 return (0); 1951 } 1952 1953 mask = 0; 1954 1955 /* Otherwise, check the owner. */ 1956 if (cred->cr_uid == uid) { 1957 if (acc_mode & VEXEC) 1958 mask |= S_IXUSR; 1959 if (acc_mode & VREAD) 1960 mask |= S_IRUSR; 1961 if (acc_mode & VWRITE) 1962 mask |= S_IWUSR; 1963 return ((file_mode & mask) == mask ? 0 : EACCES); 1964 } 1965 1966 /* Otherwise, check the groups. */ 1967 if (cred->cr_gid == gid || groupmember(gid, cred)) { 1968 if (acc_mode & VEXEC) 1969 mask |= S_IXGRP; 1970 if (acc_mode & VREAD) 1971 mask |= S_IRGRP; 1972 if (acc_mode & VWRITE) 1973 mask |= S_IWGRP; 1974 return ((file_mode & mask) == mask ? 0 : EACCES); 1975 } 1976 1977 /* Otherwise, check everyone else. */ 1978 if (acc_mode & VEXEC) 1979 mask |= S_IXOTH; 1980 if (acc_mode & VREAD) 1981 mask |= S_IROTH; 1982 if (acc_mode & VWRITE) 1983 mask |= S_IWOTH; 1984 return ((file_mode & mask) == mask ? 0 : EACCES); 1985 } 1986 1987 /* 1988 * Unmount all file systems. 1989 * We traverse the list in reverse order under the assumption that doing so 1990 * will avoid needing to worry about dependencies. 1991 */ 1992 void 1993 vfs_unmountall() 1994 { 1995 register struct mount *mp, *nmp; 1996 int allerror, error; 1997 struct proc *p = curproc; /* XXX */ 1998 1999 /* 2000 * Unmounting a file system blocks the requesting process. 2001 * However, it's possible for this routine to be called when 2002 * curproc is NULL (e.g. panic situation, or via the debugger). 2003 * If we get stuck in this situation, just abort, since any 2004 * attempts to sleep will fault. 2005 */ 2006 if (p == NULL) { 2007 printf("vfs_unmountall: no context, aborting\n"); 2008 return; 2009 } 2010 2011 for (allerror = 0, 2012 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2013 nmp = mp->mnt_list.cqe_prev; 2014 #ifdef DEBUG 2015 printf("unmounting %s (%s)...\n", 2016 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2017 #endif 2018 if (vfs_busy(mp, 0, 0)) 2019 continue; 2020 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2021 printf("unmount of %s failed with error %d\n", 2022 mp->mnt_stat.f_mntonname, error); 2023 allerror = 1; 2024 } 2025 } 2026 if (allerror) 2027 printf("WARNING: some file systems would not unmount\n"); 2028 } 2029 2030 /* 2031 * Sync and unmount file systems before shutting down. 2032 */ 2033 void 2034 vfs_shutdown() 2035 { 2036 register struct buf *bp; 2037 int iter, nbusy; 2038 2039 printf("syncing disks... "); 2040 2041 /* XXX Should suspend scheduling. */ 2042 (void) spl0(); 2043 2044 sys_sync(&proc0, (void *)0, (register_t *)0); 2045 2046 /* Wait for sync to finish. */ 2047 for (iter = 0; iter < 20; iter++) { 2048 nbusy = 0; 2049 for (bp = &buf[nbuf]; --bp >= buf; ) 2050 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 2051 nbusy++; 2052 if (nbusy == 0) 2053 break; 2054 printf("%d ", nbusy); 2055 DELAY(40000 * iter); 2056 } 2057 if (nbusy) { 2058 printf("giving up\n"); 2059 return; 2060 } else 2061 printf("done\n"); 2062 2063 /* 2064 * If we've panic'd, don't make the situation potentially 2065 * worse by unmounting the file systems. 2066 */ 2067 if (panicstr != NULL) 2068 return; 2069 2070 /* Release inodes held by texts before update. */ 2071 #if !defined(UVM) 2072 vnode_pager_umount(NULL); 2073 #endif 2074 #ifdef notdef 2075 vnshutdown(); 2076 #endif 2077 /* Unmount file systems. */ 2078 vfs_unmountall(); 2079 } 2080 2081 /* 2082 * Mount the root file system. If the operator didn't specify a 2083 * file system to use, try all possible file systems until one 2084 * succeeds. 2085 */ 2086 int 2087 vfs_mountroot() 2088 { 2089 extern int (*mountroot) __P((void)); 2090 struct vfsops *v; 2091 2092 if (root_device == NULL) 2093 panic("vfs_mountroot: root device unknown"); 2094 2095 switch (root_device->dv_class) { 2096 case DV_IFNET: 2097 if (rootdev != NODEV) 2098 panic("vfs_mountroot: rootdev set for DV_IFNET"); 2099 break; 2100 2101 case DV_DISK: 2102 if (rootdev == NODEV) 2103 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2104 break; 2105 2106 default: 2107 printf("%s: inappropriate for root file system\n", 2108 root_device->dv_xname); 2109 return (ENODEV); 2110 } 2111 2112 /* 2113 * If user specified a file system, use it. 2114 */ 2115 if (mountroot != NULL) 2116 return ((*mountroot)()); 2117 2118 /* 2119 * Try each file system currently configured into the kernel. 2120 */ 2121 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2122 if (v->vfs_mountroot == NULL) 2123 continue; 2124 #ifdef DEBUG 2125 printf("mountroot: trying %s...\n", v->vfs_name); 2126 #endif 2127 if ((*v->vfs_mountroot)() == 0) { 2128 printf("root file system type: %s\n", v->vfs_name); 2129 break; 2130 } 2131 } 2132 2133 if (v == NULL) { 2134 printf("no file system for %s", root_device->dv_xname); 2135 if (root_device->dv_class == DV_DISK) 2136 printf(" (dev 0x%x)", rootdev); 2137 printf("\n"); 2138 return (EFTYPE); 2139 } 2140 return (0); 2141 } 2142 2143 /* 2144 * Given a file system name, look up the vfsops for that 2145 * file system, or return NULL if file system isn't present 2146 * in the kernel. 2147 */ 2148 struct vfsops * 2149 vfs_getopsbyname(name) 2150 const char *name; 2151 { 2152 struct vfsops *v; 2153 2154 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2155 if (strcmp(v->vfs_name, name) == 0) 2156 break; 2157 } 2158 2159 return (v); 2160 } 2161 2162 /* 2163 * Establish a file system and initialize it. 2164 */ 2165 int 2166 vfs_attach(vfs) 2167 struct vfsops *vfs; 2168 { 2169 struct vfsops *v; 2170 int error = 0; 2171 2172 2173 /* 2174 * Make sure this file system doesn't already exist. 2175 */ 2176 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2177 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2178 error = EEXIST; 2179 goto out; 2180 } 2181 } 2182 2183 /* 2184 * Initialize the vnode operations for this file system. 2185 */ 2186 vfs_opv_init(vfs->vfs_opv_descs); 2187 2188 /* 2189 * Now initialize the file system itself. 2190 */ 2191 (*vfs->vfs_init)(); 2192 2193 /* 2194 * ...and link it into the kernel's list. 2195 */ 2196 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2197 2198 /* 2199 * Sanity: make sure the reference count is 0. 2200 */ 2201 vfs->vfs_refcount = 0; 2202 2203 out: 2204 return (error); 2205 } 2206 2207 /* 2208 * Remove a file system from the kernel. 2209 */ 2210 int 2211 vfs_detach(vfs) 2212 struct vfsops *vfs; 2213 { 2214 struct vfsops *v; 2215 2216 /* 2217 * Make sure no one is using the filesystem. 2218 */ 2219 if (vfs->vfs_refcount != 0) 2220 return (EBUSY); 2221 2222 /* 2223 * ...and remove it from the kernel's list. 2224 */ 2225 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2226 if (v == vfs) { 2227 LIST_REMOVE(v, vfs_list); 2228 break; 2229 } 2230 } 2231 2232 if (v == NULL) 2233 return (ESRCH); 2234 2235 /* 2236 * Free the vnode operations vector. 2237 */ 2238 vfs_opv_free(vfs->vfs_opv_descs); 2239 return (0); 2240 } 2241