1 /* $NetBSD: vfs_subr.c,v 1.97 1998/12/10 15:09:19 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include "opt_compat_netbsd.h" 85 #include "opt_compat_43.h" 86 #include "opt_uvm.h" 87 88 #include <sys/param.h> 89 #include <sys/systm.h> 90 #include <sys/proc.h> 91 #include <sys/mount.h> 92 #include <sys/time.h> 93 #include <sys/fcntl.h> 94 #include <sys/vnode.h> 95 #include <sys/stat.h> 96 #include <sys/namei.h> 97 #include <sys/ucred.h> 98 #include <sys/buf.h> 99 #include <sys/errno.h> 100 #include <sys/malloc.h> 101 #include <sys/domain.h> 102 #include <sys/mbuf.h> 103 #include <sys/syscallargs.h> 104 #include <sys/device.h> 105 #include <sys/dirent.h> 106 107 #include <vm/vm.h> 108 #include <sys/sysctl.h> 109 110 #include <miscfs/specfs/specdev.h> 111 112 #if defined(UVM) 113 #include <uvm/uvm_extern.h> 114 #endif 115 116 enum vtype iftovt_tab[16] = { 117 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 118 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 119 }; 120 int vttoif_tab[9] = { 121 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 122 S_IFSOCK, S_IFIFO, S_IFMT, 123 }; 124 125 int doforce = 1; /* 1 => permit forcible unmounting */ 126 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 127 128 /* 129 * Insq/Remq for the vnode usage lists. 130 */ 131 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 132 #define bufremvn(bp) { \ 133 LIST_REMOVE(bp, b_vnbufs); \ 134 (bp)->b_vnbufs.le_next = NOLIST; \ 135 } 136 TAILQ_HEAD(freelst, vnode) vnode_free_list = /* vnode free list */ 137 TAILQ_HEAD_INITIALIZER(vnode_free_list); 138 struct mntlist mountlist = /* mounted filesystem list */ 139 CIRCLEQ_HEAD_INITIALIZER(mountlist); 140 struct vfs_list_head vfs_list = /* vfs list */ 141 LIST_HEAD_INITIALIZER(vfs_list); 142 143 struct nfs_public nfs_pub; /* publicly exported FS */ 144 145 struct simplelock mountlist_slock; 146 static struct simplelock mntid_slock; 147 struct simplelock mntvnode_slock; 148 struct simplelock vnode_free_list_slock; 149 struct simplelock spechash_slock; 150 151 /* 152 * These define the root filesystem and device. 153 */ 154 struct mount *rootfs; 155 struct vnode *rootvnode; 156 struct device *root_device; /* root device */ 157 158 struct pool vnode_pool; /* memory pool for vnodes */ 159 160 /* 161 * Local declarations. 162 */ 163 void insmntque __P((struct vnode *, struct mount *)); 164 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 165 void vgoneall __P((struct vnode *)); 166 167 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 168 struct export_args *)); 169 static int vfs_free_netcred __P((struct radix_node *, void *)); 170 static void vfs_free_addrlist __P((struct netexport *)); 171 172 #ifdef DEBUG 173 void printlockedvnodes __P((void)); 174 #endif 175 176 /* 177 * Initialize the vnode management data structures. 178 */ 179 void 180 vntblinit() 181 { 182 183 simple_lock_init(&mntvnode_slock); 184 simple_lock_init(&mntid_slock); 185 simple_lock_init(&spechash_slock); 186 simple_lock_init(&vnode_free_list_slock); 187 188 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 189 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE); 190 } 191 192 /* 193 * Mark a mount point as busy. Used to synchronize access and to delay 194 * unmounting. Interlock is not released on failure. 195 */ 196 int 197 vfs_busy(mp, flags, interlkp) 198 struct mount *mp; 199 int flags; 200 struct simplelock *interlkp; 201 { 202 int lkflags; 203 204 if (mp->mnt_flag & MNT_UNMOUNT) { 205 if (flags & LK_NOWAIT) 206 return (ENOENT); 207 mp->mnt_flag |= MNT_MWAIT; 208 if (interlkp) 209 simple_unlock(interlkp); 210 /* 211 * Since all busy locks are shared except the exclusive 212 * lock granted when unmounting, the only place that a 213 * wakeup needs to be done is at the release of the 214 * exclusive lock at the end of dounmount. 215 */ 216 sleep((caddr_t)mp, PVFS); 217 if (interlkp) 218 simple_lock(interlkp); 219 return (ENOENT); 220 } 221 lkflags = LK_SHARED; 222 if (interlkp) 223 lkflags |= LK_INTERLOCK; 224 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 225 panic("vfs_busy: unexpected lock failure"); 226 return (0); 227 } 228 229 /* 230 * Free a busy filesystem. 231 */ 232 void 233 vfs_unbusy(mp) 234 struct mount *mp; 235 { 236 237 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 238 } 239 240 /* 241 * Lookup a filesystem type, and if found allocate and initialize 242 * a mount structure for it. 243 * 244 * Devname is usually updated by mount(8) after booting. 245 */ 246 int 247 vfs_rootmountalloc(fstypename, devname, mpp) 248 char *fstypename; 249 char *devname; 250 struct mount **mpp; 251 { 252 struct vfsops *vfsp = NULL; 253 struct mount *mp; 254 255 for (vfsp = LIST_FIRST(&vfs_list); vfsp != NULL; 256 vfsp = LIST_NEXT(vfsp, vfs_list)) 257 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 258 break; 259 260 if (vfsp == NULL) 261 return (ENODEV); 262 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 263 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 264 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 265 (void)vfs_busy(mp, LK_NOWAIT, 0); 266 LIST_INIT(&mp->mnt_vnodelist); 267 mp->mnt_op = vfsp; 268 mp->mnt_flag = MNT_RDONLY; 269 mp->mnt_vnodecovered = NULLVP; 270 vfsp->vfs_refcount++; 271 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 272 mp->mnt_stat.f_mntonname[0] = '/'; 273 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 274 *mpp = mp; 275 return (0); 276 } 277 278 /* 279 * Lookup a mount point by filesystem identifier. 280 */ 281 struct mount * 282 vfs_getvfs(fsid) 283 fsid_t *fsid; 284 { 285 register struct mount *mp; 286 287 simple_lock(&mountlist_slock); 288 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 289 mp = mp->mnt_list.cqe_next) { 290 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 291 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 292 simple_unlock(&mountlist_slock); 293 return (mp); 294 } 295 } 296 simple_unlock(&mountlist_slock); 297 return ((struct mount *)0); 298 } 299 300 /* 301 * Get a new unique fsid 302 */ 303 void 304 vfs_getnewfsid(mp, fstypename) 305 struct mount *mp; 306 char *fstypename; 307 { 308 static u_short xxxfs_mntid; 309 fsid_t tfsid; 310 int mtype; 311 312 simple_lock(&mntid_slock); 313 mtype = makefstype(fstypename); 314 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 315 mp->mnt_stat.f_fsid.val[1] = mtype; 316 if (xxxfs_mntid == 0) 317 ++xxxfs_mntid; 318 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 319 tfsid.val[1] = mtype; 320 if (mountlist.cqh_first != (void *)&mountlist) { 321 while (vfs_getvfs(&tfsid)) { 322 tfsid.val[0]++; 323 xxxfs_mntid++; 324 } 325 } 326 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 327 simple_unlock(&mntid_slock); 328 } 329 330 /* 331 * Make a 'unique' number from a mount type name. 332 */ 333 long 334 makefstype(type) 335 char *type; 336 { 337 long rv; 338 339 for (rv = 0; *type; type++) { 340 rv <<= 2; 341 rv ^= *type; 342 } 343 return rv; 344 } 345 346 347 /* 348 * Set vnode attributes to VNOVAL 349 */ 350 void 351 vattr_null(vap) 352 register struct vattr *vap; 353 { 354 355 vap->va_type = VNON; 356 357 /* 358 * Assign individually so that it is safe even if size and 359 * sign of each member are varied. 360 */ 361 vap->va_mode = VNOVAL; 362 vap->va_nlink = VNOVAL; 363 vap->va_uid = VNOVAL; 364 vap->va_gid = VNOVAL; 365 vap->va_fsid = VNOVAL; 366 vap->va_fileid = VNOVAL; 367 vap->va_size = VNOVAL; 368 vap->va_blocksize = VNOVAL; 369 vap->va_atime.tv_sec = 370 vap->va_mtime.tv_sec = 371 vap->va_ctime.tv_sec = VNOVAL; 372 vap->va_atime.tv_nsec = 373 vap->va_mtime.tv_nsec = 374 vap->va_ctime.tv_nsec = VNOVAL; 375 vap->va_gen = VNOVAL; 376 vap->va_flags = VNOVAL; 377 vap->va_rdev = VNOVAL; 378 vap->va_bytes = VNOVAL; 379 vap->va_vaflags = 0; 380 } 381 382 /* 383 * Routines having to do with the management of the vnode table. 384 */ 385 extern int (**dead_vnodeop_p) __P((void *)); 386 long numvnodes; 387 388 /* 389 * Return the next vnode from the free list. 390 */ 391 int 392 getnewvnode(tag, mp, vops, vpp) 393 enum vtagtype tag; 394 struct mount *mp; 395 int (**vops) __P((void *)); 396 struct vnode **vpp; 397 { 398 struct proc *p = curproc; /* XXX */ 399 struct vnode *vp; 400 #ifdef DIAGNOSTIC 401 int s; 402 #endif 403 404 simple_lock(&vnode_free_list_slock); 405 if ((vnode_free_list.tqh_first == NULL && 406 numvnodes < 2 * desiredvnodes) || 407 numvnodes < desiredvnodes) { 408 simple_unlock(&vnode_free_list_slock); 409 vp = pool_get(&vnode_pool, PR_WAITOK); 410 memset((char *)vp, 0, sizeof(*vp)); 411 numvnodes++; 412 } else { 413 for (vp = vnode_free_list.tqh_first; 414 vp != NULLVP; vp = vp->v_freelist.tqe_next) { 415 if (simple_lock_try(&vp->v_interlock)) 416 break; 417 } 418 /* 419 * Unless this is a bad time of the month, at most 420 * the first NCPUS items on the free list are 421 * locked, so this is close enough to being empty. 422 */ 423 if (vp == NULLVP) { 424 simple_unlock(&vnode_free_list_slock); 425 tablefull("vnode"); 426 *vpp = 0; 427 return (ENFILE); 428 } 429 if (vp->v_usecount) 430 panic("free vnode isn't"); 431 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 432 /* see comment on why 0xdeadb is set at end of vgone (below) */ 433 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 434 simple_unlock(&vnode_free_list_slock); 435 vp->v_lease = NULL; 436 if (vp->v_type != VBAD) 437 vgonel(vp, p); 438 else 439 simple_unlock(&vp->v_interlock); 440 #ifdef DIAGNOSTIC 441 if (vp->v_data) 442 panic("cleaned vnode isn't"); 443 s = splbio(); 444 if (vp->v_numoutput) 445 panic("Clean vnode has pending I/O's"); 446 splx(s); 447 #endif 448 vp->v_flag = 0; 449 vp->v_lastr = 0; 450 vp->v_ralen = 0; 451 vp->v_maxra = 0; 452 vp->v_lastw = 0; 453 vp->v_lasta = 0; 454 vp->v_cstart = 0; 455 vp->v_clen = 0; 456 vp->v_socket = 0; 457 } 458 vp->v_type = VNON; 459 cache_purge(vp); 460 vp->v_tag = tag; 461 vp->v_op = vops; 462 insmntque(vp, mp); 463 *vpp = vp; 464 vp->v_usecount = 1; 465 vp->v_data = 0; 466 #ifdef UVM 467 simple_lock_init(&vp->v_uvm.u_obj.vmobjlock); 468 #endif 469 return (0); 470 } 471 472 /* 473 * Move a vnode from one mount queue to another. 474 */ 475 void 476 insmntque(vp, mp) 477 register struct vnode *vp; 478 register struct mount *mp; 479 { 480 481 simple_lock(&mntvnode_slock); 482 /* 483 * Delete from old mount point vnode list, if on one. 484 */ 485 if (vp->v_mount != NULL) 486 LIST_REMOVE(vp, v_mntvnodes); 487 /* 488 * Insert into list of vnodes for the new mount point, if available. 489 */ 490 if ((vp->v_mount = mp) != NULL) 491 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 492 simple_unlock(&mntvnode_slock); 493 } 494 495 /* 496 * Update outstanding I/O count and do wakeup if requested. 497 */ 498 void 499 vwakeup(bp) 500 register struct buf *bp; 501 { 502 register struct vnode *vp; 503 504 bp->b_flags &= ~B_WRITEINPROG; 505 if ((vp = bp->b_vp) != NULL) { 506 if (--vp->v_numoutput < 0) 507 panic("vwakeup: neg numoutput"); 508 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 509 vp->v_flag &= ~VBWAIT; 510 wakeup((caddr_t)&vp->v_numoutput); 511 } 512 } 513 } 514 515 /* 516 * Flush out and invalidate all buffers associated with a vnode. 517 * Called with the underlying object locked. 518 */ 519 int 520 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 521 register struct vnode *vp; 522 int flags; 523 struct ucred *cred; 524 struct proc *p; 525 int slpflag, slptimeo; 526 { 527 register struct buf *bp; 528 struct buf *nbp, *blist; 529 int s, error; 530 531 if (flags & V_SAVE) { 532 if ((error = VOP_FSYNC(vp, cred, FSYNC_WAIT, p)) != 0) 533 return (error); 534 if (vp->v_dirtyblkhd.lh_first != NULL) 535 panic("vinvalbuf: dirty bufs"); 536 } 537 for (;;) { 538 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 539 while (blist && blist->b_lblkno < 0) 540 blist = blist->b_vnbufs.le_next; 541 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 542 (flags & V_SAVEMETA)) 543 while (blist && blist->b_lblkno < 0) 544 blist = blist->b_vnbufs.le_next; 545 if (!blist) 546 break; 547 548 for (bp = blist; bp; bp = nbp) { 549 nbp = bp->b_vnbufs.le_next; 550 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 551 continue; 552 s = splbio(); 553 if (bp->b_flags & B_BUSY) { 554 bp->b_flags |= B_WANTED; 555 error = tsleep((caddr_t)bp, 556 slpflag | (PRIBIO + 1), "vinvalbuf", 557 slptimeo); 558 splx(s); 559 if (error) 560 return (error); 561 break; 562 } 563 bp->b_flags |= B_BUSY | B_VFLUSH; 564 splx(s); 565 /* 566 * XXX Since there are no node locks for NFS, I believe 567 * there is a slight chance that a delayed write will 568 * occur while sleeping just above, so check for it. 569 */ 570 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 571 (void) VOP_BWRITE(bp); 572 break; 573 } 574 bp->b_flags |= B_INVAL; 575 brelse(bp); 576 } 577 } 578 if (!(flags & V_SAVEMETA) && 579 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 580 panic("vinvalbuf: flush failed"); 581 return (0); 582 } 583 584 void 585 vflushbuf(vp, sync) 586 register struct vnode *vp; 587 int sync; 588 { 589 register struct buf *bp, *nbp; 590 int s; 591 592 loop: 593 s = splbio(); 594 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 595 nbp = bp->b_vnbufs.le_next; 596 if ((bp->b_flags & B_BUSY)) 597 continue; 598 if ((bp->b_flags & B_DELWRI) == 0) 599 panic("vflushbuf: not dirty"); 600 bp->b_flags |= B_BUSY | B_VFLUSH; 601 splx(s); 602 /* 603 * Wait for I/O associated with indirect blocks to complete, 604 * since there is no way to quickly wait for them below. 605 */ 606 if (bp->b_vp == vp || sync == 0) 607 (void) bawrite(bp); 608 else 609 (void) bwrite(bp); 610 goto loop; 611 } 612 if (sync == 0) { 613 splx(s); 614 return; 615 } 616 while (vp->v_numoutput) { 617 vp->v_flag |= VBWAIT; 618 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 619 } 620 splx(s); 621 if (vp->v_dirtyblkhd.lh_first != NULL) { 622 vprint("vflushbuf: dirty", vp); 623 goto loop; 624 } 625 } 626 627 /* 628 * Associate a buffer with a vnode. 629 */ 630 void 631 bgetvp(vp, bp) 632 register struct vnode *vp; 633 register struct buf *bp; 634 { 635 636 if (bp->b_vp) 637 panic("bgetvp: not free"); 638 VHOLD(vp); 639 bp->b_vp = vp; 640 if (vp->v_type == VBLK || vp->v_type == VCHR) 641 bp->b_dev = vp->v_rdev; 642 else 643 bp->b_dev = NODEV; 644 /* 645 * Insert onto list for new vnode. 646 */ 647 bufinsvn(bp, &vp->v_cleanblkhd); 648 } 649 650 /* 651 * Disassociate a buffer from a vnode. 652 */ 653 void 654 brelvp(bp) 655 register struct buf *bp; 656 { 657 struct vnode *vp; 658 659 if (bp->b_vp == (struct vnode *) 0) 660 panic("brelvp: NULL"); 661 /* 662 * Delete from old vnode list, if on one. 663 */ 664 if (bp->b_vnbufs.le_next != NOLIST) 665 bufremvn(bp); 666 vp = bp->b_vp; 667 bp->b_vp = (struct vnode *) 0; 668 HOLDRELE(vp); 669 } 670 671 /* 672 * Reassign a buffer from one vnode to another. 673 * Used to assign file specific control information 674 * (indirect blocks) to the vnode to which they belong. 675 */ 676 void 677 reassignbuf(bp, newvp) 678 register struct buf *bp; 679 register struct vnode *newvp; 680 { 681 register struct buflists *listheadp; 682 683 if (newvp == NULL) { 684 printf("reassignbuf: NULL"); 685 return; 686 } 687 /* 688 * Delete from old vnode list, if on one. 689 */ 690 if (bp->b_vnbufs.le_next != NOLIST) 691 bufremvn(bp); 692 /* 693 * If dirty, put on list of dirty buffers; 694 * otherwise insert onto list of clean buffers. 695 */ 696 if (bp->b_flags & B_DELWRI) 697 listheadp = &newvp->v_dirtyblkhd; 698 else 699 listheadp = &newvp->v_cleanblkhd; 700 bufinsvn(bp, listheadp); 701 } 702 703 /* 704 * Create a vnode for a block device. 705 * Used for root filesystem and swap areas. 706 * Also used for memory file system special devices. 707 */ 708 int 709 bdevvp(dev, vpp) 710 dev_t dev; 711 struct vnode **vpp; 712 { 713 714 return (getdevvp(dev, vpp, VBLK)); 715 } 716 717 /* 718 * Create a vnode for a character device. 719 * Used for kernfs and some console handling. 720 */ 721 int 722 cdevvp(dev, vpp) 723 dev_t dev; 724 struct vnode **vpp; 725 { 726 727 return (getdevvp(dev, vpp, VCHR)); 728 } 729 730 /* 731 * Create a vnode for a device. 732 * Used by bdevvp (block device) for root file system etc., 733 * and by cdevvp (character device) for console and kernfs. 734 */ 735 int 736 getdevvp(dev, vpp, type) 737 dev_t dev; 738 struct vnode **vpp; 739 enum vtype type; 740 { 741 register struct vnode *vp; 742 struct vnode *nvp; 743 int error; 744 745 if (dev == NODEV) { 746 *vpp = NULLVP; 747 return (0); 748 } 749 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 750 if (error) { 751 *vpp = NULLVP; 752 return (error); 753 } 754 vp = nvp; 755 vp->v_type = type; 756 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 757 vput(vp); 758 vp = nvp; 759 } 760 *vpp = vp; 761 return (0); 762 } 763 764 /* 765 * Check to see if the new vnode represents a special device 766 * for which we already have a vnode (either because of 767 * bdevvp() or because of a different vnode representing 768 * the same block device). If such an alias exists, deallocate 769 * the existing contents and return the aliased vnode. The 770 * caller is responsible for filling it with its new contents. 771 */ 772 struct vnode * 773 checkalias(nvp, nvp_rdev, mp) 774 register struct vnode *nvp; 775 dev_t nvp_rdev; 776 struct mount *mp; 777 { 778 struct proc *p = curproc; /* XXX */ 779 register struct vnode *vp; 780 struct vnode **vpp; 781 782 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 783 return (NULLVP); 784 785 vpp = &speclisth[SPECHASH(nvp_rdev)]; 786 loop: 787 simple_lock(&spechash_slock); 788 for (vp = *vpp; vp; vp = vp->v_specnext) { 789 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 790 continue; 791 /* 792 * Alias, but not in use, so flush it out. 793 */ 794 simple_lock(&vp->v_interlock); 795 if (vp->v_usecount == 0) { 796 simple_unlock(&spechash_slock); 797 vgonel(vp, p); 798 goto loop; 799 } 800 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 801 simple_unlock(&spechash_slock); 802 goto loop; 803 } 804 break; 805 } 806 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 807 MALLOC(nvp->v_specinfo, struct specinfo *, 808 sizeof(struct specinfo), M_VNODE, M_WAITOK); 809 nvp->v_rdev = nvp_rdev; 810 nvp->v_hashchain = vpp; 811 nvp->v_specnext = *vpp; 812 nvp->v_specflags = 0; 813 simple_unlock(&spechash_slock); 814 nvp->v_speclockf = NULL; 815 *vpp = nvp; 816 if (vp != NULLVP) { 817 nvp->v_flag |= VALIASED; 818 vp->v_flag |= VALIASED; 819 vput(vp); 820 } 821 return (NULLVP); 822 } 823 simple_unlock(&spechash_slock); 824 VOP_UNLOCK(vp, 0); 825 simple_lock(&vp->v_interlock); 826 vclean(vp, 0, p); 827 vp->v_op = nvp->v_op; 828 vp->v_tag = nvp->v_tag; 829 nvp->v_type = VNON; 830 insmntque(vp, mp); 831 return (vp); 832 } 833 834 /* 835 * Grab a particular vnode from the free list, increment its 836 * reference count and lock it. If the vnode lock bit is set the 837 * vnode is being eliminated in vgone. In that case, we can not 838 * grab the vnode, so the process is awakened when the transition is 839 * completed, and an error returned to indicate that the vnode is no 840 * longer usable (possibly having been changed to a new file system type). 841 */ 842 int 843 vget(vp, flags) 844 struct vnode *vp; 845 int flags; 846 { 847 int error; 848 849 /* 850 * If the vnode is in the process of being cleaned out for 851 * another use, we wait for the cleaning to finish and then 852 * return failure. Cleaning is determined by checking that 853 * the VXLOCK flag is set. 854 */ 855 if ((flags & LK_INTERLOCK) == 0) 856 simple_lock(&vp->v_interlock); 857 if (vp->v_flag & VXLOCK) { 858 vp->v_flag |= VXWANT; 859 simple_unlock(&vp->v_interlock); 860 tsleep((caddr_t)vp, PINOD, "vget", 0); 861 return (ENOENT); 862 } 863 if (vp->v_usecount == 0) { 864 simple_lock(&vnode_free_list_slock); 865 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 866 simple_unlock(&vnode_free_list_slock); 867 } 868 vp->v_usecount++; 869 if (flags & LK_TYPE_MASK) { 870 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) 871 vrele(vp); 872 return (error); 873 } 874 simple_unlock(&vp->v_interlock); 875 return (0); 876 } 877 878 /* 879 * vput(), just unlock and vrele() 880 */ 881 void 882 vput(vp) 883 struct vnode *vp; 884 { 885 struct proc *p = curproc; /* XXX */ 886 887 #ifdef DIGANOSTIC 888 if (vp == NULL) 889 panic("vput: null vp"); 890 #endif 891 simple_lock(&vp->v_interlock); 892 vp->v_usecount--; 893 if (vp->v_usecount > 0) { 894 simple_unlock(&vp->v_interlock); 895 VOP_UNLOCK(vp, 0); 896 return; 897 } 898 #ifdef DIAGNOSTIC 899 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 900 vprint("vput: bad ref count", vp); 901 panic("vput: ref cnt"); 902 } 903 #endif 904 /* 905 * Insert at tail of LRU list. 906 */ 907 simple_lock(&vnode_free_list_slock); 908 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 909 simple_unlock(&vnode_free_list_slock); 910 simple_unlock(&vp->v_interlock); 911 VOP_INACTIVE(vp, p); 912 } 913 914 /* 915 * Vnode release. 916 * If count drops to zero, call inactive routine and return to freelist. 917 */ 918 void 919 vrele(vp) 920 struct vnode *vp; 921 { 922 struct proc *p = curproc; /* XXX */ 923 924 #ifdef DIAGNOSTIC 925 if (vp == NULL) 926 panic("vrele: null vp"); 927 #endif 928 simple_lock(&vp->v_interlock); 929 vp->v_usecount--; 930 if (vp->v_usecount > 0) { 931 simple_unlock(&vp->v_interlock); 932 return; 933 } 934 #ifdef DIAGNOSTIC 935 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 936 vprint("vrele: bad ref count", vp); 937 panic("vrele: ref cnt"); 938 } 939 #endif 940 /* 941 * Insert at tail of LRU list. 942 */ 943 simple_lock(&vnode_free_list_slock); 944 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 945 simple_unlock(&vnode_free_list_slock); 946 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 947 VOP_INACTIVE(vp, p); 948 } 949 950 #ifdef DIAGNOSTIC 951 /* 952 * Page or buffer structure gets a reference. 953 */ 954 void 955 vhold(vp) 956 register struct vnode *vp; 957 { 958 959 simple_lock(&vp->v_interlock); 960 vp->v_holdcnt++; 961 simple_unlock(&vp->v_interlock); 962 } 963 964 /* 965 * Page or buffer structure frees a reference. 966 */ 967 void 968 holdrele(vp) 969 register struct vnode *vp; 970 { 971 972 simple_lock(&vp->v_interlock); 973 if (vp->v_holdcnt <= 0) 974 panic("holdrele: holdcnt"); 975 vp->v_holdcnt--; 976 simple_unlock(&vp->v_interlock); 977 } 978 979 /* 980 * Vnode reference. 981 */ 982 void 983 vref(vp) 984 struct vnode *vp; 985 { 986 987 simple_lock(&vp->v_interlock); 988 if (vp->v_usecount <= 0) 989 panic("vref used where vget required"); 990 vp->v_usecount++; 991 simple_unlock(&vp->v_interlock); 992 } 993 #endif /* DIAGNOSTIC */ 994 995 /* 996 * Remove any vnodes in the vnode table belonging to mount point mp. 997 * 998 * If MNT_NOFORCE is specified, there should not be any active ones, 999 * return error if any are found (nb: this is a user error, not a 1000 * system error). If MNT_FORCE is specified, detach any active vnodes 1001 * that are found. 1002 */ 1003 #ifdef DEBUG 1004 int busyprt = 0; /* print out busy vnodes */ 1005 struct ctldebug debug1 = { "busyprt", &busyprt }; 1006 #endif 1007 1008 int 1009 vflush(mp, skipvp, flags) 1010 struct mount *mp; 1011 struct vnode *skipvp; 1012 int flags; 1013 { 1014 struct proc *p = curproc; /* XXX */ 1015 register struct vnode *vp, *nvp; 1016 int busy = 0; 1017 1018 simple_lock(&mntvnode_slock); 1019 loop: 1020 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1021 if (vp->v_mount != mp) 1022 goto loop; 1023 nvp = vp->v_mntvnodes.le_next; 1024 /* 1025 * Skip over a selected vnode. 1026 */ 1027 if (vp == skipvp) 1028 continue; 1029 simple_lock(&vp->v_interlock); 1030 /* 1031 * Skip over a vnodes marked VSYSTEM. 1032 */ 1033 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1034 simple_unlock(&vp->v_interlock); 1035 continue; 1036 } 1037 /* 1038 * If WRITECLOSE is set, only flush out regular file 1039 * vnodes open for writing. 1040 */ 1041 if ((flags & WRITECLOSE) && 1042 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1043 simple_unlock(&vp->v_interlock); 1044 continue; 1045 } 1046 /* 1047 * With v_usecount == 0, all we need to do is clear 1048 * out the vnode data structures and we are done. 1049 */ 1050 if (vp->v_usecount == 0) { 1051 simple_unlock(&mntvnode_slock); 1052 vgonel(vp, p); 1053 simple_lock(&mntvnode_slock); 1054 continue; 1055 } 1056 /* 1057 * If FORCECLOSE is set, forcibly close the vnode. 1058 * For block or character devices, revert to an 1059 * anonymous device. For all other files, just kill them. 1060 */ 1061 if (flags & FORCECLOSE) { 1062 simple_unlock(&mntvnode_slock); 1063 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1064 vgonel(vp, p); 1065 } else { 1066 vclean(vp, 0, p); 1067 vp->v_op = spec_vnodeop_p; 1068 insmntque(vp, (struct mount *)0); 1069 } 1070 simple_lock(&mntvnode_slock); 1071 continue; 1072 } 1073 #ifdef DEBUG 1074 if (busyprt) 1075 vprint("vflush: busy vnode", vp); 1076 #endif 1077 simple_unlock(&vp->v_interlock); 1078 busy++; 1079 } 1080 simple_unlock(&mntvnode_slock); 1081 if (busy) 1082 return (EBUSY); 1083 return (0); 1084 } 1085 1086 /* 1087 * Disassociate the underlying file system from a vnode. 1088 */ 1089 void 1090 vclean(vp, flags, p) 1091 register struct vnode *vp; 1092 int flags; 1093 struct proc *p; 1094 { 1095 int active; 1096 1097 /* 1098 * Check to see if the vnode is in use. 1099 * If so we have to reference it before we clean it out 1100 * so that its count cannot fall to zero and generate a 1101 * race against ourselves to recycle it. 1102 */ 1103 if ((active = vp->v_usecount) != 0) 1104 /* We have the vnode interlock. */ 1105 vp->v_usecount++; 1106 1107 /* 1108 * Prevent the vnode from being recycled or 1109 * brought into use while we clean it out. 1110 */ 1111 if (vp->v_flag & VXLOCK) 1112 panic("vclean: deadlock"); 1113 vp->v_flag |= VXLOCK; 1114 #ifdef UVM 1115 /* 1116 * clean out any VM data associated with the vnode. 1117 */ 1118 uvm_vnp_terminate(vp); 1119 #endif 1120 /* 1121 * Even if the count is zero, the VOP_INACTIVE routine may still 1122 * have the object locked while it cleans it out. The VOP_LOCK 1123 * ensures that the VOP_INACTIVE routine is done with its work. 1124 * For active vnodes, it ensures that no other activity can 1125 * occur while the underlying object is being cleaned out. 1126 */ 1127 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1128 1129 /* 1130 * Clean out any buffers associated with the vnode. 1131 */ 1132 if (flags & DOCLOSE) 1133 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1134 1135 /* 1136 * If purging an active vnode, it must be closed and 1137 * deactivated before being reclaimed. Note that the 1138 * VOP_INACTIVE will unlock the vnode. 1139 */ 1140 if (active) { 1141 if (flags & DOCLOSE) 1142 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1143 VOP_INACTIVE(vp, p); 1144 } else { 1145 /* 1146 * Any other processes trying to obtain this lock must first 1147 * wait for VXLOCK to clear, then call the new lock operation. 1148 */ 1149 VOP_UNLOCK(vp, 0); 1150 } 1151 /* 1152 * Reclaim the vnode. 1153 */ 1154 if (VOP_RECLAIM(vp, p)) 1155 panic("vclean: cannot reclaim"); 1156 1157 if (active) { 1158 /* 1159 * Inline copy of vrele() since VOP_INACTIVE 1160 * has already been called. 1161 */ 1162 simple_lock(&vp->v_interlock); 1163 if (--vp->v_usecount <= 0) { 1164 #ifdef DIAGNOSTIC 1165 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1166 vprint("vclean: bad ref count", vp); 1167 panic("vclean: ref cnt"); 1168 } 1169 #endif 1170 /* 1171 * Insert at tail of LRU list. 1172 */ 1173 simple_lock(&vnode_free_list_slock); 1174 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1175 simple_unlock(&vnode_free_list_slock); 1176 } 1177 simple_unlock(&vp->v_interlock); 1178 } 1179 1180 cache_purge(vp); 1181 if (vp->v_vnlock) { 1182 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1183 vprint("vclean: lock not drained", vp); 1184 FREE(vp->v_vnlock, M_VNODE); 1185 vp->v_vnlock = NULL; 1186 } 1187 1188 /* 1189 * Done with purge, notify sleepers of the grim news. 1190 */ 1191 vp->v_op = dead_vnodeop_p; 1192 vp->v_tag = VT_NON; 1193 vp->v_flag &= ~VXLOCK; 1194 if (vp->v_flag & VXWANT) { 1195 vp->v_flag &= ~VXWANT; 1196 wakeup((caddr_t)vp); 1197 } 1198 } 1199 1200 /* 1201 * Recycle an unused vnode to the front of the free list. 1202 * Release the passed interlock if the vnode will be recycled. 1203 */ 1204 int 1205 vrecycle(vp, inter_lkp, p) 1206 struct vnode *vp; 1207 struct simplelock *inter_lkp; 1208 struct proc *p; 1209 { 1210 1211 simple_lock(&vp->v_interlock); 1212 if (vp->v_usecount == 0) { 1213 if (inter_lkp) 1214 simple_unlock(inter_lkp); 1215 vgonel(vp, p); 1216 return (1); 1217 } 1218 simple_unlock(&vp->v_interlock); 1219 return (0); 1220 } 1221 1222 /* 1223 * Eliminate all activity associated with a vnode 1224 * in preparation for reuse. 1225 */ 1226 void 1227 vgone(vp) 1228 struct vnode *vp; 1229 { 1230 struct proc *p = curproc; /* XXX */ 1231 1232 simple_lock(&vp->v_interlock); 1233 vgonel(vp, p); 1234 } 1235 1236 /* 1237 * vgone, with the vp interlock held. 1238 */ 1239 void 1240 vgonel(vp, p) 1241 register struct vnode *vp; 1242 struct proc *p; 1243 { 1244 struct vnode *vq; 1245 struct vnode *vx; 1246 1247 /* 1248 * If a vgone (or vclean) is already in progress, 1249 * wait until it is done and return. 1250 */ 1251 if (vp->v_flag & VXLOCK) { 1252 vp->v_flag |= VXWANT; 1253 simple_unlock(&vp->v_interlock); 1254 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1255 return; 1256 } 1257 /* 1258 * Clean out the filesystem specific data. 1259 */ 1260 vclean(vp, DOCLOSE, p); 1261 /* 1262 * Delete from old mount point vnode list, if on one. 1263 */ 1264 if (vp->v_mount != NULL) 1265 insmntque(vp, (struct mount *)0); 1266 /* 1267 * If special device, remove it from special device alias list. 1268 * if it is on one. 1269 */ 1270 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1271 simple_lock(&spechash_slock); 1272 if (*vp->v_hashchain == vp) { 1273 *vp->v_hashchain = vp->v_specnext; 1274 } else { 1275 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1276 if (vq->v_specnext != vp) 1277 continue; 1278 vq->v_specnext = vp->v_specnext; 1279 break; 1280 } 1281 if (vq == NULL) 1282 panic("missing bdev"); 1283 } 1284 if (vp->v_flag & VALIASED) { 1285 vx = NULL; 1286 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1287 if (vq->v_rdev != vp->v_rdev || 1288 vq->v_type != vp->v_type) 1289 continue; 1290 if (vx) 1291 break; 1292 vx = vq; 1293 } 1294 if (vx == NULL) 1295 panic("missing alias"); 1296 if (vq == NULL) 1297 vx->v_flag &= ~VALIASED; 1298 vp->v_flag &= ~VALIASED; 1299 } 1300 simple_unlock(&spechash_slock); 1301 FREE(vp->v_specinfo, M_VNODE); 1302 vp->v_specinfo = NULL; 1303 } 1304 /* 1305 * If it is on the freelist and not already at the head, 1306 * move it to the head of the list. The test of the back 1307 * pointer and the reference count of zero is because 1308 * it will be removed from the free list by getnewvnode, 1309 * but will not have its reference count incremented until 1310 * after calling vgone. If the reference count were 1311 * incremented first, vgone would (incorrectly) try to 1312 * close the previous instance of the underlying object. 1313 * So, the back pointer is explicitly set to `0xdeadb' in 1314 * getnewvnode after removing it from the freelist to ensure 1315 * that we do not try to move it here. 1316 */ 1317 if (vp->v_usecount == 0) { 1318 simple_lock(&vnode_free_list_slock); 1319 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1320 vnode_free_list.tqh_first != vp) { 1321 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1322 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1323 } 1324 simple_unlock(&vnode_free_list_slock); 1325 } 1326 vp->v_type = VBAD; 1327 } 1328 1329 /* 1330 * Lookup a vnode by device number. 1331 */ 1332 int 1333 vfinddev(dev, type, vpp) 1334 dev_t dev; 1335 enum vtype type; 1336 struct vnode **vpp; 1337 { 1338 struct vnode *vp; 1339 int rc = 0; 1340 1341 simple_lock(&spechash_slock); 1342 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1343 if (dev != vp->v_rdev || type != vp->v_type) 1344 continue; 1345 *vpp = vp; 1346 rc = 1; 1347 break; 1348 } 1349 simple_unlock(&spechash_slock); 1350 return (rc); 1351 } 1352 1353 /* 1354 * Revoke all the vnodes corresponding to the specified minor number 1355 * range (endpoints inclusive) of the specified major. 1356 */ 1357 void 1358 vdevgone(maj, minl, minh, type) 1359 int maj, minl, minh; 1360 enum vtype type; 1361 { 1362 struct vnode *vp; 1363 int mn; 1364 1365 for (mn = minl; mn <= minh; mn++) 1366 if (vfinddev(makedev(maj, mn), type, &vp)) 1367 VOP_REVOKE(vp, REVOKEALL); 1368 } 1369 1370 /* 1371 * Calculate the total number of references to a special device. 1372 */ 1373 int 1374 vcount(vp) 1375 register struct vnode *vp; 1376 { 1377 register struct vnode *vq, *vnext; 1378 int count; 1379 1380 loop: 1381 if ((vp->v_flag & VALIASED) == 0) 1382 return (vp->v_usecount); 1383 simple_lock(&spechash_slock); 1384 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1385 vnext = vq->v_specnext; 1386 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1387 continue; 1388 /* 1389 * Alias, but not in use, so flush it out. 1390 */ 1391 if (vq->v_usecount == 0 && vq != vp) { 1392 simple_unlock(&spechash_slock); 1393 vgone(vq); 1394 goto loop; 1395 } 1396 count += vq->v_usecount; 1397 } 1398 simple_unlock(&spechash_slock); 1399 return (count); 1400 } 1401 1402 /* 1403 * Print out a description of a vnode. 1404 */ 1405 static char *typename[] = 1406 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1407 1408 void 1409 vprint(label, vp) 1410 char *label; 1411 register struct vnode *vp; 1412 { 1413 char buf[64]; 1414 1415 if (label != NULL) 1416 printf("%s: ", label); 1417 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1418 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1419 vp->v_holdcnt); 1420 buf[0] = '\0'; 1421 if (vp->v_flag & VROOT) 1422 strcat(buf, "|VROOT"); 1423 if (vp->v_flag & VTEXT) 1424 strcat(buf, "|VTEXT"); 1425 if (vp->v_flag & VSYSTEM) 1426 strcat(buf, "|VSYSTEM"); 1427 if (vp->v_flag & VXLOCK) 1428 strcat(buf, "|VXLOCK"); 1429 if (vp->v_flag & VXWANT) 1430 strcat(buf, "|VXWANT"); 1431 if (vp->v_flag & VBWAIT) 1432 strcat(buf, "|VBWAIT"); 1433 if (vp->v_flag & VALIASED) 1434 strcat(buf, "|VALIASED"); 1435 if (buf[0] != '\0') 1436 printf(" flags (%s)", &buf[1]); 1437 if (vp->v_data == NULL) { 1438 printf("\n"); 1439 } else { 1440 printf("\n\t"); 1441 VOP_PRINT(vp); 1442 } 1443 } 1444 1445 #ifdef DEBUG 1446 /* 1447 * List all of the locked vnodes in the system. 1448 * Called when debugging the kernel. 1449 */ 1450 void 1451 printlockedvnodes() 1452 { 1453 struct mount *mp, *nmp; 1454 struct vnode *vp; 1455 1456 printf("Locked vnodes\n"); 1457 simple_lock(&mountlist_slock); 1458 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1459 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1460 nmp = mp->mnt_list.cqe_next; 1461 continue; 1462 } 1463 for (vp = mp->mnt_vnodelist.lh_first; 1464 vp != NULL; 1465 vp = vp->v_mntvnodes.le_next) { 1466 if (VOP_ISLOCKED(vp)) 1467 vprint((char *)0, vp); 1468 } 1469 simple_lock(&mountlist_slock); 1470 nmp = mp->mnt_list.cqe_next; 1471 vfs_unbusy(mp); 1472 } 1473 simple_unlock(&mountlist_slock); 1474 } 1475 #endif 1476 1477 extern const char *mountcompatnames[]; 1478 extern const int nmountcompatnames; 1479 1480 /* 1481 * Top level filesystem related information gathering. 1482 */ 1483 int 1484 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1485 int *name; 1486 u_int namelen; 1487 void *oldp; 1488 size_t *oldlenp; 1489 void *newp; 1490 size_t newlen; 1491 struct proc *p; 1492 { 1493 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1494 struct vfsconf vfc; 1495 #endif 1496 struct vfsops *vfsp; 1497 1498 /* all sysctl names at this level are at least name and field */ 1499 if (namelen < 2) 1500 return (ENOTDIR); /* overloaded */ 1501 1502 /* Not generic: goes to file system. */ 1503 if (name[0] != VFS_GENERIC) { 1504 if (name[0] >= nmountcompatnames || name[0] < 0 || 1505 mountcompatnames[name[0]] == NULL) 1506 return (EOPNOTSUPP); 1507 vfsp = vfs_getopsbyname(mountcompatnames[name[0]]); 1508 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1509 return (EOPNOTSUPP); 1510 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1511 oldp, oldlenp, newp, newlen, p)); 1512 } 1513 1514 /* The rest are generic vfs sysctls. */ 1515 switch (name[1]) { 1516 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1517 case VFS_MAXTYPENUM: 1518 /* 1519 * Provided for 4.4BSD-Lite2 compatibility. 1520 */ 1521 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1522 case VFS_CONF: 1523 /* 1524 * Special: a node, next is a file system name. 1525 * Provided for 4.4BSD-Lite2 compatibility. 1526 */ 1527 if (namelen < 3) 1528 return (ENOTDIR); /* overloaded */ 1529 if (name[2] >= nmountcompatnames || name[2] < 0 || 1530 mountcompatnames[name[2]] == NULL) 1531 return (EOPNOTSUPP); 1532 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 1533 if (vfsp == NULL) 1534 return (EOPNOTSUPP); 1535 vfc.vfc_vfsops = vfsp; 1536 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 1537 vfc.vfc_typenum = name[2]; 1538 vfc.vfc_refcount = vfsp->vfs_refcount; 1539 vfc.vfc_flags = 0; 1540 vfc.vfc_mountroot = vfsp->vfs_mountroot; 1541 vfc.vfc_next = NULL; 1542 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 1543 sizeof(struct vfsconf))); 1544 #endif 1545 default: 1546 break; 1547 } 1548 return (EOPNOTSUPP); 1549 } 1550 1551 int kinfo_vdebug = 1; 1552 int kinfo_vgetfailed; 1553 #define KINFO_VNODESLOP 10 1554 /* 1555 * Dump vnode list (via sysctl). 1556 * Copyout address of vnode followed by vnode. 1557 */ 1558 /* ARGSUSED */ 1559 int 1560 sysctl_vnode(where, sizep, p) 1561 char *where; 1562 size_t *sizep; 1563 struct proc *p; 1564 { 1565 struct mount *mp, *nmp; 1566 struct vnode *nvp, *vp; 1567 char *bp = where, *savebp; 1568 char *ewhere; 1569 int error; 1570 1571 #define VPTRSZ sizeof(struct vnode *) 1572 #define VNODESZ sizeof(struct vnode) 1573 if (where == NULL) { 1574 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1575 return (0); 1576 } 1577 ewhere = where + *sizep; 1578 1579 simple_lock(&mountlist_slock); 1580 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1581 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1582 nmp = mp->mnt_list.cqe_next; 1583 continue; 1584 } 1585 savebp = bp; 1586 again: 1587 simple_lock(&mntvnode_slock); 1588 for (vp = mp->mnt_vnodelist.lh_first; 1589 vp != NULL; 1590 vp = nvp) { 1591 /* 1592 * Check that the vp is still associated with 1593 * this filesystem. RACE: could have been 1594 * recycled onto the same filesystem. 1595 */ 1596 if (vp->v_mount != mp) { 1597 simple_unlock(&mntvnode_slock); 1598 if (kinfo_vdebug) 1599 printf("kinfo: vp changed\n"); 1600 bp = savebp; 1601 goto again; 1602 } 1603 nvp = vp->v_mntvnodes.le_next; 1604 if (bp + VPTRSZ + VNODESZ > ewhere) { 1605 simple_unlock(&mntvnode_slock); 1606 *sizep = bp - where; 1607 return (ENOMEM); 1608 } 1609 simple_unlock(&mntvnode_slock); 1610 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1611 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1612 return (error); 1613 bp += VPTRSZ + VNODESZ; 1614 simple_lock(&mntvnode_slock); 1615 } 1616 simple_unlock(&mntvnode_slock); 1617 simple_lock(&mountlist_slock); 1618 nmp = mp->mnt_list.cqe_next; 1619 vfs_unbusy(mp); 1620 } 1621 simple_unlock(&mountlist_slock); 1622 1623 *sizep = bp - where; 1624 return (0); 1625 } 1626 1627 /* 1628 * Check to see if a filesystem is mounted on a block device. 1629 */ 1630 int 1631 vfs_mountedon(vp) 1632 struct vnode *vp; 1633 { 1634 struct vnode *vq; 1635 int error = 0; 1636 1637 if (vp->v_specflags & SI_MOUNTEDON) 1638 return (EBUSY); 1639 if (vp->v_flag & VALIASED) { 1640 simple_lock(&spechash_slock); 1641 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1642 if (vq->v_rdev != vp->v_rdev || 1643 vq->v_type != vp->v_type) 1644 continue; 1645 if (vq->v_specflags & SI_MOUNTEDON) { 1646 error = EBUSY; 1647 break; 1648 } 1649 } 1650 simple_unlock(&spechash_slock); 1651 } 1652 return (error); 1653 } 1654 1655 /* 1656 * Build hash lists of net addresses and hang them off the mount point. 1657 * Called by ufs_mount() to set up the lists of export addresses. 1658 */ 1659 static int 1660 vfs_hang_addrlist(mp, nep, argp) 1661 struct mount *mp; 1662 struct netexport *nep; 1663 struct export_args *argp; 1664 { 1665 register struct netcred *np, *enp; 1666 register struct radix_node_head *rnh; 1667 register int i; 1668 struct radix_node *rn; 1669 struct sockaddr *saddr, *smask = 0; 1670 struct domain *dom; 1671 int error; 1672 1673 if (argp->ex_addrlen == 0) { 1674 if (mp->mnt_flag & MNT_DEFEXPORTED) 1675 return (EPERM); 1676 np = &nep->ne_defexported; 1677 np->netc_exflags = argp->ex_flags; 1678 np->netc_anon = argp->ex_anon; 1679 np->netc_anon.cr_ref = 1; 1680 mp->mnt_flag |= MNT_DEFEXPORTED; 1681 return (0); 1682 } 1683 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1684 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1685 memset((caddr_t)np, 0, i); 1686 saddr = (struct sockaddr *)(np + 1); 1687 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 1688 if (error) 1689 goto out; 1690 if (saddr->sa_len > argp->ex_addrlen) 1691 saddr->sa_len = argp->ex_addrlen; 1692 if (argp->ex_masklen) { 1693 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1694 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1695 if (error) 1696 goto out; 1697 if (smask->sa_len > argp->ex_masklen) 1698 smask->sa_len = argp->ex_masklen; 1699 } 1700 i = saddr->sa_family; 1701 if ((rnh = nep->ne_rtable[i]) == 0) { 1702 /* 1703 * Seems silly to initialize every AF when most are not 1704 * used, do so on demand here 1705 */ 1706 for (dom = domains; dom; dom = dom->dom_next) 1707 if (dom->dom_family == i && dom->dom_rtattach) { 1708 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1709 dom->dom_rtoffset); 1710 break; 1711 } 1712 if ((rnh = nep->ne_rtable[i]) == 0) { 1713 error = ENOBUFS; 1714 goto out; 1715 } 1716 } 1717 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1718 np->netc_rnodes); 1719 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1720 if (rn == 0) { 1721 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 1722 smask, rnh); 1723 if (enp == 0) { 1724 error = EPERM; 1725 goto out; 1726 } 1727 } else 1728 enp = (struct netcred *)rn; 1729 1730 if (enp->netc_exflags != argp->ex_flags || 1731 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 1732 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 1733 enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups || 1734 memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 1735 enp->netc_anon.cr_ngroups)) 1736 error = EPERM; 1737 else 1738 error = 0; 1739 goto out; 1740 } 1741 np->netc_exflags = argp->ex_flags; 1742 np->netc_anon = argp->ex_anon; 1743 np->netc_anon.cr_ref = 1; 1744 return (0); 1745 out: 1746 free(np, M_NETADDR); 1747 return (error); 1748 } 1749 1750 /* ARGSUSED */ 1751 static int 1752 vfs_free_netcred(rn, w) 1753 struct radix_node *rn; 1754 void *w; 1755 { 1756 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1757 1758 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1759 free((caddr_t)rn, M_NETADDR); 1760 return (0); 1761 } 1762 1763 /* 1764 * Free the net address hash lists that are hanging off the mount points. 1765 */ 1766 static void 1767 vfs_free_addrlist(nep) 1768 struct netexport *nep; 1769 { 1770 register int i; 1771 register struct radix_node_head *rnh; 1772 1773 for (i = 0; i <= AF_MAX; i++) 1774 if ((rnh = nep->ne_rtable[i]) != NULL) { 1775 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 1776 free((caddr_t)rnh, M_RTABLE); 1777 nep->ne_rtable[i] = 0; 1778 } 1779 } 1780 1781 int 1782 vfs_export(mp, nep, argp) 1783 struct mount *mp; 1784 struct netexport *nep; 1785 struct export_args *argp; 1786 { 1787 int error; 1788 1789 if (argp->ex_flags & MNT_DELEXPORT) { 1790 if (mp->mnt_flag & MNT_EXPUBLIC) { 1791 vfs_setpublicfs(NULL, NULL, NULL); 1792 mp->mnt_flag &= ~MNT_EXPUBLIC; 1793 } 1794 vfs_free_addrlist(nep); 1795 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1796 } 1797 if (argp->ex_flags & MNT_EXPORTED) { 1798 if (argp->ex_flags & MNT_EXPUBLIC) { 1799 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 1800 return (error); 1801 mp->mnt_flag |= MNT_EXPUBLIC; 1802 } 1803 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 1804 return (error); 1805 mp->mnt_flag |= MNT_EXPORTED; 1806 } 1807 return (0); 1808 } 1809 1810 /* 1811 * Set the publicly exported filesystem (WebNFS). Currently, only 1812 * one public filesystem is possible in the spec (RFC 2054 and 2055) 1813 */ 1814 int 1815 vfs_setpublicfs(mp, nep, argp) 1816 struct mount *mp; 1817 struct netexport *nep; 1818 struct export_args *argp; 1819 { 1820 int error; 1821 struct vnode *rvp; 1822 char *cp; 1823 1824 /* 1825 * mp == NULL -> invalidate the current info, the FS is 1826 * no longer exported. May be called from either vfs_export 1827 * or unmount, so check if it hasn't already been done. 1828 */ 1829 if (mp == NULL) { 1830 if (nfs_pub.np_valid) { 1831 nfs_pub.np_valid = 0; 1832 if (nfs_pub.np_index != NULL) { 1833 FREE(nfs_pub.np_index, M_TEMP); 1834 nfs_pub.np_index = NULL; 1835 } 1836 } 1837 return (0); 1838 } 1839 1840 /* 1841 * Only one allowed at a time. 1842 */ 1843 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 1844 return (EBUSY); 1845 1846 /* 1847 * Get real filehandle for root of exported FS. 1848 */ 1849 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 1850 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 1851 1852 if ((error = VFS_ROOT(mp, &rvp))) 1853 return (error); 1854 1855 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 1856 return (error); 1857 1858 vput(rvp); 1859 1860 /* 1861 * If an indexfile was specified, pull it in. 1862 */ 1863 if (argp->ex_indexfile != NULL) { 1864 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 1865 M_WAITOK); 1866 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 1867 MAXNAMLEN, (size_t *)0); 1868 if (!error) { 1869 /* 1870 * Check for illegal filenames. 1871 */ 1872 for (cp = nfs_pub.np_index; *cp; cp++) { 1873 if (*cp == '/') { 1874 error = EINVAL; 1875 break; 1876 } 1877 } 1878 } 1879 if (error) { 1880 FREE(nfs_pub.np_index, M_TEMP); 1881 return (error); 1882 } 1883 } 1884 1885 nfs_pub.np_mount = mp; 1886 nfs_pub.np_valid = 1; 1887 return (0); 1888 } 1889 1890 struct netcred * 1891 vfs_export_lookup(mp, nep, nam) 1892 register struct mount *mp; 1893 struct netexport *nep; 1894 struct mbuf *nam; 1895 { 1896 register struct netcred *np; 1897 register struct radix_node_head *rnh; 1898 struct sockaddr *saddr; 1899 1900 np = NULL; 1901 if (mp->mnt_flag & MNT_EXPORTED) { 1902 /* 1903 * Lookup in the export list first. 1904 */ 1905 if (nam != NULL) { 1906 saddr = mtod(nam, struct sockaddr *); 1907 rnh = nep->ne_rtable[saddr->sa_family]; 1908 if (rnh != NULL) { 1909 np = (struct netcred *) 1910 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1911 rnh); 1912 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1913 np = NULL; 1914 } 1915 } 1916 /* 1917 * If no address match, use the default if it exists. 1918 */ 1919 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1920 np = &nep->ne_defexported; 1921 } 1922 return (np); 1923 } 1924 1925 /* 1926 * Do the usual access checking. 1927 * file_mode, uid and gid are from the vnode in question, 1928 * while acc_mode and cred are from the VOP_ACCESS parameter list 1929 */ 1930 int 1931 vaccess(type, file_mode, uid, gid, acc_mode, cred) 1932 enum vtype type; 1933 mode_t file_mode; 1934 uid_t uid; 1935 gid_t gid; 1936 mode_t acc_mode; 1937 struct ucred *cred; 1938 { 1939 mode_t mask; 1940 1941 /* 1942 * Super-user always gets read/write access, but execute access depends 1943 * on at least one execute bit being set. 1944 */ 1945 if (cred->cr_uid == 0) { 1946 if ((acc_mode & VEXEC) && type != VDIR && 1947 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 1948 return (EACCES); 1949 return (0); 1950 } 1951 1952 mask = 0; 1953 1954 /* Otherwise, check the owner. */ 1955 if (cred->cr_uid == uid) { 1956 if (acc_mode & VEXEC) 1957 mask |= S_IXUSR; 1958 if (acc_mode & VREAD) 1959 mask |= S_IRUSR; 1960 if (acc_mode & VWRITE) 1961 mask |= S_IWUSR; 1962 return ((file_mode & mask) == mask ? 0 : EACCES); 1963 } 1964 1965 /* Otherwise, check the groups. */ 1966 if (cred->cr_gid == gid || groupmember(gid, cred)) { 1967 if (acc_mode & VEXEC) 1968 mask |= S_IXGRP; 1969 if (acc_mode & VREAD) 1970 mask |= S_IRGRP; 1971 if (acc_mode & VWRITE) 1972 mask |= S_IWGRP; 1973 return ((file_mode & mask) == mask ? 0 : EACCES); 1974 } 1975 1976 /* Otherwise, check everyone else. */ 1977 if (acc_mode & VEXEC) 1978 mask |= S_IXOTH; 1979 if (acc_mode & VREAD) 1980 mask |= S_IROTH; 1981 if (acc_mode & VWRITE) 1982 mask |= S_IWOTH; 1983 return ((file_mode & mask) == mask ? 0 : EACCES); 1984 } 1985 1986 /* 1987 * Unmount all file systems. 1988 * We traverse the list in reverse order under the assumption that doing so 1989 * will avoid needing to worry about dependencies. 1990 */ 1991 void 1992 vfs_unmountall() 1993 { 1994 register struct mount *mp, *nmp; 1995 int allerror, error; 1996 struct proc *p = curproc; /* XXX */ 1997 1998 /* 1999 * Unmounting a file system blocks the requesting process. 2000 * However, it's possible for this routine to be called when 2001 * curproc is NULL (e.g. panic situation, or via the debugger). 2002 * If we get stuck in this situation, just abort, since any 2003 * attempts to sleep will fault. 2004 */ 2005 if (p == NULL) { 2006 printf("vfs_unmountall: no context, aborting\n"); 2007 return; 2008 } 2009 2010 for (allerror = 0, 2011 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2012 nmp = mp->mnt_list.cqe_prev; 2013 #ifdef DEBUG 2014 printf("unmounting %s (%s)...\n", 2015 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2016 #endif 2017 if (vfs_busy(mp, 0, 0)) 2018 continue; 2019 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2020 printf("unmount of %s failed with error %d\n", 2021 mp->mnt_stat.f_mntonname, error); 2022 allerror = 1; 2023 } 2024 } 2025 if (allerror) 2026 printf("WARNING: some file systems would not unmount\n"); 2027 } 2028 2029 /* 2030 * Sync and unmount file systems before shutting down. 2031 */ 2032 void 2033 vfs_shutdown() 2034 { 2035 register struct buf *bp; 2036 int iter, nbusy; 2037 2038 printf("syncing disks... "); 2039 2040 /* XXX Should suspend scheduling. */ 2041 (void) spl0(); 2042 2043 sys_sync(&proc0, (void *)0, (register_t *)0); 2044 2045 /* Wait for sync to finish. */ 2046 for (iter = 0; iter < 20; iter++) { 2047 nbusy = 0; 2048 for (bp = &buf[nbuf]; --bp >= buf; ) 2049 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 2050 nbusy++; 2051 if (nbusy == 0) 2052 break; 2053 printf("%d ", nbusy); 2054 DELAY(40000 * iter); 2055 } 2056 if (nbusy) { 2057 printf("giving up\n"); 2058 return; 2059 } else 2060 printf("done\n"); 2061 2062 /* 2063 * If we've panic'd, don't make the situation potentially 2064 * worse by unmounting the file systems. 2065 */ 2066 if (panicstr != NULL) 2067 return; 2068 2069 /* Release inodes held by texts before update. */ 2070 #if !defined(UVM) 2071 vnode_pager_umount(NULL); 2072 #endif 2073 #ifdef notdef 2074 vnshutdown(); 2075 #endif 2076 /* Unmount file systems. */ 2077 vfs_unmountall(); 2078 } 2079 2080 /* 2081 * Mount the root file system. If the operator didn't specify a 2082 * file system to use, try all possible file systems until one 2083 * succeeds. 2084 */ 2085 int 2086 vfs_mountroot() 2087 { 2088 extern int (*mountroot) __P((void)); 2089 struct vfsops *v; 2090 2091 if (root_device == NULL) 2092 panic("vfs_mountroot: root device unknown"); 2093 2094 switch (root_device->dv_class) { 2095 case DV_IFNET: 2096 if (rootdev != NODEV) 2097 panic("vfs_mountroot: rootdev set for DV_IFNET"); 2098 break; 2099 2100 case DV_DISK: 2101 if (rootdev == NODEV) 2102 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2103 break; 2104 2105 default: 2106 printf("%s: inappropriate for root file system\n", 2107 root_device->dv_xname); 2108 return (ENODEV); 2109 } 2110 2111 /* 2112 * If user specified a file system, use it. 2113 */ 2114 if (mountroot != NULL) 2115 return ((*mountroot)()); 2116 2117 /* 2118 * Try each file system currently configured into the kernel. 2119 */ 2120 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2121 if (v->vfs_mountroot == NULL) 2122 continue; 2123 #ifdef DEBUG 2124 printf("mountroot: trying %s...\n", v->vfs_name); 2125 #endif 2126 if ((*v->vfs_mountroot)() == 0) { 2127 printf("root file system type: %s\n", v->vfs_name); 2128 break; 2129 } 2130 } 2131 2132 if (v == NULL) { 2133 printf("no file system for %s", root_device->dv_xname); 2134 if (root_device->dv_class == DV_DISK) 2135 printf(" (dev 0x%x)", rootdev); 2136 printf("\n"); 2137 return (EFTYPE); 2138 } 2139 return (0); 2140 } 2141 2142 /* 2143 * Given a file system name, look up the vfsops for that 2144 * file system, or return NULL if file system isn't present 2145 * in the kernel. 2146 */ 2147 struct vfsops * 2148 vfs_getopsbyname(name) 2149 const char *name; 2150 { 2151 struct vfsops *v; 2152 2153 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2154 if (strcmp(v->vfs_name, name) == 0) 2155 break; 2156 } 2157 2158 return (v); 2159 } 2160 2161 /* 2162 * Establish a file system and initialize it. 2163 */ 2164 int 2165 vfs_attach(vfs) 2166 struct vfsops *vfs; 2167 { 2168 struct vfsops *v; 2169 int error = 0; 2170 2171 2172 /* 2173 * Make sure this file system doesn't already exist. 2174 */ 2175 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2176 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2177 error = EEXIST; 2178 goto out; 2179 } 2180 } 2181 2182 /* 2183 * Initialize the vnode operations for this file system. 2184 */ 2185 vfs_opv_init(vfs->vfs_opv_descs); 2186 2187 /* 2188 * Now initialize the file system itself. 2189 */ 2190 (*vfs->vfs_init)(); 2191 2192 /* 2193 * ...and link it into the kernel's list. 2194 */ 2195 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2196 2197 /* 2198 * Sanity: make sure the reference count is 0. 2199 */ 2200 vfs->vfs_refcount = 0; 2201 2202 out: 2203 return (error); 2204 } 2205 2206 /* 2207 * Remove a file system from the kernel. 2208 */ 2209 int 2210 vfs_detach(vfs) 2211 struct vfsops *vfs; 2212 { 2213 struct vfsops *v; 2214 2215 /* 2216 * Make sure no one is using the filesystem. 2217 */ 2218 if (vfs->vfs_refcount != 0) 2219 return (EBUSY); 2220 2221 /* 2222 * ...and remove it from the kernel's list. 2223 */ 2224 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2225 if (v == vfs) { 2226 LIST_REMOVE(v, vfs_list); 2227 break; 2228 } 2229 } 2230 2231 if (v == NULL) 2232 return (ESRCH); 2233 2234 /* 2235 * Free the vnode operations vector. 2236 */ 2237 vfs_opv_free(vfs->vfs_opv_descs); 2238 return (0); 2239 } 2240