1 /* $NetBSD: vfs_subr.c,v 1.71 1997/06/24 23:43:33 fvdl Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Jason R. Thorpe. All rights reserved. 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 42 */ 43 44 /* 45 * External virtual filesystem routines 46 */ 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/proc.h> 51 #include <sys/mount.h> 52 #include <sys/time.h> 53 #include <sys/fcntl.h> 54 #include <sys/vnode.h> 55 #include <sys/stat.h> 56 #include <sys/namei.h> 57 #include <sys/ucred.h> 58 #include <sys/buf.h> 59 #include <sys/errno.h> 60 #include <sys/malloc.h> 61 #include <sys/domain.h> 62 #include <sys/mbuf.h> 63 #include <sys/syscallargs.h> 64 #include <sys/device.h> 65 #include <sys/dirent.h> 66 67 #include <vm/vm.h> 68 #include <sys/sysctl.h> 69 70 #include <miscfs/specfs/specdev.h> 71 72 enum vtype iftovt_tab[16] = { 73 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 74 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 75 }; 76 int vttoif_tab[9] = { 77 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 78 S_IFSOCK, S_IFIFO, S_IFMT, 79 }; 80 81 int doforce = 1; /* 1 => permit forcible unmounting */ 82 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 83 84 /* 85 * Insq/Remq for the vnode usage lists. 86 */ 87 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 88 #define bufremvn(bp) { \ 89 LIST_REMOVE(bp, b_vnbufs); \ 90 (bp)->b_vnbufs.le_next = NOLIST; \ 91 } 92 TAILQ_HEAD(freelst, vnode) vnode_free_list = /* vnode free list */ 93 TAILQ_HEAD_INITIALIZER(vnode_free_list); 94 struct mntlist mountlist = /* mounted filesystem list */ 95 CIRCLEQ_HEAD_INITIALIZER(mountlist); 96 97 struct device *root_device; /* root device */ 98 struct nfs_public nfs_pub; /* publicly exported FS */ 99 100 int vfs_lock __P((struct mount *)); 101 void vfs_unlock __P((struct mount *)); 102 struct mount *getvfs __P((fsid_t *)); 103 long makefstype __P((char *)); 104 void vattr_null __P((struct vattr *)); 105 int getnewvnode __P((enum vtagtype, struct mount *, int (**)(void *), 106 struct vnode **)); 107 void insmntque __P((struct vnode *, struct mount *)); 108 int vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *, int, 109 int)); 110 void vflushbuf __P((struct vnode *, int)); 111 void brelvp __P((struct buf *)); 112 int bdevvp __P((dev_t, struct vnode **)); 113 int cdevvp __P((dev_t, struct vnode **)); 114 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 115 struct vnode *checkalias __P((struct vnode *, dev_t, struct mount *)); 116 int vget __P((struct vnode *, int)); 117 void vref __P((struct vnode *)); 118 void vput __P((struct vnode *)); 119 void vrele __P((struct vnode *)); 120 void vhold __P((struct vnode *)); 121 void holdrele __P((struct vnode *)); 122 int vflush __P((struct mount *, struct vnode *, int)); 123 void vgoneall __P((struct vnode *)); 124 void vgone __P((struct vnode *)); 125 int vcount __P((struct vnode *)); 126 void vprint __P((char *, struct vnode *)); 127 int vfs_mountedon __P((struct vnode *)); 128 int vfs_export __P((struct mount *, struct netexport *, struct export_args *)); 129 struct netcred *vfs_export_lookup __P((struct mount *, struct netexport *, 130 struct mbuf *)); 131 int vaccess __P((enum vtype, mode_t, uid_t, gid_t, mode_t, struct ucred *)); 132 void vfs_unmountall __P((void)); 133 void vfs_shutdown __P((void)); 134 135 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 136 struct export_args *)); 137 static int vfs_free_netcred __P((struct radix_node *, void *)); 138 static void vfs_free_addrlist __P((struct netexport *)); 139 140 #ifdef DEBUG 141 void printlockedvnodes __P((void)); 142 #endif 143 144 /* 145 * Initialize the vnode management data structures. 146 */ 147 void 148 vntblinit() 149 { 150 151 /* 152 * Nothing to do here anymore; vnode_free_list and mountlist 153 * are now initialized data. 154 */ 155 } 156 157 /* 158 * Lock a filesystem. 159 * Used to prevent access to it while mounting and unmounting. 160 */ 161 int 162 vfs_lock(mp) 163 register struct mount *mp; 164 { 165 166 while (mp->mnt_flag & MNT_MLOCK) { 167 mp->mnt_flag |= MNT_MWAIT; 168 tsleep((caddr_t)mp, PVFS, "vfslock", 0); 169 } 170 mp->mnt_flag |= MNT_MLOCK; 171 return (0); 172 } 173 174 /* 175 * Unlock a locked filesystem. 176 * Panic if filesystem is not locked. 177 */ 178 void 179 vfs_unlock(mp) 180 register struct mount *mp; 181 { 182 183 if ((mp->mnt_flag & MNT_MLOCK) == 0) 184 panic("vfs_unlock: not locked"); 185 mp->mnt_flag &= ~MNT_MLOCK; 186 if (mp->mnt_flag & MNT_MWAIT) { 187 mp->mnt_flag &= ~MNT_MWAIT; 188 wakeup((caddr_t)mp); 189 } 190 } 191 192 /* 193 * Mark a mount point as busy. 194 * Used to synchronize access and to delay unmounting. 195 */ 196 int 197 vfs_busy(mp) 198 register struct mount *mp; 199 { 200 int unmounting = mp->mnt_flag & MNT_UNMOUNT; 201 202 while(mp->mnt_flag & MNT_MPBUSY) { 203 mp->mnt_flag |= MNT_MPWANT; 204 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); 205 if (unmounting) 206 return (1); 207 } 208 mp->mnt_flag |= MNT_MPBUSY; 209 return (0); 210 } 211 212 /* 213 * Free a busy filesystem. 214 * Panic if filesystem is not busy. 215 */ 216 void 217 vfs_unbusy(mp) 218 register struct mount *mp; 219 { 220 221 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 222 panic("vfs_unbusy: not busy"); 223 mp->mnt_flag &= ~MNT_MPBUSY; 224 if (mp->mnt_flag & MNT_MPWANT) { 225 mp->mnt_flag &= ~MNT_MPWANT; 226 wakeup((caddr_t)&mp->mnt_flag); 227 } 228 } 229 230 /* 231 * Lookup a mount point by filesystem identifier. 232 */ 233 struct mount * 234 getvfs(fsid) 235 fsid_t *fsid; 236 { 237 register struct mount *mp; 238 239 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 240 mp = mp->mnt_list.cqe_next) 241 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 242 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 243 return (mp); 244 return ((struct mount *)0); 245 } 246 247 /* 248 * Get a new unique fsid 249 */ 250 void 251 getnewfsid(mp, mtype) 252 struct mount *mp; 253 int mtype; 254 { 255 static u_short xxxfs_mntid; 256 257 fsid_t tfsid; 258 259 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0); /* XXX */ 260 mp->mnt_stat.f_fsid.val[1] = mtype; 261 if (xxxfs_mntid == 0) 262 ++xxxfs_mntid; 263 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 264 tfsid.val[1] = mtype; 265 if (mountlist.cqh_first != (void *)&mountlist) { 266 while (getvfs(&tfsid)) { 267 tfsid.val[0]++; 268 xxxfs_mntid++; 269 } 270 } 271 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 272 } 273 274 /* 275 * Make a 'unique' number from a mount type name. 276 */ 277 long 278 makefstype(type) 279 char *type; 280 { 281 long rv; 282 283 for (rv = 0; *type; type++) { 284 rv <<= 2; 285 rv ^= *type; 286 } 287 return rv; 288 } 289 290 /* 291 * Set vnode attributes to VNOVAL 292 */ 293 void 294 vattr_null(vap) 295 register struct vattr *vap; 296 { 297 298 vap->va_type = VNON; 299 /* XXX These next two used to be one line, but for a GCC bug. */ 300 vap->va_size = VNOVAL; 301 vap->va_bytes = VNOVAL; 302 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 303 vap->va_fsid = vap->va_fileid = 304 vap->va_blocksize = vap->va_rdev = 305 vap->va_atime.tv_sec = vap->va_atime.tv_nsec = 306 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = 307 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = 308 vap->va_flags = vap->va_gen = VNOVAL; 309 vap->va_vaflags = 0; 310 } 311 312 /* 313 * Routines having to do with the management of the vnode table. 314 */ 315 extern int (**dead_vnodeop_p) __P((void *)); 316 long numvnodes; 317 318 /* 319 * Return the next vnode from the free list. 320 */ 321 int 322 getnewvnode(tag, mp, vops, vpp) 323 enum vtagtype tag; 324 struct mount *mp; 325 int (**vops) __P((void *)); 326 struct vnode **vpp; 327 { 328 register struct vnode *vp; 329 #ifdef DIAGNOSTIC 330 int s; 331 #endif 332 333 if ((vnode_free_list.tqh_first == NULL && 334 numvnodes < 2 * desiredvnodes) || 335 numvnodes < desiredvnodes) { 336 vp = (struct vnode *)malloc((u_long)sizeof *vp, 337 M_VNODE, M_WAITOK); 338 bzero((char *)vp, sizeof *vp); 339 numvnodes++; 340 } else { 341 if ((vp = vnode_free_list.tqh_first) == NULL) { 342 tablefull("vnode"); 343 *vpp = 0; 344 return (ENFILE); 345 } 346 if (vp->v_usecount) { 347 vprint("free vnode", vp); 348 panic("free vnode isn't"); 349 } 350 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 351 /* see comment on why 0xdeadb is set at end of vgone (below) */ 352 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 353 vp->v_lease = NULL; 354 if (vp->v_type != VBAD) 355 vgone(vp); 356 #ifdef DIAGNOSTIC 357 if (vp->v_data) { 358 vprint("cleaned vnode", vp); 359 panic("cleaned vnode isn't"); 360 } 361 s = splbio(); 362 if (vp->v_numoutput) 363 panic("Clean vnode has pending I/O's"); 364 splx(s); 365 #endif 366 vp->v_flag = 0; 367 vp->v_lastr = 0; 368 vp->v_ralen = 0; 369 vp->v_maxra = 0; 370 vp->v_lastw = 0; 371 vp->v_lasta = 0; 372 vp->v_cstart = 0; 373 vp->v_clen = 0; 374 vp->v_socket = 0; 375 } 376 vp->v_type = VNON; 377 cache_purge(vp); 378 vp->v_tag = tag; 379 vp->v_op = vops; 380 insmntque(vp, mp); 381 *vpp = vp; 382 vp->v_usecount = 1; 383 vp->v_data = 0; 384 return (0); 385 } 386 387 /* 388 * Move a vnode from one mount queue to another. 389 */ 390 void 391 insmntque(vp, mp) 392 register struct vnode *vp; 393 register struct mount *mp; 394 { 395 396 /* 397 * Delete from old mount point vnode list, if on one. 398 */ 399 if (vp->v_mount != NULL) 400 LIST_REMOVE(vp, v_mntvnodes); 401 /* 402 * Insert into list of vnodes for the new mount point, if available. 403 */ 404 if ((vp->v_mount = mp) == NULL) 405 return; 406 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 407 } 408 409 /* 410 * Update outstanding I/O count and do wakeup if requested. 411 */ 412 void 413 vwakeup(bp) 414 register struct buf *bp; 415 { 416 register struct vnode *vp; 417 418 bp->b_flags &= ~B_WRITEINPROG; 419 if ((vp = bp->b_vp) != NULL) { 420 if (--vp->v_numoutput < 0) 421 panic("vwakeup: neg numoutput"); 422 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 423 vp->v_flag &= ~VBWAIT; 424 wakeup((caddr_t)&vp->v_numoutput); 425 } 426 } 427 } 428 429 /* 430 * Flush out and invalidate all buffers associated with a vnode. 431 * Called with the underlying object locked. 432 */ 433 int 434 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 435 register struct vnode *vp; 436 int flags; 437 struct ucred *cred; 438 struct proc *p; 439 int slpflag, slptimeo; 440 { 441 register struct buf *bp; 442 struct buf *nbp, *blist; 443 int s, error; 444 445 if (flags & V_SAVE) { 446 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 447 return (error); 448 if (vp->v_dirtyblkhd.lh_first != NULL) 449 panic("vinvalbuf: dirty bufs"); 450 } 451 for (;;) { 452 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 453 while (blist && blist->b_lblkno < 0) 454 blist = blist->b_vnbufs.le_next; 455 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 456 (flags & V_SAVEMETA)) 457 while (blist && blist->b_lblkno < 0) 458 blist = blist->b_vnbufs.le_next; 459 if (!blist) 460 break; 461 462 for (bp = blist; bp; bp = nbp) { 463 nbp = bp->b_vnbufs.le_next; 464 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 465 continue; 466 s = splbio(); 467 if (bp->b_flags & B_BUSY) { 468 bp->b_flags |= B_WANTED; 469 error = tsleep((caddr_t)bp, 470 slpflag | (PRIBIO + 1), "vinvalbuf", 471 slptimeo); 472 splx(s); 473 if (error) 474 return (error); 475 break; 476 } 477 bp->b_flags |= B_BUSY | B_VFLUSH; 478 splx(s); 479 /* 480 * XXX Since there are no node locks for NFS, I believe 481 * there is a slight chance that a delayed write will 482 * occur while sleeping just above, so check for it. 483 */ 484 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 485 (void) VOP_BWRITE(bp); 486 break; 487 } 488 bp->b_flags |= B_INVAL; 489 brelse(bp); 490 } 491 } 492 if (!(flags & V_SAVEMETA) && 493 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 494 panic("vinvalbuf: flush failed"); 495 return (0); 496 } 497 498 void 499 vflushbuf(vp, sync) 500 register struct vnode *vp; 501 int sync; 502 { 503 register struct buf *bp, *nbp; 504 int s; 505 506 loop: 507 s = splbio(); 508 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 509 nbp = bp->b_vnbufs.le_next; 510 if ((bp->b_flags & B_BUSY)) 511 continue; 512 if ((bp->b_flags & B_DELWRI) == 0) 513 panic("vflushbuf: not dirty"); 514 bp->b_flags |= B_BUSY | B_VFLUSH; 515 splx(s); 516 /* 517 * Wait for I/O associated with indirect blocks to complete, 518 * since there is no way to quickly wait for them below. 519 */ 520 if (bp->b_vp == vp || sync == 0) 521 (void) bawrite(bp); 522 else 523 (void) bwrite(bp); 524 goto loop; 525 } 526 if (sync == 0) { 527 splx(s); 528 return; 529 } 530 while (vp->v_numoutput) { 531 vp->v_flag |= VBWAIT; 532 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 533 } 534 splx(s); 535 if (vp->v_dirtyblkhd.lh_first != NULL) { 536 vprint("vflushbuf: dirty", vp); 537 goto loop; 538 } 539 } 540 541 /* 542 * Associate a buffer with a vnode. 543 */ 544 void 545 bgetvp(vp, bp) 546 register struct vnode *vp; 547 register struct buf *bp; 548 { 549 550 if (bp->b_vp) 551 panic("bgetvp: not free"); 552 VHOLD(vp); 553 bp->b_vp = vp; 554 if (vp->v_type == VBLK || vp->v_type == VCHR) 555 bp->b_dev = vp->v_rdev; 556 else 557 bp->b_dev = NODEV; 558 /* 559 * Insert onto list for new vnode. 560 */ 561 bufinsvn(bp, &vp->v_cleanblkhd); 562 } 563 564 /* 565 * Disassociate a buffer from a vnode. 566 */ 567 void 568 brelvp(bp) 569 register struct buf *bp; 570 { 571 struct vnode *vp; 572 573 if (bp->b_vp == (struct vnode *) 0) 574 panic("brelvp: NULL"); 575 /* 576 * Delete from old vnode list, if on one. 577 */ 578 if (bp->b_vnbufs.le_next != NOLIST) 579 bufremvn(bp); 580 vp = bp->b_vp; 581 bp->b_vp = (struct vnode *) 0; 582 HOLDRELE(vp); 583 } 584 585 /* 586 * Reassign a buffer from one vnode to another. 587 * Used to assign file specific control information 588 * (indirect blocks) to the vnode to which they belong. 589 */ 590 void 591 reassignbuf(bp, newvp) 592 register struct buf *bp; 593 register struct vnode *newvp; 594 { 595 register struct buflists *listheadp; 596 597 if (newvp == NULL) { 598 printf("reassignbuf: NULL"); 599 return; 600 } 601 /* 602 * Delete from old vnode list, if on one. 603 */ 604 if (bp->b_vnbufs.le_next != NOLIST) 605 bufremvn(bp); 606 /* 607 * If dirty, put on list of dirty buffers; 608 * otherwise insert onto list of clean buffers. 609 */ 610 if (bp->b_flags & B_DELWRI) 611 listheadp = &newvp->v_dirtyblkhd; 612 else 613 listheadp = &newvp->v_cleanblkhd; 614 bufinsvn(bp, listheadp); 615 } 616 617 /* 618 * Create a vnode for a block device. 619 * Used for root filesystem and swap areas. 620 * Also used for memory file system special devices. 621 */ 622 int 623 bdevvp(dev, vpp) 624 dev_t dev; 625 struct vnode **vpp; 626 { 627 628 return (getdevvp(dev, vpp, VBLK)); 629 } 630 631 /* 632 * Create a vnode for a character device. 633 * Used for kernfs and some console handling. 634 */ 635 int 636 cdevvp(dev, vpp) 637 dev_t dev; 638 struct vnode **vpp; 639 { 640 641 return (getdevvp(dev, vpp, VCHR)); 642 } 643 644 /* 645 * Create a vnode for a device. 646 * Used by bdevvp (block device) for root file system etc., 647 * and by cdevvp (character device) for console and kernfs. 648 */ 649 int 650 getdevvp(dev, vpp, type) 651 dev_t dev; 652 struct vnode **vpp; 653 enum vtype type; 654 { 655 register struct vnode *vp; 656 struct vnode *nvp; 657 int error; 658 659 if (dev == NODEV) 660 return (0); 661 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 662 if (error) { 663 *vpp = NULLVP; 664 return (error); 665 } 666 vp = nvp; 667 vp->v_type = type; 668 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 669 vput(vp); 670 vp = nvp; 671 } 672 *vpp = vp; 673 return (0); 674 } 675 676 /* 677 * Check to see if the new vnode represents a special device 678 * for which we already have a vnode (either because of 679 * bdevvp() or because of a different vnode representing 680 * the same block device). If such an alias exists, deallocate 681 * the existing contents and return the aliased vnode. The 682 * caller is responsible for filling it with its new contents. 683 */ 684 struct vnode * 685 checkalias(nvp, nvp_rdev, mp) 686 register struct vnode *nvp; 687 dev_t nvp_rdev; 688 struct mount *mp; 689 { 690 register struct vnode *vp; 691 struct vnode **vpp; 692 693 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 694 return (NULLVP); 695 696 vpp = &speclisth[SPECHASH(nvp_rdev)]; 697 loop: 698 for (vp = *vpp; vp; vp = vp->v_specnext) { 699 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 700 continue; 701 /* 702 * Alias, but not in use, so flush it out. 703 */ 704 if (vp->v_usecount == 0) { 705 vgone(vp); 706 goto loop; 707 } 708 if (vget(vp, 1)) 709 goto loop; 710 break; 711 } 712 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 713 MALLOC(nvp->v_specinfo, struct specinfo *, 714 sizeof(struct specinfo), M_VNODE, M_WAITOK); 715 nvp->v_rdev = nvp_rdev; 716 nvp->v_hashchain = vpp; 717 nvp->v_specnext = *vpp; 718 nvp->v_specflags = 0; 719 nvp->v_speclockf = NULL; 720 *vpp = nvp; 721 if (vp != NULL) { 722 nvp->v_flag |= VALIASED; 723 vp->v_flag |= VALIASED; 724 vput(vp); 725 } 726 return (NULLVP); 727 } 728 VOP_UNLOCK(vp); 729 vclean(vp, 0); 730 vp->v_op = nvp->v_op; 731 vp->v_tag = nvp->v_tag; 732 nvp->v_type = VNON; 733 insmntque(vp, mp); 734 return (vp); 735 } 736 737 /* 738 * Grab a particular vnode from the free list, increment its 739 * reference count and lock it. The vnode lock bit is set the 740 * vnode is being eliminated in vgone. The process is awakened 741 * when the transition is completed, and an error returned to 742 * indicate that the vnode is no longer usable (possibly having 743 * been changed to a new file system type). 744 */ 745 int 746 vget(vp, lockflag) 747 register struct vnode *vp; 748 int lockflag; 749 { 750 751 /* 752 * If the vnode is in the process of being cleaned out for 753 * another use, we wait for the cleaning to finish and then 754 * return failure. Cleaning is determined either by checking 755 * that the VXLOCK flag is set, or that the use count is 756 * zero with the back pointer set to show that it has been 757 * removed from the free list by getnewvnode. The VXLOCK 758 * flag may not have been set yet because vclean is blocked in 759 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 760 */ 761 if ((vp->v_flag & VXLOCK) || 762 (vp->v_usecount == 0 && 763 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 764 vp->v_flag |= VXWANT; 765 tsleep((caddr_t)vp, PINOD, "vget", 0); 766 return (1); 767 } 768 if (vp->v_usecount == 0) 769 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 770 vp->v_usecount++; 771 if (lockflag) 772 VOP_LOCK(vp); 773 return (0); 774 } 775 776 /* 777 * Vnode reference, just increment the count 778 */ 779 void 780 vref(vp) 781 struct vnode *vp; 782 { 783 784 if (vp->v_usecount <= 0) 785 panic("vref used where vget required"); 786 vp->v_usecount++; 787 } 788 789 /* 790 * vput(), just unlock and vrele() 791 */ 792 void 793 vput(vp) 794 register struct vnode *vp; 795 { 796 797 VOP_UNLOCK(vp); 798 vrele(vp); 799 } 800 801 /* 802 * Vnode release. 803 * If count drops to zero, call inactive routine and return to freelist. 804 */ 805 void 806 vrele(vp) 807 register struct vnode *vp; 808 { 809 810 #ifdef DIAGNOSTIC 811 if (vp == NULL) 812 panic("vrele: null vp"); 813 #endif 814 vp->v_usecount--; 815 if (vp->v_usecount > 0) 816 return; 817 #ifdef DIAGNOSTIC 818 if (vp->v_usecount != 0 || vp->v_writecount != 0) { 819 vprint("vrele: bad ref count", vp); 820 panic("vrele: ref cnt"); 821 } 822 #endif 823 /* 824 * insert at tail of LRU list 825 */ 826 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 827 VOP_INACTIVE(vp); 828 } 829 830 /* 831 * Page or buffer structure gets a reference. 832 */ 833 void 834 vhold(vp) 835 register struct vnode *vp; 836 { 837 838 vp->v_holdcnt++; 839 } 840 841 /* 842 * Page or buffer structure frees a reference. 843 */ 844 void 845 holdrele(vp) 846 register struct vnode *vp; 847 { 848 849 if (vp->v_holdcnt <= 0) 850 panic("holdrele: holdcnt"); 851 vp->v_holdcnt--; 852 } 853 854 /* 855 * Remove any vnodes in the vnode table belonging to mount point mp. 856 * 857 * If MNT_NOFORCE is specified, there should not be any active ones, 858 * return error if any are found (nb: this is a user error, not a 859 * system error). If MNT_FORCE is specified, detach any active vnodes 860 * that are found. 861 */ 862 #ifdef DEBUG 863 int busyprt = 0; /* print out busy vnodes */ 864 struct ctldebug debug1 = { "busyprt", &busyprt }; 865 #endif 866 867 int 868 vflush(mp, skipvp, flags) 869 struct mount *mp; 870 struct vnode *skipvp; 871 int flags; 872 { 873 register struct vnode *vp, *nvp; 874 int busy = 0; 875 876 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 877 panic("vflush: not busy"); 878 loop: 879 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 880 if (vp->v_mount != mp) 881 goto loop; 882 nvp = vp->v_mntvnodes.le_next; 883 /* 884 * Skip over a selected vnode. 885 */ 886 if (vp == skipvp) 887 continue; 888 /* 889 * Skip over a vnodes marked VSYSTEM. 890 */ 891 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 892 continue; 893 /* 894 * If WRITECLOSE is set, only flush out regular file 895 * vnodes open for writing. 896 */ 897 if ((flags & WRITECLOSE) && 898 (vp->v_writecount == 0 || vp->v_type != VREG)) 899 continue; 900 /* 901 * With v_usecount == 0, all we need to do is clear 902 * out the vnode data structures and we are done. 903 */ 904 if (vp->v_usecount == 0) { 905 vgone(vp); 906 continue; 907 } 908 /* 909 * If FORCECLOSE is set, forcibly close the vnode. 910 * For block or character devices, revert to an 911 * anonymous device. For all other files, just kill them. 912 */ 913 if (flags & FORCECLOSE) { 914 if (vp->v_type != VBLK && vp->v_type != VCHR) { 915 vgone(vp); 916 } else { 917 vclean(vp, 0); 918 vp->v_op = spec_vnodeop_p; 919 insmntque(vp, (struct mount *)0); 920 } 921 continue; 922 } 923 #ifdef DEBUG 924 if (busyprt) 925 vprint("vflush: busy vnode", vp); 926 #endif 927 busy++; 928 } 929 if (busy) 930 return (EBUSY); 931 return (0); 932 } 933 934 /* 935 * Disassociate the underlying file system from a vnode. 936 */ 937 void 938 vclean(vp, flags) 939 register struct vnode *vp; 940 int flags; 941 { 942 int active; 943 944 /* 945 * Check to see if the vnode is in use. 946 * If so we have to reference it before we clean it out 947 * so that its count cannot fall to zero and generate a 948 * race against ourselves to recycle it. 949 */ 950 if ((active = vp->v_usecount) != 0) 951 VREF(vp); 952 /* 953 * Even if the count is zero, the VOP_INACTIVE routine may still 954 * have the object locked while it cleans it out. The VOP_LOCK 955 * ensures that the VOP_INACTIVE routine is done with its work. 956 * For active vnodes, it ensures that no other activity can 957 * occur while the underlying object is being cleaned out. 958 */ 959 VOP_LOCK(vp); 960 /* 961 * Prevent the vnode from being recycled or 962 * brought into use while we clean it out. 963 */ 964 if (vp->v_flag & VXLOCK) 965 panic("vclean: deadlock"); 966 vp->v_flag |= VXLOCK; 967 /* 968 * Clean out any buffers associated with the vnode. 969 */ 970 if (flags & DOCLOSE) 971 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 972 /* 973 * Any other processes trying to obtain this lock must first 974 * wait for VXLOCK to clear, then call the new lock operation. 975 */ 976 VOP_UNLOCK(vp); 977 /* 978 * If purging an active vnode, it must be closed and 979 * deactivated before being reclaimed. 980 */ 981 if (active) { 982 if (flags & DOCLOSE) 983 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 984 VOP_INACTIVE(vp); 985 } 986 /* 987 * Reclaim the vnode. 988 */ 989 if (VOP_RECLAIM(vp)) 990 panic("vclean: cannot reclaim"); 991 if (active) 992 vrele(vp); 993 994 /* 995 * Done with purge, notify sleepers of the grim news. 996 */ 997 vp->v_op = dead_vnodeop_p; 998 vp->v_tag = VT_NON; 999 vp->v_flag &= ~VXLOCK; 1000 if (vp->v_flag & VXWANT) { 1001 vp->v_flag &= ~VXWANT; 1002 wakeup((caddr_t)vp); 1003 } 1004 } 1005 1006 /* 1007 * Eliminate all activity associated with the requested vnode 1008 * and with all vnodes aliased to the requested vnode. 1009 */ 1010 void 1011 vgoneall(vp) 1012 register struct vnode *vp; 1013 { 1014 register struct vnode *vq; 1015 1016 if (vp->v_flag & VALIASED) { 1017 /* 1018 * If a vgone (or vclean) is already in progress, 1019 * wait until it is done and return. 1020 */ 1021 if (vp->v_flag & VXLOCK) { 1022 vp->v_flag |= VXWANT; 1023 tsleep((caddr_t)vp, PINOD, "vgoneall", 0); 1024 return; 1025 } 1026 /* 1027 * Ensure that vp will not be vgone'd while we 1028 * are eliminating its aliases. 1029 */ 1030 vp->v_flag |= VXLOCK; 1031 while (vp->v_flag & VALIASED) { 1032 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1033 if (vq->v_rdev != vp->v_rdev || 1034 vq->v_type != vp->v_type || vp == vq) 1035 continue; 1036 vgone(vq); 1037 break; 1038 } 1039 } 1040 /* 1041 * Remove the lock so that vgone below will 1042 * really eliminate the vnode after which time 1043 * vgone will awaken any sleepers. 1044 */ 1045 vp->v_flag &= ~VXLOCK; 1046 } 1047 vgone(vp); 1048 } 1049 1050 /* 1051 * Eliminate all activity associated with a vnode 1052 * in preparation for reuse. 1053 */ 1054 void 1055 vgone(vp) 1056 register struct vnode *vp; 1057 { 1058 register struct vnode *vq; 1059 struct vnode *vx; 1060 1061 /* 1062 * If a vgone (or vclean) is already in progress, 1063 * wait until it is done and return. 1064 */ 1065 if (vp->v_flag & VXLOCK) { 1066 vp->v_flag |= VXWANT; 1067 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1068 return; 1069 } 1070 /* 1071 * Clean out the filesystem specific data. 1072 */ 1073 vclean(vp, DOCLOSE); 1074 /* 1075 * Delete from old mount point vnode list, if on one. 1076 */ 1077 insmntque(vp, (struct mount *)0); 1078 /* 1079 * If special device, remove it from special device alias list. 1080 */ 1081 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1082 if (*vp->v_hashchain == vp) { 1083 *vp->v_hashchain = vp->v_specnext; 1084 } else { 1085 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1086 if (vq->v_specnext != vp) 1087 continue; 1088 vq->v_specnext = vp->v_specnext; 1089 break; 1090 } 1091 if (vq == NULL) 1092 panic("missing bdev"); 1093 } 1094 if (vp->v_flag & VALIASED) { 1095 vx = NULL; 1096 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1097 if (vq->v_rdev != vp->v_rdev || 1098 vq->v_type != vp->v_type) 1099 continue; 1100 if (vx) 1101 break; 1102 vx = vq; 1103 } 1104 if (vx == NULL) 1105 panic("missing alias"); 1106 if (vq == NULL) 1107 vx->v_flag &= ~VALIASED; 1108 vp->v_flag &= ~VALIASED; 1109 } 1110 FREE(vp->v_specinfo, M_VNODE); 1111 vp->v_specinfo = NULL; 1112 } 1113 /* 1114 * If it is on the freelist and not already at the head, 1115 * move it to the head of the list. The test of the back 1116 * pointer and the reference count of zero is because 1117 * it will be removed from the free list by getnewvnode, 1118 * but will not have its reference count incremented until 1119 * after calling vgone. If the reference count were 1120 * incremented first, vgone would (incorrectly) try to 1121 * close the previous instance of the underlying object. 1122 * So, the back pointer is explicitly set to `0xdeadb' in 1123 * getnewvnode after removing it from the freelist to ensure 1124 * that we do not try to move it here. 1125 */ 1126 if (vp->v_usecount == 0 && 1127 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1128 vnode_free_list.tqh_first != vp) { 1129 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1130 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1131 } 1132 vp->v_type = VBAD; 1133 } 1134 1135 /* 1136 * Lookup a vnode by device number. 1137 */ 1138 int 1139 vfinddev(dev, type, vpp) 1140 dev_t dev; 1141 enum vtype type; 1142 struct vnode **vpp; 1143 { 1144 register struct vnode *vp; 1145 1146 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1147 if (dev != vp->v_rdev || type != vp->v_type) 1148 continue; 1149 *vpp = vp; 1150 return (1); 1151 } 1152 return (0); 1153 } 1154 1155 /* 1156 * Calculate the total number of references to a special device. 1157 */ 1158 int 1159 vcount(vp) 1160 register struct vnode *vp; 1161 { 1162 register struct vnode *vq, *vnext; 1163 int count; 1164 1165 loop: 1166 if ((vp->v_flag & VALIASED) == 0) 1167 return (vp->v_usecount); 1168 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1169 vnext = vq->v_specnext; 1170 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1171 continue; 1172 /* 1173 * Alias, but not in use, so flush it out. 1174 */ 1175 if (vq->v_usecount == 0 && vq != vp) { 1176 vgone(vq); 1177 goto loop; 1178 } 1179 count += vq->v_usecount; 1180 } 1181 return (count); 1182 } 1183 1184 /* 1185 * Print out a description of a vnode. 1186 */ 1187 static char *typename[] = 1188 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1189 1190 void 1191 vprint(label, vp) 1192 char *label; 1193 register struct vnode *vp; 1194 { 1195 char buf[64]; 1196 1197 if (label != NULL) 1198 printf("%s: ", label); 1199 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1200 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1201 vp->v_holdcnt); 1202 buf[0] = '\0'; 1203 if (vp->v_flag & VROOT) 1204 strcat(buf, "|VROOT"); 1205 if (vp->v_flag & VTEXT) 1206 strcat(buf, "|VTEXT"); 1207 if (vp->v_flag & VSYSTEM) 1208 strcat(buf, "|VSYSTEM"); 1209 if (vp->v_flag & VXLOCK) 1210 strcat(buf, "|VXLOCK"); 1211 if (vp->v_flag & VXWANT) 1212 strcat(buf, "|VXWANT"); 1213 if (vp->v_flag & VBWAIT) 1214 strcat(buf, "|VBWAIT"); 1215 if (vp->v_flag & VALIASED) 1216 strcat(buf, "|VALIASED"); 1217 if (buf[0] != '\0') 1218 printf(" flags (%s)", &buf[1]); 1219 if (vp->v_data == NULL) { 1220 printf("\n"); 1221 } else { 1222 printf("\n\t"); 1223 VOP_PRINT(vp); 1224 } 1225 } 1226 1227 #ifdef DEBUG 1228 /* 1229 * List all of the locked vnodes in the system. 1230 * Called when debugging the kernel. 1231 */ 1232 void 1233 printlockedvnodes() 1234 { 1235 register struct mount *mp; 1236 register struct vnode *vp; 1237 1238 printf("Locked vnodes\n"); 1239 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 1240 mp = mp->mnt_list.cqe_next) { 1241 for (vp = mp->mnt_vnodelist.lh_first; 1242 vp != NULL; 1243 vp = vp->v_mntvnodes.le_next) 1244 if (VOP_ISLOCKED(vp)) 1245 vprint((char *)0, vp); 1246 } 1247 } 1248 #endif 1249 1250 int kinfo_vdebug = 1; 1251 int kinfo_vgetfailed; 1252 #define KINFO_VNODESLOP 10 1253 /* 1254 * Dump vnode list (via sysctl). 1255 * Copyout address of vnode followed by vnode. 1256 */ 1257 /* ARGSUSED */ 1258 int 1259 sysctl_vnode(where, sizep) 1260 char *where; 1261 size_t *sizep; 1262 { 1263 register struct mount *mp, *nmp; 1264 struct vnode *vp; 1265 register char *bp = where, *savebp; 1266 char *ewhere; 1267 int error; 1268 1269 #define VPTRSZ sizeof (struct vnode *) 1270 #define VNODESZ sizeof (struct vnode) 1271 if (where == NULL) { 1272 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1273 return (0); 1274 } 1275 ewhere = where + *sizep; 1276 1277 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1278 nmp = mp->mnt_list.cqe_next; 1279 if (vfs_busy(mp)) 1280 continue; 1281 savebp = bp; 1282 again: 1283 for (vp = mp->mnt_vnodelist.lh_first; 1284 vp != NULL; 1285 vp = vp->v_mntvnodes.le_next) { 1286 /* 1287 * Check that the vp is still associated with 1288 * this filesystem. RACE: could have been 1289 * recycled onto the same filesystem. 1290 */ 1291 if (vp->v_mount != mp) { 1292 if (kinfo_vdebug) 1293 printf("kinfo: vp changed\n"); 1294 bp = savebp; 1295 goto again; 1296 } 1297 if (bp + VPTRSZ + VNODESZ > ewhere) { 1298 *sizep = bp - where; 1299 return (ENOMEM); 1300 } 1301 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1302 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1303 return (error); 1304 bp += VPTRSZ + VNODESZ; 1305 } 1306 vfs_unbusy(mp); 1307 } 1308 1309 *sizep = bp - where; 1310 return (0); 1311 } 1312 1313 /* 1314 * Check to see if a filesystem is mounted on a block device. 1315 */ 1316 int 1317 vfs_mountedon(vp) 1318 register struct vnode *vp; 1319 { 1320 register struct vnode *vq; 1321 1322 if (vp->v_specflags & SI_MOUNTEDON) 1323 return (EBUSY); 1324 if (vp->v_flag & VALIASED) { 1325 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1326 if (vq->v_rdev != vp->v_rdev || 1327 vq->v_type != vp->v_type) 1328 continue; 1329 if (vq->v_specflags & SI_MOUNTEDON) 1330 return (EBUSY); 1331 } 1332 } 1333 return (0); 1334 } 1335 1336 /* 1337 * Build hash lists of net addresses and hang them off the mount point. 1338 * Called by ufs_mount() to set up the lists of export addresses. 1339 */ 1340 static int 1341 vfs_hang_addrlist(mp, nep, argp) 1342 struct mount *mp; 1343 struct netexport *nep; 1344 struct export_args *argp; 1345 { 1346 register struct netcred *np; 1347 register struct radix_node_head *rnh; 1348 register int i; 1349 struct radix_node *rn; 1350 struct sockaddr *saddr, *smask = 0; 1351 struct domain *dom; 1352 int error; 1353 1354 if (argp->ex_addrlen == 0) { 1355 if (mp->mnt_flag & MNT_DEFEXPORTED) 1356 return (EPERM); 1357 np = &nep->ne_defexported; 1358 np->netc_exflags = argp->ex_flags; 1359 np->netc_anon = argp->ex_anon; 1360 np->netc_anon.cr_ref = 1; 1361 mp->mnt_flag |= MNT_DEFEXPORTED; 1362 return (0); 1363 } 1364 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1365 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1366 bzero((caddr_t)np, i); 1367 saddr = (struct sockaddr *)(np + 1); 1368 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 1369 if (error) 1370 goto out; 1371 if (saddr->sa_len > argp->ex_addrlen) 1372 saddr->sa_len = argp->ex_addrlen; 1373 if (argp->ex_masklen) { 1374 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1375 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1376 if (error) 1377 goto out; 1378 if (smask->sa_len > argp->ex_masklen) 1379 smask->sa_len = argp->ex_masklen; 1380 } 1381 i = saddr->sa_family; 1382 if ((rnh = nep->ne_rtable[i]) == 0) { 1383 /* 1384 * Seems silly to initialize every AF when most are not 1385 * used, do so on demand here 1386 */ 1387 for (dom = domains; dom; dom = dom->dom_next) 1388 if (dom->dom_family == i && dom->dom_rtattach) { 1389 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1390 dom->dom_rtoffset); 1391 break; 1392 } 1393 if ((rnh = nep->ne_rtable[i]) == 0) { 1394 error = ENOBUFS; 1395 goto out; 1396 } 1397 } 1398 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1399 np->netc_rnodes); 1400 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1401 error = EPERM; 1402 goto out; 1403 } 1404 np->netc_exflags = argp->ex_flags; 1405 np->netc_anon = argp->ex_anon; 1406 np->netc_anon.cr_ref = 1; 1407 return (0); 1408 out: 1409 free(np, M_NETADDR); 1410 return (error); 1411 } 1412 1413 /* ARGSUSED */ 1414 static int 1415 vfs_free_netcred(rn, w) 1416 struct radix_node *rn; 1417 void *w; 1418 { 1419 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1420 1421 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1422 free((caddr_t)rn, M_NETADDR); 1423 return (0); 1424 } 1425 1426 /* 1427 * Free the net address hash lists that are hanging off the mount points. 1428 */ 1429 static void 1430 vfs_free_addrlist(nep) 1431 struct netexport *nep; 1432 { 1433 register int i; 1434 register struct radix_node_head *rnh; 1435 1436 for (i = 0; i <= AF_MAX; i++) 1437 if ((rnh = nep->ne_rtable[i]) != NULL) { 1438 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 1439 free((caddr_t)rnh, M_RTABLE); 1440 nep->ne_rtable[i] = 0; 1441 } 1442 } 1443 1444 int 1445 vfs_export(mp, nep, argp) 1446 struct mount *mp; 1447 struct netexport *nep; 1448 struct export_args *argp; 1449 { 1450 int error; 1451 1452 if (argp->ex_flags & MNT_DELEXPORT) { 1453 if (mp->mnt_flag & MNT_EXPUBLIC) { 1454 vfs_setpublicfs(NULL, NULL, NULL); 1455 mp->mnt_flag &= ~MNT_EXPUBLIC; 1456 } 1457 vfs_free_addrlist(nep); 1458 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1459 } 1460 if (argp->ex_flags & MNT_EXPORTED) { 1461 if (argp->ex_flags & MNT_EXPUBLIC) { 1462 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 1463 return (error); 1464 mp->mnt_flag |= MNT_EXPUBLIC; 1465 } 1466 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 1467 return (error); 1468 mp->mnt_flag |= MNT_EXPORTED; 1469 } 1470 return (0); 1471 } 1472 1473 /* 1474 * Set the publicly exported filesystem (WebNFS). Currently, only 1475 * one public filesystem is possible in the spec (RFC 2054 and 2055) 1476 */ 1477 int 1478 vfs_setpublicfs(mp, nep, argp) 1479 struct mount *mp; 1480 struct netexport *nep; 1481 struct export_args *argp; 1482 { 1483 int error; 1484 struct vnode *rvp; 1485 char *cp; 1486 1487 /* 1488 * mp == NULL -> invalidate the current info, the FS is 1489 * no longer exported. May be called from either vfs_export 1490 * or unmount, so check if it hasn't already been done. 1491 */ 1492 if (mp == NULL) { 1493 if (nfs_pub.np_valid) { 1494 nfs_pub.np_valid = 0; 1495 if (nfs_pub.np_index != NULL) { 1496 FREE(nfs_pub.np_index, M_TEMP); 1497 nfs_pub.np_index = NULL; 1498 } 1499 } 1500 return (0); 1501 } 1502 1503 /* 1504 * Only one allowed at a time. 1505 */ 1506 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 1507 return (EBUSY); 1508 1509 /* 1510 * Get real filehandle for root of exported FS. 1511 */ 1512 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 1513 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 1514 1515 if ((error = VFS_ROOT(mp, &rvp))) 1516 return (error); 1517 1518 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 1519 return (error); 1520 1521 vput(rvp); 1522 1523 /* 1524 * If an indexfile was specified, pull it in. 1525 */ 1526 if (argp->ex_indexfile != NULL) { 1527 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 1528 M_WAITOK); 1529 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 1530 MAXNAMLEN, (size_t *)0); 1531 if (!error) { 1532 /* 1533 * Check for illegal filenames. 1534 */ 1535 for (cp = nfs_pub.np_index; *cp; cp++) { 1536 if (*cp == '/') { 1537 error = EINVAL; 1538 break; 1539 } 1540 } 1541 } 1542 if (error) { 1543 FREE(nfs_pub.np_index, M_TEMP); 1544 return (error); 1545 } 1546 } 1547 1548 nfs_pub.np_mount = mp; 1549 nfs_pub.np_valid = 1; 1550 return (0); 1551 } 1552 1553 struct netcred * 1554 vfs_export_lookup(mp, nep, nam) 1555 register struct mount *mp; 1556 struct netexport *nep; 1557 struct mbuf *nam; 1558 { 1559 register struct netcred *np; 1560 register struct radix_node_head *rnh; 1561 struct sockaddr *saddr; 1562 1563 np = NULL; 1564 if (mp->mnt_flag & MNT_EXPORTED) { 1565 /* 1566 * Lookup in the export list first. 1567 */ 1568 if (nam != NULL) { 1569 saddr = mtod(nam, struct sockaddr *); 1570 rnh = nep->ne_rtable[saddr->sa_family]; 1571 if (rnh != NULL) { 1572 np = (struct netcred *) 1573 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1574 rnh); 1575 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1576 np = NULL; 1577 } 1578 } 1579 /* 1580 * If no address match, use the default if it exists. 1581 */ 1582 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1583 np = &nep->ne_defexported; 1584 } 1585 return (np); 1586 } 1587 1588 /* 1589 * Do the usual access checking. 1590 * file_mode, uid and gid are from the vnode in question, 1591 * while acc_mode and cred are from the VOP_ACCESS parameter list 1592 */ 1593 int 1594 vaccess(type, file_mode, uid, gid, acc_mode, cred) 1595 enum vtype type; 1596 mode_t file_mode; 1597 uid_t uid; 1598 gid_t gid; 1599 mode_t acc_mode; 1600 struct ucred *cred; 1601 { 1602 mode_t mask; 1603 1604 /* 1605 * Super-user always gets read/write access, but execute access depends 1606 * on at least one execute bit being set. 1607 */ 1608 if (cred->cr_uid == 0) { 1609 if ((acc_mode & VEXEC) && type != VDIR && 1610 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 1611 return (EACCES); 1612 return (0); 1613 } 1614 1615 mask = 0; 1616 1617 /* Otherwise, check the owner. */ 1618 if (cred->cr_uid == uid) { 1619 if (acc_mode & VEXEC) 1620 mask |= S_IXUSR; 1621 if (acc_mode & VREAD) 1622 mask |= S_IRUSR; 1623 if (acc_mode & VWRITE) 1624 mask |= S_IWUSR; 1625 return ((file_mode & mask) == mask ? 0 : EACCES); 1626 } 1627 1628 /* Otherwise, check the groups. */ 1629 if (cred->cr_gid == gid || groupmember(gid, cred)) { 1630 if (acc_mode & VEXEC) 1631 mask |= S_IXGRP; 1632 if (acc_mode & VREAD) 1633 mask |= S_IRGRP; 1634 if (acc_mode & VWRITE) 1635 mask |= S_IWGRP; 1636 return ((file_mode & mask) == mask ? 0 : EACCES); 1637 } 1638 1639 /* Otherwise, check everyone else. */ 1640 if (acc_mode & VEXEC) 1641 mask |= S_IXOTH; 1642 if (acc_mode & VREAD) 1643 mask |= S_IROTH; 1644 if (acc_mode & VWRITE) 1645 mask |= S_IWOTH; 1646 return ((file_mode & mask) == mask ? 0 : EACCES); 1647 } 1648 1649 /* 1650 * Unmount all file systems. 1651 * We traverse the list in reverse order under the assumption that doing so 1652 * will avoid needing to worry about dependencies. 1653 */ 1654 void 1655 vfs_unmountall() 1656 { 1657 register struct mount *mp, *nmp; 1658 int allerror, error; 1659 1660 for (allerror = 0, 1661 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1662 nmp = mp->mnt_list.cqe_prev; 1663 #ifdef DEBUG 1664 printf("unmounting %s (%s)...\n", 1665 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1666 #endif 1667 if (vfs_busy(mp)) 1668 continue; 1669 if ((error = dounmount(mp, MNT_FORCE, &proc0)) != 0) { 1670 printf("unmount of %s failed with error %d\n", 1671 mp->mnt_stat.f_mntonname, error); 1672 allerror = 1; 1673 } 1674 } 1675 if (allerror) 1676 printf("WARNING: some file systems would not unmount\n"); 1677 } 1678 1679 /* 1680 * Sync and unmount file systems before shutting down. 1681 */ 1682 void 1683 vfs_shutdown() 1684 { 1685 register struct buf *bp; 1686 int iter, nbusy; 1687 1688 printf("syncing disks... "); 1689 1690 /* XXX Should suspend scheduling. */ 1691 (void) spl0(); 1692 1693 if (panicstr == 0) { 1694 /* Release inodes held by texts before update. */ 1695 vnode_pager_umount(NULL); 1696 #ifdef notdef 1697 vnshutdown(); 1698 #endif 1699 1700 /* Sync before unmount, in case we hang on something. */ 1701 sys_sync(&proc0, (void *)0, (register_t *)0); 1702 1703 /* Unmount file systems. */ 1704 vfs_unmountall(); 1705 } 1706 1707 /* Sync again after unmount, just in case. */ 1708 sys_sync(&proc0, (void *)0, (register_t *)0); 1709 1710 /* Wait for sync to finish. */ 1711 for (iter = 0; iter < 20; iter++) { 1712 nbusy = 0; 1713 for (bp = &buf[nbuf]; --bp >= buf; ) 1714 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 1715 nbusy++; 1716 if (nbusy == 0) 1717 break; 1718 printf("%d ", nbusy); 1719 DELAY(40000 * iter); 1720 } 1721 if (nbusy) 1722 printf("giving up\n"); 1723 else 1724 printf("done\n"); 1725 } 1726 1727 /* 1728 * Mount the root file system. If the operator didn't specify a 1729 * file system to use, try all possible file systems until one 1730 * succeeds. 1731 */ 1732 int 1733 vfs_mountroot() 1734 { 1735 extern int (*mountroot) __P((void)); 1736 int i; 1737 1738 if (root_device == NULL) 1739 panic("vfs_mountroot: root device unknown"); 1740 1741 switch (root_device->dv_class) { 1742 case DV_IFNET: 1743 if (rootdev != NODEV) 1744 panic("vfs_mountroot: rootdev set for DV_IFNET"); 1745 break; 1746 1747 case DV_DISK: 1748 if (rootdev == NODEV) 1749 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1750 break; 1751 1752 default: 1753 printf("%s: inappropriate for root file system\n", 1754 root_device->dv_xname); 1755 return (ENODEV); 1756 } 1757 1758 /* 1759 * If user specified a file system, use it. 1760 */ 1761 if (mountroot != NULL) 1762 return ((*mountroot)()); 1763 1764 /* 1765 * Try each file system currently configured into the kernel. 1766 */ 1767 for (i = 0; i < nvfssw; i++) { 1768 if (vfssw[i] == NULL || vfssw[i]->vfs_mountroot == NULL) 1769 continue; 1770 #ifdef DEBUG 1771 printf("mountroot: trying %s...\n", vfssw[i]->vfs_name); 1772 #endif 1773 if ((*vfssw[i]->vfs_mountroot)() == 0) { 1774 printf("root file system type: %s\n", 1775 vfssw[i]->vfs_name); 1776 return (0); 1777 } 1778 } 1779 1780 printf("no file system for %s", root_device->dv_xname); 1781 if (root_device->dv_class == DV_DISK) 1782 printf(" (dev 0x%x)", rootdev); 1783 printf("\n"); 1784 return (EFTYPE); 1785 } 1786 1787 /* 1788 * Given a file system name, look up the vfsops for that 1789 * file system, or return NULL if file system isn't present 1790 * in the kernel. 1791 */ 1792 struct vfsops * 1793 vfs_getopsbyname(name) 1794 const char *name; 1795 { 1796 int i; 1797 1798 for (i = 0; i < nvfssw; i++) 1799 if (vfssw[i] != NULL && strcmp(vfssw[i]->vfs_name, name) == 0) 1800 return (vfssw[i]); 1801 return (NULL); 1802 } 1803