1 /* $NetBSD: vfs_subr.c,v 1.65 1997/04/23 20:19:45 mycroft Exp $ */ 2 3 /* 4 * Copyright (c) 1997 Jason R. Thorpe. All rights reserved. 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 42 */ 43 44 /* 45 * External virtual filesystem routines 46 */ 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/proc.h> 51 #include <sys/mount.h> 52 #include <sys/time.h> 53 #include <sys/fcntl.h> 54 #include <sys/vnode.h> 55 #include <sys/stat.h> 56 #include <sys/namei.h> 57 #include <sys/ucred.h> 58 #include <sys/buf.h> 59 #include <sys/errno.h> 60 #include <sys/malloc.h> 61 #include <sys/domain.h> 62 #include <sys/mbuf.h> 63 #include <sys/syscallargs.h> 64 #include <sys/device.h> 65 66 #include <vm/vm.h> 67 #include <sys/sysctl.h> 68 69 #include <miscfs/specfs/specdev.h> 70 71 enum vtype iftovt_tab[16] = { 72 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 73 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 74 }; 75 int vttoif_tab[9] = { 76 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 77 S_IFSOCK, S_IFIFO, S_IFMT, 78 }; 79 80 int doforce = 1; /* 1 => permit forcible unmounting */ 81 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 82 83 /* 84 * Insq/Remq for the vnode usage lists. 85 */ 86 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 87 #define bufremvn(bp) { \ 88 LIST_REMOVE(bp, b_vnbufs); \ 89 (bp)->b_vnbufs.le_next = NOLIST; \ 90 } 91 TAILQ_HEAD(freelst, vnode) vnode_free_list = /* vnode free list */ 92 TAILQ_HEAD_INITIALIZER(vnode_free_list); 93 struct mntlist mountlist = /* mounted filesystem list */ 94 CIRCLEQ_HEAD_INITIALIZER(mountlist); 95 96 struct device *root_device; /* root device */ 97 98 int vfs_lock __P((struct mount *)); 99 void vfs_unlock __P((struct mount *)); 100 struct mount *getvfs __P((fsid_t *)); 101 long makefstype __P((char *)); 102 void vattr_null __P((struct vattr *)); 103 int getnewvnode __P((enum vtagtype, struct mount *, int (**)(void *), 104 struct vnode **)); 105 void insmntque __P((struct vnode *, struct mount *)); 106 int vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *, int, 107 int)); 108 void vflushbuf __P((struct vnode *, int)); 109 void brelvp __P((struct buf *)); 110 int bdevvp __P((dev_t, struct vnode **)); 111 int cdevvp __P((dev_t, struct vnode **)); 112 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 113 struct vnode *checkalias __P((struct vnode *, dev_t, struct mount *)); 114 int vget __P((struct vnode *, int)); 115 void vref __P((struct vnode *)); 116 void vput __P((struct vnode *)); 117 void vrele __P((struct vnode *)); 118 void vhold __P((struct vnode *)); 119 void holdrele __P((struct vnode *)); 120 int vflush __P((struct mount *, struct vnode *, int)); 121 void vgoneall __P((struct vnode *)); 122 void vgone __P((struct vnode *)); 123 int vcount __P((struct vnode *)); 124 void vprint __P((char *, struct vnode *)); 125 int vfs_mountedon __P((struct vnode *)); 126 int vfs_export __P((struct mount *, struct netexport *, struct export_args *)); 127 struct netcred *vfs_export_lookup __P((struct mount *, struct netexport *, 128 struct mbuf *)); 129 int vaccess __P((mode_t, uid_t, gid_t, mode_t, struct ucred *)); 130 void vfs_unmountall __P((void)); 131 void vfs_shutdown __P((void)); 132 133 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 134 struct export_args *)); 135 static int vfs_free_netcred __P((struct radix_node *, void *)); 136 static void vfs_free_addrlist __P((struct netexport *)); 137 138 #ifdef DEBUG 139 void printlockedvnodes __P((void)); 140 #endif 141 142 /* 143 * Initialize the vnode management data structures. 144 */ 145 void 146 vntblinit() 147 { 148 149 /* 150 * Nothing to do here anymore; vnode_free_list and mountlist 151 * are now initialized data. 152 */ 153 } 154 155 /* 156 * Lock a filesystem. 157 * Used to prevent access to it while mounting and unmounting. 158 */ 159 int 160 vfs_lock(mp) 161 register struct mount *mp; 162 { 163 164 while (mp->mnt_flag & MNT_MLOCK) { 165 mp->mnt_flag |= MNT_MWAIT; 166 tsleep((caddr_t)mp, PVFS, "vfslock", 0); 167 } 168 mp->mnt_flag |= MNT_MLOCK; 169 return (0); 170 } 171 172 /* 173 * Unlock a locked filesystem. 174 * Panic if filesystem is not locked. 175 */ 176 void 177 vfs_unlock(mp) 178 register struct mount *mp; 179 { 180 181 if ((mp->mnt_flag & MNT_MLOCK) == 0) 182 panic("vfs_unlock: not locked"); 183 mp->mnt_flag &= ~MNT_MLOCK; 184 if (mp->mnt_flag & MNT_MWAIT) { 185 mp->mnt_flag &= ~MNT_MWAIT; 186 wakeup((caddr_t)mp); 187 } 188 } 189 190 /* 191 * Mark a mount point as busy. 192 * Used to synchronize access and to delay unmounting. 193 */ 194 int 195 vfs_busy(mp) 196 register struct mount *mp; 197 { 198 int unmounting = mp->mnt_flag & MNT_UNMOUNT; 199 200 while(mp->mnt_flag & MNT_MPBUSY) { 201 mp->mnt_flag |= MNT_MPWANT; 202 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); 203 if (unmounting) 204 return (1); 205 } 206 mp->mnt_flag |= MNT_MPBUSY; 207 return (0); 208 } 209 210 /* 211 * Free a busy filesystem. 212 * Panic if filesystem is not busy. 213 */ 214 void 215 vfs_unbusy(mp) 216 register struct mount *mp; 217 { 218 219 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 220 panic("vfs_unbusy: not busy"); 221 mp->mnt_flag &= ~MNT_MPBUSY; 222 if (mp->mnt_flag & MNT_MPWANT) { 223 mp->mnt_flag &= ~MNT_MPWANT; 224 wakeup((caddr_t)&mp->mnt_flag); 225 } 226 } 227 228 /* 229 * Lookup a mount point by filesystem identifier. 230 */ 231 struct mount * 232 getvfs(fsid) 233 fsid_t *fsid; 234 { 235 register struct mount *mp; 236 237 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 238 mp = mp->mnt_list.cqe_next) 239 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 240 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 241 return (mp); 242 return ((struct mount *)0); 243 } 244 245 /* 246 * Get a new unique fsid 247 */ 248 void 249 getnewfsid(mp, mtype) 250 struct mount *mp; 251 int mtype; 252 { 253 static u_short xxxfs_mntid; 254 255 fsid_t tfsid; 256 257 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0); /* XXX */ 258 mp->mnt_stat.f_fsid.val[1] = mtype; 259 if (xxxfs_mntid == 0) 260 ++xxxfs_mntid; 261 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 262 tfsid.val[1] = mtype; 263 if (mountlist.cqh_first != (void *)&mountlist) { 264 while (getvfs(&tfsid)) { 265 tfsid.val[0]++; 266 xxxfs_mntid++; 267 } 268 } 269 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 270 } 271 272 /* 273 * Make a 'unique' number from a mount type name. 274 */ 275 long 276 makefstype(type) 277 char *type; 278 { 279 long rv; 280 281 for (rv = 0; *type; type++) { 282 rv <<= 2; 283 rv ^= *type; 284 } 285 return rv; 286 } 287 288 /* 289 * Set vnode attributes to VNOVAL 290 */ 291 void 292 vattr_null(vap) 293 register struct vattr *vap; 294 { 295 296 vap->va_type = VNON; 297 /* XXX These next two used to be one line, but for a GCC bug. */ 298 vap->va_size = VNOVAL; 299 vap->va_bytes = VNOVAL; 300 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 301 vap->va_fsid = vap->va_fileid = 302 vap->va_blocksize = vap->va_rdev = 303 vap->va_atime.tv_sec = vap->va_atime.tv_nsec = 304 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = 305 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = 306 vap->va_flags = vap->va_gen = VNOVAL; 307 vap->va_vaflags = 0; 308 } 309 310 /* 311 * Routines having to do with the management of the vnode table. 312 */ 313 extern int (**dead_vnodeop_p) __P((void *)); 314 long numvnodes; 315 316 /* 317 * Return the next vnode from the free list. 318 */ 319 int 320 getnewvnode(tag, mp, vops, vpp) 321 enum vtagtype tag; 322 struct mount *mp; 323 int (**vops) __P((void *)); 324 struct vnode **vpp; 325 { 326 register struct vnode *vp; 327 #ifdef DIAGNOSTIC 328 int s; 329 #endif 330 331 if ((vnode_free_list.tqh_first == NULL && 332 numvnodes < 2 * desiredvnodes) || 333 numvnodes < desiredvnodes) { 334 vp = (struct vnode *)malloc((u_long)sizeof *vp, 335 M_VNODE, M_WAITOK); 336 bzero((char *)vp, sizeof *vp); 337 numvnodes++; 338 } else { 339 if ((vp = vnode_free_list.tqh_first) == NULL) { 340 tablefull("vnode"); 341 *vpp = 0; 342 return (ENFILE); 343 } 344 if (vp->v_usecount) { 345 vprint("free vnode", vp); 346 panic("free vnode isn't"); 347 } 348 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 349 /* see comment on why 0xdeadb is set at end of vgone (below) */ 350 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 351 vp->v_lease = NULL; 352 if (vp->v_type != VBAD) 353 vgone(vp); 354 #ifdef DIAGNOSTIC 355 if (vp->v_data) { 356 vprint("cleaned vnode", vp); 357 panic("cleaned vnode isn't"); 358 } 359 s = splbio(); 360 if (vp->v_numoutput) 361 panic("Clean vnode has pending I/O's"); 362 splx(s); 363 #endif 364 vp->v_flag = 0; 365 vp->v_lastr = 0; 366 vp->v_ralen = 0; 367 vp->v_maxra = 0; 368 vp->v_lastw = 0; 369 vp->v_lasta = 0; 370 vp->v_cstart = 0; 371 vp->v_clen = 0; 372 vp->v_socket = 0; 373 } 374 vp->v_type = VNON; 375 cache_purge(vp); 376 vp->v_tag = tag; 377 vp->v_op = vops; 378 insmntque(vp, mp); 379 *vpp = vp; 380 vp->v_usecount = 1; 381 vp->v_data = 0; 382 return (0); 383 } 384 385 /* 386 * Move a vnode from one mount queue to another. 387 */ 388 void 389 insmntque(vp, mp) 390 register struct vnode *vp; 391 register struct mount *mp; 392 { 393 394 /* 395 * Delete from old mount point vnode list, if on one. 396 */ 397 if (vp->v_mount != NULL) 398 LIST_REMOVE(vp, v_mntvnodes); 399 /* 400 * Insert into list of vnodes for the new mount point, if available. 401 */ 402 if ((vp->v_mount = mp) == NULL) 403 return; 404 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 405 } 406 407 /* 408 * Update outstanding I/O count and do wakeup if requested. 409 */ 410 void 411 vwakeup(bp) 412 register struct buf *bp; 413 { 414 register struct vnode *vp; 415 416 bp->b_flags &= ~B_WRITEINPROG; 417 if ((vp = bp->b_vp) != NULL) { 418 if (--vp->v_numoutput < 0) 419 panic("vwakeup: neg numoutput"); 420 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 421 vp->v_flag &= ~VBWAIT; 422 wakeup((caddr_t)&vp->v_numoutput); 423 } 424 } 425 } 426 427 /* 428 * Flush out and invalidate all buffers associated with a vnode. 429 * Called with the underlying object locked. 430 */ 431 int 432 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 433 register struct vnode *vp; 434 int flags; 435 struct ucred *cred; 436 struct proc *p; 437 int slpflag, slptimeo; 438 { 439 register struct buf *bp; 440 struct buf *nbp, *blist; 441 int s, error; 442 443 if (flags & V_SAVE) { 444 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 445 return (error); 446 if (vp->v_dirtyblkhd.lh_first != NULL) 447 panic("vinvalbuf: dirty bufs"); 448 } 449 for (;;) { 450 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 451 while (blist && blist->b_lblkno < 0) 452 blist = blist->b_vnbufs.le_next; 453 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 454 (flags & V_SAVEMETA)) 455 while (blist && blist->b_lblkno < 0) 456 blist = blist->b_vnbufs.le_next; 457 if (!blist) 458 break; 459 460 for (bp = blist; bp; bp = nbp) { 461 nbp = bp->b_vnbufs.le_next; 462 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 463 continue; 464 s = splbio(); 465 if (bp->b_flags & B_BUSY) { 466 bp->b_flags |= B_WANTED; 467 error = tsleep((caddr_t)bp, 468 slpflag | (PRIBIO + 1), "vinvalbuf", 469 slptimeo); 470 splx(s); 471 if (error) 472 return (error); 473 break; 474 } 475 bp->b_flags |= B_BUSY | B_VFLUSH; 476 splx(s); 477 /* 478 * XXX Since there are no node locks for NFS, I believe 479 * there is a slight chance that a delayed write will 480 * occur while sleeping just above, so check for it. 481 */ 482 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 483 (void) VOP_BWRITE(bp); 484 break; 485 } 486 bp->b_flags |= B_INVAL; 487 brelse(bp); 488 } 489 } 490 if (!(flags & V_SAVEMETA) && 491 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 492 panic("vinvalbuf: flush failed"); 493 return (0); 494 } 495 496 void 497 vflushbuf(vp, sync) 498 register struct vnode *vp; 499 int sync; 500 { 501 register struct buf *bp, *nbp; 502 int s; 503 504 loop: 505 s = splbio(); 506 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 507 nbp = bp->b_vnbufs.le_next; 508 if ((bp->b_flags & B_BUSY)) 509 continue; 510 if ((bp->b_flags & B_DELWRI) == 0) 511 panic("vflushbuf: not dirty"); 512 bp->b_flags |= B_BUSY | B_VFLUSH; 513 splx(s); 514 /* 515 * Wait for I/O associated with indirect blocks to complete, 516 * since there is no way to quickly wait for them below. 517 */ 518 if (bp->b_vp == vp || sync == 0) 519 (void) bawrite(bp); 520 else 521 (void) bwrite(bp); 522 goto loop; 523 } 524 if (sync == 0) { 525 splx(s); 526 return; 527 } 528 while (vp->v_numoutput) { 529 vp->v_flag |= VBWAIT; 530 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 531 } 532 splx(s); 533 if (vp->v_dirtyblkhd.lh_first != NULL) { 534 vprint("vflushbuf: dirty", vp); 535 goto loop; 536 } 537 } 538 539 /* 540 * Associate a buffer with a vnode. 541 */ 542 void 543 bgetvp(vp, bp) 544 register struct vnode *vp; 545 register struct buf *bp; 546 { 547 548 if (bp->b_vp) 549 panic("bgetvp: not free"); 550 VHOLD(vp); 551 bp->b_vp = vp; 552 if (vp->v_type == VBLK || vp->v_type == VCHR) 553 bp->b_dev = vp->v_rdev; 554 else 555 bp->b_dev = NODEV; 556 /* 557 * Insert onto list for new vnode. 558 */ 559 bufinsvn(bp, &vp->v_cleanblkhd); 560 } 561 562 /* 563 * Disassociate a buffer from a vnode. 564 */ 565 void 566 brelvp(bp) 567 register struct buf *bp; 568 { 569 struct vnode *vp; 570 571 if (bp->b_vp == (struct vnode *) 0) 572 panic("brelvp: NULL"); 573 /* 574 * Delete from old vnode list, if on one. 575 */ 576 if (bp->b_vnbufs.le_next != NOLIST) 577 bufremvn(bp); 578 vp = bp->b_vp; 579 bp->b_vp = (struct vnode *) 0; 580 HOLDRELE(vp); 581 } 582 583 /* 584 * Reassign a buffer from one vnode to another. 585 * Used to assign file specific control information 586 * (indirect blocks) to the vnode to which they belong. 587 */ 588 void 589 reassignbuf(bp, newvp) 590 register struct buf *bp; 591 register struct vnode *newvp; 592 { 593 register struct buflists *listheadp; 594 595 if (newvp == NULL) { 596 printf("reassignbuf: NULL"); 597 return; 598 } 599 /* 600 * Delete from old vnode list, if on one. 601 */ 602 if (bp->b_vnbufs.le_next != NOLIST) 603 bufremvn(bp); 604 /* 605 * If dirty, put on list of dirty buffers; 606 * otherwise insert onto list of clean buffers. 607 */ 608 if (bp->b_flags & B_DELWRI) 609 listheadp = &newvp->v_dirtyblkhd; 610 else 611 listheadp = &newvp->v_cleanblkhd; 612 bufinsvn(bp, listheadp); 613 } 614 615 /* 616 * Create a vnode for a block device. 617 * Used for root filesystem and swap areas. 618 * Also used for memory file system special devices. 619 */ 620 int 621 bdevvp(dev, vpp) 622 dev_t dev; 623 struct vnode **vpp; 624 { 625 626 return (getdevvp(dev, vpp, VBLK)); 627 } 628 629 /* 630 * Create a vnode for a character device. 631 * Used for kernfs and some console handling. 632 */ 633 int 634 cdevvp(dev, vpp) 635 dev_t dev; 636 struct vnode **vpp; 637 { 638 639 return (getdevvp(dev, vpp, VCHR)); 640 } 641 642 /* 643 * Create a vnode for a device. 644 * Used by bdevvp (block device) for root file system etc., 645 * and by cdevvp (character device) for console and kernfs. 646 */ 647 int 648 getdevvp(dev, vpp, type) 649 dev_t dev; 650 struct vnode **vpp; 651 enum vtype type; 652 { 653 register struct vnode *vp; 654 struct vnode *nvp; 655 int error; 656 657 if (dev == NODEV) 658 return (0); 659 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 660 if (error) { 661 *vpp = NULLVP; 662 return (error); 663 } 664 vp = nvp; 665 vp->v_type = type; 666 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 667 vput(vp); 668 vp = nvp; 669 } 670 *vpp = vp; 671 return (0); 672 } 673 674 /* 675 * Check to see if the new vnode represents a special device 676 * for which we already have a vnode (either because of 677 * bdevvp() or because of a different vnode representing 678 * the same block device). If such an alias exists, deallocate 679 * the existing contents and return the aliased vnode. The 680 * caller is responsible for filling it with its new contents. 681 */ 682 struct vnode * 683 checkalias(nvp, nvp_rdev, mp) 684 register struct vnode *nvp; 685 dev_t nvp_rdev; 686 struct mount *mp; 687 { 688 register struct vnode *vp; 689 struct vnode **vpp; 690 691 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 692 return (NULLVP); 693 694 vpp = &speclisth[SPECHASH(nvp_rdev)]; 695 loop: 696 for (vp = *vpp; vp; vp = vp->v_specnext) { 697 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 698 continue; 699 /* 700 * Alias, but not in use, so flush it out. 701 */ 702 if (vp->v_usecount == 0) { 703 vgone(vp); 704 goto loop; 705 } 706 if (vget(vp, 1)) 707 goto loop; 708 break; 709 } 710 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 711 MALLOC(nvp->v_specinfo, struct specinfo *, 712 sizeof(struct specinfo), M_VNODE, M_WAITOK); 713 nvp->v_rdev = nvp_rdev; 714 nvp->v_hashchain = vpp; 715 nvp->v_specnext = *vpp; 716 nvp->v_specflags = 0; 717 nvp->v_speclockf = NULL; 718 *vpp = nvp; 719 if (vp != NULL) { 720 nvp->v_flag |= VALIASED; 721 vp->v_flag |= VALIASED; 722 vput(vp); 723 } 724 return (NULLVP); 725 } 726 VOP_UNLOCK(vp); 727 vclean(vp, 0); 728 vp->v_op = nvp->v_op; 729 vp->v_tag = nvp->v_tag; 730 nvp->v_type = VNON; 731 insmntque(vp, mp); 732 return (vp); 733 } 734 735 /* 736 * Grab a particular vnode from the free list, increment its 737 * reference count and lock it. The vnode lock bit is set the 738 * vnode is being eliminated in vgone. The process is awakened 739 * when the transition is completed, and an error returned to 740 * indicate that the vnode is no longer usable (possibly having 741 * been changed to a new file system type). 742 */ 743 int 744 vget(vp, lockflag) 745 register struct vnode *vp; 746 int lockflag; 747 { 748 749 /* 750 * If the vnode is in the process of being cleaned out for 751 * another use, we wait for the cleaning to finish and then 752 * return failure. Cleaning is determined either by checking 753 * that the VXLOCK flag is set, or that the use count is 754 * zero with the back pointer set to show that it has been 755 * removed from the free list by getnewvnode. The VXLOCK 756 * flag may not have been set yet because vclean is blocked in 757 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 758 */ 759 if ((vp->v_flag & VXLOCK) || 760 (vp->v_usecount == 0 && 761 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 762 vp->v_flag |= VXWANT; 763 tsleep((caddr_t)vp, PINOD, "vget", 0); 764 return (1); 765 } 766 if (vp->v_usecount == 0) 767 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 768 vp->v_usecount++; 769 if (lockflag) 770 VOP_LOCK(vp); 771 return (0); 772 } 773 774 /* 775 * Vnode reference, just increment the count 776 */ 777 void 778 vref(vp) 779 struct vnode *vp; 780 { 781 782 if (vp->v_usecount <= 0) 783 panic("vref used where vget required"); 784 vp->v_usecount++; 785 } 786 787 /* 788 * vput(), just unlock and vrele() 789 */ 790 void 791 vput(vp) 792 register struct vnode *vp; 793 { 794 795 VOP_UNLOCK(vp); 796 vrele(vp); 797 } 798 799 /* 800 * Vnode release. 801 * If count drops to zero, call inactive routine and return to freelist. 802 */ 803 void 804 vrele(vp) 805 register struct vnode *vp; 806 { 807 808 #ifdef DIAGNOSTIC 809 if (vp == NULL) 810 panic("vrele: null vp"); 811 #endif 812 vp->v_usecount--; 813 if (vp->v_usecount > 0) 814 return; 815 #ifdef DIAGNOSTIC 816 if (vp->v_usecount != 0 || vp->v_writecount != 0) { 817 vprint("vrele: bad ref count", vp); 818 panic("vrele: ref cnt"); 819 } 820 #endif 821 /* 822 * insert at tail of LRU list 823 */ 824 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 825 VOP_INACTIVE(vp); 826 } 827 828 /* 829 * Page or buffer structure gets a reference. 830 */ 831 void 832 vhold(vp) 833 register struct vnode *vp; 834 { 835 836 vp->v_holdcnt++; 837 } 838 839 /* 840 * Page or buffer structure frees a reference. 841 */ 842 void 843 holdrele(vp) 844 register struct vnode *vp; 845 { 846 847 if (vp->v_holdcnt <= 0) 848 panic("holdrele: holdcnt"); 849 vp->v_holdcnt--; 850 } 851 852 /* 853 * Remove any vnodes in the vnode table belonging to mount point mp. 854 * 855 * If MNT_NOFORCE is specified, there should not be any active ones, 856 * return error if any are found (nb: this is a user error, not a 857 * system error). If MNT_FORCE is specified, detach any active vnodes 858 * that are found. 859 */ 860 #ifdef DEBUG 861 int busyprt = 0; /* print out busy vnodes */ 862 struct ctldebug debug1 = { "busyprt", &busyprt }; 863 #endif 864 865 int 866 vflush(mp, skipvp, flags) 867 struct mount *mp; 868 struct vnode *skipvp; 869 int flags; 870 { 871 register struct vnode *vp, *nvp; 872 int busy = 0; 873 874 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 875 panic("vflush: not busy"); 876 loop: 877 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 878 if (vp->v_mount != mp) 879 goto loop; 880 nvp = vp->v_mntvnodes.le_next; 881 /* 882 * Skip over a selected vnode. 883 */ 884 if (vp == skipvp) 885 continue; 886 /* 887 * Skip over a vnodes marked VSYSTEM. 888 */ 889 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 890 continue; 891 /* 892 * If WRITECLOSE is set, only flush out regular file 893 * vnodes open for writing. 894 */ 895 if ((flags & WRITECLOSE) && 896 (vp->v_writecount == 0 || vp->v_type != VREG)) 897 continue; 898 /* 899 * With v_usecount == 0, all we need to do is clear 900 * out the vnode data structures and we are done. 901 */ 902 if (vp->v_usecount == 0) { 903 vgone(vp); 904 continue; 905 } 906 /* 907 * If FORCECLOSE is set, forcibly close the vnode. 908 * For block or character devices, revert to an 909 * anonymous device. For all other files, just kill them. 910 */ 911 if (flags & FORCECLOSE) { 912 if (vp->v_type != VBLK && vp->v_type != VCHR) { 913 vgone(vp); 914 } else { 915 vclean(vp, 0); 916 vp->v_op = spec_vnodeop_p; 917 insmntque(vp, (struct mount *)0); 918 } 919 continue; 920 } 921 #ifdef DEBUG 922 if (busyprt) 923 vprint("vflush: busy vnode", vp); 924 #endif 925 busy++; 926 } 927 if (busy) 928 return (EBUSY); 929 return (0); 930 } 931 932 /* 933 * Disassociate the underlying file system from a vnode. 934 */ 935 void 936 vclean(vp, flags) 937 register struct vnode *vp; 938 int flags; 939 { 940 int active; 941 942 /* 943 * Check to see if the vnode is in use. 944 * If so we have to reference it before we clean it out 945 * so that its count cannot fall to zero and generate a 946 * race against ourselves to recycle it. 947 */ 948 if ((active = vp->v_usecount) != 0) 949 VREF(vp); 950 /* 951 * Even if the count is zero, the VOP_INACTIVE routine may still 952 * have the object locked while it cleans it out. The VOP_LOCK 953 * ensures that the VOP_INACTIVE routine is done with its work. 954 * For active vnodes, it ensures that no other activity can 955 * occur while the underlying object is being cleaned out. 956 */ 957 VOP_LOCK(vp); 958 /* 959 * Prevent the vnode from being recycled or 960 * brought into use while we clean it out. 961 */ 962 if (vp->v_flag & VXLOCK) 963 panic("vclean: deadlock"); 964 vp->v_flag |= VXLOCK; 965 /* 966 * Clean out any buffers associated with the vnode. 967 */ 968 if (flags & DOCLOSE) 969 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 970 /* 971 * Any other processes trying to obtain this lock must first 972 * wait for VXLOCK to clear, then call the new lock operation. 973 */ 974 VOP_UNLOCK(vp); 975 /* 976 * If purging an active vnode, it must be closed and 977 * deactivated before being reclaimed. 978 */ 979 if (active) { 980 if (flags & DOCLOSE) 981 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 982 VOP_INACTIVE(vp); 983 } 984 /* 985 * Reclaim the vnode. 986 */ 987 if (VOP_RECLAIM(vp)) 988 panic("vclean: cannot reclaim"); 989 if (active) 990 vrele(vp); 991 992 /* 993 * Done with purge, notify sleepers of the grim news. 994 */ 995 vp->v_op = dead_vnodeop_p; 996 vp->v_tag = VT_NON; 997 vp->v_flag &= ~VXLOCK; 998 if (vp->v_flag & VXWANT) { 999 vp->v_flag &= ~VXWANT; 1000 wakeup((caddr_t)vp); 1001 } 1002 } 1003 1004 /* 1005 * Eliminate all activity associated with the requested vnode 1006 * and with all vnodes aliased to the requested vnode. 1007 */ 1008 void 1009 vgoneall(vp) 1010 register struct vnode *vp; 1011 { 1012 register struct vnode *vq; 1013 1014 if (vp->v_flag & VALIASED) { 1015 /* 1016 * If a vgone (or vclean) is already in progress, 1017 * wait until it is done and return. 1018 */ 1019 if (vp->v_flag & VXLOCK) { 1020 vp->v_flag |= VXWANT; 1021 tsleep((caddr_t)vp, PINOD, "vgoneall", 0); 1022 return; 1023 } 1024 /* 1025 * Ensure that vp will not be vgone'd while we 1026 * are eliminating its aliases. 1027 */ 1028 vp->v_flag |= VXLOCK; 1029 while (vp->v_flag & VALIASED) { 1030 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1031 if (vq->v_rdev != vp->v_rdev || 1032 vq->v_type != vp->v_type || vp == vq) 1033 continue; 1034 vgone(vq); 1035 break; 1036 } 1037 } 1038 /* 1039 * Remove the lock so that vgone below will 1040 * really eliminate the vnode after which time 1041 * vgone will awaken any sleepers. 1042 */ 1043 vp->v_flag &= ~VXLOCK; 1044 } 1045 vgone(vp); 1046 } 1047 1048 /* 1049 * Eliminate all activity associated with a vnode 1050 * in preparation for reuse. 1051 */ 1052 void 1053 vgone(vp) 1054 register struct vnode *vp; 1055 { 1056 register struct vnode *vq; 1057 struct vnode *vx; 1058 1059 /* 1060 * If a vgone (or vclean) is already in progress, 1061 * wait until it is done and return. 1062 */ 1063 if (vp->v_flag & VXLOCK) { 1064 vp->v_flag |= VXWANT; 1065 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1066 return; 1067 } 1068 /* 1069 * Clean out the filesystem specific data. 1070 */ 1071 vclean(vp, DOCLOSE); 1072 /* 1073 * Delete from old mount point vnode list, if on one. 1074 */ 1075 insmntque(vp, (struct mount *)0); 1076 /* 1077 * If special device, remove it from special device alias list. 1078 */ 1079 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1080 if (*vp->v_hashchain == vp) { 1081 *vp->v_hashchain = vp->v_specnext; 1082 } else { 1083 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1084 if (vq->v_specnext != vp) 1085 continue; 1086 vq->v_specnext = vp->v_specnext; 1087 break; 1088 } 1089 if (vq == NULL) 1090 panic("missing bdev"); 1091 } 1092 if (vp->v_flag & VALIASED) { 1093 vx = NULL; 1094 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1095 if (vq->v_rdev != vp->v_rdev || 1096 vq->v_type != vp->v_type) 1097 continue; 1098 if (vx) 1099 break; 1100 vx = vq; 1101 } 1102 if (vx == NULL) 1103 panic("missing alias"); 1104 if (vq == NULL) 1105 vx->v_flag &= ~VALIASED; 1106 vp->v_flag &= ~VALIASED; 1107 } 1108 FREE(vp->v_specinfo, M_VNODE); 1109 vp->v_specinfo = NULL; 1110 } 1111 /* 1112 * If it is on the freelist and not already at the head, 1113 * move it to the head of the list. The test of the back 1114 * pointer and the reference count of zero is because 1115 * it will be removed from the free list by getnewvnode, 1116 * but will not have its reference count incremented until 1117 * after calling vgone. If the reference count were 1118 * incremented first, vgone would (incorrectly) try to 1119 * close the previous instance of the underlying object. 1120 * So, the back pointer is explicitly set to `0xdeadb' in 1121 * getnewvnode after removing it from the freelist to ensure 1122 * that we do not try to move it here. 1123 */ 1124 if (vp->v_usecount == 0 && 1125 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1126 vnode_free_list.tqh_first != vp) { 1127 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1128 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1129 } 1130 vp->v_type = VBAD; 1131 } 1132 1133 /* 1134 * Lookup a vnode by device number. 1135 */ 1136 int 1137 vfinddev(dev, type, vpp) 1138 dev_t dev; 1139 enum vtype type; 1140 struct vnode **vpp; 1141 { 1142 register struct vnode *vp; 1143 1144 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1145 if (dev != vp->v_rdev || type != vp->v_type) 1146 continue; 1147 *vpp = vp; 1148 return (1); 1149 } 1150 return (0); 1151 } 1152 1153 /* 1154 * Calculate the total number of references to a special device. 1155 */ 1156 int 1157 vcount(vp) 1158 register struct vnode *vp; 1159 { 1160 register struct vnode *vq, *vnext; 1161 int count; 1162 1163 loop: 1164 if ((vp->v_flag & VALIASED) == 0) 1165 return (vp->v_usecount); 1166 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1167 vnext = vq->v_specnext; 1168 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1169 continue; 1170 /* 1171 * Alias, but not in use, so flush it out. 1172 */ 1173 if (vq->v_usecount == 0 && vq != vp) { 1174 vgone(vq); 1175 goto loop; 1176 } 1177 count += vq->v_usecount; 1178 } 1179 return (count); 1180 } 1181 1182 /* 1183 * Print out a description of a vnode. 1184 */ 1185 static char *typename[] = 1186 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1187 1188 void 1189 vprint(label, vp) 1190 char *label; 1191 register struct vnode *vp; 1192 { 1193 char buf[64]; 1194 1195 if (label != NULL) 1196 printf("%s: ", label); 1197 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1198 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1199 vp->v_holdcnt); 1200 buf[0] = '\0'; 1201 if (vp->v_flag & VROOT) 1202 strcat(buf, "|VROOT"); 1203 if (vp->v_flag & VTEXT) 1204 strcat(buf, "|VTEXT"); 1205 if (vp->v_flag & VSYSTEM) 1206 strcat(buf, "|VSYSTEM"); 1207 if (vp->v_flag & VXLOCK) 1208 strcat(buf, "|VXLOCK"); 1209 if (vp->v_flag & VXWANT) 1210 strcat(buf, "|VXWANT"); 1211 if (vp->v_flag & VBWAIT) 1212 strcat(buf, "|VBWAIT"); 1213 if (vp->v_flag & VALIASED) 1214 strcat(buf, "|VALIASED"); 1215 if (buf[0] != '\0') 1216 printf(" flags (%s)", &buf[1]); 1217 if (vp->v_data == NULL) { 1218 printf("\n"); 1219 } else { 1220 printf("\n\t"); 1221 VOP_PRINT(vp); 1222 } 1223 } 1224 1225 #ifdef DEBUG 1226 /* 1227 * List all of the locked vnodes in the system. 1228 * Called when debugging the kernel. 1229 */ 1230 void 1231 printlockedvnodes() 1232 { 1233 register struct mount *mp; 1234 register struct vnode *vp; 1235 1236 printf("Locked vnodes\n"); 1237 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 1238 mp = mp->mnt_list.cqe_next) { 1239 for (vp = mp->mnt_vnodelist.lh_first; 1240 vp != NULL; 1241 vp = vp->v_mntvnodes.le_next) 1242 if (VOP_ISLOCKED(vp)) 1243 vprint((char *)0, vp); 1244 } 1245 } 1246 #endif 1247 1248 int kinfo_vdebug = 1; 1249 int kinfo_vgetfailed; 1250 #define KINFO_VNODESLOP 10 1251 /* 1252 * Dump vnode list (via sysctl). 1253 * Copyout address of vnode followed by vnode. 1254 */ 1255 /* ARGSUSED */ 1256 int 1257 sysctl_vnode(where, sizep) 1258 char *where; 1259 size_t *sizep; 1260 { 1261 register struct mount *mp, *nmp; 1262 struct vnode *vp; 1263 register char *bp = where, *savebp; 1264 char *ewhere; 1265 int error; 1266 1267 #define VPTRSZ sizeof (struct vnode *) 1268 #define VNODESZ sizeof (struct vnode) 1269 if (where == NULL) { 1270 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1271 return (0); 1272 } 1273 ewhere = where + *sizep; 1274 1275 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1276 nmp = mp->mnt_list.cqe_next; 1277 if (vfs_busy(mp)) 1278 continue; 1279 savebp = bp; 1280 again: 1281 for (vp = mp->mnt_vnodelist.lh_first; 1282 vp != NULL; 1283 vp = vp->v_mntvnodes.le_next) { 1284 /* 1285 * Check that the vp is still associated with 1286 * this filesystem. RACE: could have been 1287 * recycled onto the same filesystem. 1288 */ 1289 if (vp->v_mount != mp) { 1290 if (kinfo_vdebug) 1291 printf("kinfo: vp changed\n"); 1292 bp = savebp; 1293 goto again; 1294 } 1295 if (bp + VPTRSZ + VNODESZ > ewhere) { 1296 *sizep = bp - where; 1297 return (ENOMEM); 1298 } 1299 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1300 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1301 return (error); 1302 bp += VPTRSZ + VNODESZ; 1303 } 1304 vfs_unbusy(mp); 1305 } 1306 1307 *sizep = bp - where; 1308 return (0); 1309 } 1310 1311 /* 1312 * Check to see if a filesystem is mounted on a block device. 1313 */ 1314 int 1315 vfs_mountedon(vp) 1316 register struct vnode *vp; 1317 { 1318 register struct vnode *vq; 1319 1320 if (vp->v_specflags & SI_MOUNTEDON) 1321 return (EBUSY); 1322 if (vp->v_flag & VALIASED) { 1323 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1324 if (vq->v_rdev != vp->v_rdev || 1325 vq->v_type != vp->v_type) 1326 continue; 1327 if (vq->v_specflags & SI_MOUNTEDON) 1328 return (EBUSY); 1329 } 1330 } 1331 return (0); 1332 } 1333 1334 /* 1335 * Build hash lists of net addresses and hang them off the mount point. 1336 * Called by ufs_mount() to set up the lists of export addresses. 1337 */ 1338 static int 1339 vfs_hang_addrlist(mp, nep, argp) 1340 struct mount *mp; 1341 struct netexport *nep; 1342 struct export_args *argp; 1343 { 1344 register struct netcred *np; 1345 register struct radix_node_head *rnh; 1346 register int i; 1347 struct radix_node *rn; 1348 struct sockaddr *saddr, *smask = 0; 1349 struct domain *dom; 1350 int error; 1351 1352 if (argp->ex_addrlen == 0) { 1353 if (mp->mnt_flag & MNT_DEFEXPORTED) 1354 return (EPERM); 1355 np = &nep->ne_defexported; 1356 np->netc_exflags = argp->ex_flags; 1357 np->netc_anon = argp->ex_anon; 1358 np->netc_anon.cr_ref = 1; 1359 mp->mnt_flag |= MNT_DEFEXPORTED; 1360 return (0); 1361 } 1362 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1363 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1364 bzero((caddr_t)np, i); 1365 saddr = (struct sockaddr *)(np + 1); 1366 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 1367 if (error) 1368 goto out; 1369 if (saddr->sa_len > argp->ex_addrlen) 1370 saddr->sa_len = argp->ex_addrlen; 1371 if (argp->ex_masklen) { 1372 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1373 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1374 if (error) 1375 goto out; 1376 if (smask->sa_len > argp->ex_masklen) 1377 smask->sa_len = argp->ex_masklen; 1378 } 1379 i = saddr->sa_family; 1380 if ((rnh = nep->ne_rtable[i]) == 0) { 1381 /* 1382 * Seems silly to initialize every AF when most are not 1383 * used, do so on demand here 1384 */ 1385 for (dom = domains; dom; dom = dom->dom_next) 1386 if (dom->dom_family == i && dom->dom_rtattach) { 1387 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1388 dom->dom_rtoffset); 1389 break; 1390 } 1391 if ((rnh = nep->ne_rtable[i]) == 0) { 1392 error = ENOBUFS; 1393 goto out; 1394 } 1395 } 1396 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1397 np->netc_rnodes); 1398 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1399 error = EPERM; 1400 goto out; 1401 } 1402 np->netc_exflags = argp->ex_flags; 1403 np->netc_anon = argp->ex_anon; 1404 np->netc_anon.cr_ref = 1; 1405 return (0); 1406 out: 1407 free(np, M_NETADDR); 1408 return (error); 1409 } 1410 1411 /* ARGSUSED */ 1412 static int 1413 vfs_free_netcred(rn, w) 1414 struct radix_node *rn; 1415 void *w; 1416 { 1417 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1418 1419 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1420 free((caddr_t)rn, M_NETADDR); 1421 return (0); 1422 } 1423 1424 /* 1425 * Free the net address hash lists that are hanging off the mount points. 1426 */ 1427 static void 1428 vfs_free_addrlist(nep) 1429 struct netexport *nep; 1430 { 1431 register int i; 1432 register struct radix_node_head *rnh; 1433 1434 for (i = 0; i <= AF_MAX; i++) 1435 if ((rnh = nep->ne_rtable[i]) != NULL) { 1436 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 1437 free((caddr_t)rnh, M_RTABLE); 1438 nep->ne_rtable[i] = 0; 1439 } 1440 } 1441 1442 int 1443 vfs_export(mp, nep, argp) 1444 struct mount *mp; 1445 struct netexport *nep; 1446 struct export_args *argp; 1447 { 1448 int error; 1449 1450 if (argp->ex_flags & MNT_DELEXPORT) { 1451 vfs_free_addrlist(nep); 1452 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1453 } 1454 if (argp->ex_flags & MNT_EXPORTED) { 1455 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 1456 return (error); 1457 mp->mnt_flag |= MNT_EXPORTED; 1458 } 1459 return (0); 1460 } 1461 1462 struct netcred * 1463 vfs_export_lookup(mp, nep, nam) 1464 register struct mount *mp; 1465 struct netexport *nep; 1466 struct mbuf *nam; 1467 { 1468 register struct netcred *np; 1469 register struct radix_node_head *rnh; 1470 struct sockaddr *saddr; 1471 1472 np = NULL; 1473 if (mp->mnt_flag & MNT_EXPORTED) { 1474 /* 1475 * Lookup in the export list first. 1476 */ 1477 if (nam != NULL) { 1478 saddr = mtod(nam, struct sockaddr *); 1479 rnh = nep->ne_rtable[saddr->sa_family]; 1480 if (rnh != NULL) { 1481 np = (struct netcred *) 1482 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1483 rnh); 1484 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1485 np = NULL; 1486 } 1487 } 1488 /* 1489 * If no address match, use the default if it exists. 1490 */ 1491 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1492 np = &nep->ne_defexported; 1493 } 1494 return (np); 1495 } 1496 1497 /* 1498 * Do the usual access checking. 1499 * file_mode, uid and gid are from the vnode in question, 1500 * while acc_mode and cred are from the VOP_ACCESS parameter list 1501 */ 1502 int 1503 vaccess(file_mode, uid, gid, acc_mode, cred) 1504 mode_t file_mode; 1505 uid_t uid; 1506 gid_t gid; 1507 mode_t acc_mode; 1508 struct ucred *cred; 1509 { 1510 mode_t mask; 1511 1512 /* 1513 * Super-user always gets read/write access, but execute access depends 1514 * on at least one execute bit being set. 1515 */ 1516 if (cred->cr_uid == 0) { 1517 if (acc_mode & VEXEC && 1518 (file_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0) 1519 return (EACCES); 1520 else 1521 return (0); 1522 } 1523 1524 mask = 0; 1525 1526 /* Otherwise, check the owner. */ 1527 if (cred->cr_uid == uid) { 1528 if (acc_mode & VEXEC) 1529 mask |= S_IXUSR; 1530 if (acc_mode & VREAD) 1531 mask |= S_IRUSR; 1532 if (acc_mode & VWRITE) 1533 mask |= S_IWUSR; 1534 return ((file_mode & mask) == mask ? 0 : EACCES); 1535 } 1536 1537 /* Otherwise, check the groups. */ 1538 if (cred->cr_gid == gid || groupmember(gid, cred)) { 1539 if (acc_mode & VEXEC) 1540 mask |= S_IXGRP; 1541 if (acc_mode & VREAD) 1542 mask |= S_IRGRP; 1543 if (acc_mode & VWRITE) 1544 mask |= S_IWGRP; 1545 return ((file_mode & mask) == mask ? 0 : EACCES); 1546 } 1547 1548 /* Otherwise, check everyone else. */ 1549 if (acc_mode & VEXEC) 1550 mask |= S_IXOTH; 1551 if (acc_mode & VREAD) 1552 mask |= S_IROTH; 1553 if (acc_mode & VWRITE) 1554 mask |= S_IWOTH; 1555 return ((file_mode & mask) == mask ? 0 : EACCES); 1556 } 1557 1558 /* 1559 * Unmount all file systems. 1560 * We traverse the list in reverse order under the assumption that doing so 1561 * will avoid needing to worry about dependencies. 1562 */ 1563 void 1564 vfs_unmountall() 1565 { 1566 register struct mount *mp, *nmp; 1567 int allerror, error; 1568 1569 for (allerror = 0, 1570 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1571 nmp = mp->mnt_list.cqe_prev; 1572 #ifdef DEBUG 1573 printf("unmounting %s (%s)...\n", 1574 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1575 #endif 1576 if (vfs_busy(mp)) 1577 continue; 1578 if ((error = dounmount(mp, MNT_FORCE, &proc0)) != 0) { 1579 printf("unmount of %s failed with error %d\n", 1580 mp->mnt_stat.f_mntonname, error); 1581 allerror = 1; 1582 } 1583 } 1584 if (allerror) 1585 printf("WARNING: some file systems would not unmount\n"); 1586 } 1587 1588 /* 1589 * Sync and unmount file systems before shutting down. 1590 */ 1591 void 1592 vfs_shutdown() 1593 { 1594 register struct buf *bp; 1595 int iter, nbusy; 1596 1597 /* XXX Should suspend scheduling. */ 1598 (void) spl0(); 1599 1600 printf("syncing disks... "); 1601 1602 if (panicstr == 0) { 1603 /* Release inodes held by texts before update. */ 1604 vnode_pager_umount(NULL); 1605 #ifdef notdef 1606 vnshutdown(); 1607 #endif 1608 1609 /* Sync before unmount, in case we hang on something. */ 1610 sys_sync(&proc0, (void *)0, (register_t *)0); 1611 1612 /* Unmount file systems. */ 1613 vfs_unmountall(); 1614 } 1615 1616 /* Sync again after unmount, just in case. */ 1617 sys_sync(&proc0, (void *)0, (register_t *)0); 1618 1619 /* Wait for sync to finish. */ 1620 for (iter = 0; iter < 20; iter++) { 1621 nbusy = 0; 1622 for (bp = &buf[nbuf]; --bp >= buf; ) 1623 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 1624 nbusy++; 1625 if (nbusy == 0) 1626 break; 1627 printf("%d ", nbusy); 1628 DELAY(40000 * iter); 1629 } 1630 if (nbusy) 1631 printf("giving up\n"); 1632 else 1633 printf("done\n"); 1634 } 1635 1636 /* 1637 * Mount the root file system. If the operator didn't specify a 1638 * file system to use, try all possible file systems until one 1639 * succeeds. 1640 */ 1641 int 1642 vfs_mountroot() 1643 { 1644 extern int (*mountroot) __P((void)); 1645 int i; 1646 1647 if (root_device == NULL) 1648 panic("vfs_mountroot: root device unknown"); 1649 1650 switch (root_device->dv_class) { 1651 case DV_IFNET: 1652 if (rootdev != NODEV) 1653 panic("vfs_mountroot: rootdev set for DV_IFNET"); 1654 break; 1655 1656 case DV_DISK: 1657 if (rootdev == NODEV) 1658 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1659 break; 1660 1661 default: 1662 printf("%s: inappropriate for root file system\n", 1663 root_device->dv_xname); 1664 return (ENODEV); 1665 } 1666 1667 /* 1668 * If user specified a file system, use it. 1669 */ 1670 if (mountroot != NULL) 1671 return ((*mountroot)()); 1672 1673 /* 1674 * Try each file system currently configured into the kernel. 1675 */ 1676 for (i = 0; i < nvfssw; i++) { 1677 if (vfssw[i] == NULL || vfssw[i]->vfs_mountroot == NULL) 1678 continue; 1679 #ifdef DEBUG 1680 printf("mountroot: trying %s...\n", vfssw[i]->vfs_name); 1681 #endif 1682 if ((*vfssw[i]->vfs_mountroot)() == 0) { 1683 printf("root file system type: %s\n", 1684 vfssw[i]->vfs_name); 1685 return (0); 1686 } 1687 } 1688 1689 printf("no file system for %s", root_device->dv_xname); 1690 if (root_device->dv_class == DV_DISK) 1691 printf(" (dev 0x%x)", rootdev); 1692 printf("\n"); 1693 return (EFTYPE); 1694 } 1695 1696 /* 1697 * Given a file system name, look up the vfsops for that 1698 * file system, or return NULL if file system isn't present 1699 * in the kernel. 1700 */ 1701 struct vfsops * 1702 vfs_getopsbyname(name) 1703 const char *name; 1704 { 1705 int i; 1706 1707 for (i = 0; i < nvfssw; i++) 1708 if (vfssw[i] != NULL && strcmp(vfssw[i]->vfs_name, name) == 0) 1709 return (vfssw[i]); 1710 return (NULL); 1711 } 1712