1 /* $NetBSD: vfs_subr.c,v 1.74 1997/10/05 18:39:52 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include <sys/param.h> 85 #include <sys/systm.h> 86 #include <sys/proc.h> 87 #include <sys/mount.h> 88 #include <sys/time.h> 89 #include <sys/fcntl.h> 90 #include <sys/vnode.h> 91 #include <sys/stat.h> 92 #include <sys/namei.h> 93 #include <sys/ucred.h> 94 #include <sys/buf.h> 95 #include <sys/errno.h> 96 #include <sys/malloc.h> 97 #include <sys/domain.h> 98 #include <sys/mbuf.h> 99 #include <sys/syscallargs.h> 100 #include <sys/device.h> 101 #include <sys/dirent.h> 102 103 #include <vm/vm.h> 104 #include <sys/sysctl.h> 105 106 #include <miscfs/specfs/specdev.h> 107 108 enum vtype iftovt_tab[16] = { 109 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 110 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 111 }; 112 int vttoif_tab[9] = { 113 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 114 S_IFSOCK, S_IFIFO, S_IFMT, 115 }; 116 117 int doforce = 1; /* 1 => permit forcible unmounting */ 118 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 119 120 /* 121 * Insq/Remq for the vnode usage lists. 122 */ 123 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 124 #define bufremvn(bp) { \ 125 LIST_REMOVE(bp, b_vnbufs); \ 126 (bp)->b_vnbufs.le_next = NOLIST; \ 127 } 128 TAILQ_HEAD(freelst, vnode) vnode_free_list = /* vnode free list */ 129 TAILQ_HEAD_INITIALIZER(vnode_free_list); 130 struct mntlist mountlist = /* mounted filesystem list */ 131 CIRCLEQ_HEAD_INITIALIZER(mountlist); 132 133 struct device *root_device; /* root device */ 134 struct nfs_public nfs_pub; /* publicly exported FS */ 135 136 int vfs_lock __P((struct mount *)); 137 void vfs_unlock __P((struct mount *)); 138 struct mount *getvfs __P((fsid_t *)); 139 long makefstype __P((char *)); 140 void vattr_null __P((struct vattr *)); 141 int getnewvnode __P((enum vtagtype, struct mount *, int (**)(void *), 142 struct vnode **)); 143 void insmntque __P((struct vnode *, struct mount *)); 144 int vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *, int, 145 int)); 146 void vflushbuf __P((struct vnode *, int)); 147 void brelvp __P((struct buf *)); 148 int bdevvp __P((dev_t, struct vnode **)); 149 int cdevvp __P((dev_t, struct vnode **)); 150 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 151 struct vnode *checkalias __P((struct vnode *, dev_t, struct mount *)); 152 int vget __P((struct vnode *, int)); 153 void vref __P((struct vnode *)); 154 void vput __P((struct vnode *)); 155 void vrele __P((struct vnode *)); 156 void vhold __P((struct vnode *)); 157 void holdrele __P((struct vnode *)); 158 int vflush __P((struct mount *, struct vnode *, int)); 159 void vgoneall __P((struct vnode *)); 160 void vgone __P((struct vnode *)); 161 int vcount __P((struct vnode *)); 162 void vprint __P((char *, struct vnode *)); 163 int vfs_mountedon __P((struct vnode *)); 164 int vfs_export __P((struct mount *, struct netexport *, struct export_args *)); 165 struct netcred *vfs_export_lookup __P((struct mount *, struct netexport *, 166 struct mbuf *)); 167 int vaccess __P((enum vtype, mode_t, uid_t, gid_t, mode_t, struct ucred *)); 168 void vfs_unmountall __P((void)); 169 void vfs_shutdown __P((void)); 170 171 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 172 struct export_args *)); 173 static int vfs_free_netcred __P((struct radix_node *, void *)); 174 static void vfs_free_addrlist __P((struct netexport *)); 175 176 #ifdef DEBUG 177 void printlockedvnodes __P((void)); 178 #endif 179 180 /* 181 * Initialize the vnode management data structures. 182 */ 183 void 184 vntblinit() 185 { 186 187 /* 188 * Nothing to do here anymore; vnode_free_list and mountlist 189 * are now initialized data. 190 */ 191 } 192 193 /* 194 * Lock a filesystem. 195 * Used to prevent access to it while mounting and unmounting. 196 */ 197 int 198 vfs_lock(mp) 199 register struct mount *mp; 200 { 201 202 while (mp->mnt_flag & MNT_MLOCK) { 203 mp->mnt_flag |= MNT_MWAIT; 204 tsleep((caddr_t)mp, PVFS, "vfslock", 0); 205 } 206 mp->mnt_flag |= MNT_MLOCK; 207 return (0); 208 } 209 210 /* 211 * Unlock a locked filesystem. 212 * Panic if filesystem is not locked. 213 */ 214 void 215 vfs_unlock(mp) 216 register struct mount *mp; 217 { 218 219 if ((mp->mnt_flag & MNT_MLOCK) == 0) 220 panic("vfs_unlock: not locked"); 221 mp->mnt_flag &= ~MNT_MLOCK; 222 if (mp->mnt_flag & MNT_MWAIT) { 223 mp->mnt_flag &= ~MNT_MWAIT; 224 wakeup((caddr_t)mp); 225 } 226 } 227 228 /* 229 * Mark a mount point as busy. 230 * Used to synchronize access and to delay unmounting. 231 */ 232 int 233 vfs_busy(mp) 234 register struct mount *mp; 235 { 236 int unmounting = mp->mnt_flag & MNT_UNMOUNT; 237 238 while(mp->mnt_flag & MNT_MPBUSY) { 239 mp->mnt_flag |= MNT_MPWANT; 240 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); 241 if (unmounting) 242 return (1); 243 } 244 mp->mnt_flag |= MNT_MPBUSY; 245 return (0); 246 } 247 248 /* 249 * Free a busy filesystem. 250 * Panic if filesystem is not busy. 251 */ 252 void 253 vfs_unbusy(mp) 254 register struct mount *mp; 255 { 256 257 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 258 panic("vfs_unbusy: not busy"); 259 mp->mnt_flag &= ~MNT_MPBUSY; 260 if (mp->mnt_flag & MNT_MPWANT) { 261 mp->mnt_flag &= ~MNT_MPWANT; 262 wakeup((caddr_t)&mp->mnt_flag); 263 } 264 } 265 266 /* 267 * Lookup a mount point by filesystem identifier. 268 */ 269 struct mount * 270 getvfs(fsid) 271 fsid_t *fsid; 272 { 273 register struct mount *mp; 274 275 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 276 mp = mp->mnt_list.cqe_next) 277 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 278 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 279 return (mp); 280 return ((struct mount *)0); 281 } 282 283 /* 284 * Get a new unique fsid 285 */ 286 void 287 getnewfsid(mp, mtype) 288 struct mount *mp; 289 int mtype; 290 { 291 static u_short xxxfs_mntid; 292 293 fsid_t tfsid; 294 295 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0); /* XXX */ 296 mp->mnt_stat.f_fsid.val[1] = mtype; 297 if (xxxfs_mntid == 0) 298 ++xxxfs_mntid; 299 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 300 tfsid.val[1] = mtype; 301 if (mountlist.cqh_first != (void *)&mountlist) { 302 while (getvfs(&tfsid)) { 303 tfsid.val[0]++; 304 xxxfs_mntid++; 305 } 306 } 307 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 308 } 309 310 /* 311 * Make a 'unique' number from a mount type name. 312 */ 313 long 314 makefstype(type) 315 char *type; 316 { 317 long rv; 318 319 for (rv = 0; *type; type++) { 320 rv <<= 2; 321 rv ^= *type; 322 } 323 return rv; 324 } 325 326 /* 327 * Set vnode attributes to VNOVAL 328 */ 329 void 330 vattr_null(vap) 331 register struct vattr *vap; 332 { 333 334 vap->va_type = VNON; 335 /* XXX These next two used to be one line, but for a GCC bug. */ 336 vap->va_size = VNOVAL; 337 vap->va_bytes = VNOVAL; 338 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 339 vap->va_fsid = vap->va_fileid = 340 vap->va_blocksize = vap->va_rdev = 341 vap->va_atime.tv_sec = vap->va_atime.tv_nsec = 342 vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec = 343 vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec = 344 vap->va_flags = vap->va_gen = VNOVAL; 345 vap->va_vaflags = 0; 346 } 347 348 /* 349 * Routines having to do with the management of the vnode table. 350 */ 351 extern int (**dead_vnodeop_p) __P((void *)); 352 long numvnodes; 353 354 /* 355 * Return the next vnode from the free list. 356 */ 357 int 358 getnewvnode(tag, mp, vops, vpp) 359 enum vtagtype tag; 360 struct mount *mp; 361 int (**vops) __P((void *)); 362 struct vnode **vpp; 363 { 364 register struct vnode *vp; 365 #ifdef DIAGNOSTIC 366 int s; 367 #endif 368 369 if ((vnode_free_list.tqh_first == NULL && 370 numvnodes < 2 * desiredvnodes) || 371 numvnodes < desiredvnodes) { 372 vp = (struct vnode *)malloc((u_long)sizeof *vp, 373 M_VNODE, M_WAITOK); 374 bzero((char *)vp, sizeof *vp); 375 numvnodes++; 376 } else { 377 if ((vp = vnode_free_list.tqh_first) == NULL) { 378 tablefull("vnode"); 379 *vpp = 0; 380 return (ENFILE); 381 } 382 if (vp->v_usecount) { 383 vprint("free vnode", vp); 384 panic("free vnode isn't"); 385 } 386 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 387 /* see comment on why 0xdeadb is set at end of vgone (below) */ 388 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 389 vp->v_lease = NULL; 390 if (vp->v_type != VBAD) 391 vgone(vp); 392 #ifdef DIAGNOSTIC 393 if (vp->v_data) { 394 vprint("cleaned vnode", vp); 395 panic("cleaned vnode isn't"); 396 } 397 s = splbio(); 398 if (vp->v_numoutput) 399 panic("Clean vnode has pending I/O's"); 400 splx(s); 401 #endif 402 vp->v_flag = 0; 403 vp->v_lastr = 0; 404 vp->v_ralen = 0; 405 vp->v_maxra = 0; 406 vp->v_lastw = 0; 407 vp->v_lasta = 0; 408 vp->v_cstart = 0; 409 vp->v_clen = 0; 410 vp->v_socket = 0; 411 } 412 vp->v_type = VNON; 413 cache_purge(vp); 414 vp->v_tag = tag; 415 vp->v_op = vops; 416 insmntque(vp, mp); 417 *vpp = vp; 418 vp->v_usecount = 1; 419 vp->v_data = 0; 420 return (0); 421 } 422 423 /* 424 * Move a vnode from one mount queue to another. 425 */ 426 void 427 insmntque(vp, mp) 428 register struct vnode *vp; 429 register struct mount *mp; 430 { 431 432 /* 433 * Delete from old mount point vnode list, if on one. 434 */ 435 if (vp->v_mount != NULL) 436 LIST_REMOVE(vp, v_mntvnodes); 437 /* 438 * Insert into list of vnodes for the new mount point, if available. 439 */ 440 if ((vp->v_mount = mp) == NULL) 441 return; 442 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 443 } 444 445 /* 446 * Update outstanding I/O count and do wakeup if requested. 447 */ 448 void 449 vwakeup(bp) 450 register struct buf *bp; 451 { 452 register struct vnode *vp; 453 454 bp->b_flags &= ~B_WRITEINPROG; 455 if ((vp = bp->b_vp) != NULL) { 456 if (--vp->v_numoutput < 0) 457 panic("vwakeup: neg numoutput"); 458 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 459 vp->v_flag &= ~VBWAIT; 460 wakeup((caddr_t)&vp->v_numoutput); 461 } 462 } 463 } 464 465 /* 466 * Flush out and invalidate all buffers associated with a vnode. 467 * Called with the underlying object locked. 468 */ 469 int 470 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 471 register struct vnode *vp; 472 int flags; 473 struct ucred *cred; 474 struct proc *p; 475 int slpflag, slptimeo; 476 { 477 register struct buf *bp; 478 struct buf *nbp, *blist; 479 int s, error; 480 481 if (flags & V_SAVE) { 482 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 483 return (error); 484 if (vp->v_dirtyblkhd.lh_first != NULL) 485 panic("vinvalbuf: dirty bufs"); 486 } 487 for (;;) { 488 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 489 while (blist && blist->b_lblkno < 0) 490 blist = blist->b_vnbufs.le_next; 491 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 492 (flags & V_SAVEMETA)) 493 while (blist && blist->b_lblkno < 0) 494 blist = blist->b_vnbufs.le_next; 495 if (!blist) 496 break; 497 498 for (bp = blist; bp; bp = nbp) { 499 nbp = bp->b_vnbufs.le_next; 500 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 501 continue; 502 s = splbio(); 503 if (bp->b_flags & B_BUSY) { 504 bp->b_flags |= B_WANTED; 505 error = tsleep((caddr_t)bp, 506 slpflag | (PRIBIO + 1), "vinvalbuf", 507 slptimeo); 508 splx(s); 509 if (error) 510 return (error); 511 break; 512 } 513 bp->b_flags |= B_BUSY | B_VFLUSH; 514 splx(s); 515 /* 516 * XXX Since there are no node locks for NFS, I believe 517 * there is a slight chance that a delayed write will 518 * occur while sleeping just above, so check for it. 519 */ 520 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 521 (void) VOP_BWRITE(bp); 522 break; 523 } 524 bp->b_flags |= B_INVAL; 525 brelse(bp); 526 } 527 } 528 if (!(flags & V_SAVEMETA) && 529 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 530 panic("vinvalbuf: flush failed"); 531 return (0); 532 } 533 534 void 535 vflushbuf(vp, sync) 536 register struct vnode *vp; 537 int sync; 538 { 539 register struct buf *bp, *nbp; 540 int s; 541 542 loop: 543 s = splbio(); 544 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 545 nbp = bp->b_vnbufs.le_next; 546 if ((bp->b_flags & B_BUSY)) 547 continue; 548 if ((bp->b_flags & B_DELWRI) == 0) 549 panic("vflushbuf: not dirty"); 550 bp->b_flags |= B_BUSY | B_VFLUSH; 551 splx(s); 552 /* 553 * Wait for I/O associated with indirect blocks to complete, 554 * since there is no way to quickly wait for them below. 555 */ 556 if (bp->b_vp == vp || sync == 0) 557 (void) bawrite(bp); 558 else 559 (void) bwrite(bp); 560 goto loop; 561 } 562 if (sync == 0) { 563 splx(s); 564 return; 565 } 566 while (vp->v_numoutput) { 567 vp->v_flag |= VBWAIT; 568 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 569 } 570 splx(s); 571 if (vp->v_dirtyblkhd.lh_first != NULL) { 572 vprint("vflushbuf: dirty", vp); 573 goto loop; 574 } 575 } 576 577 /* 578 * Associate a buffer with a vnode. 579 */ 580 void 581 bgetvp(vp, bp) 582 register struct vnode *vp; 583 register struct buf *bp; 584 { 585 586 if (bp->b_vp) 587 panic("bgetvp: not free"); 588 VHOLD(vp); 589 bp->b_vp = vp; 590 if (vp->v_type == VBLK || vp->v_type == VCHR) 591 bp->b_dev = vp->v_rdev; 592 else 593 bp->b_dev = NODEV; 594 /* 595 * Insert onto list for new vnode. 596 */ 597 bufinsvn(bp, &vp->v_cleanblkhd); 598 } 599 600 /* 601 * Disassociate a buffer from a vnode. 602 */ 603 void 604 brelvp(bp) 605 register struct buf *bp; 606 { 607 struct vnode *vp; 608 609 if (bp->b_vp == (struct vnode *) 0) 610 panic("brelvp: NULL"); 611 /* 612 * Delete from old vnode list, if on one. 613 */ 614 if (bp->b_vnbufs.le_next != NOLIST) 615 bufremvn(bp); 616 vp = bp->b_vp; 617 bp->b_vp = (struct vnode *) 0; 618 HOLDRELE(vp); 619 } 620 621 /* 622 * Reassign a buffer from one vnode to another. 623 * Used to assign file specific control information 624 * (indirect blocks) to the vnode to which they belong. 625 */ 626 void 627 reassignbuf(bp, newvp) 628 register struct buf *bp; 629 register struct vnode *newvp; 630 { 631 register struct buflists *listheadp; 632 633 if (newvp == NULL) { 634 printf("reassignbuf: NULL"); 635 return; 636 } 637 /* 638 * Delete from old vnode list, if on one. 639 */ 640 if (bp->b_vnbufs.le_next != NOLIST) 641 bufremvn(bp); 642 /* 643 * If dirty, put on list of dirty buffers; 644 * otherwise insert onto list of clean buffers. 645 */ 646 if (bp->b_flags & B_DELWRI) 647 listheadp = &newvp->v_dirtyblkhd; 648 else 649 listheadp = &newvp->v_cleanblkhd; 650 bufinsvn(bp, listheadp); 651 } 652 653 /* 654 * Create a vnode for a block device. 655 * Used for root filesystem and swap areas. 656 * Also used for memory file system special devices. 657 */ 658 int 659 bdevvp(dev, vpp) 660 dev_t dev; 661 struct vnode **vpp; 662 { 663 664 return (getdevvp(dev, vpp, VBLK)); 665 } 666 667 /* 668 * Create a vnode for a character device. 669 * Used for kernfs and some console handling. 670 */ 671 int 672 cdevvp(dev, vpp) 673 dev_t dev; 674 struct vnode **vpp; 675 { 676 677 return (getdevvp(dev, vpp, VCHR)); 678 } 679 680 /* 681 * Create a vnode for a device. 682 * Used by bdevvp (block device) for root file system etc., 683 * and by cdevvp (character device) for console and kernfs. 684 */ 685 int 686 getdevvp(dev, vpp, type) 687 dev_t dev; 688 struct vnode **vpp; 689 enum vtype type; 690 { 691 register struct vnode *vp; 692 struct vnode *nvp; 693 int error; 694 695 if (dev == NODEV) 696 return (0); 697 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 698 if (error) { 699 *vpp = NULLVP; 700 return (error); 701 } 702 vp = nvp; 703 vp->v_type = type; 704 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 705 vput(vp); 706 vp = nvp; 707 } 708 *vpp = vp; 709 return (0); 710 } 711 712 /* 713 * Check to see if the new vnode represents a special device 714 * for which we already have a vnode (either because of 715 * bdevvp() or because of a different vnode representing 716 * the same block device). If such an alias exists, deallocate 717 * the existing contents and return the aliased vnode. The 718 * caller is responsible for filling it with its new contents. 719 */ 720 struct vnode * 721 checkalias(nvp, nvp_rdev, mp) 722 register struct vnode *nvp; 723 dev_t nvp_rdev; 724 struct mount *mp; 725 { 726 register struct vnode *vp; 727 struct vnode **vpp; 728 729 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 730 return (NULLVP); 731 732 vpp = &speclisth[SPECHASH(nvp_rdev)]; 733 loop: 734 for (vp = *vpp; vp; vp = vp->v_specnext) { 735 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 736 continue; 737 /* 738 * Alias, but not in use, so flush it out. 739 */ 740 if (vp->v_usecount == 0) { 741 vgone(vp); 742 goto loop; 743 } 744 if (vget(vp, 1)) 745 goto loop; 746 break; 747 } 748 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 749 MALLOC(nvp->v_specinfo, struct specinfo *, 750 sizeof(struct specinfo), M_VNODE, M_WAITOK); 751 nvp->v_rdev = nvp_rdev; 752 nvp->v_hashchain = vpp; 753 nvp->v_specnext = *vpp; 754 nvp->v_specflags = 0; 755 nvp->v_speclockf = NULL; 756 *vpp = nvp; 757 if (vp != NULL) { 758 nvp->v_flag |= VALIASED; 759 vp->v_flag |= VALIASED; 760 vput(vp); 761 } 762 return (NULLVP); 763 } 764 VOP_UNLOCK(vp); 765 vclean(vp, 0); 766 vp->v_op = nvp->v_op; 767 vp->v_tag = nvp->v_tag; 768 nvp->v_type = VNON; 769 insmntque(vp, mp); 770 return (vp); 771 } 772 773 /* 774 * Grab a particular vnode from the free list, increment its 775 * reference count and lock it. The vnode lock bit is set the 776 * vnode is being eliminated in vgone. The process is awakened 777 * when the transition is completed, and an error returned to 778 * indicate that the vnode is no longer usable (possibly having 779 * been changed to a new file system type). 780 */ 781 int 782 vget(vp, lockflag) 783 register struct vnode *vp; 784 int lockflag; 785 { 786 787 /* 788 * If the vnode is in the process of being cleaned out for 789 * another use, we wait for the cleaning to finish and then 790 * return failure. Cleaning is determined either by checking 791 * that the VXLOCK flag is set, or that the use count is 792 * zero with the back pointer set to show that it has been 793 * removed from the free list by getnewvnode. The VXLOCK 794 * flag may not have been set yet because vclean is blocked in 795 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 796 */ 797 if ((vp->v_flag & VXLOCK) || 798 (vp->v_usecount == 0 && 799 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 800 vp->v_flag |= VXWANT; 801 tsleep((caddr_t)vp, PINOD, "vget", 0); 802 return (1); 803 } 804 if (vp->v_usecount == 0) 805 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 806 vp->v_usecount++; 807 if (lockflag) 808 VOP_LOCK(vp); 809 return (0); 810 } 811 812 /* 813 * Vnode reference, just increment the count 814 */ 815 void 816 vref(vp) 817 struct vnode *vp; 818 { 819 820 if (vp->v_usecount <= 0) 821 panic("vref used where vget required"); 822 vp->v_usecount++; 823 } 824 825 /* 826 * vput(), just unlock and vrele() 827 */ 828 void 829 vput(vp) 830 register struct vnode *vp; 831 { 832 833 VOP_UNLOCK(vp); 834 vrele(vp); 835 } 836 837 /* 838 * Vnode release. 839 * If count drops to zero, call inactive routine and return to freelist. 840 */ 841 void 842 vrele(vp) 843 register struct vnode *vp; 844 { 845 846 #ifdef DIAGNOSTIC 847 if (vp == NULL) 848 panic("vrele: null vp"); 849 #endif 850 vp->v_usecount--; 851 if (vp->v_usecount > 0) 852 return; 853 #ifdef DIAGNOSTIC 854 if (vp->v_usecount != 0 || vp->v_writecount != 0) { 855 vprint("vrele: bad ref count", vp); 856 panic("vrele: ref cnt"); 857 } 858 #endif 859 /* 860 * insert at tail of LRU list 861 */ 862 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 863 VOP_INACTIVE(vp); 864 } 865 866 /* 867 * Page or buffer structure gets a reference. 868 */ 869 void 870 vhold(vp) 871 register struct vnode *vp; 872 { 873 874 vp->v_holdcnt++; 875 } 876 877 /* 878 * Page or buffer structure frees a reference. 879 */ 880 void 881 holdrele(vp) 882 register struct vnode *vp; 883 { 884 885 if (vp->v_holdcnt <= 0) 886 panic("holdrele: holdcnt"); 887 vp->v_holdcnt--; 888 } 889 890 /* 891 * Remove any vnodes in the vnode table belonging to mount point mp. 892 * 893 * If MNT_NOFORCE is specified, there should not be any active ones, 894 * return error if any are found (nb: this is a user error, not a 895 * system error). If MNT_FORCE is specified, detach any active vnodes 896 * that are found. 897 */ 898 #ifdef DEBUG 899 int busyprt = 0; /* print out busy vnodes */ 900 struct ctldebug debug1 = { "busyprt", &busyprt }; 901 #endif 902 903 int 904 vflush(mp, skipvp, flags) 905 struct mount *mp; 906 struct vnode *skipvp; 907 int flags; 908 { 909 register struct vnode *vp, *nvp; 910 int busy = 0; 911 912 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 913 panic("vflush: not busy"); 914 loop: 915 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 916 if (vp->v_mount != mp) 917 goto loop; 918 nvp = vp->v_mntvnodes.le_next; 919 /* 920 * Skip over a selected vnode. 921 */ 922 if (vp == skipvp) 923 continue; 924 /* 925 * Skip over a vnodes marked VSYSTEM. 926 */ 927 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 928 continue; 929 /* 930 * If WRITECLOSE is set, only flush out regular file 931 * vnodes open for writing. 932 */ 933 if ((flags & WRITECLOSE) && 934 (vp->v_writecount == 0 || vp->v_type != VREG)) 935 continue; 936 /* 937 * With v_usecount == 0, all we need to do is clear 938 * out the vnode data structures and we are done. 939 */ 940 if (vp->v_usecount == 0) { 941 vgone(vp); 942 continue; 943 } 944 /* 945 * If FORCECLOSE is set, forcibly close the vnode. 946 * For block or character devices, revert to an 947 * anonymous device. For all other files, just kill them. 948 */ 949 if (flags & FORCECLOSE) { 950 if (vp->v_type != VBLK && vp->v_type != VCHR) { 951 vgone(vp); 952 } else { 953 vclean(vp, 0); 954 vp->v_op = spec_vnodeop_p; 955 insmntque(vp, (struct mount *)0); 956 } 957 continue; 958 } 959 #ifdef DEBUG 960 if (busyprt) 961 vprint("vflush: busy vnode", vp); 962 #endif 963 busy++; 964 } 965 if (busy) 966 return (EBUSY); 967 return (0); 968 } 969 970 /* 971 * Disassociate the underlying file system from a vnode. 972 */ 973 void 974 vclean(vp, flags) 975 register struct vnode *vp; 976 int flags; 977 { 978 int active; 979 980 /* 981 * Check to see if the vnode is in use. 982 * If so we have to reference it before we clean it out 983 * so that its count cannot fall to zero and generate a 984 * race against ourselves to recycle it. 985 */ 986 if ((active = vp->v_usecount) != 0) 987 VREF(vp); 988 /* 989 * Even if the count is zero, the VOP_INACTIVE routine may still 990 * have the object locked while it cleans it out. The VOP_LOCK 991 * ensures that the VOP_INACTIVE routine is done with its work. 992 * For active vnodes, it ensures that no other activity can 993 * occur while the underlying object is being cleaned out. 994 */ 995 VOP_LOCK(vp); 996 /* 997 * Prevent the vnode from being recycled or 998 * brought into use while we clean it out. 999 */ 1000 if (vp->v_flag & VXLOCK) 1001 panic("vclean: deadlock"); 1002 vp->v_flag |= VXLOCK; 1003 /* 1004 * Clean out any buffers associated with the vnode. 1005 */ 1006 if (flags & DOCLOSE) 1007 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 1008 /* 1009 * Any other processes trying to obtain this lock must first 1010 * wait for VXLOCK to clear, then call the new lock operation. 1011 */ 1012 VOP_UNLOCK(vp); 1013 /* 1014 * If purging an active vnode, it must be closed and 1015 * deactivated before being reclaimed. 1016 */ 1017 if (active) { 1018 if (flags & DOCLOSE) 1019 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1020 VOP_INACTIVE(vp); 1021 } 1022 /* 1023 * Reclaim the vnode. 1024 */ 1025 if (VOP_RECLAIM(vp)) 1026 panic("vclean: cannot reclaim"); 1027 if (active) 1028 vrele(vp); 1029 1030 /* 1031 * Done with purge, notify sleepers of the grim news. 1032 */ 1033 vp->v_op = dead_vnodeop_p; 1034 vp->v_tag = VT_NON; 1035 vp->v_flag &= ~VXLOCK; 1036 if (vp->v_flag & VXWANT) { 1037 vp->v_flag &= ~VXWANT; 1038 wakeup((caddr_t)vp); 1039 } 1040 } 1041 1042 /* 1043 * Eliminate all activity associated with the requested vnode 1044 * and with all vnodes aliased to the requested vnode. 1045 */ 1046 void 1047 vgoneall(vp) 1048 register struct vnode *vp; 1049 { 1050 register struct vnode *vq; 1051 1052 if (vp->v_flag & VALIASED) { 1053 /* 1054 * If a vgone (or vclean) is already in progress, 1055 * wait until it is done and return. 1056 */ 1057 if (vp->v_flag & VXLOCK) { 1058 vp->v_flag |= VXWANT; 1059 tsleep((caddr_t)vp, PINOD, "vgoneall", 0); 1060 return; 1061 } 1062 /* 1063 * Ensure that vp will not be vgone'd while we 1064 * are eliminating its aliases. 1065 */ 1066 vp->v_flag |= VXLOCK; 1067 while (vp->v_flag & VALIASED) { 1068 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1069 if (vq->v_rdev != vp->v_rdev || 1070 vq->v_type != vp->v_type || vp == vq) 1071 continue; 1072 vgone(vq); 1073 break; 1074 } 1075 } 1076 /* 1077 * Remove the lock so that vgone below will 1078 * really eliminate the vnode after which time 1079 * vgone will awaken any sleepers. 1080 */ 1081 vp->v_flag &= ~VXLOCK; 1082 } 1083 vgone(vp); 1084 } 1085 1086 /* 1087 * Eliminate all activity associated with a vnode 1088 * in preparation for reuse. 1089 */ 1090 void 1091 vgone(vp) 1092 register struct vnode *vp; 1093 { 1094 register struct vnode *vq; 1095 struct vnode *vx; 1096 1097 /* 1098 * If a vgone (or vclean) is already in progress, 1099 * wait until it is done and return. 1100 */ 1101 if (vp->v_flag & VXLOCK) { 1102 vp->v_flag |= VXWANT; 1103 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1104 return; 1105 } 1106 /* 1107 * Clean out the filesystem specific data. 1108 */ 1109 vclean(vp, DOCLOSE); 1110 /* 1111 * Delete from old mount point vnode list, if on one. 1112 */ 1113 insmntque(vp, (struct mount *)0); 1114 /* 1115 * If special device, remove it from special device alias list. 1116 */ 1117 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1118 if (*vp->v_hashchain == vp) { 1119 *vp->v_hashchain = vp->v_specnext; 1120 } else { 1121 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1122 if (vq->v_specnext != vp) 1123 continue; 1124 vq->v_specnext = vp->v_specnext; 1125 break; 1126 } 1127 if (vq == NULL) 1128 panic("missing bdev"); 1129 } 1130 if (vp->v_flag & VALIASED) { 1131 vx = NULL; 1132 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1133 if (vq->v_rdev != vp->v_rdev || 1134 vq->v_type != vp->v_type) 1135 continue; 1136 if (vx) 1137 break; 1138 vx = vq; 1139 } 1140 if (vx == NULL) 1141 panic("missing alias"); 1142 if (vq == NULL) 1143 vx->v_flag &= ~VALIASED; 1144 vp->v_flag &= ~VALIASED; 1145 } 1146 FREE(vp->v_specinfo, M_VNODE); 1147 vp->v_specinfo = NULL; 1148 } 1149 /* 1150 * If it is on the freelist and not already at the head, 1151 * move it to the head of the list. The test of the back 1152 * pointer and the reference count of zero is because 1153 * it will be removed from the free list by getnewvnode, 1154 * but will not have its reference count incremented until 1155 * after calling vgone. If the reference count were 1156 * incremented first, vgone would (incorrectly) try to 1157 * close the previous instance of the underlying object. 1158 * So, the back pointer is explicitly set to `0xdeadb' in 1159 * getnewvnode after removing it from the freelist to ensure 1160 * that we do not try to move it here. 1161 */ 1162 if (vp->v_usecount == 0 && 1163 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1164 vnode_free_list.tqh_first != vp) { 1165 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1166 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1167 } 1168 vp->v_type = VBAD; 1169 } 1170 1171 /* 1172 * Lookup a vnode by device number. 1173 */ 1174 int 1175 vfinddev(dev, type, vpp) 1176 dev_t dev; 1177 enum vtype type; 1178 struct vnode **vpp; 1179 { 1180 register struct vnode *vp; 1181 1182 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1183 if (dev != vp->v_rdev || type != vp->v_type) 1184 continue; 1185 *vpp = vp; 1186 return (1); 1187 } 1188 return (0); 1189 } 1190 1191 /* 1192 * Calculate the total number of references to a special device. 1193 */ 1194 int 1195 vcount(vp) 1196 register struct vnode *vp; 1197 { 1198 register struct vnode *vq, *vnext; 1199 int count; 1200 1201 loop: 1202 if ((vp->v_flag & VALIASED) == 0) 1203 return (vp->v_usecount); 1204 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1205 vnext = vq->v_specnext; 1206 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1207 continue; 1208 /* 1209 * Alias, but not in use, so flush it out. 1210 */ 1211 if (vq->v_usecount == 0 && vq != vp) { 1212 vgone(vq); 1213 goto loop; 1214 } 1215 count += vq->v_usecount; 1216 } 1217 return (count); 1218 } 1219 1220 /* 1221 * Print out a description of a vnode. 1222 */ 1223 static char *typename[] = 1224 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1225 1226 void 1227 vprint(label, vp) 1228 char *label; 1229 register struct vnode *vp; 1230 { 1231 char buf[64]; 1232 1233 if (label != NULL) 1234 printf("%s: ", label); 1235 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1236 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1237 vp->v_holdcnt); 1238 buf[0] = '\0'; 1239 if (vp->v_flag & VROOT) 1240 strcat(buf, "|VROOT"); 1241 if (vp->v_flag & VTEXT) 1242 strcat(buf, "|VTEXT"); 1243 if (vp->v_flag & VSYSTEM) 1244 strcat(buf, "|VSYSTEM"); 1245 if (vp->v_flag & VXLOCK) 1246 strcat(buf, "|VXLOCK"); 1247 if (vp->v_flag & VXWANT) 1248 strcat(buf, "|VXWANT"); 1249 if (vp->v_flag & VBWAIT) 1250 strcat(buf, "|VBWAIT"); 1251 if (vp->v_flag & VALIASED) 1252 strcat(buf, "|VALIASED"); 1253 if (buf[0] != '\0') 1254 printf(" flags (%s)", &buf[1]); 1255 if (vp->v_data == NULL) { 1256 printf("\n"); 1257 } else { 1258 printf("\n\t"); 1259 VOP_PRINT(vp); 1260 } 1261 } 1262 1263 #ifdef DEBUG 1264 /* 1265 * List all of the locked vnodes in the system. 1266 * Called when debugging the kernel. 1267 */ 1268 void 1269 printlockedvnodes() 1270 { 1271 register struct mount *mp; 1272 register struct vnode *vp; 1273 1274 printf("Locked vnodes\n"); 1275 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 1276 mp = mp->mnt_list.cqe_next) { 1277 for (vp = mp->mnt_vnodelist.lh_first; 1278 vp != NULL; 1279 vp = vp->v_mntvnodes.le_next) 1280 if (VOP_ISLOCKED(vp)) 1281 vprint((char *)0, vp); 1282 } 1283 } 1284 #endif 1285 1286 int kinfo_vdebug = 1; 1287 int kinfo_vgetfailed; 1288 #define KINFO_VNODESLOP 10 1289 /* 1290 * Dump vnode list (via sysctl). 1291 * Copyout address of vnode followed by vnode. 1292 */ 1293 /* ARGSUSED */ 1294 int 1295 sysctl_vnode(where, sizep) 1296 char *where; 1297 size_t *sizep; 1298 { 1299 register struct mount *mp, *nmp; 1300 struct vnode *vp; 1301 register char *bp = where, *savebp; 1302 char *ewhere; 1303 int error; 1304 1305 #define VPTRSZ sizeof (struct vnode *) 1306 #define VNODESZ sizeof (struct vnode) 1307 if (where == NULL) { 1308 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1309 return (0); 1310 } 1311 ewhere = where + *sizep; 1312 1313 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1314 nmp = mp->mnt_list.cqe_next; 1315 if (vfs_busy(mp)) 1316 continue; 1317 savebp = bp; 1318 again: 1319 for (vp = mp->mnt_vnodelist.lh_first; 1320 vp != NULL; 1321 vp = vp->v_mntvnodes.le_next) { 1322 /* 1323 * Check that the vp is still associated with 1324 * this filesystem. RACE: could have been 1325 * recycled onto the same filesystem. 1326 */ 1327 if (vp->v_mount != mp) { 1328 if (kinfo_vdebug) 1329 printf("kinfo: vp changed\n"); 1330 bp = savebp; 1331 goto again; 1332 } 1333 if (bp + VPTRSZ + VNODESZ > ewhere) { 1334 *sizep = bp - where; 1335 return (ENOMEM); 1336 } 1337 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1338 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1339 return (error); 1340 bp += VPTRSZ + VNODESZ; 1341 } 1342 vfs_unbusy(mp); 1343 } 1344 1345 *sizep = bp - where; 1346 return (0); 1347 } 1348 1349 /* 1350 * Check to see if a filesystem is mounted on a block device. 1351 */ 1352 int 1353 vfs_mountedon(vp) 1354 register struct vnode *vp; 1355 { 1356 register struct vnode *vq; 1357 1358 if (vp->v_specflags & SI_MOUNTEDON) 1359 return (EBUSY); 1360 if (vp->v_flag & VALIASED) { 1361 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1362 if (vq->v_rdev != vp->v_rdev || 1363 vq->v_type != vp->v_type) 1364 continue; 1365 if (vq->v_specflags & SI_MOUNTEDON) 1366 return (EBUSY); 1367 } 1368 } 1369 return (0); 1370 } 1371 1372 /* 1373 * Build hash lists of net addresses and hang them off the mount point. 1374 * Called by ufs_mount() to set up the lists of export addresses. 1375 */ 1376 static int 1377 vfs_hang_addrlist(mp, nep, argp) 1378 struct mount *mp; 1379 struct netexport *nep; 1380 struct export_args *argp; 1381 { 1382 register struct netcred *np, *enp; 1383 register struct radix_node_head *rnh; 1384 register int i; 1385 struct radix_node *rn; 1386 struct sockaddr *saddr, *smask = 0; 1387 struct domain *dom; 1388 int error; 1389 1390 if (argp->ex_addrlen == 0) { 1391 if (mp->mnt_flag & MNT_DEFEXPORTED) 1392 return (EPERM); 1393 np = &nep->ne_defexported; 1394 np->netc_exflags = argp->ex_flags; 1395 np->netc_anon = argp->ex_anon; 1396 np->netc_anon.cr_ref = 1; 1397 mp->mnt_flag |= MNT_DEFEXPORTED; 1398 return (0); 1399 } 1400 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1401 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1402 bzero((caddr_t)np, i); 1403 saddr = (struct sockaddr *)(np + 1); 1404 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 1405 if (error) 1406 goto out; 1407 if (saddr->sa_len > argp->ex_addrlen) 1408 saddr->sa_len = argp->ex_addrlen; 1409 if (argp->ex_masklen) { 1410 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1411 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1412 if (error) 1413 goto out; 1414 if (smask->sa_len > argp->ex_masklen) 1415 smask->sa_len = argp->ex_masklen; 1416 } 1417 i = saddr->sa_family; 1418 if ((rnh = nep->ne_rtable[i]) == 0) { 1419 /* 1420 * Seems silly to initialize every AF when most are not 1421 * used, do so on demand here 1422 */ 1423 for (dom = domains; dom; dom = dom->dom_next) 1424 if (dom->dom_family == i && dom->dom_rtattach) { 1425 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1426 dom->dom_rtoffset); 1427 break; 1428 } 1429 if ((rnh = nep->ne_rtable[i]) == 0) { 1430 error = ENOBUFS; 1431 goto out; 1432 } 1433 } 1434 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1435 np->netc_rnodes); 1436 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1437 if (rn == 0) { 1438 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 1439 smask, rnh); 1440 if (enp == 0) { 1441 error = EPERM; 1442 goto out; 1443 } 1444 } else 1445 enp = (struct netcred *)rn; 1446 1447 if (enp->netc_exflags != argp->ex_flags || 1448 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 1449 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 1450 enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups || 1451 bcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 1452 enp->netc_anon.cr_ngroups)) 1453 error = EPERM; 1454 else 1455 error = 0; 1456 goto out; 1457 } 1458 np->netc_exflags = argp->ex_flags; 1459 np->netc_anon = argp->ex_anon; 1460 np->netc_anon.cr_ref = 1; 1461 return (0); 1462 out: 1463 free(np, M_NETADDR); 1464 return (error); 1465 } 1466 1467 /* ARGSUSED */ 1468 static int 1469 vfs_free_netcred(rn, w) 1470 struct radix_node *rn; 1471 void *w; 1472 { 1473 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1474 1475 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1476 free((caddr_t)rn, M_NETADDR); 1477 return (0); 1478 } 1479 1480 /* 1481 * Free the net address hash lists that are hanging off the mount points. 1482 */ 1483 static void 1484 vfs_free_addrlist(nep) 1485 struct netexport *nep; 1486 { 1487 register int i; 1488 register struct radix_node_head *rnh; 1489 1490 for (i = 0; i <= AF_MAX; i++) 1491 if ((rnh = nep->ne_rtable[i]) != NULL) { 1492 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 1493 free((caddr_t)rnh, M_RTABLE); 1494 nep->ne_rtable[i] = 0; 1495 } 1496 } 1497 1498 int 1499 vfs_export(mp, nep, argp) 1500 struct mount *mp; 1501 struct netexport *nep; 1502 struct export_args *argp; 1503 { 1504 int error; 1505 1506 if (argp->ex_flags & MNT_DELEXPORT) { 1507 if (mp->mnt_flag & MNT_EXPUBLIC) { 1508 vfs_setpublicfs(NULL, NULL, NULL); 1509 mp->mnt_flag &= ~MNT_EXPUBLIC; 1510 } 1511 vfs_free_addrlist(nep); 1512 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1513 } 1514 if (argp->ex_flags & MNT_EXPORTED) { 1515 if (argp->ex_flags & MNT_EXPUBLIC) { 1516 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 1517 return (error); 1518 mp->mnt_flag |= MNT_EXPUBLIC; 1519 } 1520 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 1521 return (error); 1522 mp->mnt_flag |= MNT_EXPORTED; 1523 } 1524 return (0); 1525 } 1526 1527 /* 1528 * Set the publicly exported filesystem (WebNFS). Currently, only 1529 * one public filesystem is possible in the spec (RFC 2054 and 2055) 1530 */ 1531 int 1532 vfs_setpublicfs(mp, nep, argp) 1533 struct mount *mp; 1534 struct netexport *nep; 1535 struct export_args *argp; 1536 { 1537 int error; 1538 struct vnode *rvp; 1539 char *cp; 1540 1541 /* 1542 * mp == NULL -> invalidate the current info, the FS is 1543 * no longer exported. May be called from either vfs_export 1544 * or unmount, so check if it hasn't already been done. 1545 */ 1546 if (mp == NULL) { 1547 if (nfs_pub.np_valid) { 1548 nfs_pub.np_valid = 0; 1549 if (nfs_pub.np_index != NULL) { 1550 FREE(nfs_pub.np_index, M_TEMP); 1551 nfs_pub.np_index = NULL; 1552 } 1553 } 1554 return (0); 1555 } 1556 1557 /* 1558 * Only one allowed at a time. 1559 */ 1560 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 1561 return (EBUSY); 1562 1563 /* 1564 * Get real filehandle for root of exported FS. 1565 */ 1566 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 1567 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 1568 1569 if ((error = VFS_ROOT(mp, &rvp))) 1570 return (error); 1571 1572 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 1573 return (error); 1574 1575 vput(rvp); 1576 1577 /* 1578 * If an indexfile was specified, pull it in. 1579 */ 1580 if (argp->ex_indexfile != NULL) { 1581 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 1582 M_WAITOK); 1583 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 1584 MAXNAMLEN, (size_t *)0); 1585 if (!error) { 1586 /* 1587 * Check for illegal filenames. 1588 */ 1589 for (cp = nfs_pub.np_index; *cp; cp++) { 1590 if (*cp == '/') { 1591 error = EINVAL; 1592 break; 1593 } 1594 } 1595 } 1596 if (error) { 1597 FREE(nfs_pub.np_index, M_TEMP); 1598 return (error); 1599 } 1600 } 1601 1602 nfs_pub.np_mount = mp; 1603 nfs_pub.np_valid = 1; 1604 return (0); 1605 } 1606 1607 struct netcred * 1608 vfs_export_lookup(mp, nep, nam) 1609 register struct mount *mp; 1610 struct netexport *nep; 1611 struct mbuf *nam; 1612 { 1613 register struct netcred *np; 1614 register struct radix_node_head *rnh; 1615 struct sockaddr *saddr; 1616 1617 np = NULL; 1618 if (mp->mnt_flag & MNT_EXPORTED) { 1619 /* 1620 * Lookup in the export list first. 1621 */ 1622 if (nam != NULL) { 1623 saddr = mtod(nam, struct sockaddr *); 1624 rnh = nep->ne_rtable[saddr->sa_family]; 1625 if (rnh != NULL) { 1626 np = (struct netcred *) 1627 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1628 rnh); 1629 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1630 np = NULL; 1631 } 1632 } 1633 /* 1634 * If no address match, use the default if it exists. 1635 */ 1636 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1637 np = &nep->ne_defexported; 1638 } 1639 return (np); 1640 } 1641 1642 /* 1643 * Do the usual access checking. 1644 * file_mode, uid and gid are from the vnode in question, 1645 * while acc_mode and cred are from the VOP_ACCESS parameter list 1646 */ 1647 int 1648 vaccess(type, file_mode, uid, gid, acc_mode, cred) 1649 enum vtype type; 1650 mode_t file_mode; 1651 uid_t uid; 1652 gid_t gid; 1653 mode_t acc_mode; 1654 struct ucred *cred; 1655 { 1656 mode_t mask; 1657 1658 /* 1659 * Super-user always gets read/write access, but execute access depends 1660 * on at least one execute bit being set. 1661 */ 1662 if (cred->cr_uid == 0) { 1663 if ((acc_mode & VEXEC) && type != VDIR && 1664 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 1665 return (EACCES); 1666 return (0); 1667 } 1668 1669 mask = 0; 1670 1671 /* Otherwise, check the owner. */ 1672 if (cred->cr_uid == uid) { 1673 if (acc_mode & VEXEC) 1674 mask |= S_IXUSR; 1675 if (acc_mode & VREAD) 1676 mask |= S_IRUSR; 1677 if (acc_mode & VWRITE) 1678 mask |= S_IWUSR; 1679 return ((file_mode & mask) == mask ? 0 : EACCES); 1680 } 1681 1682 /* Otherwise, check the groups. */ 1683 if (cred->cr_gid == gid || groupmember(gid, cred)) { 1684 if (acc_mode & VEXEC) 1685 mask |= S_IXGRP; 1686 if (acc_mode & VREAD) 1687 mask |= S_IRGRP; 1688 if (acc_mode & VWRITE) 1689 mask |= S_IWGRP; 1690 return ((file_mode & mask) == mask ? 0 : EACCES); 1691 } 1692 1693 /* Otherwise, check everyone else. */ 1694 if (acc_mode & VEXEC) 1695 mask |= S_IXOTH; 1696 if (acc_mode & VREAD) 1697 mask |= S_IROTH; 1698 if (acc_mode & VWRITE) 1699 mask |= S_IWOTH; 1700 return ((file_mode & mask) == mask ? 0 : EACCES); 1701 } 1702 1703 /* 1704 * Unmount all file systems. 1705 * We traverse the list in reverse order under the assumption that doing so 1706 * will avoid needing to worry about dependencies. 1707 */ 1708 void 1709 vfs_unmountall() 1710 { 1711 register struct mount *mp, *nmp; 1712 int allerror, error; 1713 1714 for (allerror = 0, 1715 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1716 nmp = mp->mnt_list.cqe_prev; 1717 #ifdef DEBUG 1718 printf("unmounting %s (%s)...\n", 1719 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1720 #endif 1721 if (vfs_busy(mp)) 1722 continue; 1723 if ((error = dounmount(mp, MNT_FORCE, &proc0)) != 0) { 1724 printf("unmount of %s failed with error %d\n", 1725 mp->mnt_stat.f_mntonname, error); 1726 allerror = 1; 1727 } 1728 } 1729 if (allerror) 1730 printf("WARNING: some file systems would not unmount\n"); 1731 } 1732 1733 /* 1734 * Sync and unmount file systems before shutting down. 1735 */ 1736 void 1737 vfs_shutdown() 1738 { 1739 register struct buf *bp; 1740 int iter, nbusy, unmountem; 1741 1742 /* 1743 * If we've panic'd, don't make the situation potentially 1744 * worse by unmounting the file systems; just attempt to 1745 * sync. 1746 */ 1747 if (panicstr != NULL) 1748 unmountem = 0; 1749 else 1750 unmountem = 1; 1751 1752 printf("syncing disks... "); 1753 1754 /* XXX Should suspend scheduling. */ 1755 (void) spl0(); 1756 1757 sys_sync(&proc0, (void *)0, (register_t *)0); 1758 1759 /* Wait for sync to finish. */ 1760 for (iter = 0; iter < 20; iter++) { 1761 nbusy = 0; 1762 for (bp = &buf[nbuf]; --bp >= buf; ) 1763 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 1764 nbusy++; 1765 if (nbusy == 0) 1766 break; 1767 printf("%d ", nbusy); 1768 DELAY(40000 * iter); 1769 } 1770 if (nbusy) { 1771 printf("giving up\n"); 1772 unmountem = 0; 1773 } else 1774 printf("done\n"); 1775 1776 if (unmountem) { 1777 /* Release inodes held by texts before update. */ 1778 vnode_pager_umount(NULL); 1779 #ifdef notdef 1780 vnshutdown(); 1781 #endif 1782 /* Unmount file systems. */ 1783 vfs_unmountall(); 1784 } 1785 } 1786 1787 /* 1788 * Mount the root file system. If the operator didn't specify a 1789 * file system to use, try all possible file systems until one 1790 * succeeds. 1791 */ 1792 int 1793 vfs_mountroot() 1794 { 1795 extern int (*mountroot) __P((void)); 1796 int i; 1797 1798 if (root_device == NULL) 1799 panic("vfs_mountroot: root device unknown"); 1800 1801 switch (root_device->dv_class) { 1802 case DV_IFNET: 1803 if (rootdev != NODEV) 1804 panic("vfs_mountroot: rootdev set for DV_IFNET"); 1805 break; 1806 1807 case DV_DISK: 1808 if (rootdev == NODEV) 1809 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1810 break; 1811 1812 default: 1813 printf("%s: inappropriate for root file system\n", 1814 root_device->dv_xname); 1815 return (ENODEV); 1816 } 1817 1818 /* 1819 * If user specified a file system, use it. 1820 */ 1821 if (mountroot != NULL) 1822 return ((*mountroot)()); 1823 1824 /* 1825 * Try each file system currently configured into the kernel. 1826 */ 1827 for (i = 0; i < nvfssw; i++) { 1828 if (vfssw[i] == NULL || vfssw[i]->vfs_mountroot == NULL) 1829 continue; 1830 #ifdef DEBUG 1831 printf("mountroot: trying %s...\n", vfssw[i]->vfs_name); 1832 #endif 1833 if ((*vfssw[i]->vfs_mountroot)() == 0) { 1834 printf("root file system type: %s\n", 1835 vfssw[i]->vfs_name); 1836 return (0); 1837 } 1838 } 1839 1840 printf("no file system for %s", root_device->dv_xname); 1841 if (root_device->dv_class == DV_DISK) 1842 printf(" (dev 0x%x)", rootdev); 1843 printf("\n"); 1844 return (EFTYPE); 1845 } 1846 1847 /* 1848 * Given a file system name, look up the vfsops for that 1849 * file system, or return NULL if file system isn't present 1850 * in the kernel. 1851 */ 1852 struct vfsops * 1853 vfs_getopsbyname(name) 1854 const char *name; 1855 { 1856 int i; 1857 1858 for (i = 0; i < nvfssw; i++) 1859 if (vfssw[i] != NULL && strcmp(vfssw[i]->vfs_name, name) == 0) 1860 return (vfssw[i]); 1861 return (NULL); 1862 } 1863