1 /* $NetBSD: vfs_subr.c,v 1.76 1997/10/18 16:34:17 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 1997 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include <sys/param.h> 85 #include <sys/systm.h> 86 #include <sys/proc.h> 87 #include <sys/mount.h> 88 #include <sys/time.h> 89 #include <sys/fcntl.h> 90 #include <sys/vnode.h> 91 #include <sys/stat.h> 92 #include <sys/namei.h> 93 #include <sys/ucred.h> 94 #include <sys/buf.h> 95 #include <sys/errno.h> 96 #include <sys/malloc.h> 97 #include <sys/domain.h> 98 #include <sys/mbuf.h> 99 #include <sys/syscallargs.h> 100 #include <sys/device.h> 101 #include <sys/dirent.h> 102 103 #include <vm/vm.h> 104 #include <sys/sysctl.h> 105 106 #include <miscfs/specfs/specdev.h> 107 108 enum vtype iftovt_tab[16] = { 109 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 110 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 111 }; 112 int vttoif_tab[9] = { 113 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 114 S_IFSOCK, S_IFIFO, S_IFMT, 115 }; 116 117 int doforce = 1; /* 1 => permit forcible unmounting */ 118 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 119 120 /* 121 * Insq/Remq for the vnode usage lists. 122 */ 123 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 124 #define bufremvn(bp) { \ 125 LIST_REMOVE(bp, b_vnbufs); \ 126 (bp)->b_vnbufs.le_next = NOLIST; \ 127 } 128 TAILQ_HEAD(freelst, vnode) vnode_free_list = /* vnode free list */ 129 TAILQ_HEAD_INITIALIZER(vnode_free_list); 130 struct mntlist mountlist = /* mounted filesystem list */ 131 CIRCLEQ_HEAD_INITIALIZER(mountlist); 132 133 struct device *root_device; /* root device */ 134 struct nfs_public nfs_pub; /* publicly exported FS */ 135 136 int vfs_lock __P((struct mount *)); 137 void vfs_unlock __P((struct mount *)); 138 struct mount *getvfs __P((fsid_t *)); 139 long makefstype __P((char *)); 140 void vattr_null __P((struct vattr *)); 141 int getnewvnode __P((enum vtagtype, struct mount *, int (**)(void *), 142 struct vnode **)); 143 void insmntque __P((struct vnode *, struct mount *)); 144 int vinvalbuf __P((struct vnode *, int, struct ucred *, struct proc *, int, 145 int)); 146 void vflushbuf __P((struct vnode *, int)); 147 void brelvp __P((struct buf *)); 148 int bdevvp __P((dev_t, struct vnode **)); 149 int cdevvp __P((dev_t, struct vnode **)); 150 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 151 struct vnode *checkalias __P((struct vnode *, dev_t, struct mount *)); 152 int vget __P((struct vnode *, int)); 153 void vref __P((struct vnode *)); 154 void vput __P((struct vnode *)); 155 void vrele __P((struct vnode *)); 156 void vhold __P((struct vnode *)); 157 void holdrele __P((struct vnode *)); 158 int vflush __P((struct mount *, struct vnode *, int)); 159 void vgoneall __P((struct vnode *)); 160 void vgone __P((struct vnode *)); 161 int vcount __P((struct vnode *)); 162 void vprint __P((char *, struct vnode *)); 163 int vfs_mountedon __P((struct vnode *)); 164 int vfs_export __P((struct mount *, struct netexport *, struct export_args *)); 165 struct netcred *vfs_export_lookup __P((struct mount *, struct netexport *, 166 struct mbuf *)); 167 int vaccess __P((enum vtype, mode_t, uid_t, gid_t, mode_t, struct ucred *)); 168 void vfs_unmountall __P((void)); 169 void vfs_shutdown __P((void)); 170 171 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 172 struct export_args *)); 173 static int vfs_free_netcred __P((struct radix_node *, void *)); 174 static void vfs_free_addrlist __P((struct netexport *)); 175 176 #ifdef DEBUG 177 void printlockedvnodes __P((void)); 178 #endif 179 180 /* 181 * Initialize the vnode management data structures. 182 */ 183 void 184 vntblinit() 185 { 186 187 /* 188 * Nothing to do here anymore; vnode_free_list and mountlist 189 * are now initialized data. 190 */ 191 } 192 193 /* 194 * Lock a filesystem. 195 * Used to prevent access to it while mounting and unmounting. 196 */ 197 int 198 vfs_lock(mp) 199 register struct mount *mp; 200 { 201 202 while (mp->mnt_flag & MNT_MLOCK) { 203 mp->mnt_flag |= MNT_MWAIT; 204 tsleep((caddr_t)mp, PVFS, "vfslock", 0); 205 } 206 mp->mnt_flag |= MNT_MLOCK; 207 return (0); 208 } 209 210 /* 211 * Unlock a locked filesystem. 212 * Panic if filesystem is not locked. 213 */ 214 void 215 vfs_unlock(mp) 216 register struct mount *mp; 217 { 218 219 if ((mp->mnt_flag & MNT_MLOCK) == 0) 220 panic("vfs_unlock: not locked"); 221 mp->mnt_flag &= ~MNT_MLOCK; 222 if (mp->mnt_flag & MNT_MWAIT) { 223 mp->mnt_flag &= ~MNT_MWAIT; 224 wakeup((caddr_t)mp); 225 } 226 } 227 228 /* 229 * Mark a mount point as busy. 230 * Used to synchronize access and to delay unmounting. 231 */ 232 int 233 vfs_busy(mp) 234 register struct mount *mp; 235 { 236 int unmounting = mp->mnt_flag & MNT_UNMOUNT; 237 238 while(mp->mnt_flag & MNT_MPBUSY) { 239 mp->mnt_flag |= MNT_MPWANT; 240 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); 241 if (unmounting) 242 return (1); 243 } 244 mp->mnt_flag |= MNT_MPBUSY; 245 return (0); 246 } 247 248 /* 249 * Free a busy filesystem. 250 * Panic if filesystem is not busy. 251 */ 252 void 253 vfs_unbusy(mp) 254 register struct mount *mp; 255 { 256 257 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 258 panic("vfs_unbusy: not busy"); 259 mp->mnt_flag &= ~MNT_MPBUSY; 260 if (mp->mnt_flag & MNT_MPWANT) { 261 mp->mnt_flag &= ~MNT_MPWANT; 262 wakeup((caddr_t)&mp->mnt_flag); 263 } 264 } 265 266 /* 267 * Lookup a mount point by filesystem identifier. 268 */ 269 struct mount * 270 getvfs(fsid) 271 fsid_t *fsid; 272 { 273 register struct mount *mp; 274 275 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 276 mp = mp->mnt_list.cqe_next) 277 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 278 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 279 return (mp); 280 return ((struct mount *)0); 281 } 282 283 /* 284 * Get a new unique fsid 285 */ 286 void 287 getnewfsid(mp, mtype) 288 struct mount *mp; 289 int mtype; 290 { 291 static u_short xxxfs_mntid; 292 293 fsid_t tfsid; 294 295 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0); /* XXX */ 296 mp->mnt_stat.f_fsid.val[1] = mtype; 297 if (xxxfs_mntid == 0) 298 ++xxxfs_mntid; 299 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 300 tfsid.val[1] = mtype; 301 if (mountlist.cqh_first != (void *)&mountlist) { 302 while (getvfs(&tfsid)) { 303 tfsid.val[0]++; 304 xxxfs_mntid++; 305 } 306 } 307 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 308 } 309 310 /* 311 * Make a 'unique' number from a mount type name. 312 */ 313 long 314 makefstype(type) 315 char *type; 316 { 317 long rv; 318 319 for (rv = 0; *type; type++) { 320 rv <<= 2; 321 rv ^= *type; 322 } 323 return rv; 324 } 325 326 /* 327 * Set vnode attributes to VNOVAL 328 */ 329 void 330 vattr_null(vap) 331 register struct vattr *vap; 332 { 333 334 vap->va_type = VNON; 335 336 /* 337 * Assign individually so that it is safe even if size and 338 * sign of each member are varied. 339 */ 340 vap->va_mode = VNOVAL; 341 vap->va_nlink = VNOVAL; 342 vap->va_uid = VNOVAL; 343 vap->va_gid = VNOVAL; 344 vap->va_fsid = VNOVAL; 345 vap->va_fileid = VNOVAL; 346 vap->va_size = VNOVAL; 347 vap->va_blocksize = VNOVAL; 348 vap->va_atime.tv_sec = 349 vap->va_mtime.tv_sec = 350 vap->va_ctime.tv_sec = VNOVAL; 351 vap->va_atime.tv_nsec = 352 vap->va_mtime.tv_nsec = 353 vap->va_ctime.tv_nsec = VNOVAL; 354 vap->va_gen = VNOVAL; 355 vap->va_flags = VNOVAL; 356 vap->va_rdev = VNOVAL; 357 vap->va_bytes = VNOVAL; 358 vap->va_vaflags = 0; 359 } 360 361 /* 362 * Routines having to do with the management of the vnode table. 363 */ 364 extern int (**dead_vnodeop_p) __P((void *)); 365 long numvnodes; 366 367 /* 368 * Return the next vnode from the free list. 369 */ 370 int 371 getnewvnode(tag, mp, vops, vpp) 372 enum vtagtype tag; 373 struct mount *mp; 374 int (**vops) __P((void *)); 375 struct vnode **vpp; 376 { 377 register struct vnode *vp; 378 #ifdef DIAGNOSTIC 379 int s; 380 #endif 381 382 if ((vnode_free_list.tqh_first == NULL && 383 numvnodes < 2 * desiredvnodes) || 384 numvnodes < desiredvnodes) { 385 vp = (struct vnode *)malloc((u_long)sizeof *vp, 386 M_VNODE, M_WAITOK); 387 bzero((char *)vp, sizeof *vp); 388 numvnodes++; 389 } else { 390 if ((vp = vnode_free_list.tqh_first) == NULL) { 391 tablefull("vnode"); 392 *vpp = 0; 393 return (ENFILE); 394 } 395 if (vp->v_usecount) { 396 vprint("free vnode", vp); 397 panic("free vnode isn't"); 398 } 399 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 400 /* see comment on why 0xdeadb is set at end of vgone (below) */ 401 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 402 vp->v_lease = NULL; 403 if (vp->v_type != VBAD) 404 vgone(vp); 405 #ifdef DIAGNOSTIC 406 if (vp->v_data) { 407 vprint("cleaned vnode", vp); 408 panic("cleaned vnode isn't"); 409 } 410 s = splbio(); 411 if (vp->v_numoutput) 412 panic("Clean vnode has pending I/O's"); 413 splx(s); 414 #endif 415 vp->v_flag = 0; 416 vp->v_lastr = 0; 417 vp->v_ralen = 0; 418 vp->v_maxra = 0; 419 vp->v_lastw = 0; 420 vp->v_lasta = 0; 421 vp->v_cstart = 0; 422 vp->v_clen = 0; 423 vp->v_socket = 0; 424 } 425 vp->v_type = VNON; 426 cache_purge(vp); 427 vp->v_tag = tag; 428 vp->v_op = vops; 429 insmntque(vp, mp); 430 *vpp = vp; 431 vp->v_usecount = 1; 432 vp->v_data = 0; 433 return (0); 434 } 435 436 /* 437 * Move a vnode from one mount queue to another. 438 */ 439 void 440 insmntque(vp, mp) 441 register struct vnode *vp; 442 register struct mount *mp; 443 { 444 445 /* 446 * Delete from old mount point vnode list, if on one. 447 */ 448 if (vp->v_mount != NULL) 449 LIST_REMOVE(vp, v_mntvnodes); 450 /* 451 * Insert into list of vnodes for the new mount point, if available. 452 */ 453 if ((vp->v_mount = mp) == NULL) 454 return; 455 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 456 } 457 458 /* 459 * Update outstanding I/O count and do wakeup if requested. 460 */ 461 void 462 vwakeup(bp) 463 register struct buf *bp; 464 { 465 register struct vnode *vp; 466 467 bp->b_flags &= ~B_WRITEINPROG; 468 if ((vp = bp->b_vp) != NULL) { 469 if (--vp->v_numoutput < 0) 470 panic("vwakeup: neg numoutput"); 471 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 472 vp->v_flag &= ~VBWAIT; 473 wakeup((caddr_t)&vp->v_numoutput); 474 } 475 } 476 } 477 478 /* 479 * Flush out and invalidate all buffers associated with a vnode. 480 * Called with the underlying object locked. 481 */ 482 int 483 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 484 register struct vnode *vp; 485 int flags; 486 struct ucred *cred; 487 struct proc *p; 488 int slpflag, slptimeo; 489 { 490 register struct buf *bp; 491 struct buf *nbp, *blist; 492 int s, error; 493 494 if (flags & V_SAVE) { 495 if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0) 496 return (error); 497 if (vp->v_dirtyblkhd.lh_first != NULL) 498 panic("vinvalbuf: dirty bufs"); 499 } 500 for (;;) { 501 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 502 while (blist && blist->b_lblkno < 0) 503 blist = blist->b_vnbufs.le_next; 504 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 505 (flags & V_SAVEMETA)) 506 while (blist && blist->b_lblkno < 0) 507 blist = blist->b_vnbufs.le_next; 508 if (!blist) 509 break; 510 511 for (bp = blist; bp; bp = nbp) { 512 nbp = bp->b_vnbufs.le_next; 513 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 514 continue; 515 s = splbio(); 516 if (bp->b_flags & B_BUSY) { 517 bp->b_flags |= B_WANTED; 518 error = tsleep((caddr_t)bp, 519 slpflag | (PRIBIO + 1), "vinvalbuf", 520 slptimeo); 521 splx(s); 522 if (error) 523 return (error); 524 break; 525 } 526 bp->b_flags |= B_BUSY | B_VFLUSH; 527 splx(s); 528 /* 529 * XXX Since there are no node locks for NFS, I believe 530 * there is a slight chance that a delayed write will 531 * occur while sleeping just above, so check for it. 532 */ 533 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 534 (void) VOP_BWRITE(bp); 535 break; 536 } 537 bp->b_flags |= B_INVAL; 538 brelse(bp); 539 } 540 } 541 if (!(flags & V_SAVEMETA) && 542 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 543 panic("vinvalbuf: flush failed"); 544 return (0); 545 } 546 547 void 548 vflushbuf(vp, sync) 549 register struct vnode *vp; 550 int sync; 551 { 552 register struct buf *bp, *nbp; 553 int s; 554 555 loop: 556 s = splbio(); 557 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 558 nbp = bp->b_vnbufs.le_next; 559 if ((bp->b_flags & B_BUSY)) 560 continue; 561 if ((bp->b_flags & B_DELWRI) == 0) 562 panic("vflushbuf: not dirty"); 563 bp->b_flags |= B_BUSY | B_VFLUSH; 564 splx(s); 565 /* 566 * Wait for I/O associated with indirect blocks to complete, 567 * since there is no way to quickly wait for them below. 568 */ 569 if (bp->b_vp == vp || sync == 0) 570 (void) bawrite(bp); 571 else 572 (void) bwrite(bp); 573 goto loop; 574 } 575 if (sync == 0) { 576 splx(s); 577 return; 578 } 579 while (vp->v_numoutput) { 580 vp->v_flag |= VBWAIT; 581 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 582 } 583 splx(s); 584 if (vp->v_dirtyblkhd.lh_first != NULL) { 585 vprint("vflushbuf: dirty", vp); 586 goto loop; 587 } 588 } 589 590 /* 591 * Associate a buffer with a vnode. 592 */ 593 void 594 bgetvp(vp, bp) 595 register struct vnode *vp; 596 register struct buf *bp; 597 { 598 599 if (bp->b_vp) 600 panic("bgetvp: not free"); 601 VHOLD(vp); 602 bp->b_vp = vp; 603 if (vp->v_type == VBLK || vp->v_type == VCHR) 604 bp->b_dev = vp->v_rdev; 605 else 606 bp->b_dev = NODEV; 607 /* 608 * Insert onto list for new vnode. 609 */ 610 bufinsvn(bp, &vp->v_cleanblkhd); 611 } 612 613 /* 614 * Disassociate a buffer from a vnode. 615 */ 616 void 617 brelvp(bp) 618 register struct buf *bp; 619 { 620 struct vnode *vp; 621 622 if (bp->b_vp == (struct vnode *) 0) 623 panic("brelvp: NULL"); 624 /* 625 * Delete from old vnode list, if on one. 626 */ 627 if (bp->b_vnbufs.le_next != NOLIST) 628 bufremvn(bp); 629 vp = bp->b_vp; 630 bp->b_vp = (struct vnode *) 0; 631 HOLDRELE(vp); 632 } 633 634 /* 635 * Reassign a buffer from one vnode to another. 636 * Used to assign file specific control information 637 * (indirect blocks) to the vnode to which they belong. 638 */ 639 void 640 reassignbuf(bp, newvp) 641 register struct buf *bp; 642 register struct vnode *newvp; 643 { 644 register struct buflists *listheadp; 645 646 if (newvp == NULL) { 647 printf("reassignbuf: NULL"); 648 return; 649 } 650 /* 651 * Delete from old vnode list, if on one. 652 */ 653 if (bp->b_vnbufs.le_next != NOLIST) 654 bufremvn(bp); 655 /* 656 * If dirty, put on list of dirty buffers; 657 * otherwise insert onto list of clean buffers. 658 */ 659 if (bp->b_flags & B_DELWRI) 660 listheadp = &newvp->v_dirtyblkhd; 661 else 662 listheadp = &newvp->v_cleanblkhd; 663 bufinsvn(bp, listheadp); 664 } 665 666 /* 667 * Create a vnode for a block device. 668 * Used for root filesystem and swap areas. 669 * Also used for memory file system special devices. 670 */ 671 int 672 bdevvp(dev, vpp) 673 dev_t dev; 674 struct vnode **vpp; 675 { 676 677 return (getdevvp(dev, vpp, VBLK)); 678 } 679 680 /* 681 * Create a vnode for a character device. 682 * Used for kernfs and some console handling. 683 */ 684 int 685 cdevvp(dev, vpp) 686 dev_t dev; 687 struct vnode **vpp; 688 { 689 690 return (getdevvp(dev, vpp, VCHR)); 691 } 692 693 /* 694 * Create a vnode for a device. 695 * Used by bdevvp (block device) for root file system etc., 696 * and by cdevvp (character device) for console and kernfs. 697 */ 698 int 699 getdevvp(dev, vpp, type) 700 dev_t dev; 701 struct vnode **vpp; 702 enum vtype type; 703 { 704 register struct vnode *vp; 705 struct vnode *nvp; 706 int error; 707 708 if (dev == NODEV) 709 return (0); 710 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 711 if (error) { 712 *vpp = NULLVP; 713 return (error); 714 } 715 vp = nvp; 716 vp->v_type = type; 717 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 718 vput(vp); 719 vp = nvp; 720 } 721 *vpp = vp; 722 return (0); 723 } 724 725 /* 726 * Check to see if the new vnode represents a special device 727 * for which we already have a vnode (either because of 728 * bdevvp() or because of a different vnode representing 729 * the same block device). If such an alias exists, deallocate 730 * the existing contents and return the aliased vnode. The 731 * caller is responsible for filling it with its new contents. 732 */ 733 struct vnode * 734 checkalias(nvp, nvp_rdev, mp) 735 register struct vnode *nvp; 736 dev_t nvp_rdev; 737 struct mount *mp; 738 { 739 register struct vnode *vp; 740 struct vnode **vpp; 741 742 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 743 return (NULLVP); 744 745 vpp = &speclisth[SPECHASH(nvp_rdev)]; 746 loop: 747 for (vp = *vpp; vp; vp = vp->v_specnext) { 748 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 749 continue; 750 /* 751 * Alias, but not in use, so flush it out. 752 */ 753 if (vp->v_usecount == 0) { 754 vgone(vp); 755 goto loop; 756 } 757 if (vget(vp, 1)) 758 goto loop; 759 break; 760 } 761 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 762 MALLOC(nvp->v_specinfo, struct specinfo *, 763 sizeof(struct specinfo), M_VNODE, M_WAITOK); 764 nvp->v_rdev = nvp_rdev; 765 nvp->v_hashchain = vpp; 766 nvp->v_specnext = *vpp; 767 nvp->v_specflags = 0; 768 nvp->v_speclockf = NULL; 769 *vpp = nvp; 770 if (vp != NULL) { 771 nvp->v_flag |= VALIASED; 772 vp->v_flag |= VALIASED; 773 vput(vp); 774 } 775 return (NULLVP); 776 } 777 VOP_UNLOCK(vp); 778 vclean(vp, 0); 779 vp->v_op = nvp->v_op; 780 vp->v_tag = nvp->v_tag; 781 nvp->v_type = VNON; 782 insmntque(vp, mp); 783 return (vp); 784 } 785 786 /* 787 * Grab a particular vnode from the free list, increment its 788 * reference count and lock it. The vnode lock bit is set the 789 * vnode is being eliminated in vgone. The process is awakened 790 * when the transition is completed, and an error returned to 791 * indicate that the vnode is no longer usable (possibly having 792 * been changed to a new file system type). 793 */ 794 int 795 vget(vp, lockflag) 796 register struct vnode *vp; 797 int lockflag; 798 { 799 800 /* 801 * If the vnode is in the process of being cleaned out for 802 * another use, we wait for the cleaning to finish and then 803 * return failure. Cleaning is determined either by checking 804 * that the VXLOCK flag is set, or that the use count is 805 * zero with the back pointer set to show that it has been 806 * removed from the free list by getnewvnode. The VXLOCK 807 * flag may not have been set yet because vclean is blocked in 808 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 809 */ 810 if ((vp->v_flag & VXLOCK) || 811 (vp->v_usecount == 0 && 812 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 813 vp->v_flag |= VXWANT; 814 tsleep((caddr_t)vp, PINOD, "vget", 0); 815 return (1); 816 } 817 if (vp->v_usecount == 0) 818 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 819 vp->v_usecount++; 820 if (lockflag) 821 VOP_LOCK(vp); 822 return (0); 823 } 824 825 /* 826 * Vnode reference, just increment the count 827 */ 828 void 829 vref(vp) 830 struct vnode *vp; 831 { 832 833 if (vp->v_usecount <= 0) 834 panic("vref used where vget required"); 835 vp->v_usecount++; 836 } 837 838 /* 839 * vput(), just unlock and vrele() 840 */ 841 void 842 vput(vp) 843 register struct vnode *vp; 844 { 845 846 VOP_UNLOCK(vp); 847 vrele(vp); 848 } 849 850 /* 851 * Vnode release. 852 * If count drops to zero, call inactive routine and return to freelist. 853 */ 854 void 855 vrele(vp) 856 register struct vnode *vp; 857 { 858 859 #ifdef DIAGNOSTIC 860 if (vp == NULL) 861 panic("vrele: null vp"); 862 #endif 863 vp->v_usecount--; 864 if (vp->v_usecount > 0) 865 return; 866 #ifdef DIAGNOSTIC 867 if (vp->v_usecount != 0 || vp->v_writecount != 0) { 868 vprint("vrele: bad ref count", vp); 869 panic("vrele: ref cnt"); 870 } 871 #endif 872 /* 873 * insert at tail of LRU list 874 */ 875 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 876 VOP_INACTIVE(vp); 877 } 878 879 /* 880 * Page or buffer structure gets a reference. 881 */ 882 void 883 vhold(vp) 884 register struct vnode *vp; 885 { 886 887 vp->v_holdcnt++; 888 } 889 890 /* 891 * Page or buffer structure frees a reference. 892 */ 893 void 894 holdrele(vp) 895 register struct vnode *vp; 896 { 897 898 if (vp->v_holdcnt <= 0) 899 panic("holdrele: holdcnt"); 900 vp->v_holdcnt--; 901 } 902 903 /* 904 * Remove any vnodes in the vnode table belonging to mount point mp. 905 * 906 * If MNT_NOFORCE is specified, there should not be any active ones, 907 * return error if any are found (nb: this is a user error, not a 908 * system error). If MNT_FORCE is specified, detach any active vnodes 909 * that are found. 910 */ 911 #ifdef DEBUG 912 int busyprt = 0; /* print out busy vnodes */ 913 struct ctldebug debug1 = { "busyprt", &busyprt }; 914 #endif 915 916 int 917 vflush(mp, skipvp, flags) 918 struct mount *mp; 919 struct vnode *skipvp; 920 int flags; 921 { 922 register struct vnode *vp, *nvp; 923 int busy = 0; 924 925 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 926 panic("vflush: not busy"); 927 loop: 928 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 929 if (vp->v_mount != mp) 930 goto loop; 931 nvp = vp->v_mntvnodes.le_next; 932 /* 933 * Skip over a selected vnode. 934 */ 935 if (vp == skipvp) 936 continue; 937 /* 938 * Skip over a vnodes marked VSYSTEM. 939 */ 940 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 941 continue; 942 /* 943 * If WRITECLOSE is set, only flush out regular file 944 * vnodes open for writing. 945 */ 946 if ((flags & WRITECLOSE) && 947 (vp->v_writecount == 0 || vp->v_type != VREG)) 948 continue; 949 /* 950 * With v_usecount == 0, all we need to do is clear 951 * out the vnode data structures and we are done. 952 */ 953 if (vp->v_usecount == 0) { 954 vgone(vp); 955 continue; 956 } 957 /* 958 * If FORCECLOSE is set, forcibly close the vnode. 959 * For block or character devices, revert to an 960 * anonymous device. For all other files, just kill them. 961 */ 962 if (flags & FORCECLOSE) { 963 if (vp->v_type != VBLK && vp->v_type != VCHR) { 964 vgone(vp); 965 } else { 966 vclean(vp, 0); 967 vp->v_op = spec_vnodeop_p; 968 insmntque(vp, (struct mount *)0); 969 } 970 continue; 971 } 972 #ifdef DEBUG 973 if (busyprt) 974 vprint("vflush: busy vnode", vp); 975 #endif 976 busy++; 977 } 978 if (busy) 979 return (EBUSY); 980 return (0); 981 } 982 983 /* 984 * Disassociate the underlying file system from a vnode. 985 */ 986 void 987 vclean(vp, flags) 988 register struct vnode *vp; 989 int flags; 990 { 991 int active; 992 993 /* 994 * Check to see if the vnode is in use. 995 * If so we have to reference it before we clean it out 996 * so that its count cannot fall to zero and generate a 997 * race against ourselves to recycle it. 998 */ 999 if ((active = vp->v_usecount) != 0) 1000 VREF(vp); 1001 /* 1002 * Even if the count is zero, the VOP_INACTIVE routine may still 1003 * have the object locked while it cleans it out. The VOP_LOCK 1004 * ensures that the VOP_INACTIVE routine is done with its work. 1005 * For active vnodes, it ensures that no other activity can 1006 * occur while the underlying object is being cleaned out. 1007 */ 1008 VOP_LOCK(vp); 1009 /* 1010 * Prevent the vnode from being recycled or 1011 * brought into use while we clean it out. 1012 */ 1013 if (vp->v_flag & VXLOCK) 1014 panic("vclean: deadlock"); 1015 vp->v_flag |= VXLOCK; 1016 /* 1017 * Clean out any buffers associated with the vnode. 1018 */ 1019 if (flags & DOCLOSE) 1020 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 1021 /* 1022 * Any other processes trying to obtain this lock must first 1023 * wait for VXLOCK to clear, then call the new lock operation. 1024 */ 1025 VOP_UNLOCK(vp); 1026 /* 1027 * If purging an active vnode, it must be closed and 1028 * deactivated before being reclaimed. 1029 */ 1030 if (active) { 1031 if (flags & DOCLOSE) 1032 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1033 VOP_INACTIVE(vp); 1034 } 1035 /* 1036 * Reclaim the vnode. 1037 */ 1038 if (VOP_RECLAIM(vp)) 1039 panic("vclean: cannot reclaim"); 1040 if (active) 1041 vrele(vp); 1042 1043 /* 1044 * Done with purge, notify sleepers of the grim news. 1045 */ 1046 vp->v_op = dead_vnodeop_p; 1047 vp->v_tag = VT_NON; 1048 vp->v_flag &= ~VXLOCK; 1049 if (vp->v_flag & VXWANT) { 1050 vp->v_flag &= ~VXWANT; 1051 wakeup((caddr_t)vp); 1052 } 1053 } 1054 1055 /* 1056 * Eliminate all activity associated with the requested vnode 1057 * and with all vnodes aliased to the requested vnode. 1058 */ 1059 void 1060 vgoneall(vp) 1061 register struct vnode *vp; 1062 { 1063 register struct vnode *vq; 1064 1065 if (vp->v_flag & VALIASED) { 1066 /* 1067 * If a vgone (or vclean) is already in progress, 1068 * wait until it is done and return. 1069 */ 1070 if (vp->v_flag & VXLOCK) { 1071 vp->v_flag |= VXWANT; 1072 tsleep((caddr_t)vp, PINOD, "vgoneall", 0); 1073 return; 1074 } 1075 /* 1076 * Ensure that vp will not be vgone'd while we 1077 * are eliminating its aliases. 1078 */ 1079 vp->v_flag |= VXLOCK; 1080 while (vp->v_flag & VALIASED) { 1081 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1082 if (vq->v_rdev != vp->v_rdev || 1083 vq->v_type != vp->v_type || vp == vq) 1084 continue; 1085 vgone(vq); 1086 break; 1087 } 1088 } 1089 /* 1090 * Remove the lock so that vgone below will 1091 * really eliminate the vnode after which time 1092 * vgone will awaken any sleepers. 1093 */ 1094 vp->v_flag &= ~VXLOCK; 1095 } 1096 vgone(vp); 1097 } 1098 1099 /* 1100 * Eliminate all activity associated with a vnode 1101 * in preparation for reuse. 1102 */ 1103 void 1104 vgone(vp) 1105 register struct vnode *vp; 1106 { 1107 register struct vnode *vq; 1108 struct vnode *vx; 1109 1110 /* 1111 * If a vgone (or vclean) is already in progress, 1112 * wait until it is done and return. 1113 */ 1114 if (vp->v_flag & VXLOCK) { 1115 vp->v_flag |= VXWANT; 1116 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1117 return; 1118 } 1119 /* 1120 * Clean out the filesystem specific data. 1121 */ 1122 vclean(vp, DOCLOSE); 1123 /* 1124 * Delete from old mount point vnode list, if on one. 1125 */ 1126 insmntque(vp, (struct mount *)0); 1127 /* 1128 * If special device, remove it from special device alias list. 1129 */ 1130 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1131 if (*vp->v_hashchain == vp) { 1132 *vp->v_hashchain = vp->v_specnext; 1133 } else { 1134 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1135 if (vq->v_specnext != vp) 1136 continue; 1137 vq->v_specnext = vp->v_specnext; 1138 break; 1139 } 1140 if (vq == NULL) 1141 panic("missing bdev"); 1142 } 1143 if (vp->v_flag & VALIASED) { 1144 vx = NULL; 1145 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1146 if (vq->v_rdev != vp->v_rdev || 1147 vq->v_type != vp->v_type) 1148 continue; 1149 if (vx) 1150 break; 1151 vx = vq; 1152 } 1153 if (vx == NULL) 1154 panic("missing alias"); 1155 if (vq == NULL) 1156 vx->v_flag &= ~VALIASED; 1157 vp->v_flag &= ~VALIASED; 1158 } 1159 FREE(vp->v_specinfo, M_VNODE); 1160 vp->v_specinfo = NULL; 1161 } 1162 /* 1163 * If it is on the freelist and not already at the head, 1164 * move it to the head of the list. The test of the back 1165 * pointer and the reference count of zero is because 1166 * it will be removed from the free list by getnewvnode, 1167 * but will not have its reference count incremented until 1168 * after calling vgone. If the reference count were 1169 * incremented first, vgone would (incorrectly) try to 1170 * close the previous instance of the underlying object. 1171 * So, the back pointer is explicitly set to `0xdeadb' in 1172 * getnewvnode after removing it from the freelist to ensure 1173 * that we do not try to move it here. 1174 */ 1175 if (vp->v_usecount == 0 && 1176 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1177 vnode_free_list.tqh_first != vp) { 1178 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1179 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1180 } 1181 vp->v_type = VBAD; 1182 } 1183 1184 /* 1185 * Lookup a vnode by device number. 1186 */ 1187 int 1188 vfinddev(dev, type, vpp) 1189 dev_t dev; 1190 enum vtype type; 1191 struct vnode **vpp; 1192 { 1193 register struct vnode *vp; 1194 1195 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1196 if (dev != vp->v_rdev || type != vp->v_type) 1197 continue; 1198 *vpp = vp; 1199 return (1); 1200 } 1201 return (0); 1202 } 1203 1204 /* 1205 * Calculate the total number of references to a special device. 1206 */ 1207 int 1208 vcount(vp) 1209 register struct vnode *vp; 1210 { 1211 register struct vnode *vq, *vnext; 1212 int count; 1213 1214 loop: 1215 if ((vp->v_flag & VALIASED) == 0) 1216 return (vp->v_usecount); 1217 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1218 vnext = vq->v_specnext; 1219 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1220 continue; 1221 /* 1222 * Alias, but not in use, so flush it out. 1223 */ 1224 if (vq->v_usecount == 0 && vq != vp) { 1225 vgone(vq); 1226 goto loop; 1227 } 1228 count += vq->v_usecount; 1229 } 1230 return (count); 1231 } 1232 1233 /* 1234 * Print out a description of a vnode. 1235 */ 1236 static char *typename[] = 1237 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1238 1239 void 1240 vprint(label, vp) 1241 char *label; 1242 register struct vnode *vp; 1243 { 1244 char buf[64]; 1245 1246 if (label != NULL) 1247 printf("%s: ", label); 1248 printf("type %s, usecount %d, writecount %d, refcount %ld,", 1249 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1250 vp->v_holdcnt); 1251 buf[0] = '\0'; 1252 if (vp->v_flag & VROOT) 1253 strcat(buf, "|VROOT"); 1254 if (vp->v_flag & VTEXT) 1255 strcat(buf, "|VTEXT"); 1256 if (vp->v_flag & VSYSTEM) 1257 strcat(buf, "|VSYSTEM"); 1258 if (vp->v_flag & VXLOCK) 1259 strcat(buf, "|VXLOCK"); 1260 if (vp->v_flag & VXWANT) 1261 strcat(buf, "|VXWANT"); 1262 if (vp->v_flag & VBWAIT) 1263 strcat(buf, "|VBWAIT"); 1264 if (vp->v_flag & VALIASED) 1265 strcat(buf, "|VALIASED"); 1266 if (buf[0] != '\0') 1267 printf(" flags (%s)", &buf[1]); 1268 if (vp->v_data == NULL) { 1269 printf("\n"); 1270 } else { 1271 printf("\n\t"); 1272 VOP_PRINT(vp); 1273 } 1274 } 1275 1276 #ifdef DEBUG 1277 /* 1278 * List all of the locked vnodes in the system. 1279 * Called when debugging the kernel. 1280 */ 1281 void 1282 printlockedvnodes() 1283 { 1284 register struct mount *mp; 1285 register struct vnode *vp; 1286 1287 printf("Locked vnodes\n"); 1288 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 1289 mp = mp->mnt_list.cqe_next) { 1290 for (vp = mp->mnt_vnodelist.lh_first; 1291 vp != NULL; 1292 vp = vp->v_mntvnodes.le_next) 1293 if (VOP_ISLOCKED(vp)) 1294 vprint((char *)0, vp); 1295 } 1296 } 1297 #endif 1298 1299 int kinfo_vdebug = 1; 1300 int kinfo_vgetfailed; 1301 #define KINFO_VNODESLOP 10 1302 /* 1303 * Dump vnode list (via sysctl). 1304 * Copyout address of vnode followed by vnode. 1305 */ 1306 /* ARGSUSED */ 1307 int 1308 sysctl_vnode(where, sizep) 1309 char *where; 1310 size_t *sizep; 1311 { 1312 register struct mount *mp, *nmp; 1313 struct vnode *vp; 1314 register char *bp = where, *savebp; 1315 char *ewhere; 1316 int error; 1317 1318 #define VPTRSZ sizeof (struct vnode *) 1319 #define VNODESZ sizeof (struct vnode) 1320 if (where == NULL) { 1321 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1322 return (0); 1323 } 1324 ewhere = where + *sizep; 1325 1326 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1327 nmp = mp->mnt_list.cqe_next; 1328 if (vfs_busy(mp)) 1329 continue; 1330 savebp = bp; 1331 again: 1332 for (vp = mp->mnt_vnodelist.lh_first; 1333 vp != NULL; 1334 vp = vp->v_mntvnodes.le_next) { 1335 /* 1336 * Check that the vp is still associated with 1337 * this filesystem. RACE: could have been 1338 * recycled onto the same filesystem. 1339 */ 1340 if (vp->v_mount != mp) { 1341 if (kinfo_vdebug) 1342 printf("kinfo: vp changed\n"); 1343 bp = savebp; 1344 goto again; 1345 } 1346 if (bp + VPTRSZ + VNODESZ > ewhere) { 1347 *sizep = bp - where; 1348 return (ENOMEM); 1349 } 1350 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1351 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1352 return (error); 1353 bp += VPTRSZ + VNODESZ; 1354 } 1355 vfs_unbusy(mp); 1356 } 1357 1358 *sizep = bp - where; 1359 return (0); 1360 } 1361 1362 /* 1363 * Check to see if a filesystem is mounted on a block device. 1364 */ 1365 int 1366 vfs_mountedon(vp) 1367 register struct vnode *vp; 1368 { 1369 register struct vnode *vq; 1370 1371 if (vp->v_specflags & SI_MOUNTEDON) 1372 return (EBUSY); 1373 if (vp->v_flag & VALIASED) { 1374 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1375 if (vq->v_rdev != vp->v_rdev || 1376 vq->v_type != vp->v_type) 1377 continue; 1378 if (vq->v_specflags & SI_MOUNTEDON) 1379 return (EBUSY); 1380 } 1381 } 1382 return (0); 1383 } 1384 1385 /* 1386 * Build hash lists of net addresses and hang them off the mount point. 1387 * Called by ufs_mount() to set up the lists of export addresses. 1388 */ 1389 static int 1390 vfs_hang_addrlist(mp, nep, argp) 1391 struct mount *mp; 1392 struct netexport *nep; 1393 struct export_args *argp; 1394 { 1395 register struct netcred *np, *enp; 1396 register struct radix_node_head *rnh; 1397 register int i; 1398 struct radix_node *rn; 1399 struct sockaddr *saddr, *smask = 0; 1400 struct domain *dom; 1401 int error; 1402 1403 if (argp->ex_addrlen == 0) { 1404 if (mp->mnt_flag & MNT_DEFEXPORTED) 1405 return (EPERM); 1406 np = &nep->ne_defexported; 1407 np->netc_exflags = argp->ex_flags; 1408 np->netc_anon = argp->ex_anon; 1409 np->netc_anon.cr_ref = 1; 1410 mp->mnt_flag |= MNT_DEFEXPORTED; 1411 return (0); 1412 } 1413 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1414 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1415 bzero((caddr_t)np, i); 1416 saddr = (struct sockaddr *)(np + 1); 1417 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 1418 if (error) 1419 goto out; 1420 if (saddr->sa_len > argp->ex_addrlen) 1421 saddr->sa_len = argp->ex_addrlen; 1422 if (argp->ex_masklen) { 1423 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1424 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1425 if (error) 1426 goto out; 1427 if (smask->sa_len > argp->ex_masklen) 1428 smask->sa_len = argp->ex_masklen; 1429 } 1430 i = saddr->sa_family; 1431 if ((rnh = nep->ne_rtable[i]) == 0) { 1432 /* 1433 * Seems silly to initialize every AF when most are not 1434 * used, do so on demand here 1435 */ 1436 for (dom = domains; dom; dom = dom->dom_next) 1437 if (dom->dom_family == i && dom->dom_rtattach) { 1438 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1439 dom->dom_rtoffset); 1440 break; 1441 } 1442 if ((rnh = nep->ne_rtable[i]) == 0) { 1443 error = ENOBUFS; 1444 goto out; 1445 } 1446 } 1447 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1448 np->netc_rnodes); 1449 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1450 if (rn == 0) { 1451 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 1452 smask, rnh); 1453 if (enp == 0) { 1454 error = EPERM; 1455 goto out; 1456 } 1457 } else 1458 enp = (struct netcred *)rn; 1459 1460 if (enp->netc_exflags != argp->ex_flags || 1461 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 1462 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 1463 enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups || 1464 bcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 1465 enp->netc_anon.cr_ngroups)) 1466 error = EPERM; 1467 else 1468 error = 0; 1469 goto out; 1470 } 1471 np->netc_exflags = argp->ex_flags; 1472 np->netc_anon = argp->ex_anon; 1473 np->netc_anon.cr_ref = 1; 1474 return (0); 1475 out: 1476 free(np, M_NETADDR); 1477 return (error); 1478 } 1479 1480 /* ARGSUSED */ 1481 static int 1482 vfs_free_netcred(rn, w) 1483 struct radix_node *rn; 1484 void *w; 1485 { 1486 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1487 1488 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1489 free((caddr_t)rn, M_NETADDR); 1490 return (0); 1491 } 1492 1493 /* 1494 * Free the net address hash lists that are hanging off the mount points. 1495 */ 1496 static void 1497 vfs_free_addrlist(nep) 1498 struct netexport *nep; 1499 { 1500 register int i; 1501 register struct radix_node_head *rnh; 1502 1503 for (i = 0; i <= AF_MAX; i++) 1504 if ((rnh = nep->ne_rtable[i]) != NULL) { 1505 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 1506 free((caddr_t)rnh, M_RTABLE); 1507 nep->ne_rtable[i] = 0; 1508 } 1509 } 1510 1511 int 1512 vfs_export(mp, nep, argp) 1513 struct mount *mp; 1514 struct netexport *nep; 1515 struct export_args *argp; 1516 { 1517 int error; 1518 1519 if (argp->ex_flags & MNT_DELEXPORT) { 1520 if (mp->mnt_flag & MNT_EXPUBLIC) { 1521 vfs_setpublicfs(NULL, NULL, NULL); 1522 mp->mnt_flag &= ~MNT_EXPUBLIC; 1523 } 1524 vfs_free_addrlist(nep); 1525 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1526 } 1527 if (argp->ex_flags & MNT_EXPORTED) { 1528 if (argp->ex_flags & MNT_EXPUBLIC) { 1529 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 1530 return (error); 1531 mp->mnt_flag |= MNT_EXPUBLIC; 1532 } 1533 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 1534 return (error); 1535 mp->mnt_flag |= MNT_EXPORTED; 1536 } 1537 return (0); 1538 } 1539 1540 /* 1541 * Set the publicly exported filesystem (WebNFS). Currently, only 1542 * one public filesystem is possible in the spec (RFC 2054 and 2055) 1543 */ 1544 int 1545 vfs_setpublicfs(mp, nep, argp) 1546 struct mount *mp; 1547 struct netexport *nep; 1548 struct export_args *argp; 1549 { 1550 int error; 1551 struct vnode *rvp; 1552 char *cp; 1553 1554 /* 1555 * mp == NULL -> invalidate the current info, the FS is 1556 * no longer exported. May be called from either vfs_export 1557 * or unmount, so check if it hasn't already been done. 1558 */ 1559 if (mp == NULL) { 1560 if (nfs_pub.np_valid) { 1561 nfs_pub.np_valid = 0; 1562 if (nfs_pub.np_index != NULL) { 1563 FREE(nfs_pub.np_index, M_TEMP); 1564 nfs_pub.np_index = NULL; 1565 } 1566 } 1567 return (0); 1568 } 1569 1570 /* 1571 * Only one allowed at a time. 1572 */ 1573 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 1574 return (EBUSY); 1575 1576 /* 1577 * Get real filehandle for root of exported FS. 1578 */ 1579 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 1580 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 1581 1582 if ((error = VFS_ROOT(mp, &rvp))) 1583 return (error); 1584 1585 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 1586 return (error); 1587 1588 vput(rvp); 1589 1590 /* 1591 * If an indexfile was specified, pull it in. 1592 */ 1593 if (argp->ex_indexfile != NULL) { 1594 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 1595 M_WAITOK); 1596 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 1597 MAXNAMLEN, (size_t *)0); 1598 if (!error) { 1599 /* 1600 * Check for illegal filenames. 1601 */ 1602 for (cp = nfs_pub.np_index; *cp; cp++) { 1603 if (*cp == '/') { 1604 error = EINVAL; 1605 break; 1606 } 1607 } 1608 } 1609 if (error) { 1610 FREE(nfs_pub.np_index, M_TEMP); 1611 return (error); 1612 } 1613 } 1614 1615 nfs_pub.np_mount = mp; 1616 nfs_pub.np_valid = 1; 1617 return (0); 1618 } 1619 1620 struct netcred * 1621 vfs_export_lookup(mp, nep, nam) 1622 register struct mount *mp; 1623 struct netexport *nep; 1624 struct mbuf *nam; 1625 { 1626 register struct netcred *np; 1627 register struct radix_node_head *rnh; 1628 struct sockaddr *saddr; 1629 1630 np = NULL; 1631 if (mp->mnt_flag & MNT_EXPORTED) { 1632 /* 1633 * Lookup in the export list first. 1634 */ 1635 if (nam != NULL) { 1636 saddr = mtod(nam, struct sockaddr *); 1637 rnh = nep->ne_rtable[saddr->sa_family]; 1638 if (rnh != NULL) { 1639 np = (struct netcred *) 1640 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1641 rnh); 1642 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1643 np = NULL; 1644 } 1645 } 1646 /* 1647 * If no address match, use the default if it exists. 1648 */ 1649 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1650 np = &nep->ne_defexported; 1651 } 1652 return (np); 1653 } 1654 1655 /* 1656 * Do the usual access checking. 1657 * file_mode, uid and gid are from the vnode in question, 1658 * while acc_mode and cred are from the VOP_ACCESS parameter list 1659 */ 1660 int 1661 vaccess(type, file_mode, uid, gid, acc_mode, cred) 1662 enum vtype type; 1663 mode_t file_mode; 1664 uid_t uid; 1665 gid_t gid; 1666 mode_t acc_mode; 1667 struct ucred *cred; 1668 { 1669 mode_t mask; 1670 1671 /* 1672 * Super-user always gets read/write access, but execute access depends 1673 * on at least one execute bit being set. 1674 */ 1675 if (cred->cr_uid == 0) { 1676 if ((acc_mode & VEXEC) && type != VDIR && 1677 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 1678 return (EACCES); 1679 return (0); 1680 } 1681 1682 mask = 0; 1683 1684 /* Otherwise, check the owner. */ 1685 if (cred->cr_uid == uid) { 1686 if (acc_mode & VEXEC) 1687 mask |= S_IXUSR; 1688 if (acc_mode & VREAD) 1689 mask |= S_IRUSR; 1690 if (acc_mode & VWRITE) 1691 mask |= S_IWUSR; 1692 return ((file_mode & mask) == mask ? 0 : EACCES); 1693 } 1694 1695 /* Otherwise, check the groups. */ 1696 if (cred->cr_gid == gid || groupmember(gid, cred)) { 1697 if (acc_mode & VEXEC) 1698 mask |= S_IXGRP; 1699 if (acc_mode & VREAD) 1700 mask |= S_IRGRP; 1701 if (acc_mode & VWRITE) 1702 mask |= S_IWGRP; 1703 return ((file_mode & mask) == mask ? 0 : EACCES); 1704 } 1705 1706 /* Otherwise, check everyone else. */ 1707 if (acc_mode & VEXEC) 1708 mask |= S_IXOTH; 1709 if (acc_mode & VREAD) 1710 mask |= S_IROTH; 1711 if (acc_mode & VWRITE) 1712 mask |= S_IWOTH; 1713 return ((file_mode & mask) == mask ? 0 : EACCES); 1714 } 1715 1716 /* 1717 * Unmount all file systems. 1718 * We traverse the list in reverse order under the assumption that doing so 1719 * will avoid needing to worry about dependencies. 1720 */ 1721 void 1722 vfs_unmountall() 1723 { 1724 register struct mount *mp, *nmp; 1725 int allerror, error; 1726 1727 for (allerror = 0, 1728 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1729 nmp = mp->mnt_list.cqe_prev; 1730 #ifdef DEBUG 1731 printf("unmounting %s (%s)...\n", 1732 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1733 #endif 1734 if (vfs_busy(mp)) 1735 continue; 1736 if ((error = dounmount(mp, MNT_FORCE, &proc0)) != 0) { 1737 printf("unmount of %s failed with error %d\n", 1738 mp->mnt_stat.f_mntonname, error); 1739 allerror = 1; 1740 } 1741 } 1742 if (allerror) 1743 printf("WARNING: some file systems would not unmount\n"); 1744 } 1745 1746 /* 1747 * Sync and unmount file systems before shutting down. 1748 */ 1749 void 1750 vfs_shutdown() 1751 { 1752 register struct buf *bp; 1753 int iter, nbusy, unmountem; 1754 1755 /* 1756 * If we've panic'd, don't make the situation potentially 1757 * worse by unmounting the file systems; just attempt to 1758 * sync. 1759 */ 1760 if (panicstr != NULL) 1761 unmountem = 0; 1762 else 1763 unmountem = 1; 1764 1765 printf("syncing disks... "); 1766 1767 /* XXX Should suspend scheduling. */ 1768 (void) spl0(); 1769 1770 sys_sync(&proc0, (void *)0, (register_t *)0); 1771 1772 /* Wait for sync to finish. */ 1773 for (iter = 0; iter < 20; iter++) { 1774 nbusy = 0; 1775 for (bp = &buf[nbuf]; --bp >= buf; ) 1776 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 1777 nbusy++; 1778 if (nbusy == 0) 1779 break; 1780 printf("%d ", nbusy); 1781 DELAY(40000 * iter); 1782 } 1783 if (nbusy) { 1784 printf("giving up\n"); 1785 unmountem = 0; 1786 } else 1787 printf("done\n"); 1788 1789 if (unmountem) { 1790 /* Release inodes held by texts before update. */ 1791 vnode_pager_umount(NULL); 1792 #ifdef notdef 1793 vnshutdown(); 1794 #endif 1795 /* Unmount file systems. */ 1796 vfs_unmountall(); 1797 } 1798 } 1799 1800 /* 1801 * Mount the root file system. If the operator didn't specify a 1802 * file system to use, try all possible file systems until one 1803 * succeeds. 1804 */ 1805 int 1806 vfs_mountroot() 1807 { 1808 extern int (*mountroot) __P((void)); 1809 int i; 1810 1811 if (root_device == NULL) 1812 panic("vfs_mountroot: root device unknown"); 1813 1814 switch (root_device->dv_class) { 1815 case DV_IFNET: 1816 if (rootdev != NODEV) 1817 panic("vfs_mountroot: rootdev set for DV_IFNET"); 1818 break; 1819 1820 case DV_DISK: 1821 if (rootdev == NODEV) 1822 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1823 break; 1824 1825 default: 1826 printf("%s: inappropriate for root file system\n", 1827 root_device->dv_xname); 1828 return (ENODEV); 1829 } 1830 1831 /* 1832 * If user specified a file system, use it. 1833 */ 1834 if (mountroot != NULL) 1835 return ((*mountroot)()); 1836 1837 /* 1838 * Try each file system currently configured into the kernel. 1839 */ 1840 for (i = 0; i < nvfssw; i++) { 1841 if (vfssw[i] == NULL || vfssw[i]->vfs_mountroot == NULL) 1842 continue; 1843 #ifdef DEBUG 1844 printf("mountroot: trying %s...\n", vfssw[i]->vfs_name); 1845 #endif 1846 if ((*vfssw[i]->vfs_mountroot)() == 0) { 1847 printf("root file system type: %s\n", 1848 vfssw[i]->vfs_name); 1849 return (0); 1850 } 1851 } 1852 1853 printf("no file system for %s", root_device->dv_xname); 1854 if (root_device->dv_class == DV_DISK) 1855 printf(" (dev 0x%x)", rootdev); 1856 printf("\n"); 1857 return (EFTYPE); 1858 } 1859 1860 /* 1861 * Given a file system name, look up the vfsops for that 1862 * file system, or return NULL if file system isn't present 1863 * in the kernel. 1864 */ 1865 struct vfsops * 1866 vfs_getopsbyname(name) 1867 const char *name; 1868 { 1869 int i; 1870 1871 for (i = 0; i < nvfssw; i++) 1872 if (vfssw[i] != NULL && strcmp(vfssw[i]->vfs_name, name) == 0) 1873 return (vfssw[i]); 1874 return (NULL); 1875 } 1876