1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 39 * $Id: vfs_subr.c,v 1.31 1994/06/13 15:37:55 mycroft Exp $ 40 */ 41 42 /* 43 * External virtual filesystem routines 44 */ 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/proc.h> 49 #include <sys/mount.h> 50 #include <sys/time.h> 51 #include <sys/vnode.h> 52 #include <sys/stat.h> 53 #include <sys/namei.h> 54 #include <sys/ucred.h> 55 #include <sys/buf.h> 56 #include <sys/errno.h> 57 #include <sys/malloc.h> 58 #include <sys/domain.h> 59 #include <sys/mbuf.h> 60 61 #include <vm/vm.h> 62 #include <sys/sysctl.h> 63 64 #include <miscfs/specfs/specdev.h> 65 66 enum vtype iftovt_tab[16] = { 67 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 68 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 69 }; 70 int vttoif_tab[9] = { 71 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 72 S_IFSOCK, S_IFIFO, S_IFMT, 73 }; 74 75 int doforce = 1; /* 1 => permit forcible unmounting */ 76 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 77 78 /* 79 * Insq/Remq for the vnode usage lists. 80 */ 81 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 82 #define bufremvn(bp) { \ 83 LIST_REMOVE(bp, b_vnbufs); \ 84 (bp)->b_vnbufs.le_next = NOLIST; \ 85 } 86 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 87 struct mntlist mountlist; /* mounted filesystem list */ 88 89 /* 90 * Initialize the vnode management data structures. 91 */ 92 vntblinit() 93 { 94 95 TAILQ_INIT(&vnode_free_list); 96 TAILQ_INIT(&mountlist); 97 } 98 99 /* 100 * Lock a filesystem. 101 * Used to prevent access to it while mounting and unmounting. 102 */ 103 vfs_lock(mp) 104 register struct mount *mp; 105 { 106 107 while (mp->mnt_flag & MNT_MLOCK) { 108 mp->mnt_flag |= MNT_MWAIT; 109 tsleep((caddr_t)mp, PVFS, "vfslock", 0); 110 } 111 mp->mnt_flag |= MNT_MLOCK; 112 return (0); 113 } 114 115 /* 116 * Unlock a locked filesystem. 117 * Panic if filesystem is not locked. 118 */ 119 void 120 vfs_unlock(mp) 121 register struct mount *mp; 122 { 123 124 if ((mp->mnt_flag & MNT_MLOCK) == 0) 125 panic("vfs_unlock: not locked"); 126 mp->mnt_flag &= ~MNT_MLOCK; 127 if (mp->mnt_flag & MNT_MWAIT) { 128 mp->mnt_flag &= ~MNT_MWAIT; 129 wakeup((caddr_t)mp); 130 } 131 } 132 133 /* 134 * Mark a mount point as busy. 135 * Used to synchronize access and to delay unmounting. 136 */ 137 vfs_busy(mp) 138 register struct mount *mp; 139 { 140 141 while(mp->mnt_flag & MNT_MPBUSY) { 142 mp->mnt_flag |= MNT_MPWANT; 143 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); 144 } 145 if (mp->mnt_flag & MNT_UNMOUNT) 146 return (1); 147 mp->mnt_flag |= MNT_MPBUSY; 148 return (0); 149 } 150 151 /* 152 * Free a busy filesystem. 153 * Panic if filesystem is not busy. 154 */ 155 vfs_unbusy(mp) 156 register struct mount *mp; 157 { 158 159 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 160 panic("vfs_unbusy: not busy"); 161 mp->mnt_flag &= ~MNT_MPBUSY; 162 if (mp->mnt_flag & MNT_MPWANT) { 163 mp->mnt_flag &= ~MNT_MPWANT; 164 wakeup((caddr_t)&mp->mnt_flag); 165 } 166 } 167 168 /* 169 * Lookup a mount point by filesystem identifier. 170 */ 171 struct mount * 172 getvfs(fsid) 173 fsid_t *fsid; 174 { 175 register struct mount *mp; 176 177 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) 178 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 179 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 180 return (mp); 181 return ((struct mount *)0); 182 } 183 184 /* 185 * Get a new unique fsid 186 */ 187 void 188 getnewfsid(mp, mtype) 189 struct mount *mp; 190 int mtype; 191 { 192 static u_short xxxfs_mntid; 193 194 fsid_t tfsid; 195 196 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0); /* XXX */ 197 mp->mnt_stat.f_fsid.val[1] = mtype; 198 if (xxxfs_mntid == 0) 199 ++xxxfs_mntid; 200 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 201 tfsid.val[1] = mtype; 202 if (mountlist.tqh_first != NULL) { 203 while (getvfs(&tfsid)) { 204 tfsid.val[0]++; 205 xxxfs_mntid++; 206 } 207 } 208 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 209 } 210 211 /* 212 * Make a 'unique' number from a mount type name. 213 */ 214 long 215 makefstype(type) 216 char *type; 217 { 218 long rv; 219 220 for (rv = 0; *type; type++) { 221 rv <<= 2; 222 rv ^= *type; 223 } 224 return rv; 225 } 226 227 /* 228 * Set vnode attributes to VNOVAL 229 */ 230 void 231 vattr_null(vap) 232 register struct vattr *vap; 233 { 234 235 vap->va_type = VNON; 236 /* XXX These next two used to be one line, but for a GCC bug. */ 237 vap->va_size = VNOVAL; 238 vap->va_bytes = VNOVAL; 239 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 240 vap->va_fsid = vap->va_fileid = 241 vap->va_blocksize = vap->va_rdev = 242 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 243 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 244 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 245 vap->va_flags = vap->va_gen = VNOVAL; 246 vap->va_vaflags = 0; 247 } 248 249 /* 250 * Routines having to do with the management of the vnode table. 251 */ 252 extern int (**dead_vnodeop_p)(); 253 extern void vclean(); 254 long numvnodes; 255 extern struct vattr va_null; 256 257 /* 258 * Return the next vnode from the free list. 259 */ 260 getnewvnode(tag, mp, vops, vpp) 261 enum vtagtype tag; 262 struct mount *mp; 263 int (**vops)(); 264 struct vnode **vpp; 265 { 266 register struct vnode *vp; 267 int s; 268 269 if ((vnode_free_list.tqh_first == NULL && 270 numvnodes < 2 * desiredvnodes) || 271 numvnodes < desiredvnodes) { 272 vp = (struct vnode *)malloc((u_long)sizeof *vp, 273 M_VNODE, M_WAITOK); 274 bzero((char *)vp, sizeof *vp); 275 numvnodes++; 276 } else { 277 if ((vp = vnode_free_list.tqh_first) == NULL) { 278 tablefull("vnode"); 279 *vpp = 0; 280 return (ENFILE); 281 } 282 if (vp->v_usecount) 283 panic("free vnode isn't"); 284 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 285 /* see comment on why 0xdeadb is set at end of vgone (below) */ 286 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 287 vp->v_lease = NULL; 288 if (vp->v_type != VBAD) 289 vgone(vp); 290 #ifdef DIAGNOSTIC 291 if (vp->v_data) 292 panic("cleaned vnode isn't"); 293 s = splbio(); 294 if (vp->v_numoutput) 295 panic("Clean vnode has pending I/O's"); 296 splx(s); 297 #endif 298 vp->v_flag = 0; 299 vp->v_lastr = 0; 300 vp->v_ralen = 0; 301 vp->v_maxra = 0; 302 vp->v_lastw = 0; 303 vp->v_lasta = 0; 304 vp->v_cstart = 0; 305 vp->v_clen = 0; 306 vp->v_socket = 0; 307 } 308 vp->v_type = VNON; 309 cache_purge(vp); 310 vp->v_tag = tag; 311 vp->v_op = vops; 312 insmntque(vp, mp); 313 *vpp = vp; 314 vp->v_usecount = 1; 315 vp->v_data = 0; 316 return (0); 317 } 318 319 /* 320 * Move a vnode from one mount queue to another. 321 */ 322 insmntque(vp, mp) 323 register struct vnode *vp; 324 register struct mount *mp; 325 { 326 327 /* 328 * Delete from old mount point vnode list, if on one. 329 */ 330 if (vp->v_mount != NULL) 331 LIST_REMOVE(vp, v_mntvnodes); 332 /* 333 * Insert into list of vnodes for the new mount point, if available. 334 */ 335 if ((vp->v_mount = mp) == NULL) 336 return; 337 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 338 } 339 340 /* 341 * Update outstanding I/O count and do wakeup if requested. 342 */ 343 vwakeup(bp) 344 register struct buf *bp; 345 { 346 register struct vnode *vp; 347 348 bp->b_flags &= ~B_WRITEINPROG; 349 if (vp = bp->b_vp) { 350 if (--vp->v_numoutput < 0) 351 panic("vwakeup: neg numoutput"); 352 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 353 if (vp->v_numoutput < 0) 354 panic("vwakeup: neg numoutput"); 355 vp->v_flag &= ~VBWAIT; 356 wakeup((caddr_t)&vp->v_numoutput); 357 } 358 } 359 } 360 361 /* 362 * Flush out and invalidate all buffers associated with a vnode. 363 * Called with the underlying object locked. 364 */ 365 int 366 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 367 register struct vnode *vp; 368 int flags; 369 struct ucred *cred; 370 struct proc *p; 371 int slpflag, slptimeo; 372 { 373 register struct buf *bp; 374 struct buf *nbp, *blist; 375 int s, error; 376 377 if (flags & V_SAVE) { 378 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 379 return (error); 380 if (vp->v_dirtyblkhd.lh_first != NULL) 381 panic("vinvalbuf: dirty bufs"); 382 } 383 for (;;) { 384 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 385 while (blist && blist->b_lblkno < 0) 386 blist = blist->b_vnbufs.le_next; 387 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 388 (flags & V_SAVEMETA)) 389 while (blist && blist->b_lblkno < 0) 390 blist = blist->b_vnbufs.le_next; 391 if (!blist) 392 break; 393 394 for (bp = blist; bp; bp = nbp) { 395 nbp = bp->b_vnbufs.le_next; 396 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 397 continue; 398 s = splbio(); 399 if (bp->b_flags & B_BUSY) { 400 bp->b_flags |= B_WANTED; 401 error = tsleep((caddr_t)bp, 402 slpflag | (PRIBIO + 1), "vinvalbuf", 403 slptimeo); 404 splx(s); 405 if (error) 406 return (error); 407 break; 408 } 409 bremfree(bp); 410 bp->b_flags |= B_BUSY; 411 splx(s); 412 /* 413 * XXX Since there are no node locks for NFS, I believe 414 * there is a slight chance that a delayed write will 415 * occur while sleeping just above, so check for it. 416 */ 417 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 418 (void) VOP_BWRITE(bp); 419 break; 420 } 421 bp->b_flags |= B_INVAL; 422 brelse(bp); 423 } 424 } 425 if (!(flags & V_SAVEMETA) && 426 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 427 panic("vinvalbuf: flush failed"); 428 return (0); 429 } 430 431 void 432 vflushbuf(vp, sync) 433 register struct vnode *vp; 434 int sync; 435 { 436 register struct buf *bp, *nbp; 437 int s; 438 439 loop: 440 s = splbio(); 441 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 442 nbp = bp->b_vnbufs.le_next; 443 if ((bp->b_flags & B_BUSY)) 444 continue; 445 if ((bp->b_flags & B_DELWRI) == 0) 446 panic("vflushbuf: not dirty"); 447 bremfree(bp); 448 bp->b_flags |= B_BUSY; 449 splx(s); 450 /* 451 * Wait for I/O associated with indirect blocks to complete, 452 * since there is no way to quickly wait for them below. 453 */ 454 if (bp->b_vp == vp || sync == 0) 455 (void) bawrite(bp); 456 else 457 (void) bwrite(bp); 458 goto loop; 459 } 460 if (sync == 0) { 461 splx(s); 462 return; 463 } 464 while (vp->v_numoutput) { 465 vp->v_flag |= VBWAIT; 466 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 467 } 468 splx(s); 469 if (vp->v_dirtyblkhd.lh_first != NULL) { 470 vprint("vflushbuf: dirty", vp); 471 goto loop; 472 } 473 } 474 475 /* 476 * Associate a buffer with a vnode. 477 */ 478 bgetvp(vp, bp) 479 register struct vnode *vp; 480 register struct buf *bp; 481 { 482 483 if (bp->b_vp) 484 panic("bgetvp: not free"); 485 VHOLD(vp); 486 bp->b_vp = vp; 487 if (vp->v_type == VBLK || vp->v_type == VCHR) 488 bp->b_dev = vp->v_rdev; 489 else 490 bp->b_dev = NODEV; 491 /* 492 * Insert onto list for new vnode. 493 */ 494 bufinsvn(bp, &vp->v_cleanblkhd); 495 } 496 497 /* 498 * Disassociate a buffer from a vnode. 499 */ 500 brelvp(bp) 501 register struct buf *bp; 502 { 503 struct vnode *vp; 504 505 if (bp->b_vp == (struct vnode *) 0) 506 panic("brelvp: NULL"); 507 /* 508 * Delete from old vnode list, if on one. 509 */ 510 if (bp->b_vnbufs.le_next != NOLIST) 511 bufremvn(bp); 512 vp = bp->b_vp; 513 bp->b_vp = (struct vnode *) 0; 514 HOLDRELE(vp); 515 } 516 517 /* 518 * Reassign a buffer from one vnode to another. 519 * Used to assign file specific control information 520 * (indirect blocks) to the vnode to which they belong. 521 */ 522 reassignbuf(bp, newvp) 523 register struct buf *bp; 524 register struct vnode *newvp; 525 { 526 register struct buflists *listheadp; 527 528 if (newvp == NULL) { 529 printf("reassignbuf: NULL"); 530 return; 531 } 532 /* 533 * Delete from old vnode list, if on one. 534 */ 535 if (bp->b_vnbufs.le_next != NOLIST) 536 bufremvn(bp); 537 /* 538 * If dirty, put on list of dirty buffers; 539 * otherwise insert onto list of clean buffers. 540 */ 541 if (bp->b_flags & B_DELWRI) 542 listheadp = &newvp->v_dirtyblkhd; 543 else 544 listheadp = &newvp->v_cleanblkhd; 545 bufinsvn(bp, listheadp); 546 } 547 548 /* 549 * Create a vnode for a block device. 550 * Used for root filesystem, argdev, and swap areas. 551 * Also used for memory file system special devices. 552 */ 553 bdevvp(dev, vpp) 554 dev_t dev; 555 struct vnode **vpp; 556 { 557 558 return (getdevvp(dev, vpp, VBLK)); 559 } 560 561 /* 562 * Create a vnode for a character device. 563 * Used for kernfs and some console handling. 564 */ 565 cdevvp(dev, vpp) 566 dev_t dev; 567 struct vnode **vpp; 568 { 569 570 return (getdevvp(dev, vpp, VCHR)); 571 } 572 573 /* 574 * Create a vnode for a device. 575 * Used by bdevvp (block device) for root file system etc., 576 * and by cdevvp (character device) for console and kernfs. 577 */ 578 getdevvp(dev, vpp, type) 579 dev_t dev; 580 struct vnode **vpp; 581 enum vtype type; 582 { 583 register struct vnode *vp; 584 struct vnode *nvp; 585 int error; 586 587 if (dev == NODEV) 588 return (0); 589 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 590 if (error) { 591 *vpp = NULLVP; 592 return (error); 593 } 594 vp = nvp; 595 vp->v_type = type; 596 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 597 vput(vp); 598 vp = nvp; 599 } 600 *vpp = vp; 601 return (0); 602 } 603 604 /* 605 * Check to see if the new vnode represents a special device 606 * for which we already have a vnode (either because of 607 * bdevvp() or because of a different vnode representing 608 * the same block device). If such an alias exists, deallocate 609 * the existing contents and return the aliased vnode. The 610 * caller is responsible for filling it with its new contents. 611 */ 612 struct vnode * 613 checkalias(nvp, nvp_rdev, mp) 614 register struct vnode *nvp; 615 dev_t nvp_rdev; 616 struct mount *mp; 617 { 618 register struct vnode *vp; 619 struct vnode **vpp; 620 621 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 622 return (NULLVP); 623 624 vpp = &speclisth[SPECHASH(nvp_rdev)]; 625 loop: 626 for (vp = *vpp; vp; vp = vp->v_specnext) { 627 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 628 continue; 629 /* 630 * Alias, but not in use, so flush it out. 631 */ 632 if (vp->v_usecount == 0) { 633 vgone(vp); 634 goto loop; 635 } 636 if (vget(vp, 1)) 637 goto loop; 638 break; 639 } 640 if (vp == NULL || vp->v_tag != VT_NON) { 641 MALLOC(nvp->v_specinfo, struct specinfo *, 642 sizeof(struct specinfo), M_VNODE, M_WAITOK); 643 nvp->v_rdev = nvp_rdev; 644 nvp->v_hashchain = vpp; 645 nvp->v_specnext = *vpp; 646 nvp->v_specflags = 0; 647 *vpp = nvp; 648 if (vp != NULL) { 649 nvp->v_flag |= VALIASED; 650 vp->v_flag |= VALIASED; 651 vput(vp); 652 } 653 return (NULLVP); 654 } 655 VOP_UNLOCK(vp); 656 vclean(vp, 0); 657 vp->v_op = nvp->v_op; 658 vp->v_tag = nvp->v_tag; 659 nvp->v_type = VNON; 660 insmntque(vp, mp); 661 return (vp); 662 } 663 664 /* 665 * Grab a particular vnode from the free list, increment its 666 * reference count and lock it. The vnode lock bit is set the 667 * vnode is being eliminated in vgone. The process is awakened 668 * when the transition is completed, and an error returned to 669 * indicate that the vnode is no longer usable (possibly having 670 * been changed to a new file system type). 671 */ 672 int 673 vget(vp, lockflag) 674 register struct vnode *vp; 675 int lockflag; 676 { 677 678 /* 679 * If the vnode is in the process of being cleaned out for 680 * another use, we wait for the cleaning to finish and then 681 * return failure. Cleaning is determined either by checking 682 * that the VXLOCK flag is set, or that the use count is 683 * zero with the back pointer set to show that it has been 684 * removed from the free list by getnewvnode. The VXLOCK 685 * flag may not have been set yet because vclean is blocked in 686 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 687 */ 688 if ((vp->v_flag & VXLOCK) || 689 (vp->v_usecount == 0 && 690 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 691 vp->v_flag |= VXWANT; 692 tsleep((caddr_t)vp, PINOD, "vget", 0); 693 return (1); 694 } 695 if (vp->v_usecount == 0) 696 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 697 vp->v_usecount++; 698 if (lockflag) 699 VOP_LOCK(vp); 700 return (0); 701 } 702 703 /* 704 * Vnode reference, just increment the count 705 */ 706 void 707 vref(vp) 708 struct vnode *vp; 709 { 710 711 if (vp->v_usecount <= 0) 712 panic("vref used where vget required"); 713 vp->v_usecount++; 714 } 715 716 /* 717 * vput(), just unlock and vrele() 718 */ 719 void 720 vput(vp) 721 register struct vnode *vp; 722 { 723 724 VOP_UNLOCK(vp); 725 vrele(vp); 726 } 727 728 /* 729 * Vnode release. 730 * If count drops to zero, call inactive routine and return to freelist. 731 */ 732 void 733 vrele(vp) 734 register struct vnode *vp; 735 { 736 737 #ifdef DIAGNOSTIC 738 if (vp == NULL) 739 panic("vrele: null vp"); 740 #endif 741 vp->v_usecount--; 742 if (vp->v_usecount > 0) 743 return; 744 #ifdef DIAGNOSTIC 745 if (vp->v_usecount != 0 || vp->v_writecount != 0) { 746 vprint("vrele: bad ref count", vp); 747 panic("vrele: ref cnt"); 748 } 749 #endif 750 /* 751 * insert at tail of LRU list 752 */ 753 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 754 VOP_INACTIVE(vp); 755 } 756 757 /* 758 * Page or buffer structure gets a reference. 759 */ 760 void 761 vhold(vp) 762 register struct vnode *vp; 763 { 764 765 vp->v_holdcnt++; 766 } 767 768 /* 769 * Page or buffer structure frees a reference. 770 */ 771 void 772 holdrele(vp) 773 register struct vnode *vp; 774 { 775 776 if (vp->v_holdcnt <= 0) 777 panic("holdrele: holdcnt"); 778 vp->v_holdcnt--; 779 } 780 781 /* 782 * Remove any vnodes in the vnode table belonging to mount point mp. 783 * 784 * If MNT_NOFORCE is specified, there should not be any active ones, 785 * return error if any are found (nb: this is a user error, not a 786 * system error). If MNT_FORCE is specified, detach any active vnodes 787 * that are found. 788 */ 789 #ifdef DEBUG 790 int busyprt = 0; /* print out busy vnodes */ 791 struct ctldebug debug1 = { "busyprt", &busyprt }; 792 #endif 793 794 vflush(mp, skipvp, flags) 795 struct mount *mp; 796 struct vnode *skipvp; 797 int flags; 798 { 799 register struct vnode *vp, *nvp; 800 int busy = 0; 801 802 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 803 panic("vflush: not busy"); 804 loop: 805 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 806 if (vp->v_mount != mp) 807 goto loop; 808 nvp = vp->v_mntvnodes.le_next; 809 /* 810 * Skip over a selected vnode. 811 */ 812 if (vp == skipvp) 813 continue; 814 /* 815 * Skip over a vnodes marked VSYSTEM. 816 */ 817 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 818 continue; 819 /* 820 * If WRITECLOSE is set, only flush out regular file 821 * vnodes open for writing. 822 */ 823 if ((flags & WRITECLOSE) && 824 (vp->v_writecount == 0 || vp->v_type != VREG)) 825 continue; 826 /* 827 * With v_usecount == 0, all we need to do is clear 828 * out the vnode data structures and we are done. 829 */ 830 if (vp->v_usecount == 0) { 831 vgone(vp); 832 continue; 833 } 834 /* 835 * If FORCECLOSE is set, forcibly close the vnode. 836 * For block or character devices, revert to an 837 * anonymous device. For all other files, just kill them. 838 */ 839 if (flags & FORCECLOSE) { 840 if (vp->v_type != VBLK && vp->v_type != VCHR) { 841 vgone(vp); 842 } else { 843 vclean(vp, 0); 844 vp->v_op = spec_vnodeop_p; 845 insmntque(vp, (struct mount *)0); 846 } 847 continue; 848 } 849 #ifdef DEBUG 850 if (busyprt) 851 vprint("vflush: busy vnode", vp); 852 #endif 853 busy++; 854 } 855 if (busy) 856 return (EBUSY); 857 return (0); 858 } 859 860 /* 861 * Disassociate the underlying file system from a vnode. 862 */ 863 void 864 vclean(vp, flags) 865 register struct vnode *vp; 866 int flags; 867 { 868 int active; 869 870 /* 871 * Check to see if the vnode is in use. 872 * If so we have to reference it before we clean it out 873 * so that its count cannot fall to zero and generate a 874 * race against ourselves to recycle it. 875 */ 876 if (active = vp->v_usecount) 877 VREF(vp); 878 /* 879 * Even if the count is zero, the VOP_INACTIVE routine may still 880 * have the object locked while it cleans it out. The VOP_LOCK 881 * ensures that the VOP_INACTIVE routine is done with its work. 882 * For active vnodes, it ensures that no other activity can 883 * occur while the underlying object is being cleaned out. 884 */ 885 VOP_LOCK(vp); 886 /* 887 * Prevent the vnode from being recycled or 888 * brought into use while we clean it out. 889 */ 890 if (vp->v_flag & VXLOCK) 891 panic("vclean: deadlock"); 892 vp->v_flag |= VXLOCK; 893 /* 894 * Clean out any buffers associated with the vnode. 895 */ 896 if (flags & DOCLOSE) 897 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 898 /* 899 * Any other processes trying to obtain this lock must first 900 * wait for VXLOCK to clear, then call the new lock operation. 901 */ 902 VOP_UNLOCK(vp); 903 /* 904 * If purging an active vnode, it must be closed and 905 * deactivated before being reclaimed. 906 */ 907 if (active) { 908 if (flags & DOCLOSE) 909 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); 910 VOP_INACTIVE(vp); 911 } 912 /* 913 * Reclaim the vnode. 914 */ 915 if (VOP_RECLAIM(vp)) 916 panic("vclean: cannot reclaim"); 917 if (active) 918 vrele(vp); 919 920 /* 921 * Done with purge, notify sleepers of the grim news. 922 */ 923 vp->v_op = dead_vnodeop_p; 924 vp->v_tag = VT_NON; 925 vp->v_flag &= ~VXLOCK; 926 if (vp->v_flag & VXWANT) { 927 vp->v_flag &= ~VXWANT; 928 wakeup((caddr_t)vp); 929 } 930 } 931 932 /* 933 * Eliminate all activity associated with the requested vnode 934 * and with all vnodes aliased to the requested vnode. 935 */ 936 void 937 vgoneall(vp) 938 register struct vnode *vp; 939 { 940 register struct vnode *vq; 941 942 if (vp->v_flag & VALIASED) { 943 /* 944 * If a vgone (or vclean) is already in progress, 945 * wait until it is done and return. 946 */ 947 if (vp->v_flag & VXLOCK) { 948 vp->v_flag |= VXWANT; 949 tsleep((caddr_t)vp, PINOD, "vgoneall", 0); 950 return; 951 } 952 /* 953 * Ensure that vp will not be vgone'd while we 954 * are eliminating its aliases. 955 */ 956 vp->v_flag |= VXLOCK; 957 while (vp->v_flag & VALIASED) { 958 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 959 if (vq->v_rdev != vp->v_rdev || 960 vq->v_type != vp->v_type || vp == vq) 961 continue; 962 vgone(vq); 963 break; 964 } 965 } 966 /* 967 * Remove the lock so that vgone below will 968 * really eliminate the vnode after which time 969 * vgone will awaken any sleepers. 970 */ 971 vp->v_flag &= ~VXLOCK; 972 } 973 vgone(vp); 974 } 975 976 /* 977 * Eliminate all activity associated with a vnode 978 * in preparation for reuse. 979 */ 980 void 981 vgone(vp) 982 register struct vnode *vp; 983 { 984 register struct vnode *vq; 985 struct vnode *vx; 986 987 /* 988 * If a vgone (or vclean) is already in progress, 989 * wait until it is done and return. 990 */ 991 if (vp->v_flag & VXLOCK) { 992 vp->v_flag |= VXWANT; 993 tsleep((caddr_t)vp, PINOD, "vgone", 0); 994 return; 995 } 996 /* 997 * Clean out the filesystem specific data. 998 */ 999 vclean(vp, DOCLOSE); 1000 /* 1001 * Delete from old mount point vnode list, if on one. 1002 */ 1003 if (vp->v_mount != NULL) { 1004 LIST_REMOVE(vp, v_mntvnodes); 1005 vp->v_mount = NULL; 1006 } 1007 /* 1008 * If special device, remove it from special device alias list. 1009 */ 1010 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1011 if (*vp->v_hashchain == vp) { 1012 *vp->v_hashchain = vp->v_specnext; 1013 } else { 1014 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1015 if (vq->v_specnext != vp) 1016 continue; 1017 vq->v_specnext = vp->v_specnext; 1018 break; 1019 } 1020 if (vq == NULL) 1021 panic("missing bdev"); 1022 } 1023 if (vp->v_flag & VALIASED) { 1024 vx = NULL; 1025 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1026 if (vq->v_rdev != vp->v_rdev || 1027 vq->v_type != vp->v_type) 1028 continue; 1029 if (vx) 1030 break; 1031 vx = vq; 1032 } 1033 if (vx == NULL) 1034 panic("missing alias"); 1035 if (vq == NULL) 1036 vx->v_flag &= ~VALIASED; 1037 vp->v_flag &= ~VALIASED; 1038 } 1039 FREE(vp->v_specinfo, M_VNODE); 1040 vp->v_specinfo = NULL; 1041 } 1042 /* 1043 * If it is on the freelist and not already at the head, 1044 * move it to the head of the list. The test of the back 1045 * pointer and the reference count of zero is because 1046 * it will be removed from the free list by getnewvnode, 1047 * but will not have its reference count incremented until 1048 * after calling vgone. If the reference count were 1049 * incremented first, vgone would (incorrectly) try to 1050 * close the previous instance of the underlying object. 1051 * So, the back pointer is explicitly set to `0xdeadb' in 1052 * getnewvnode after removing it from the freelist to ensure 1053 * that we do not try to move it here. 1054 */ 1055 if (vp->v_usecount == 0 && 1056 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1057 vnode_free_list.tqh_first != vp) { 1058 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1059 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1060 } 1061 vp->v_type = VBAD; 1062 } 1063 1064 /* 1065 * Lookup a vnode by device number. 1066 */ 1067 vfinddev(dev, type, vpp) 1068 dev_t dev; 1069 enum vtype type; 1070 struct vnode **vpp; 1071 { 1072 register struct vnode *vp; 1073 1074 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1075 if (dev != vp->v_rdev || type != vp->v_type) 1076 continue; 1077 *vpp = vp; 1078 return (1); 1079 } 1080 return (0); 1081 } 1082 1083 /* 1084 * Calculate the total number of references to a special device. 1085 */ 1086 int 1087 vcount(vp) 1088 register struct vnode *vp; 1089 { 1090 register struct vnode *vq, *vnext; 1091 int count; 1092 1093 loop: 1094 if ((vp->v_flag & VALIASED) == 0) 1095 return (vp->v_usecount); 1096 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1097 vnext = vq->v_specnext; 1098 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1099 continue; 1100 /* 1101 * Alias, but not in use, so flush it out. 1102 */ 1103 if (vq->v_usecount == 0 && vq != vp) { 1104 vgone(vq); 1105 goto loop; 1106 } 1107 count += vq->v_usecount; 1108 } 1109 return (count); 1110 } 1111 1112 /* 1113 * Print out a description of a vnode. 1114 */ 1115 static char *typename[] = 1116 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1117 1118 void 1119 vprint(label, vp) 1120 char *label; 1121 register struct vnode *vp; 1122 { 1123 char buf[64]; 1124 1125 if (label != NULL) 1126 printf("%s: ", label); 1127 printf("type %s, usecount %d, writecount %d, refcount %d,", 1128 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1129 vp->v_holdcnt); 1130 buf[0] = '\0'; 1131 if (vp->v_flag & VROOT) 1132 strcat(buf, "|VROOT"); 1133 if (vp->v_flag & VTEXT) 1134 strcat(buf, "|VTEXT"); 1135 if (vp->v_flag & VSYSTEM) 1136 strcat(buf, "|VSYSTEM"); 1137 if (vp->v_flag & VXLOCK) 1138 strcat(buf, "|VXLOCK"); 1139 if (vp->v_flag & VXWANT) 1140 strcat(buf, "|VXWANT"); 1141 if (vp->v_flag & VBWAIT) 1142 strcat(buf, "|VBWAIT"); 1143 if (vp->v_flag & VALIASED) 1144 strcat(buf, "|VALIASED"); 1145 if (buf[0] != '\0') 1146 printf(" flags (%s)", &buf[1]); 1147 if (vp->v_data == NULL) { 1148 printf("\n"); 1149 } else { 1150 printf("\n\t"); 1151 VOP_PRINT(vp); 1152 } 1153 } 1154 1155 #ifdef DEBUG 1156 /* 1157 * List all of the locked vnodes in the system. 1158 * Called when debugging the kernel. 1159 */ 1160 printlockedvnodes() 1161 { 1162 register struct mount *mp; 1163 register struct vnode *vp; 1164 1165 printf("Locked vnodes\n"); 1166 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 1167 for (vp = mp->mnt_vnodelist.lh_first; 1168 vp != NULL; 1169 vp = vp->v_mntvnodes.le_next) 1170 if (VOP_ISLOCKED(vp)) 1171 vprint((char *)0, vp); 1172 } 1173 } 1174 #endif 1175 1176 int kinfo_vdebug = 1; 1177 int kinfo_vgetfailed; 1178 #define KINFO_VNODESLOP 10 1179 /* 1180 * Dump vnode list (via sysctl). 1181 * Copyout address of vnode followed by vnode. 1182 */ 1183 /* ARGSUSED */ 1184 sysctl_vnode(where, sizep) 1185 char *where; 1186 size_t *sizep; 1187 { 1188 register struct mount *mp, *nmp; 1189 struct vnode *vp; 1190 register char *bp = where, *savebp; 1191 char *ewhere; 1192 int error; 1193 1194 #define VPTRSZ sizeof (struct vnode *) 1195 #define VNODESZ sizeof (struct vnode) 1196 if (where == NULL) { 1197 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1198 return (0); 1199 } 1200 ewhere = where + *sizep; 1201 1202 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) { 1203 nmp = mp->mnt_list.tqe_next; 1204 if (vfs_busy(mp)) 1205 continue; 1206 savebp = bp; 1207 again: 1208 for (vp = mp->mnt_vnodelist.lh_first; 1209 vp != NULL; 1210 vp = vp->v_mntvnodes.le_next) { 1211 /* 1212 * Check that the vp is still associated with 1213 * this filesystem. RACE: could have been 1214 * recycled onto the same filesystem. 1215 */ 1216 if (vp->v_mount != mp) { 1217 if (kinfo_vdebug) 1218 printf("kinfo: vp changed\n"); 1219 bp = savebp; 1220 goto again; 1221 } 1222 if (bp + VPTRSZ + VNODESZ > ewhere) { 1223 *sizep = bp - where; 1224 return (ENOMEM); 1225 } 1226 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1227 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1228 return (error); 1229 bp += VPTRSZ + VNODESZ; 1230 } 1231 vfs_unbusy(mp); 1232 } 1233 1234 *sizep = bp - where; 1235 return (0); 1236 } 1237 1238 /* 1239 * Check to see if a filesystem is mounted on a block device. 1240 */ 1241 int 1242 vfs_mountedon(vp) 1243 register struct vnode *vp; 1244 { 1245 register struct vnode *vq; 1246 1247 if (vp->v_specflags & SI_MOUNTEDON) 1248 return (EBUSY); 1249 if (vp->v_flag & VALIASED) { 1250 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1251 if (vq->v_rdev != vp->v_rdev || 1252 vq->v_type != vp->v_type) 1253 continue; 1254 if (vq->v_specflags & SI_MOUNTEDON) 1255 return (EBUSY); 1256 } 1257 } 1258 return (0); 1259 } 1260 1261 /* 1262 * Build hash lists of net addresses and hang them off the mount point. 1263 * Called by ufs_mount() to set up the lists of export addresses. 1264 */ 1265 static int 1266 vfs_hang_addrlist(mp, nep, argp) 1267 struct mount *mp; 1268 struct netexport *nep; 1269 struct export_args *argp; 1270 { 1271 register struct netcred *np; 1272 register struct radix_node_head *rnh; 1273 register int i; 1274 struct radix_node *rn; 1275 struct sockaddr *saddr, *smask = 0; 1276 struct domain *dom; 1277 int error; 1278 1279 if (argp->ex_addrlen == 0) { 1280 if (mp->mnt_flag & MNT_DEFEXPORTED) 1281 return (EPERM); 1282 np = &nep->ne_defexported; 1283 np->netc_exflags = argp->ex_flags; 1284 np->netc_anon = argp->ex_anon; 1285 np->netc_anon.cr_ref = 1; 1286 mp->mnt_flag |= MNT_DEFEXPORTED; 1287 return (0); 1288 } 1289 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1290 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1291 bzero((caddr_t)np, i); 1292 saddr = (struct sockaddr *)(np + 1); 1293 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 1294 goto out; 1295 if (saddr->sa_len > argp->ex_addrlen) 1296 saddr->sa_len = argp->ex_addrlen; 1297 if (argp->ex_masklen) { 1298 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1299 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1300 if (error) 1301 goto out; 1302 if (smask->sa_len > argp->ex_masklen) 1303 smask->sa_len = argp->ex_masklen; 1304 } 1305 i = saddr->sa_family; 1306 if ((rnh = nep->ne_rtable[i]) == 0) { 1307 /* 1308 * Seems silly to initialize every AF when most are not 1309 * used, do so on demand here 1310 */ 1311 for (dom = domains; dom; dom = dom->dom_next) 1312 if (dom->dom_family == i && dom->dom_rtattach) { 1313 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1314 dom->dom_rtoffset); 1315 break; 1316 } 1317 if ((rnh = nep->ne_rtable[i]) == 0) { 1318 error = ENOBUFS; 1319 goto out; 1320 } 1321 } 1322 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1323 np->netc_rnodes); 1324 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1325 error = EPERM; 1326 goto out; 1327 } 1328 np->netc_exflags = argp->ex_flags; 1329 np->netc_anon = argp->ex_anon; 1330 np->netc_anon.cr_ref = 1; 1331 return (0); 1332 out: 1333 free(np, M_NETADDR); 1334 return (error); 1335 } 1336 1337 /* ARGSUSED */ 1338 static int 1339 vfs_free_netcred(rn, w) 1340 struct radix_node *rn; 1341 caddr_t w; 1342 { 1343 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1344 1345 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1346 free((caddr_t)rn, M_NETADDR); 1347 return (0); 1348 } 1349 1350 /* 1351 * Free the net address hash lists that are hanging off the mount points. 1352 */ 1353 static void 1354 vfs_free_addrlist(nep) 1355 struct netexport *nep; 1356 { 1357 register int i; 1358 register struct radix_node_head *rnh; 1359 1360 for (i = 0; i <= AF_MAX; i++) 1361 if (rnh = nep->ne_rtable[i]) { 1362 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1363 (caddr_t)rnh); 1364 free((caddr_t)rnh, M_RTABLE); 1365 nep->ne_rtable[i] = 0; 1366 } 1367 } 1368 1369 int 1370 vfs_export(mp, nep, argp) 1371 struct mount *mp; 1372 struct netexport *nep; 1373 struct export_args *argp; 1374 { 1375 int error; 1376 1377 if (argp->ex_flags & MNT_DELEXPORT) { 1378 vfs_free_addrlist(nep); 1379 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1380 } 1381 if (argp->ex_flags & MNT_EXPORTED) { 1382 if (error = vfs_hang_addrlist(mp, nep, argp)) 1383 return (error); 1384 mp->mnt_flag |= MNT_EXPORTED; 1385 } 1386 return (0); 1387 } 1388 1389 struct netcred * 1390 vfs_export_lookup(mp, nep, nam) 1391 register struct mount *mp; 1392 struct netexport *nep; 1393 struct mbuf *nam; 1394 { 1395 register struct netcred *np; 1396 register struct radix_node_head *rnh; 1397 struct sockaddr *saddr; 1398 1399 np = NULL; 1400 if (mp->mnt_flag & MNT_EXPORTED) { 1401 /* 1402 * Lookup in the export list first. 1403 */ 1404 if (nam != NULL) { 1405 saddr = mtod(nam, struct sockaddr *); 1406 rnh = nep->ne_rtable[saddr->sa_family]; 1407 if (rnh != NULL) { 1408 np = (struct netcred *) 1409 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1410 rnh); 1411 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1412 np = NULL; 1413 } 1414 } 1415 /* 1416 * If no address match, use the default if it exists. 1417 */ 1418 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1419 np = &nep->ne_defexported; 1420 } 1421 return (np); 1422 } 1423