1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * from: @(#)vfs_subr.c 7.60 (Berkeley) 6/21/91 39 * $Id: vfs_subr.c,v 1.29 1994/05/17 04:22:04 cgd Exp $ 40 */ 41 42 /* 43 * External virtual filesystem routines 44 */ 45 46 #include <sys/param.h> 47 #include <sys/proc.h> 48 #include <sys/mount.h> 49 #include <sys/time.h> 50 #include <sys/vnode.h> 51 #include <miscfs/specfs/specdev.h> /* XXX */ 52 #include <sys/namei.h> 53 #include <sys/ucred.h> 54 #include <sys/buf.h> 55 #include <sys/errno.h> 56 #include <sys/malloc.h> 57 #include <sys/systm.h> 58 #include <vm/vm.h> 59 #include <sys/sysctl.h> 60 61 /* 62 * Flag to allow forcible unmounting. 63 */ 64 int doforce = 1; 65 66 int prtactive; /* 1 => print out reclaim of active vnodes */ 67 68 void vprint __P((char *label, struct vnode *vp)); 69 70 /* 71 * Insq/Remq for the vnode usage lists. 72 */ 73 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 74 #define bufremvn(bp) { \ 75 LIST_REMOVE(bp, b_vnbufs); \ 76 (bp)->b_vnbufs.le_next = NOLIST; \ 77 } 78 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 79 struct mntlist mountlist; /* mounted filesystem list */ 80 81 /* 82 * Remove a mount point from the list of mounted filesystems. 83 * Unmount of the root is illegal. 84 */ 85 void 86 vfs_remove(mp) 87 register struct mount *mp; 88 { 89 90 if (mp == rootfs) 91 panic("vfs_remove: unmounting root"); 92 TAILQ_REMOVE(&mountlist, mp, mnt_list); 93 mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0; 94 vfs_unlock(mp); 95 } 96 97 /* 98 * Lock a filesystem. 99 * Used to prevent access to it while mounting and unmounting. 100 */ 101 vfs_lock(mp) 102 register struct mount *mp; 103 { 104 105 while(mp->mnt_flag & MNT_MLOCK) { 106 mp->mnt_flag |= MNT_MWAIT; 107 tsleep((caddr_t)mp, PVFS, "vfslock", 0); 108 } 109 mp->mnt_flag |= MNT_MLOCK; 110 return (0); 111 } 112 113 /* 114 * Unlock a locked filesystem. 115 * Panic if filesystem is not locked. 116 */ 117 void 118 vfs_unlock(mp) 119 register struct mount *mp; 120 { 121 122 if ((mp->mnt_flag & MNT_MLOCK) == 0) 123 panic("vfs_unlock: not locked"); 124 mp->mnt_flag &= ~MNT_MLOCK; 125 if (mp->mnt_flag & MNT_MWAIT) { 126 mp->mnt_flag &= ~MNT_MWAIT; 127 wakeup((caddr_t)mp); 128 } 129 } 130 131 /* 132 * Mark a mount point as busy. 133 * Used to synchronize access and to delay unmounting. 134 */ 135 vfs_busy(mp) 136 register struct mount *mp; 137 { 138 139 while(mp->mnt_flag & MNT_MPBUSY) { 140 mp->mnt_flag |= MNT_MPWANT; 141 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); 142 } 143 if (mp->mnt_flag & MNT_UNMOUNT) 144 return (1); 145 mp->mnt_flag |= MNT_MPBUSY; 146 return (0); 147 } 148 149 /* 150 * Free a busy filesystem. 151 * Panic if filesystem is not busy. 152 */ 153 vfs_unbusy(mp) 154 register struct mount *mp; 155 { 156 157 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 158 panic("vfs_unbusy: not busy"); 159 mp->mnt_flag &= ~MNT_MPBUSY; 160 if (mp->mnt_flag & MNT_MPWANT) { 161 mp->mnt_flag &= ~MNT_MPWANT; 162 wakeup((caddr_t)&mp->mnt_flag); 163 } 164 } 165 166 /* 167 * Lookup a mount point by filesystem identifier. 168 */ 169 struct mount * 170 getvfs(fsid) 171 fsid_t *fsid; 172 { 173 register struct mount *mp; 174 175 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) 176 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 177 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 178 return (mp); 179 return ((struct mount *)0); 180 } 181 182 /* 183 * Check to see if a filesystem is mounted on a block device. 184 */ 185 mountedon(vp) 186 register struct vnode *vp; 187 { 188 register struct vnode *vq; 189 190 if (vp->v_specflags & SI_MOUNTEDON) 191 return (EBUSY); 192 if (vp->v_flag & VALIASED) { 193 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 194 if (vq->v_rdev != vp->v_rdev || 195 vq->v_type != vp->v_type) 196 continue; 197 if (vq->v_specflags & SI_MOUNTEDON) 198 return (EBUSY); 199 } 200 } 201 return (0); 202 } 203 204 /* 205 * Set vnode attributes to VNOVAL 206 */ 207 void 208 vattr_null(vap) 209 register struct vattr *vap; 210 { 211 212 vap->va_type = VNON; 213 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 214 vap->va_fsid = vap->va_fileid = vap->va_size = 215 vap->va_blocksize = vap->va_rdev = vap->va_bytes = 216 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 217 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 218 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 219 vap->va_flags = vap->va_gen = VNOVAL; 220 vap->va_vaflags = 0; 221 } 222 223 /* 224 * Routines having to do with the management of the vnode table. 225 */ 226 extern struct vnodeops dead_vnodeops, spec_vnodeops; 227 extern void vclean(); 228 long numvnodes; 229 struct vattr va_null; 230 231 /* 232 * Initialize the vnode structures and initialize each file system type. 233 */ 234 void 235 vfsinit() 236 { 237 int i; 238 239 /* initialize the vnode management data structures */ 240 TAILQ_INIT(&vnode_free_list); 241 TAILQ_INIT(&mountlist); 242 /* 243 * Initialize the vnode name cache 244 */ 245 nchinit(); 246 /* 247 * Initialize each file system type. 248 */ 249 vattr_null(&va_null); 250 for (i = 0; i < nvfssw; i++) { 251 if (vfssw[i] == NULL) 252 continue; 253 (*(vfssw[i]->vfs_init))(); 254 } 255 } 256 257 /* 258 * Get a new unique fsid 259 */ 260 void 261 getnewfsid(mp, mtype) 262 struct mount *mp; 263 int mtype; 264 { 265 static u_short xxxfs_mntid; 266 267 fsid_t tfsid; 268 269 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0); /* XXX */ 270 mp->mnt_stat.f_fsid.val[1] = mtype; 271 if (xxxfs_mntid == 0) 272 ++xxxfs_mntid; 273 tfsid.val[0] = makedev(nblkdev+mtype, xxxfs_mntid); 274 tfsid.val[1] = mtype; 275 if (rootfs) { 276 while (getvfs(&tfsid)) { 277 tfsid.val[0]++; 278 xxxfs_mntid++; 279 } 280 } 281 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 282 } 283 284 /* 285 * make a 'unique' number from a mount type name 286 */ 287 long 288 makefstype(type) 289 char *type; 290 { 291 long rv; 292 293 for (rv = 0; *type; type++) { 294 rv <<= 2; 295 rv ^= *type; 296 } 297 return rv; 298 } 299 /* 300 * Return the next vnode from the free list. 301 */ 302 getnewvnode(tag, mp, vops, vpp) 303 enum vtagtype tag; 304 struct mount *mp; 305 struct vnodeops *vops; 306 struct vnode **vpp; 307 { 308 register struct vnode *vp, *vq; 309 310 if ((vnode_free_list.tqh_first == NULL && 311 numvnodes < 2 * desiredvnodes) || 312 numvnodes < desiredvnodes) { 313 vp = (struct vnode *)malloc((u_long)sizeof *vp, 314 M_VNODE, M_WAITOK); 315 bzero((char *)vp, sizeof *vp); 316 numvnodes++; 317 } else { 318 if ((vp = vnode_free_list.tqh_first) == NULL) { 319 tablefull("vnode"); 320 *vpp = 0; 321 return (ENFILE); 322 } 323 if (vp->v_usecount) 324 panic("free vnode isn't"); 325 326 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 327 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 328 if (vp->v_type != VBAD) 329 vgone(vp); 330 vp->v_flag = 0; 331 vp->v_lastr = 0; 332 vp->v_socket = 0; 333 } 334 vp->v_type = VNON; 335 cache_purge(vp); 336 vp->v_tag = tag; 337 vp->v_op = vops; 338 insmntque(vp, mp); 339 vp->v_usecount = 1; 340 *vpp = vp; 341 return (0); 342 } 343 344 /* 345 * Move a vnode from one mount queue to another. 346 */ 347 insmntque(vp, mp) 348 register struct vnode *vp; 349 register struct mount *mp; 350 { 351 register struct vnode *vq; 352 353 /* 354 * Delete from old mount point vnode list, if on one. 355 */ 356 if (vp->v_mount != NULL) 357 LIST_REMOVE(vp, v_mntvnodes); 358 /* 359 * Insert into list of vnodes for the new mount point, if available. 360 */ 361 if ((vp->v_mount = mp) == NULL) 362 return; 363 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 364 } 365 366 /* 367 * Make sure all write-behind blocks associated 368 * with mount point are flushed out (from sync). 369 */ 370 mntflushbuf(mountp, flags) 371 struct mount *mountp; 372 int flags; 373 { 374 register struct vnode *vp; 375 376 if ((mountp->mnt_flag & MNT_MPBUSY) == 0) 377 panic("mntflushbuf: not busy"); 378 loop: 379 for (vp = mountp->mnt_vnodelist.lh_first; vp; 380 vp = vp->v_mntvnodes.le_next) { 381 if (VOP_ISLOCKED(vp)) 382 continue; 383 if (vget(vp, 1)) 384 goto loop; 385 vflushbuf(vp, flags); 386 vput(vp); 387 if (vp->v_mount != mountp) 388 goto loop; 389 } 390 } 391 392 /* 393 * Flush all dirty buffers associated with a vnode. 394 */ 395 vflushbuf(vp, flags) 396 register struct vnode *vp; 397 int flags; 398 { 399 register struct buf *bp; 400 struct buf *nbp; 401 int s; 402 403 loop: 404 s = splbio(); 405 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 406 nbp = bp->b_vnbufs.le_next; 407 if ((bp->b_flags & B_BUSY)) 408 continue; 409 if ((bp->b_flags & B_DELWRI) == 0) 410 panic("vflushbuf: not dirty"); 411 bremfree(bp); 412 bp->b_flags |= B_BUSY; 413 splx(s); 414 /* 415 * Wait for I/O associated with indirect blocks to complete, 416 * since there is no way to quickly wait for them below. 417 * NB: This is really specific to ufs, but is done here 418 * as it is easier and quicker. 419 */ 420 if (bp->b_vp == vp || (flags & B_SYNC) == 0) 421 (void) bawrite(bp); 422 else 423 (void) bwrite(bp); 424 goto loop; 425 } 426 splx(s); 427 if ((flags & B_SYNC) == 0) 428 return; 429 s = splbio(); 430 while (vp->v_numoutput) { 431 vp->v_flag |= VBWAIT; 432 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflshbuf", 0); 433 } 434 splx(s); 435 if (vp->v_dirtyblkhd.lh_first != NULL) { 436 vprint("vflushbuf: dirty", vp); 437 goto loop; 438 } 439 } 440 441 /* 442 * Update outstanding I/O count and do wakeup if requested. 443 */ 444 vwakeup(bp) 445 register struct buf *bp; 446 { 447 register struct vnode *vp; 448 449 bp->b_dirtyoff = bp->b_dirtyend = 0; 450 if (vp = bp->b_vp) { 451 vp->v_numoutput--; 452 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 453 if (vp->v_numoutput < 0) 454 panic("vwakeup: neg numoutput"); 455 vp->v_flag &= ~VBWAIT; 456 wakeup((caddr_t)&vp->v_numoutput); 457 } 458 } 459 } 460 461 /* 462 * Invalidate in core blocks belonging to closed or umounted filesystem 463 * 464 * Go through the list of vnodes associated with the file system; 465 * for each vnode invalidate any buffers that it holds. Normally 466 * this routine is preceeded by a bflush call, so that on a quiescent 467 * filesystem there will be no dirty buffers when we are done. Binval 468 * returns the count of dirty buffers when it is finished. 469 */ 470 mntinvalbuf(mountp) 471 struct mount *mountp; 472 { 473 register struct vnode *vp; 474 int dirty = 0; 475 476 if ((mountp->mnt_flag & MNT_MPBUSY) == 0) 477 panic("mntinvalbuf: not busy"); 478 loop: 479 for (vp = mountp->mnt_vnodelist.lh_first; vp; 480 vp = vp->v_mntvnodes.le_next) { 481 if (vget(vp, 1)) 482 goto loop; 483 dirty += vinvalbuf(vp, 1); 484 vput(vp); 485 if (vp->v_mount != mountp) 486 goto loop; 487 } 488 return (dirty); 489 } 490 491 /* 492 * Flush out and invalidate all buffers associated with a vnode. 493 * Called with the underlying object locked. 494 */ 495 vinvalbuf(vp, save) 496 register struct vnode *vp; 497 int save; 498 { 499 register struct buf *bp; 500 struct buf *nbp, *blist; 501 int s, dirty = 0; 502 503 for (;;) { 504 if (blist = vp->v_dirtyblkhd.lh_first) 505 /* void */; 506 else if (blist = vp->v_cleanblkhd.lh_first) 507 /* void */; 508 else 509 break; 510 for (bp = blist; bp; bp = nbp) { 511 nbp = bp->b_vnbufs.le_next; 512 s = splbio(); 513 if (bp->b_flags & B_BUSY) { 514 bp->b_flags |= B_WANTED; 515 tsleep((caddr_t)bp, PRIBIO + 1, "vinvalbuf", 0); 516 splx(s); 517 break; 518 } 519 bremfree(bp); 520 bp->b_flags |= B_BUSY; 521 splx(s); 522 if (save && (bp->b_flags & B_DELWRI)) { 523 dirty++; 524 (void) bwrite(bp); 525 break; 526 } 527 if (bp->b_vp != vp) 528 reassignbuf(bp, bp->b_vp); 529 else 530 bp->b_flags |= B_INVAL; 531 brelse(bp); 532 } 533 } 534 if (vp->v_dirtyblkhd.lh_first != NULL || 535 vp->v_cleanblkhd.lh_first != NULL) 536 panic("vinvalbuf: flush failed"); 537 return (dirty); 538 } 539 540 /* 541 * Associate a buffer with a vnode. 542 */ 543 bgetvp(vp, bp) 544 register struct vnode *vp; 545 register struct buf *bp; 546 { 547 register struct vnode *vq; 548 register struct buf *bq; 549 550 if (bp->b_vp) 551 panic("bgetvp: not free"); 552 VHOLD(vp); 553 bp->b_vp = vp; 554 if (vp->v_type == VBLK || vp->v_type == VCHR) 555 bp->b_dev = vp->v_rdev; 556 else 557 bp->b_dev = NODEV; 558 /* 559 * Insert onto list for new vnode. 560 */ 561 bufinsvn(bp, &vp->v_cleanblkhd); 562 } 563 564 /* 565 * Disassociate a buffer from a vnode. 566 */ 567 brelvp(bp) 568 register struct buf *bp; 569 { 570 struct buf *bq; 571 struct vnode *vp; 572 573 if (bp->b_vp == (struct vnode *) 0) 574 panic("brelvp: NULL"); 575 /* 576 * Delete from old vnode list, if on one. 577 */ 578 if (bp->b_vnbufs.le_next != NOLIST) 579 bufremvn(bp); 580 vp = bp->b_vp; 581 bp->b_vp = (struct vnode *) 0; 582 HOLDRELE(vp); 583 } 584 585 /* 586 * Reassign a buffer from one vnode to another. 587 * Used to assign file specific control information 588 * (indirect blocks) to the vnode to which they belong. 589 */ 590 reassignbuf(bp, newvp) 591 register struct buf *bp; 592 register struct vnode *newvp; 593 { 594 struct buf *bq; 595 struct buflists *listheadp; 596 597 if (newvp == NULL) 598 panic("reassignbuf: NULL"); 599 /* 600 * Delete from old vnode list, if on one. 601 */ 602 if (bp->b_vnbufs.le_next != NOLIST) 603 bufremvn(bp); 604 /* 605 * If dirty, put on list of dirty buffers; 606 * otherwise insert onto list of clean buffers. 607 */ 608 if (bp->b_flags & B_DELWRI) 609 listheadp = &newvp->v_dirtyblkhd; 610 else 611 listheadp = &newvp->v_cleanblkhd; 612 bufinsvn(bp, listheadp); 613 } 614 615 /* 616 * Create a vnode for a block device. 617 * Used for root filesystem, argdev, and swap areas. 618 * Also used for memory file system special devices. 619 */ 620 bdevvp(dev, vpp) 621 dev_t dev; 622 struct vnode **vpp; 623 { 624 return(getdevvp(dev, vpp, VBLK)); 625 } 626 627 /* 628 * Create a vnode for a character device. 629 * Used for kernfs and some console handling. 630 */ 631 cdevvp(dev, vpp) 632 dev_t dev; 633 struct vnode **vpp; 634 { 635 return(getdevvp(dev, vpp, VCHR)); 636 } 637 638 /* 639 * Create a vnode for a device. 640 * Used by bdevvp (block device) for root file system etc., 641 * and by cdevvp (character device) for console and kernfs. 642 */ 643 getdevvp(dev, vpp, type) 644 dev_t dev; 645 struct vnode **vpp; 646 enum vtype type; 647 { 648 register struct vnode *vp; 649 struct vnode *nvp; 650 int error; 651 652 if (dev == NODEV) 653 return (0); 654 error = getnewvnode(VT_NON, (struct mount *)0, &spec_vnodeops, &nvp); 655 if (error) { 656 *vpp = NULLVP; 657 return (error); 658 } 659 vp = nvp; 660 vp->v_type = type; 661 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 662 vput(vp); 663 vp = nvp; 664 } 665 *vpp = vp; 666 return (0); 667 } 668 669 /* 670 * Check to see if the new vnode represents a special device 671 * for which we already have a vnode (either because of 672 * bdevvp() or because of a different vnode representing 673 * the same block device). If such an alias exists, deallocate 674 * the existing contents and return the aliased vnode. The 675 * caller is responsible for filling it with its new contents. 676 */ 677 struct vnode * 678 checkalias(nvp, nvp_rdev, mp) 679 register struct vnode *nvp; 680 dev_t nvp_rdev; 681 struct mount *mp; 682 { 683 register struct vnode *vp; 684 struct vnode **vpp; 685 686 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 687 return (NULLVP); 688 689 vpp = &speclisth[SPECHASH(nvp_rdev)]; 690 loop: 691 for (vp = *vpp; vp; vp = vp->v_specnext) { 692 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 693 continue; 694 /* 695 * Alias, but not in use, so flush it out. 696 */ 697 if (vp->v_usecount == 0) { 698 vgone(vp); 699 goto loop; 700 } 701 if (vget(vp, 1)) 702 goto loop; 703 break; 704 } 705 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 706 MALLOC(nvp->v_specinfo, struct specinfo *, 707 sizeof(struct specinfo), M_VNODE, M_WAITOK); 708 nvp->v_rdev = nvp_rdev; 709 nvp->v_hashchain = vpp; 710 nvp->v_specnext = *vpp; 711 nvp->v_specflags = 0; 712 *vpp = nvp; 713 if (vp != NULL) { 714 nvp->v_flag |= VALIASED; 715 vp->v_flag |= VALIASED; 716 vput(vp); 717 } 718 return (NULLVP); 719 } 720 VOP_UNLOCK(vp); 721 vclean(vp, 0); 722 vp->v_op = nvp->v_op; 723 vp->v_tag = nvp->v_tag; 724 nvp->v_type = VNON; 725 insmntque(vp, mp); 726 return (vp); 727 } 728 729 /* 730 * Grab a particular vnode from the free list, increment its 731 * reference count and lock it. The vnode lock bit is set the 732 * vnode is being eliminated in vgone. The process is awakened 733 * when the transition is completed, and an error returned to 734 * indicate that the vnode is no longer usable (possibly having 735 * been changed to a new file system type). 736 */ 737 vget(vp, lockflag) 738 register struct vnode *vp; 739 int lockflag; 740 { 741 register struct vnode *vq; 742 743 if ((vp->v_flag & VXLOCK) || 744 (vp->v_usecount == 0 && 745 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 746 vp->v_flag |= VXWANT; 747 tsleep((caddr_t)vp, PINOD, "vget", 0); 748 return (1); 749 } 750 if (vp->v_usecount == 0) 751 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 752 vp->v_usecount++; 753 if (lockflag) 754 VOP_LOCK(vp); 755 return (0); 756 } 757 758 /* 759 * Vnode reference, just increment the count 760 */ 761 void 762 vref(vp) 763 struct vnode *vp; 764 { 765 766 if (vp->v_usecount <= 0) 767 panic("vref used where vget required"); 768 vp->v_usecount++; 769 } 770 771 /* 772 * vput(), just unlock and vrele() 773 */ 774 void 775 vput(vp) 776 register struct vnode *vp; 777 { 778 VOP_UNLOCK(vp); 779 vrele(vp); 780 } 781 782 /* 783 * Vnode release. 784 * If count drops to zero, call inactive routine and return to freelist. 785 */ 786 void 787 vrele(vp) 788 register struct vnode *vp; 789 { 790 struct proc *p = curproc; /* XXX */ 791 792 #ifdef DIAGNOSTIC 793 if (vp == NULL) 794 panic("vrele: null vp"); 795 #endif 796 vp->v_usecount--; 797 if (vp->v_usecount > 0) 798 return; 799 #ifdef DIAGNOSTIC 800 if (vp->v_usecount != 0 || vp->v_writecount != 0) { 801 vprint("vrele: bad ref count", vp); 802 panic("vrele: ref cnt"); 803 } 804 #endif 805 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 806 VOP_INACTIVE(vp, p); 807 } 808 809 /* 810 * Page or buffer structure gets a reference. 811 */ 812 vhold(vp) 813 register struct vnode *vp; 814 { 815 816 vp->v_holdcnt++; 817 } 818 819 /* 820 * Page or buffer structure frees a reference. 821 */ 822 holdrele(vp) 823 register struct vnode *vp; 824 { 825 826 if (vp->v_holdcnt <= 0) 827 panic("holdrele: holdcnt"); 828 vp->v_holdcnt--; 829 } 830 831 /* 832 * Remove any vnodes in the vnode table belonging to mount point mp. 833 * 834 * If MNT_NOFORCE is specified, there should not be any active ones, 835 * return error if any are found (nb: this is a user error, not a 836 * system error). If MNT_FORCE is specified, detach any active vnodes 837 * that are found. 838 */ 839 int busyprt = 0; /* patch to print out busy vnodes */ 840 841 vflush(mp, skipvp, flags) 842 struct mount *mp; 843 struct vnode *skipvp; 844 int flags; 845 { 846 register struct vnode *vp, *nvp; 847 int busy = 0; 848 849 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 850 panic("vflush: not busy"); 851 loop: 852 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 853 if (vp->v_mount != mp) 854 goto loop; 855 nvp = vp->v_mntvnodes.le_next; 856 /* 857 * Skip over a selected vnode. 858 */ 859 if (vp == skipvp) 860 continue; 861 /* 862 * Skip over a vnodes marked VSYSTEM. 863 */ 864 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 865 continue; 866 /* 867 * With v_usecount == 0, all we need to do is clear 868 * out the vnode data structures and we are done. 869 */ 870 if (vp->v_usecount == 0) { 871 vgone(vp); 872 continue; 873 } 874 /* 875 * For block or character devices, revert to an 876 * anonymous device. For all other files, just kill them. 877 */ 878 if (flags & FORCECLOSE) { 879 if (vp->v_type != VBLK && vp->v_type != VCHR) { 880 vgone(vp); 881 } else { 882 vclean(vp, 0); 883 vp->v_op = &spec_vnodeops; 884 insmntque(vp, (struct mount *)0); 885 } 886 continue; 887 } 888 if (busyprt) 889 vprint("vflush: busy vnode", vp); 890 busy++; 891 } 892 if (busy) 893 return (EBUSY); 894 return (0); 895 } 896 897 /* 898 * Disassociate the underlying file system from a vnode. 899 */ 900 void 901 vclean(vp, flags) 902 register struct vnode *vp; 903 int flags; 904 { 905 struct vnodeops *origops; 906 int active; 907 struct proc *p = curproc; /* XXX */ 908 909 /* 910 * Check to see if the vnode is in use. 911 * If so we have to reference it before we clean it out 912 * so that its count cannot fall to zero and generate a 913 * race against ourselves to recycle it. 914 */ 915 if (active = vp->v_usecount) 916 VREF(vp); 917 /* 918 * Prevent the vnode from being recycled or 919 * brought into use while we clean it out. 920 */ 921 if (vp->v_flag & VXLOCK) 922 panic("vclean: deadlock"); 923 vp->v_flag |= VXLOCK; 924 /* 925 * Even if the count is zero, the VOP_INACTIVE routine may still 926 * have the object locked while it cleans it out. The VOP_LOCK 927 * ensures that the VOP_INACTIVE routine is done with its work. 928 * For active vnodes, it ensures that no other activity can 929 * occur while the buffer list is being cleaned out. 930 */ 931 VOP_LOCK(vp); 932 if (flags & DOCLOSE) 933 vinvalbuf(vp, 1); 934 /* 935 * Prevent any further operations on the vnode from 936 * being passed through to the old file system. 937 */ 938 origops = vp->v_op; 939 vp->v_op = &dead_vnodeops; 940 vp->v_tag = VT_NON; 941 /* 942 * If purging an active vnode, it must be unlocked, closed, 943 * and deactivated before being reclaimed. 944 */ 945 (*(origops->vop_unlock))(vp); 946 if (active) { 947 if (flags & DOCLOSE) 948 (*(origops->vop_close))(vp, IO_NDELAY, NOCRED, p); 949 (*(origops->vop_inactive))(vp, p); 950 } 951 /* 952 * Reclaim the vnode. 953 */ 954 if ((*(origops->vop_reclaim))(vp)) 955 panic("vclean: cannot reclaim"); 956 if (active) 957 vrele(vp); 958 /* 959 * Done with purge, notify sleepers in vget of the grim news. 960 */ 961 vp->v_flag &= ~VXLOCK; 962 if (vp->v_flag & VXWANT) { 963 vp->v_flag &= ~VXWANT; 964 wakeup((caddr_t)vp); 965 } 966 } 967 968 /* 969 * Eliminate all activity associated with the requested vnode 970 * and with all vnodes aliased to the requested vnode. 971 */ 972 void 973 vgoneall(vp) 974 register struct vnode *vp; 975 { 976 register struct vnode *vq; 977 978 if (vp->v_flag & VALIASED) { 979 /* 980 * If a vgone (or vclean) is already in progress, 981 * wait until it is done and return. 982 */ 983 if (vp->v_flag & VXLOCK) { 984 vp->v_flag |= VXWANT; 985 tsleep((caddr_t)vp, PINOD, "vgoneall", 0); 986 return; 987 } 988 /* 989 * Ensure that vp will not be vgone'd while we 990 * are eliminating its aliases. 991 */ 992 vp->v_flag |= VXLOCK; 993 while (vp->v_flag & VALIASED) { 994 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 995 if (vq->v_rdev != vp->v_rdev || 996 vq->v_type != vp->v_type || vp == vq) 997 continue; 998 vgone(vq); 999 break; 1000 } 1001 } 1002 /* 1003 * Remove the lock so that vgone below will 1004 * really eliminate the vnode after which time 1005 * vgone will awaken any sleepers. 1006 */ 1007 vp->v_flag &= ~VXLOCK; 1008 } 1009 vgone(vp); 1010 } 1011 1012 /* 1013 * Eliminate all activity associated with a vnode 1014 * in preparation for reuse. 1015 */ 1016 void 1017 vgone(vp) 1018 register struct vnode *vp; 1019 { 1020 register struct vnode *vq; 1021 struct vnode *vx; 1022 1023 /* 1024 * If a vgone (or vclean) is already in progress, 1025 * wait until it is done and return. 1026 */ 1027 if (vp->v_flag & VXLOCK) { 1028 vp->v_flag |= VXWANT; 1029 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1030 return; 1031 } 1032 /* 1033 * Clean out the filesystem specific data. 1034 */ 1035 vclean(vp, DOCLOSE); 1036 /* 1037 * Delete from old mount point vnode list, if on one. 1038 */ 1039 if (vp->v_mount != NULL) { 1040 LIST_REMOVE(vp, v_mntvnodes); 1041 vp->v_mount = NULL; 1042 } 1043 /* 1044 * If special device, remove it from special device alias list. 1045 */ 1046 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1047 if (*vp->v_hashchain == vp) { 1048 *vp->v_hashchain = vp->v_specnext; 1049 } else { 1050 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1051 if (vq->v_specnext != vp) 1052 continue; 1053 vq->v_specnext = vp->v_specnext; 1054 break; 1055 } 1056 if (vq == NULL) 1057 panic("missing bdev"); 1058 } 1059 if (vp->v_flag & VALIASED) { 1060 vx = NULL; 1061 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1062 if (vq->v_rdev != vp->v_rdev || 1063 vq->v_type != vp->v_type) 1064 continue; 1065 if (vx != NULL) 1066 break; 1067 vx = vq; 1068 } 1069 if (vx == NULL) 1070 panic("missing alias"); 1071 if (vq == NULL) 1072 vx->v_flag &= ~VALIASED; 1073 vp->v_flag &= ~VALIASED; 1074 } 1075 FREE(vp->v_specinfo, M_VNODE); 1076 vp->v_specinfo = NULL; 1077 } 1078 /* 1079 * If it is on the freelist, move it to the head of the list. 1080 */ 1081 if (vp->v_usecount == 0 && 1082 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1083 vnode_free_list.tqh_first != vp) { 1084 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1085 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1086 } 1087 vp->v_type = VBAD; 1088 } 1089 1090 /* 1091 * Lookup a vnode by device number. 1092 */ 1093 vfinddev(dev, type, vpp) 1094 dev_t dev; 1095 enum vtype type; 1096 struct vnode **vpp; 1097 { 1098 register struct vnode *vp; 1099 1100 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1101 if (dev != vp->v_rdev || type != vp->v_type) 1102 continue; 1103 *vpp = vp; 1104 return (0); 1105 } 1106 return (1); 1107 } 1108 1109 /* 1110 * Calculate the total number of references to a special device. 1111 */ 1112 vcount(vp) 1113 register struct vnode *vp; 1114 { 1115 register struct vnode *vq; 1116 int count; 1117 1118 loop: 1119 if ((vp->v_flag & VALIASED) == 0) 1120 return (vp->v_usecount); 1121 1122 for (count = 0, vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1123 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1124 continue; 1125 /* 1126 * Alias, but not in use, so flush it out. 1127 */ 1128 if (vq->v_usecount == 0 && vq != vp) { 1129 vgone(vq); 1130 goto loop; 1131 } 1132 count += vq->v_usecount; 1133 } 1134 return (count); 1135 } 1136 1137 /* 1138 * Print out a description of a vnode. 1139 */ 1140 static char *typename[] = 1141 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1142 1143 void 1144 vprint(label, vp) 1145 char *label; 1146 register struct vnode *vp; 1147 { 1148 char buf[64]; 1149 1150 if (label != NULL) 1151 printf("%s: ", label); 1152 printf("type %s, usecount %d, writecount %d, refcount %d,", 1153 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1154 vp->v_holdcnt); 1155 buf[0] = '\0'; 1156 if (vp->v_flag & VROOT) 1157 strcat(buf, "|VROOT"); 1158 if (vp->v_flag & VTEXT) 1159 strcat(buf, "|VTEXT"); 1160 if (vp->v_flag & VSYSTEM) 1161 strcat(buf, "|VSYSTEM"); 1162 if (vp->v_flag & VXLOCK) 1163 strcat(buf, "|VXLOCK"); 1164 if (vp->v_flag & VXWANT) 1165 strcat(buf, "|VXWANT"); 1166 if (vp->v_flag & VBWAIT) 1167 strcat(buf, "|VBWAIT"); 1168 if (vp->v_flag & VALIASED) 1169 strcat(buf, "|VALIASED"); 1170 if (buf[0] != '\0') 1171 printf(" flags (%s)", &buf[1]); 1172 printf("\n\t"); 1173 VOP_PRINT(vp); 1174 } 1175 1176 #ifdef DEBUG 1177 /* 1178 * List all of the locked vnodes in the system. 1179 * Called when debugging the kernel. 1180 */ 1181 printlockedvnodes() 1182 { 1183 register struct mount *mp; 1184 register struct vnode *vp; 1185 1186 printf("Locked vnodes\n"); 1187 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 1188 for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; 1189 vp = vp->v_mntvnodes.le_next) 1190 if (VOP_ISLOCKED(vp)) 1191 vprint((char *)0, vp); 1192 } 1193 } 1194 #endif 1195 1196 int kinfo_vdebug = 1; 1197 int kinfo_vgetfailed; 1198 #define KINFO_VNODESLOP 10 1199 /* 1200 * Dump vnode list (via sysctl). 1201 * Copyout address of vnode followed by vnode. 1202 */ 1203 /* ARGSUSED */ 1204 sysctl_vnode(where, sizep) 1205 char *where; 1206 size_t *sizep; 1207 { 1208 register struct mount *mp, *nmp; 1209 struct vnode *vp; 1210 register char *bp = where, *savebp; 1211 char *ewhere; 1212 int error; 1213 1214 #define VPTRSZ sizeof (struct vnode *) 1215 #define VNODESZ sizeof (struct vnode) 1216 if (where == NULL) { 1217 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1218 return (0); 1219 } 1220 ewhere = where + *sizep; 1221 1222 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) { 1223 nmp = mp->mnt_list.tqe_next; 1224 if (vfs_busy(mp)) 1225 continue; 1226 savebp = bp; 1227 again: 1228 for (vp = mp->mnt_vnodelist.lh_first; 1229 vp != NULL; 1230 vp = vp->v_mntvnodes.le_next) { 1231 /* 1232 * Check that the vp is still associated with 1233 * this filesystem. RACE: could have been 1234 * recycled onto the same filesystem. 1235 */ 1236 if (vp->v_mount != mp) { 1237 if (kinfo_vdebug) 1238 printf("kinfo: vp changed\n"); 1239 bp = savebp; 1240 goto again; 1241 } 1242 if (bp + VPTRSZ + VNODESZ > ewhere) { 1243 *sizep = bp - where; 1244 return (ENOMEM); 1245 } 1246 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1247 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1248 return (error); 1249 bp += VPTRSZ + VNODESZ; 1250 } 1251 vfs_unbusy(mp); 1252 } 1253 1254 *sizep = bp - where; 1255 return (0); 1256 } 1257