1 /* $NetBSD: vfs_subr.c,v 1.34 1994/07/10 05:53:25 cgd Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 41 */ 42 43 /* 44 * External virtual filesystem routines 45 */ 46 47 #include <sys/param.h> 48 #include <sys/systm.h> 49 #include <sys/proc.h> 50 #include <sys/mount.h> 51 #include <sys/time.h> 52 #include <sys/vnode.h> 53 #include <sys/stat.h> 54 #include <sys/namei.h> 55 #include <sys/ucred.h> 56 #include <sys/buf.h> 57 #include <sys/errno.h> 58 #include <sys/malloc.h> 59 #include <sys/domain.h> 60 #include <sys/mbuf.h> 61 62 #include <vm/vm.h> 63 #include <sys/sysctl.h> 64 65 #include <miscfs/specfs/specdev.h> 66 67 enum vtype iftovt_tab[16] = { 68 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 69 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 70 }; 71 int vttoif_tab[9] = { 72 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 73 S_IFSOCK, S_IFIFO, S_IFMT, 74 }; 75 76 int doforce = 1; /* 1 => permit forcible unmounting */ 77 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 78 79 /* 80 * Insq/Remq for the vnode usage lists. 81 */ 82 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 83 #define bufremvn(bp) { \ 84 LIST_REMOVE(bp, b_vnbufs); \ 85 (bp)->b_vnbufs.le_next = NOLIST; \ 86 } 87 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 88 struct mntlist mountlist; /* mounted filesystem list */ 89 90 /* 91 * Initialize the vnode management data structures. 92 */ 93 vntblinit() 94 { 95 96 TAILQ_INIT(&vnode_free_list); 97 TAILQ_INIT(&mountlist); 98 } 99 100 /* 101 * Lock a filesystem. 102 * Used to prevent access to it while mounting and unmounting. 103 */ 104 vfs_lock(mp) 105 register struct mount *mp; 106 { 107 108 while (mp->mnt_flag & MNT_MLOCK) { 109 mp->mnt_flag |= MNT_MWAIT; 110 tsleep((caddr_t)mp, PVFS, "vfslock", 0); 111 } 112 mp->mnt_flag |= MNT_MLOCK; 113 return (0); 114 } 115 116 /* 117 * Unlock a locked filesystem. 118 * Panic if filesystem is not locked. 119 */ 120 void 121 vfs_unlock(mp) 122 register struct mount *mp; 123 { 124 125 if ((mp->mnt_flag & MNT_MLOCK) == 0) 126 panic("vfs_unlock: not locked"); 127 mp->mnt_flag &= ~MNT_MLOCK; 128 if (mp->mnt_flag & MNT_MWAIT) { 129 mp->mnt_flag &= ~MNT_MWAIT; 130 wakeup((caddr_t)mp); 131 } 132 } 133 134 /* 135 * Mark a mount point as busy. 136 * Used to synchronize access and to delay unmounting. 137 */ 138 vfs_busy(mp) 139 register struct mount *mp; 140 { 141 142 while(mp->mnt_flag & MNT_MPBUSY) { 143 mp->mnt_flag |= MNT_MPWANT; 144 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); 145 } 146 if (mp->mnt_flag & MNT_UNMOUNT) 147 return (1); 148 mp->mnt_flag |= MNT_MPBUSY; 149 return (0); 150 } 151 152 /* 153 * Free a busy filesystem. 154 * Panic if filesystem is not busy. 155 */ 156 vfs_unbusy(mp) 157 register struct mount *mp; 158 { 159 160 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 161 panic("vfs_unbusy: not busy"); 162 mp->mnt_flag &= ~MNT_MPBUSY; 163 if (mp->mnt_flag & MNT_MPWANT) { 164 mp->mnt_flag &= ~MNT_MPWANT; 165 wakeup((caddr_t)&mp->mnt_flag); 166 } 167 } 168 169 /* 170 * Lookup a mount point by filesystem identifier. 171 */ 172 struct mount * 173 getvfs(fsid) 174 fsid_t *fsid; 175 { 176 register struct mount *mp; 177 178 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) 179 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 180 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 181 return (mp); 182 return ((struct mount *)0); 183 } 184 185 /* 186 * Get a new unique fsid 187 */ 188 void 189 getnewfsid(mp, mtype) 190 struct mount *mp; 191 int mtype; 192 { 193 static u_short xxxfs_mntid; 194 195 fsid_t tfsid; 196 197 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0); /* XXX */ 198 mp->mnt_stat.f_fsid.val[1] = mtype; 199 if (xxxfs_mntid == 0) 200 ++xxxfs_mntid; 201 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 202 tfsid.val[1] = mtype; 203 if (mountlist.tqh_first != NULL) { 204 while (getvfs(&tfsid)) { 205 tfsid.val[0]++; 206 xxxfs_mntid++; 207 } 208 } 209 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 210 } 211 212 /* 213 * Make a 'unique' number from a mount type name. 214 */ 215 long 216 makefstype(type) 217 char *type; 218 { 219 long rv; 220 221 for (rv = 0; *type; type++) { 222 rv <<= 2; 223 rv ^= *type; 224 } 225 return rv; 226 } 227 228 /* 229 * Set vnode attributes to VNOVAL 230 */ 231 void 232 vattr_null(vap) 233 register struct vattr *vap; 234 { 235 236 vap->va_type = VNON; 237 /* XXX These next two used to be one line, but for a GCC bug. */ 238 vap->va_size = VNOVAL; 239 vap->va_bytes = VNOVAL; 240 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 241 vap->va_fsid = vap->va_fileid = 242 vap->va_blocksize = vap->va_rdev = 243 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 244 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 245 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 246 vap->va_flags = vap->va_gen = VNOVAL; 247 vap->va_vaflags = 0; 248 } 249 250 /* 251 * Routines having to do with the management of the vnode table. 252 */ 253 extern int (**dead_vnodeop_p)(); 254 extern void vclean(); 255 long numvnodes; 256 extern struct vattr va_null; 257 258 /* 259 * Return the next vnode from the free list. 260 */ 261 getnewvnode(tag, mp, vops, vpp) 262 enum vtagtype tag; 263 struct mount *mp; 264 int (**vops)(); 265 struct vnode **vpp; 266 { 267 register struct vnode *vp; 268 int s; 269 270 if ((vnode_free_list.tqh_first == NULL && 271 numvnodes < 2 * desiredvnodes) || 272 numvnodes < desiredvnodes) { 273 vp = (struct vnode *)malloc((u_long)sizeof *vp, 274 M_VNODE, M_WAITOK); 275 bzero((char *)vp, sizeof *vp); 276 numvnodes++; 277 } else { 278 if ((vp = vnode_free_list.tqh_first) == NULL) { 279 tablefull("vnode"); 280 *vpp = 0; 281 return (ENFILE); 282 } 283 if (vp->v_usecount) 284 panic("free vnode isn't"); 285 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 286 /* see comment on why 0xdeadb is set at end of vgone (below) */ 287 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 288 vp->v_lease = NULL; 289 if (vp->v_type != VBAD) 290 vgone(vp); 291 #ifdef DIAGNOSTIC 292 if (vp->v_data) 293 panic("cleaned vnode isn't"); 294 s = splbio(); 295 if (vp->v_numoutput) 296 panic("Clean vnode has pending I/O's"); 297 splx(s); 298 #endif 299 vp->v_flag = 0; 300 vp->v_lastr = 0; 301 vp->v_ralen = 0; 302 vp->v_maxra = 0; 303 vp->v_lastw = 0; 304 vp->v_lasta = 0; 305 vp->v_cstart = 0; 306 vp->v_clen = 0; 307 vp->v_socket = 0; 308 } 309 vp->v_type = VNON; 310 cache_purge(vp); 311 vp->v_tag = tag; 312 vp->v_op = vops; 313 insmntque(vp, mp); 314 *vpp = vp; 315 vp->v_usecount = 1; 316 vp->v_data = 0; 317 return (0); 318 } 319 320 /* 321 * Move a vnode from one mount queue to another. 322 */ 323 insmntque(vp, mp) 324 register struct vnode *vp; 325 register struct mount *mp; 326 { 327 328 /* 329 * Delete from old mount point vnode list, if on one. 330 */ 331 if (vp->v_mount != NULL) 332 LIST_REMOVE(vp, v_mntvnodes); 333 /* 334 * Insert into list of vnodes for the new mount point, if available. 335 */ 336 if ((vp->v_mount = mp) == NULL) 337 return; 338 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 339 } 340 341 /* 342 * Update outstanding I/O count and do wakeup if requested. 343 */ 344 vwakeup(bp) 345 register struct buf *bp; 346 { 347 register struct vnode *vp; 348 349 bp->b_flags &= ~B_WRITEINPROG; 350 if (vp = bp->b_vp) { 351 if (--vp->v_numoutput < 0) 352 panic("vwakeup: neg numoutput"); 353 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 354 if (vp->v_numoutput < 0) 355 panic("vwakeup: neg numoutput"); 356 vp->v_flag &= ~VBWAIT; 357 wakeup((caddr_t)&vp->v_numoutput); 358 } 359 } 360 } 361 362 /* 363 * Flush out and invalidate all buffers associated with a vnode. 364 * Called with the underlying object locked. 365 */ 366 int 367 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 368 register struct vnode *vp; 369 int flags; 370 struct ucred *cred; 371 struct proc *p; 372 int slpflag, slptimeo; 373 { 374 register struct buf *bp; 375 struct buf *nbp, *blist; 376 int s, error; 377 378 if (flags & V_SAVE) { 379 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 380 return (error); 381 if (vp->v_dirtyblkhd.lh_first != NULL) 382 panic("vinvalbuf: dirty bufs"); 383 } 384 for (;;) { 385 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 386 while (blist && blist->b_lblkno < 0) 387 blist = blist->b_vnbufs.le_next; 388 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 389 (flags & V_SAVEMETA)) 390 while (blist && blist->b_lblkno < 0) 391 blist = blist->b_vnbufs.le_next; 392 if (!blist) 393 break; 394 395 for (bp = blist; bp; bp = nbp) { 396 nbp = bp->b_vnbufs.le_next; 397 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 398 continue; 399 s = splbio(); 400 if (bp->b_flags & B_BUSY) { 401 bp->b_flags |= B_WANTED; 402 error = tsleep((caddr_t)bp, 403 slpflag | (PRIBIO + 1), "vinvalbuf", 404 slptimeo); 405 splx(s); 406 if (error) 407 return (error); 408 break; 409 } 410 bremfree(bp); 411 bp->b_flags |= B_BUSY; 412 splx(s); 413 /* 414 * XXX Since there are no node locks for NFS, I believe 415 * there is a slight chance that a delayed write will 416 * occur while sleeping just above, so check for it. 417 */ 418 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 419 (void) VOP_BWRITE(bp); 420 break; 421 } 422 bp->b_flags |= B_INVAL; 423 brelse(bp); 424 } 425 } 426 if (!(flags & V_SAVEMETA) && 427 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 428 panic("vinvalbuf: flush failed"); 429 return (0); 430 } 431 432 void 433 vflushbuf(vp, sync) 434 register struct vnode *vp; 435 int sync; 436 { 437 register struct buf *bp, *nbp; 438 int s; 439 440 loop: 441 s = splbio(); 442 for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { 443 nbp = bp->b_vnbufs.le_next; 444 if ((bp->b_flags & B_BUSY)) 445 continue; 446 if ((bp->b_flags & B_DELWRI) == 0) 447 panic("vflushbuf: not dirty"); 448 bremfree(bp); 449 bp->b_flags |= B_BUSY; 450 splx(s); 451 /* 452 * Wait for I/O associated with indirect blocks to complete, 453 * since there is no way to quickly wait for them below. 454 */ 455 if (bp->b_vp == vp || sync == 0) 456 (void) bawrite(bp); 457 else 458 (void) bwrite(bp); 459 goto loop; 460 } 461 if (sync == 0) { 462 splx(s); 463 return; 464 } 465 while (vp->v_numoutput) { 466 vp->v_flag |= VBWAIT; 467 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 468 } 469 splx(s); 470 if (vp->v_dirtyblkhd.lh_first != NULL) { 471 vprint("vflushbuf: dirty", vp); 472 goto loop; 473 } 474 } 475 476 /* 477 * Associate a buffer with a vnode. 478 */ 479 bgetvp(vp, bp) 480 register struct vnode *vp; 481 register struct buf *bp; 482 { 483 484 if (bp->b_vp) 485 panic("bgetvp: not free"); 486 VHOLD(vp); 487 bp->b_vp = vp; 488 if (vp->v_type == VBLK || vp->v_type == VCHR) 489 bp->b_dev = vp->v_rdev; 490 else 491 bp->b_dev = NODEV; 492 /* 493 * Insert onto list for new vnode. 494 */ 495 bufinsvn(bp, &vp->v_cleanblkhd); 496 } 497 498 /* 499 * Disassociate a buffer from a vnode. 500 */ 501 brelvp(bp) 502 register struct buf *bp; 503 { 504 struct vnode *vp; 505 506 if (bp->b_vp == (struct vnode *) 0) 507 panic("brelvp: NULL"); 508 /* 509 * Delete from old vnode list, if on one. 510 */ 511 if (bp->b_vnbufs.le_next != NOLIST) 512 bufremvn(bp); 513 vp = bp->b_vp; 514 bp->b_vp = (struct vnode *) 0; 515 HOLDRELE(vp); 516 } 517 518 /* 519 * Reassign a buffer from one vnode to another. 520 * Used to assign file specific control information 521 * (indirect blocks) to the vnode to which they belong. 522 */ 523 reassignbuf(bp, newvp) 524 register struct buf *bp; 525 register struct vnode *newvp; 526 { 527 register struct buflists *listheadp; 528 529 if (newvp == NULL) { 530 printf("reassignbuf: NULL"); 531 return; 532 } 533 /* 534 * Delete from old vnode list, if on one. 535 */ 536 if (bp->b_vnbufs.le_next != NOLIST) 537 bufremvn(bp); 538 /* 539 * If dirty, put on list of dirty buffers; 540 * otherwise insert onto list of clean buffers. 541 */ 542 if (bp->b_flags & B_DELWRI) 543 listheadp = &newvp->v_dirtyblkhd; 544 else 545 listheadp = &newvp->v_cleanblkhd; 546 bufinsvn(bp, listheadp); 547 } 548 549 /* 550 * Create a vnode for a block device. 551 * Used for root filesystem, argdev, and swap areas. 552 * Also used for memory file system special devices. 553 */ 554 bdevvp(dev, vpp) 555 dev_t dev; 556 struct vnode **vpp; 557 { 558 559 return (getdevvp(dev, vpp, VBLK)); 560 } 561 562 /* 563 * Create a vnode for a character device. 564 * Used for kernfs and some console handling. 565 */ 566 cdevvp(dev, vpp) 567 dev_t dev; 568 struct vnode **vpp; 569 { 570 571 return (getdevvp(dev, vpp, VCHR)); 572 } 573 574 /* 575 * Create a vnode for a device. 576 * Used by bdevvp (block device) for root file system etc., 577 * and by cdevvp (character device) for console and kernfs. 578 */ 579 getdevvp(dev, vpp, type) 580 dev_t dev; 581 struct vnode **vpp; 582 enum vtype type; 583 { 584 register struct vnode *vp; 585 struct vnode *nvp; 586 int error; 587 588 if (dev == NODEV) 589 return (0); 590 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 591 if (error) { 592 *vpp = NULLVP; 593 return (error); 594 } 595 vp = nvp; 596 vp->v_type = type; 597 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 598 vput(vp); 599 vp = nvp; 600 } 601 *vpp = vp; 602 return (0); 603 } 604 605 /* 606 * Check to see if the new vnode represents a special device 607 * for which we already have a vnode (either because of 608 * bdevvp() or because of a different vnode representing 609 * the same block device). If such an alias exists, deallocate 610 * the existing contents and return the aliased vnode. The 611 * caller is responsible for filling it with its new contents. 612 */ 613 struct vnode * 614 checkalias(nvp, nvp_rdev, mp) 615 register struct vnode *nvp; 616 dev_t nvp_rdev; 617 struct mount *mp; 618 { 619 register struct vnode *vp; 620 struct vnode **vpp; 621 622 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 623 return (NULLVP); 624 625 vpp = &speclisth[SPECHASH(nvp_rdev)]; 626 loop: 627 for (vp = *vpp; vp; vp = vp->v_specnext) { 628 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 629 continue; 630 /* 631 * Alias, but not in use, so flush it out. 632 */ 633 if (vp->v_usecount == 0) { 634 vgone(vp); 635 goto loop; 636 } 637 if (vget(vp, 1)) 638 goto loop; 639 break; 640 } 641 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 642 MALLOC(nvp->v_specinfo, struct specinfo *, 643 sizeof(struct specinfo), M_VNODE, M_WAITOK); 644 nvp->v_rdev = nvp_rdev; 645 nvp->v_hashchain = vpp; 646 nvp->v_specnext = *vpp; 647 nvp->v_specflags = 0; 648 *vpp = nvp; 649 if (vp != NULL) { 650 nvp->v_flag |= VALIASED; 651 vp->v_flag |= VALIASED; 652 vput(vp); 653 } 654 return (NULLVP); 655 } 656 VOP_UNLOCK(vp); 657 vclean(vp, 0); 658 vp->v_op = nvp->v_op; 659 vp->v_tag = nvp->v_tag; 660 nvp->v_type = VNON; 661 insmntque(vp, mp); 662 return (vp); 663 } 664 665 /* 666 * Grab a particular vnode from the free list, increment its 667 * reference count and lock it. The vnode lock bit is set the 668 * vnode is being eliminated in vgone. The process is awakened 669 * when the transition is completed, and an error returned to 670 * indicate that the vnode is no longer usable (possibly having 671 * been changed to a new file system type). 672 */ 673 int 674 vget(vp, lockflag) 675 register struct vnode *vp; 676 int lockflag; 677 { 678 679 /* 680 * If the vnode is in the process of being cleaned out for 681 * another use, we wait for the cleaning to finish and then 682 * return failure. Cleaning is determined either by checking 683 * that the VXLOCK flag is set, or that the use count is 684 * zero with the back pointer set to show that it has been 685 * removed from the free list by getnewvnode. The VXLOCK 686 * flag may not have been set yet because vclean is blocked in 687 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 688 */ 689 if ((vp->v_flag & VXLOCK) || 690 (vp->v_usecount == 0 && 691 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 692 vp->v_flag |= VXWANT; 693 tsleep((caddr_t)vp, PINOD, "vget", 0); 694 return (1); 695 } 696 if (vp->v_usecount == 0) 697 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 698 vp->v_usecount++; 699 if (lockflag) 700 VOP_LOCK(vp); 701 return (0); 702 } 703 704 /* 705 * Vnode reference, just increment the count 706 */ 707 void 708 vref(vp) 709 struct vnode *vp; 710 { 711 712 if (vp->v_usecount <= 0) 713 panic("vref used where vget required"); 714 vp->v_usecount++; 715 } 716 717 /* 718 * vput(), just unlock and vrele() 719 */ 720 void 721 vput(vp) 722 register struct vnode *vp; 723 { 724 725 VOP_UNLOCK(vp); 726 vrele(vp); 727 } 728 729 /* 730 * Vnode release. 731 * If count drops to zero, call inactive routine and return to freelist. 732 */ 733 void 734 vrele(vp) 735 register struct vnode *vp; 736 { 737 738 #ifdef DIAGNOSTIC 739 if (vp == NULL) 740 panic("vrele: null vp"); 741 #endif 742 vp->v_usecount--; 743 if (vp->v_usecount > 0) 744 return; 745 #ifdef DIAGNOSTIC 746 if (vp->v_usecount != 0 || vp->v_writecount != 0) { 747 vprint("vrele: bad ref count", vp); 748 panic("vrele: ref cnt"); 749 } 750 #endif 751 /* 752 * insert at tail of LRU list 753 */ 754 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 755 VOP_INACTIVE(vp); 756 } 757 758 /* 759 * Page or buffer structure gets a reference. 760 */ 761 void 762 vhold(vp) 763 register struct vnode *vp; 764 { 765 766 vp->v_holdcnt++; 767 } 768 769 /* 770 * Page or buffer structure frees a reference. 771 */ 772 void 773 holdrele(vp) 774 register struct vnode *vp; 775 { 776 777 if (vp->v_holdcnt <= 0) 778 panic("holdrele: holdcnt"); 779 vp->v_holdcnt--; 780 } 781 782 /* 783 * Remove any vnodes in the vnode table belonging to mount point mp. 784 * 785 * If MNT_NOFORCE is specified, there should not be any active ones, 786 * return error if any are found (nb: this is a user error, not a 787 * system error). If MNT_FORCE is specified, detach any active vnodes 788 * that are found. 789 */ 790 #ifdef DEBUG 791 int busyprt = 0; /* print out busy vnodes */ 792 struct ctldebug debug1 = { "busyprt", &busyprt }; 793 #endif 794 795 vflush(mp, skipvp, flags) 796 struct mount *mp; 797 struct vnode *skipvp; 798 int flags; 799 { 800 register struct vnode *vp, *nvp; 801 int busy = 0; 802 803 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 804 panic("vflush: not busy"); 805 loop: 806 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 807 if (vp->v_mount != mp) 808 goto loop; 809 nvp = vp->v_mntvnodes.le_next; 810 /* 811 * Skip over a selected vnode. 812 */ 813 if (vp == skipvp) 814 continue; 815 /* 816 * Skip over a vnodes marked VSYSTEM. 817 */ 818 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 819 continue; 820 /* 821 * If WRITECLOSE is set, only flush out regular file 822 * vnodes open for writing. 823 */ 824 if ((flags & WRITECLOSE) && 825 (vp->v_writecount == 0 || vp->v_type != VREG)) 826 continue; 827 /* 828 * With v_usecount == 0, all we need to do is clear 829 * out the vnode data structures and we are done. 830 */ 831 if (vp->v_usecount == 0) { 832 vgone(vp); 833 continue; 834 } 835 /* 836 * If FORCECLOSE is set, forcibly close the vnode. 837 * For block or character devices, revert to an 838 * anonymous device. For all other files, just kill them. 839 */ 840 if (flags & FORCECLOSE) { 841 if (vp->v_type != VBLK && vp->v_type != VCHR) { 842 vgone(vp); 843 } else { 844 vclean(vp, 0); 845 vp->v_op = spec_vnodeop_p; 846 insmntque(vp, (struct mount *)0); 847 } 848 continue; 849 } 850 #ifdef DEBUG 851 if (busyprt) 852 vprint("vflush: busy vnode", vp); 853 #endif 854 busy++; 855 } 856 if (busy) 857 return (EBUSY); 858 return (0); 859 } 860 861 /* 862 * Disassociate the underlying file system from a vnode. 863 */ 864 void 865 vclean(vp, flags) 866 register struct vnode *vp; 867 int flags; 868 { 869 int active; 870 871 /* 872 * Check to see if the vnode is in use. 873 * If so we have to reference it before we clean it out 874 * so that its count cannot fall to zero and generate a 875 * race against ourselves to recycle it. 876 */ 877 if (active = vp->v_usecount) 878 VREF(vp); 879 /* 880 * Even if the count is zero, the VOP_INACTIVE routine may still 881 * have the object locked while it cleans it out. The VOP_LOCK 882 * ensures that the VOP_INACTIVE routine is done with its work. 883 * For active vnodes, it ensures that no other activity can 884 * occur while the underlying object is being cleaned out. 885 */ 886 VOP_LOCK(vp); 887 /* 888 * Prevent the vnode from being recycled or 889 * brought into use while we clean it out. 890 */ 891 if (vp->v_flag & VXLOCK) 892 panic("vclean: deadlock"); 893 vp->v_flag |= VXLOCK; 894 /* 895 * Clean out any buffers associated with the vnode. 896 */ 897 if (flags & DOCLOSE) 898 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 899 /* 900 * Any other processes trying to obtain this lock must first 901 * wait for VXLOCK to clear, then call the new lock operation. 902 */ 903 VOP_UNLOCK(vp); 904 /* 905 * If purging an active vnode, it must be closed and 906 * deactivated before being reclaimed. 907 */ 908 if (active) { 909 if (flags & DOCLOSE) 910 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); 911 VOP_INACTIVE(vp); 912 } 913 /* 914 * Reclaim the vnode. 915 */ 916 if (VOP_RECLAIM(vp)) 917 panic("vclean: cannot reclaim"); 918 if (active) 919 vrele(vp); 920 921 /* 922 * Done with purge, notify sleepers of the grim news. 923 */ 924 vp->v_op = dead_vnodeop_p; 925 vp->v_tag = VT_NON; 926 vp->v_flag &= ~VXLOCK; 927 if (vp->v_flag & VXWANT) { 928 vp->v_flag &= ~VXWANT; 929 wakeup((caddr_t)vp); 930 } 931 } 932 933 /* 934 * Eliminate all activity associated with the requested vnode 935 * and with all vnodes aliased to the requested vnode. 936 */ 937 void 938 vgoneall(vp) 939 register struct vnode *vp; 940 { 941 register struct vnode *vq; 942 943 if (vp->v_flag & VALIASED) { 944 /* 945 * If a vgone (or vclean) is already in progress, 946 * wait until it is done and return. 947 */ 948 if (vp->v_flag & VXLOCK) { 949 vp->v_flag |= VXWANT; 950 tsleep((caddr_t)vp, PINOD, "vgoneall", 0); 951 return; 952 } 953 /* 954 * Ensure that vp will not be vgone'd while we 955 * are eliminating its aliases. 956 */ 957 vp->v_flag |= VXLOCK; 958 while (vp->v_flag & VALIASED) { 959 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 960 if (vq->v_rdev != vp->v_rdev || 961 vq->v_type != vp->v_type || vp == vq) 962 continue; 963 vgone(vq); 964 break; 965 } 966 } 967 /* 968 * Remove the lock so that vgone below will 969 * really eliminate the vnode after which time 970 * vgone will awaken any sleepers. 971 */ 972 vp->v_flag &= ~VXLOCK; 973 } 974 vgone(vp); 975 } 976 977 /* 978 * Eliminate all activity associated with a vnode 979 * in preparation for reuse. 980 */ 981 void 982 vgone(vp) 983 register struct vnode *vp; 984 { 985 register struct vnode *vq; 986 struct vnode *vx; 987 988 /* 989 * If a vgone (or vclean) is already in progress, 990 * wait until it is done and return. 991 */ 992 if (vp->v_flag & VXLOCK) { 993 vp->v_flag |= VXWANT; 994 tsleep((caddr_t)vp, PINOD, "vgone", 0); 995 return; 996 } 997 /* 998 * Clean out the filesystem specific data. 999 */ 1000 vclean(vp, DOCLOSE); 1001 /* 1002 * Delete from old mount point vnode list, if on one. 1003 */ 1004 if (vp->v_mount != NULL) { 1005 LIST_REMOVE(vp, v_mntvnodes); 1006 vp->v_mount = NULL; 1007 } 1008 /* 1009 * If special device, remove it from special device alias list. 1010 */ 1011 if (vp->v_type == VBLK || vp->v_type == VCHR) { 1012 if (*vp->v_hashchain == vp) { 1013 *vp->v_hashchain = vp->v_specnext; 1014 } else { 1015 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1016 if (vq->v_specnext != vp) 1017 continue; 1018 vq->v_specnext = vp->v_specnext; 1019 break; 1020 } 1021 if (vq == NULL) 1022 panic("missing bdev"); 1023 } 1024 if (vp->v_flag & VALIASED) { 1025 vx = NULL; 1026 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1027 if (vq->v_rdev != vp->v_rdev || 1028 vq->v_type != vp->v_type) 1029 continue; 1030 if (vx) 1031 break; 1032 vx = vq; 1033 } 1034 if (vx == NULL) 1035 panic("missing alias"); 1036 if (vq == NULL) 1037 vx->v_flag &= ~VALIASED; 1038 vp->v_flag &= ~VALIASED; 1039 } 1040 FREE(vp->v_specinfo, M_VNODE); 1041 vp->v_specinfo = NULL; 1042 } 1043 /* 1044 * If it is on the freelist and not already at the head, 1045 * move it to the head of the list. The test of the back 1046 * pointer and the reference count of zero is because 1047 * it will be removed from the free list by getnewvnode, 1048 * but will not have its reference count incremented until 1049 * after calling vgone. If the reference count were 1050 * incremented first, vgone would (incorrectly) try to 1051 * close the previous instance of the underlying object. 1052 * So, the back pointer is explicitly set to `0xdeadb' in 1053 * getnewvnode after removing it from the freelist to ensure 1054 * that we do not try to move it here. 1055 */ 1056 if (vp->v_usecount == 0 && 1057 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1058 vnode_free_list.tqh_first != vp) { 1059 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1060 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1061 } 1062 vp->v_type = VBAD; 1063 } 1064 1065 /* 1066 * Lookup a vnode by device number. 1067 */ 1068 vfinddev(dev, type, vpp) 1069 dev_t dev; 1070 enum vtype type; 1071 struct vnode **vpp; 1072 { 1073 register struct vnode *vp; 1074 1075 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1076 if (dev != vp->v_rdev || type != vp->v_type) 1077 continue; 1078 *vpp = vp; 1079 return (1); 1080 } 1081 return (0); 1082 } 1083 1084 /* 1085 * Calculate the total number of references to a special device. 1086 */ 1087 int 1088 vcount(vp) 1089 register struct vnode *vp; 1090 { 1091 register struct vnode *vq, *vnext; 1092 int count; 1093 1094 loop: 1095 if ((vp->v_flag & VALIASED) == 0) 1096 return (vp->v_usecount); 1097 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1098 vnext = vq->v_specnext; 1099 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1100 continue; 1101 /* 1102 * Alias, but not in use, so flush it out. 1103 */ 1104 if (vq->v_usecount == 0 && vq != vp) { 1105 vgone(vq); 1106 goto loop; 1107 } 1108 count += vq->v_usecount; 1109 } 1110 return (count); 1111 } 1112 1113 /* 1114 * Print out a description of a vnode. 1115 */ 1116 static char *typename[] = 1117 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1118 1119 void 1120 vprint(label, vp) 1121 char *label; 1122 register struct vnode *vp; 1123 { 1124 char buf[64]; 1125 1126 if (label != NULL) 1127 printf("%s: ", label); 1128 printf("type %s, usecount %d, writecount %d, refcount %d,", 1129 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1130 vp->v_holdcnt); 1131 buf[0] = '\0'; 1132 if (vp->v_flag & VROOT) 1133 strcat(buf, "|VROOT"); 1134 if (vp->v_flag & VTEXT) 1135 strcat(buf, "|VTEXT"); 1136 if (vp->v_flag & VSYSTEM) 1137 strcat(buf, "|VSYSTEM"); 1138 if (vp->v_flag & VXLOCK) 1139 strcat(buf, "|VXLOCK"); 1140 if (vp->v_flag & VXWANT) 1141 strcat(buf, "|VXWANT"); 1142 if (vp->v_flag & VBWAIT) 1143 strcat(buf, "|VBWAIT"); 1144 if (vp->v_flag & VALIASED) 1145 strcat(buf, "|VALIASED"); 1146 if (buf[0] != '\0') 1147 printf(" flags (%s)", &buf[1]); 1148 if (vp->v_data == NULL) { 1149 printf("\n"); 1150 } else { 1151 printf("\n\t"); 1152 VOP_PRINT(vp); 1153 } 1154 } 1155 1156 #ifdef DEBUG 1157 /* 1158 * List all of the locked vnodes in the system. 1159 * Called when debugging the kernel. 1160 */ 1161 printlockedvnodes() 1162 { 1163 register struct mount *mp; 1164 register struct vnode *vp; 1165 1166 printf("Locked vnodes\n"); 1167 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 1168 for (vp = mp->mnt_vnodelist.lh_first; 1169 vp != NULL; 1170 vp = vp->v_mntvnodes.le_next) 1171 if (VOP_ISLOCKED(vp)) 1172 vprint((char *)0, vp); 1173 } 1174 } 1175 #endif 1176 1177 int kinfo_vdebug = 1; 1178 int kinfo_vgetfailed; 1179 #define KINFO_VNODESLOP 10 1180 /* 1181 * Dump vnode list (via sysctl). 1182 * Copyout address of vnode followed by vnode. 1183 */ 1184 /* ARGSUSED */ 1185 sysctl_vnode(where, sizep) 1186 char *where; 1187 size_t *sizep; 1188 { 1189 register struct mount *mp, *nmp; 1190 struct vnode *vp; 1191 register char *bp = where, *savebp; 1192 char *ewhere; 1193 int error; 1194 1195 #define VPTRSZ sizeof (struct vnode *) 1196 #define VNODESZ sizeof (struct vnode) 1197 if (where == NULL) { 1198 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1199 return (0); 1200 } 1201 ewhere = where + *sizep; 1202 1203 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) { 1204 nmp = mp->mnt_list.tqe_next; 1205 if (vfs_busy(mp)) 1206 continue; 1207 savebp = bp; 1208 again: 1209 for (vp = mp->mnt_vnodelist.lh_first; 1210 vp != NULL; 1211 vp = vp->v_mntvnodes.le_next) { 1212 /* 1213 * Check that the vp is still associated with 1214 * this filesystem. RACE: could have been 1215 * recycled onto the same filesystem. 1216 */ 1217 if (vp->v_mount != mp) { 1218 if (kinfo_vdebug) 1219 printf("kinfo: vp changed\n"); 1220 bp = savebp; 1221 goto again; 1222 } 1223 if (bp + VPTRSZ + VNODESZ > ewhere) { 1224 *sizep = bp - where; 1225 return (ENOMEM); 1226 } 1227 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1228 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1229 return (error); 1230 bp += VPTRSZ + VNODESZ; 1231 } 1232 vfs_unbusy(mp); 1233 } 1234 1235 *sizep = bp - where; 1236 return (0); 1237 } 1238 1239 /* 1240 * Check to see if a filesystem is mounted on a block device. 1241 */ 1242 int 1243 vfs_mountedon(vp) 1244 register struct vnode *vp; 1245 { 1246 register struct vnode *vq; 1247 1248 if (vp->v_specflags & SI_MOUNTEDON) 1249 return (EBUSY); 1250 if (vp->v_flag & VALIASED) { 1251 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1252 if (vq->v_rdev != vp->v_rdev || 1253 vq->v_type != vp->v_type) 1254 continue; 1255 if (vq->v_specflags & SI_MOUNTEDON) 1256 return (EBUSY); 1257 } 1258 } 1259 return (0); 1260 } 1261 1262 /* 1263 * Build hash lists of net addresses and hang them off the mount point. 1264 * Called by ufs_mount() to set up the lists of export addresses. 1265 */ 1266 static int 1267 vfs_hang_addrlist(mp, nep, argp) 1268 struct mount *mp; 1269 struct netexport *nep; 1270 struct export_args *argp; 1271 { 1272 register struct netcred *np; 1273 register struct radix_node_head *rnh; 1274 register int i; 1275 struct radix_node *rn; 1276 struct sockaddr *saddr, *smask = 0; 1277 struct domain *dom; 1278 int error; 1279 1280 if (argp->ex_addrlen == 0) { 1281 if (mp->mnt_flag & MNT_DEFEXPORTED) 1282 return (EPERM); 1283 np = &nep->ne_defexported; 1284 np->netc_exflags = argp->ex_flags; 1285 np->netc_anon = argp->ex_anon; 1286 np->netc_anon.cr_ref = 1; 1287 mp->mnt_flag |= MNT_DEFEXPORTED; 1288 return (0); 1289 } 1290 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1291 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1292 bzero((caddr_t)np, i); 1293 saddr = (struct sockaddr *)(np + 1); 1294 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 1295 goto out; 1296 if (saddr->sa_len > argp->ex_addrlen) 1297 saddr->sa_len = argp->ex_addrlen; 1298 if (argp->ex_masklen) { 1299 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1300 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1301 if (error) 1302 goto out; 1303 if (smask->sa_len > argp->ex_masklen) 1304 smask->sa_len = argp->ex_masklen; 1305 } 1306 i = saddr->sa_family; 1307 if ((rnh = nep->ne_rtable[i]) == 0) { 1308 /* 1309 * Seems silly to initialize every AF when most are not 1310 * used, do so on demand here 1311 */ 1312 for (dom = domains; dom; dom = dom->dom_next) 1313 if (dom->dom_family == i && dom->dom_rtattach) { 1314 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1315 dom->dom_rtoffset); 1316 break; 1317 } 1318 if ((rnh = nep->ne_rtable[i]) == 0) { 1319 error = ENOBUFS; 1320 goto out; 1321 } 1322 } 1323 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1324 np->netc_rnodes); 1325 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1326 error = EPERM; 1327 goto out; 1328 } 1329 np->netc_exflags = argp->ex_flags; 1330 np->netc_anon = argp->ex_anon; 1331 np->netc_anon.cr_ref = 1; 1332 return (0); 1333 out: 1334 free(np, M_NETADDR); 1335 return (error); 1336 } 1337 1338 /* ARGSUSED */ 1339 static int 1340 vfs_free_netcred(rn, w) 1341 struct radix_node *rn; 1342 caddr_t w; 1343 { 1344 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1345 1346 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1347 free((caddr_t)rn, M_NETADDR); 1348 return (0); 1349 } 1350 1351 /* 1352 * Free the net address hash lists that are hanging off the mount points. 1353 */ 1354 static void 1355 vfs_free_addrlist(nep) 1356 struct netexport *nep; 1357 { 1358 register int i; 1359 register struct radix_node_head *rnh; 1360 1361 for (i = 0; i <= AF_MAX; i++) 1362 if (rnh = nep->ne_rtable[i]) { 1363 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1364 (caddr_t)rnh); 1365 free((caddr_t)rnh, M_RTABLE); 1366 nep->ne_rtable[i] = 0; 1367 } 1368 } 1369 1370 int 1371 vfs_export(mp, nep, argp) 1372 struct mount *mp; 1373 struct netexport *nep; 1374 struct export_args *argp; 1375 { 1376 int error; 1377 1378 if (argp->ex_flags & MNT_DELEXPORT) { 1379 vfs_free_addrlist(nep); 1380 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1381 } 1382 if (argp->ex_flags & MNT_EXPORTED) { 1383 if (error = vfs_hang_addrlist(mp, nep, argp)) 1384 return (error); 1385 mp->mnt_flag |= MNT_EXPORTED; 1386 } 1387 return (0); 1388 } 1389 1390 struct netcred * 1391 vfs_export_lookup(mp, nep, nam) 1392 register struct mount *mp; 1393 struct netexport *nep; 1394 struct mbuf *nam; 1395 { 1396 register struct netcred *np; 1397 register struct radix_node_head *rnh; 1398 struct sockaddr *saddr; 1399 1400 np = NULL; 1401 if (mp->mnt_flag & MNT_EXPORTED) { 1402 /* 1403 * Lookup in the export list first. 1404 */ 1405 if (nam != NULL) { 1406 saddr = mtod(nam, struct sockaddr *); 1407 rnh = nep->ne_rtable[saddr->sa_family]; 1408 if (rnh != NULL) { 1409 np = (struct netcred *) 1410 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1411 rnh); 1412 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1413 np = NULL; 1414 } 1415 } 1416 /* 1417 * If no address match, use the default if it exists. 1418 */ 1419 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1420 np = &nep->ne_defexported; 1421 } 1422 return (np); 1423 } 1424