1 /* $NetBSD: vfs_subr.c,v 1.126 2000/05/28 04:13:56 mycroft Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include "opt_ddb.h" 85 #include "opt_compat_netbsd.h" 86 #include "opt_compat_43.h" 87 88 #include <sys/param.h> 89 #include <sys/systm.h> 90 #include <sys/proc.h> 91 #include <sys/mount.h> 92 #include <sys/time.h> 93 #include <sys/fcntl.h> 94 #include <sys/vnode.h> 95 #include <sys/stat.h> 96 #include <sys/namei.h> 97 #include <sys/ucred.h> 98 #include <sys/buf.h> 99 #include <sys/errno.h> 100 #include <sys/malloc.h> 101 #include <sys/domain.h> 102 #include <sys/mbuf.h> 103 #include <sys/syscallargs.h> 104 #include <sys/device.h> 105 #include <sys/dirent.h> 106 107 #include <vm/vm.h> 108 #include <sys/sysctl.h> 109 110 #include <miscfs/specfs/specdev.h> 111 #include <miscfs/genfs/genfs.h> 112 #include <miscfs/syncfs/syncfs.h> 113 114 #include <uvm/uvm_extern.h> 115 #include <uvm/uvm.h> 116 #include <uvm/uvm_ddb.h> 117 118 enum vtype iftovt_tab[16] = { 119 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 120 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 121 }; 122 int vttoif_tab[9] = { 123 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 124 S_IFSOCK, S_IFIFO, S_IFMT, 125 }; 126 127 int doforce = 1; /* 1 => permit forcible unmounting */ 128 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 129 130 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 131 132 /* 133 * Insq/Remq for the vnode usage lists. 134 */ 135 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 136 #define bufremvn(bp) { \ 137 LIST_REMOVE(bp, b_vnbufs); \ 138 (bp)->b_vnbufs.le_next = NOLIST; \ 139 } 140 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 141 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 142 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 143 144 struct mntlist mountlist = /* mounted filesystem list */ 145 CIRCLEQ_HEAD_INITIALIZER(mountlist); 146 struct vfs_list_head vfs_list = /* vfs list */ 147 LIST_HEAD_INITIALIZER(vfs_list); 148 149 struct nfs_public nfs_pub; /* publicly exported FS */ 150 151 struct simplelock mountlist_slock; 152 static struct simplelock mntid_slock; 153 struct simplelock mntvnode_slock; 154 struct simplelock vnode_free_list_slock; 155 struct simplelock spechash_slock; 156 157 /* 158 * These define the root filesystem and device. 159 */ 160 struct mount *rootfs; 161 struct vnode *rootvnode; 162 struct device *root_device; /* root device */ 163 164 struct pool vnode_pool; /* memory pool for vnodes */ 165 166 /* 167 * Local declarations. 168 */ 169 void insmntque __P((struct vnode *, struct mount *)); 170 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 171 void vgoneall __P((struct vnode *)); 172 173 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 174 struct export_args *)); 175 static int vfs_free_netcred __P((struct radix_node *, void *)); 176 static void vfs_free_addrlist __P((struct netexport *)); 177 178 #ifdef DEBUG 179 void printlockedvnodes __P((void)); 180 #endif 181 182 /* 183 * Initialize the vnode management data structures. 184 */ 185 void 186 vntblinit() 187 { 188 189 simple_lock_init(&mntvnode_slock); 190 simple_lock_init(&mntid_slock); 191 simple_lock_init(&spechash_slock); 192 simple_lock_init(&vnode_free_list_slock); 193 194 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 195 0, pool_page_alloc_nointr, pool_page_free_nointr, M_VNODE); 196 197 /* 198 * Initialize the filesystem syncer. 199 */ 200 vn_initialize_syncerd(); 201 } 202 203 /* 204 * Mark a mount point as busy. Used to synchronize access and to delay 205 * unmounting. Interlock is not released on failure. 206 */ 207 int 208 vfs_busy(mp, flags, interlkp) 209 struct mount *mp; 210 int flags; 211 struct simplelock *interlkp; 212 { 213 int lkflags; 214 215 while (mp->mnt_flag & MNT_UNMOUNT) { 216 int gone; 217 218 if (flags & LK_NOWAIT) 219 return (ENOENT); 220 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 221 && mp->mnt_unmounter == curproc) 222 return (EDEADLK); 223 if (interlkp) 224 simple_unlock(interlkp); 225 /* 226 * Since all busy locks are shared except the exclusive 227 * lock granted when unmounting, the only place that a 228 * wakeup needs to be done is at the release of the 229 * exclusive lock at the end of dounmount. 230 * 231 * XXX MP: add spinlock protecting mnt_wcnt here once you 232 * can atomically unlock-and-sleep. 233 */ 234 mp->mnt_wcnt++; 235 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 236 mp->mnt_wcnt--; 237 gone = mp->mnt_flag & MNT_GONE; 238 239 if (mp->mnt_wcnt == 0) 240 wakeup(&mp->mnt_wcnt); 241 if (interlkp) 242 simple_lock(interlkp); 243 if (gone) 244 return (ENOENT); 245 } 246 lkflags = LK_SHARED; 247 if (interlkp) 248 lkflags |= LK_INTERLOCK; 249 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 250 panic("vfs_busy: unexpected lock failure"); 251 return (0); 252 } 253 254 /* 255 * Free a busy filesystem. 256 */ 257 void 258 vfs_unbusy(mp) 259 struct mount *mp; 260 { 261 262 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 263 } 264 265 /* 266 * Lookup a filesystem type, and if found allocate and initialize 267 * a mount structure for it. 268 * 269 * Devname is usually updated by mount(8) after booting. 270 */ 271 int 272 vfs_rootmountalloc(fstypename, devname, mpp) 273 char *fstypename; 274 char *devname; 275 struct mount **mpp; 276 { 277 struct vfsops *vfsp = NULL; 278 struct mount *mp; 279 280 for (vfsp = LIST_FIRST(&vfs_list); vfsp != NULL; 281 vfsp = LIST_NEXT(vfsp, vfs_list)) 282 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 283 break; 284 285 if (vfsp == NULL) 286 return (ENODEV); 287 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 288 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 289 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 290 (void)vfs_busy(mp, LK_NOWAIT, 0); 291 LIST_INIT(&mp->mnt_vnodelist); 292 mp->mnt_op = vfsp; 293 mp->mnt_flag = MNT_RDONLY; 294 mp->mnt_vnodecovered = NULLVP; 295 vfsp->vfs_refcount++; 296 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 297 mp->mnt_stat.f_mntonname[0] = '/'; 298 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 299 *mpp = mp; 300 return (0); 301 } 302 303 /* 304 * Lookup a mount point by filesystem identifier. 305 */ 306 struct mount * 307 vfs_getvfs(fsid) 308 fsid_t *fsid; 309 { 310 struct mount *mp; 311 312 simple_lock(&mountlist_slock); 313 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 314 mp = mp->mnt_list.cqe_next) { 315 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 316 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 317 simple_unlock(&mountlist_slock); 318 return (mp); 319 } 320 } 321 simple_unlock(&mountlist_slock); 322 return ((struct mount *)0); 323 } 324 325 /* 326 * Get a new unique fsid 327 */ 328 void 329 vfs_getnewfsid(mp, fstypename) 330 struct mount *mp; 331 char *fstypename; 332 { 333 static u_short xxxfs_mntid; 334 fsid_t tfsid; 335 int mtype; 336 337 simple_lock(&mntid_slock); 338 mtype = makefstype(fstypename); 339 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 340 mp->mnt_stat.f_fsid.val[1] = mtype; 341 if (xxxfs_mntid == 0) 342 ++xxxfs_mntid; 343 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 344 tfsid.val[1] = mtype; 345 if (mountlist.cqh_first != (void *)&mountlist) { 346 while (vfs_getvfs(&tfsid)) { 347 tfsid.val[0]++; 348 xxxfs_mntid++; 349 } 350 } 351 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 352 simple_unlock(&mntid_slock); 353 } 354 355 /* 356 * Make a 'unique' number from a mount type name. 357 */ 358 long 359 makefstype(type) 360 char *type; 361 { 362 long rv; 363 364 for (rv = 0; *type; type++) { 365 rv <<= 2; 366 rv ^= *type; 367 } 368 return rv; 369 } 370 371 372 /* 373 * Set vnode attributes to VNOVAL 374 */ 375 void 376 vattr_null(vap) 377 struct vattr *vap; 378 { 379 380 vap->va_type = VNON; 381 382 /* 383 * Assign individually so that it is safe even if size and 384 * sign of each member are varied. 385 */ 386 vap->va_mode = VNOVAL; 387 vap->va_nlink = VNOVAL; 388 vap->va_uid = VNOVAL; 389 vap->va_gid = VNOVAL; 390 vap->va_fsid = VNOVAL; 391 vap->va_fileid = VNOVAL; 392 vap->va_size = VNOVAL; 393 vap->va_blocksize = VNOVAL; 394 vap->va_atime.tv_sec = 395 vap->va_mtime.tv_sec = 396 vap->va_ctime.tv_sec = VNOVAL; 397 vap->va_atime.tv_nsec = 398 vap->va_mtime.tv_nsec = 399 vap->va_ctime.tv_nsec = VNOVAL; 400 vap->va_gen = VNOVAL; 401 vap->va_flags = VNOVAL; 402 vap->va_rdev = VNOVAL; 403 vap->va_bytes = VNOVAL; 404 vap->va_vaflags = 0; 405 } 406 407 /* 408 * Routines having to do with the management of the vnode table. 409 */ 410 extern int (**dead_vnodeop_p) __P((void *)); 411 long numvnodes; 412 413 /* 414 * Return the next vnode from the free list. 415 */ 416 int 417 getnewvnode(tag, mp, vops, vpp) 418 enum vtagtype tag; 419 struct mount *mp; 420 int (**vops) __P((void *)); 421 struct vnode **vpp; 422 { 423 struct proc *p = curproc; /* XXX */ 424 struct freelst *listhd; 425 static int toggle; 426 struct vnode *vp; 427 int error = 0; 428 #ifdef DIAGNOSTIC 429 int s; 430 #endif 431 if (mp) { 432 /* 433 * Mark filesystem busy while we're creating a vnode. 434 * If unmount is in progress, this will wait; if the 435 * unmount succeeds (only if umount -f), this will 436 * return an error. If the unmount fails, we'll keep 437 * going afterwards. 438 * (This puts the per-mount vnode list logically under 439 * the protection of the vfs_busy lock). 440 */ 441 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 442 if (error && error != EDEADLK) 443 return error; 444 } 445 446 /* 447 * We must choose whether to allocate a new vnode or recycle an 448 * existing one. The criterion for allocating a new one is that 449 * the total number of vnodes is less than the number desired or 450 * there are no vnodes on either free list. Generally we only 451 * want to recycle vnodes that have no buffers associated with 452 * them, so we look first on the vnode_free_list. If it is empty, 453 * we next consider vnodes with referencing buffers on the 454 * vnode_hold_list. The toggle ensures that half the time we 455 * will use a buffer from the vnode_hold_list, and half the time 456 * we will allocate a new one unless the list has grown to twice 457 * the desired size. We are reticent to recycle vnodes from the 458 * vnode_hold_list because we will lose the identity of all its 459 * referencing buffers. 460 */ 461 toggle ^= 1; 462 if (numvnodes > 2 * desiredvnodes) 463 toggle = 0; 464 465 simple_lock(&vnode_free_list_slock); 466 if (numvnodes < desiredvnodes || 467 (TAILQ_FIRST(listhd = &vnode_free_list) == NULL && 468 (TAILQ_FIRST(listhd = &vnode_hold_list) == NULL || toggle))) { 469 simple_unlock(&vnode_free_list_slock); 470 vp = pool_get(&vnode_pool, PR_WAITOK); 471 memset((char *)vp, 0, sizeof(*vp)); 472 simple_lock_init(&vp->v_interlock); 473 numvnodes++; 474 } else { 475 for (vp = TAILQ_FIRST(listhd); vp != NULLVP; 476 vp = TAILQ_NEXT(vp, v_freelist)) { 477 if (simple_lock_try(&vp->v_interlock)) { 478 if ((vp->v_flag & VLAYER) == 0) { 479 break; 480 } 481 if (VOP_ISLOCKED(vp) == 0) 482 break; 483 else 484 simple_unlock(&vp->v_interlock); 485 } 486 } 487 /* 488 * Unless this is a bad time of the month, at most 489 * the first NCPUS items on the free list are 490 * locked, so this is close enough to being empty. 491 */ 492 if (vp == NULLVP) { 493 simple_unlock(&vnode_free_list_slock); 494 if (mp && error != EDEADLK) 495 vfs_unbusy(mp); 496 tablefull("vnode"); 497 *vpp = 0; 498 return (ENFILE); 499 } 500 if (vp->v_usecount) 501 panic("free vnode isn't, vp %p", vp); 502 TAILQ_REMOVE(listhd, vp, v_freelist); 503 /* see comment on why 0xdeadb is set at end of vgone (below) */ 504 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 505 simple_unlock(&vnode_free_list_slock); 506 vp->v_lease = NULL; 507 if (vp->v_type != VBAD) 508 vgonel(vp, p); 509 else 510 simple_unlock(&vp->v_interlock); 511 #ifdef DIAGNOSTIC 512 if (vp->v_data) 513 panic("cleaned vnode isn't, vp %p", vp); 514 s = splbio(); 515 if (vp->v_numoutput) 516 panic("clean vnode has pending I/O's, vp %p", vp); 517 splx(s); 518 #endif 519 vp->v_flag = 0; 520 vp->v_lastr = 0; 521 vp->v_ralen = 0; 522 vp->v_maxra = 0; 523 vp->v_lastw = 0; 524 vp->v_lasta = 0; 525 vp->v_cstart = 0; 526 vp->v_clen = 0; 527 vp->v_socket = 0; 528 } 529 vp->v_type = VNON; 530 vp->v_vnlock = &vp->v_lock; 531 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 532 cache_purge(vp); 533 vp->v_tag = tag; 534 vp->v_op = vops; 535 insmntque(vp, mp); 536 *vpp = vp; 537 vp->v_usecount = 1; 538 vp->v_data = 0; 539 simple_lock_init(&vp->v_uvm.u_obj.vmobjlock); 540 if (mp && error != EDEADLK) 541 vfs_unbusy(mp); 542 return (0); 543 } 544 545 /* 546 * Move a vnode from one mount queue to another. 547 */ 548 void 549 insmntque(vp, mp) 550 struct vnode *vp; 551 struct mount *mp; 552 { 553 554 #ifdef DIAGNOSTIC 555 if ((mp != NULL) && 556 (mp->mnt_flag & MNT_UNMOUNT) && 557 !(mp->mnt_flag & MNT_SOFTDEP) && 558 vp->v_tag != VT_VFS) { 559 panic("insmntque into dying filesystem"); 560 } 561 #endif 562 563 simple_lock(&mntvnode_slock); 564 /* 565 * Delete from old mount point vnode list, if on one. 566 */ 567 if (vp->v_mount != NULL) 568 LIST_REMOVE(vp, v_mntvnodes); 569 /* 570 * Insert into list of vnodes for the new mount point, if available. 571 */ 572 if ((vp->v_mount = mp) != NULL) 573 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 574 simple_unlock(&mntvnode_slock); 575 } 576 577 /* 578 * Update outstanding I/O count and do wakeup if requested. 579 */ 580 void 581 vwakeup(bp) 582 struct buf *bp; 583 { 584 struct vnode *vp; 585 586 bp->b_flags &= ~B_WRITEINPROG; 587 if ((vp = bp->b_vp) != NULL) { 588 if (--vp->v_numoutput < 0) 589 panic("vwakeup: neg numoutput, vp %p", vp); 590 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 591 vp->v_flag &= ~VBWAIT; 592 wakeup((caddr_t)&vp->v_numoutput); 593 } 594 } 595 } 596 597 /* 598 * Flush out and invalidate all buffers associated with a vnode. 599 * Called with the underlying vnode locked, which should prevent new dirty 600 * buffers from being queued. 601 */ 602 int 603 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 604 struct vnode *vp; 605 int flags; 606 struct ucred *cred; 607 struct proc *p; 608 int slpflag, slptimeo; 609 { 610 struct buf *bp, *nbp; 611 int s, error; 612 613 if (flags & V_SAVE) { 614 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, p); 615 if (error) 616 return (error); 617 #ifdef DIAGNOSTIC 618 s = splbio(); 619 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 620 panic("vinvalbuf: dirty bufs, vp %p", vp); 621 splx(s); 622 #endif 623 } 624 625 s = splbio(); 626 627 restart: 628 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 629 nbp = LIST_NEXT(bp, b_vnbufs); 630 if (bp->b_flags & B_BUSY) { 631 bp->b_flags |= B_WANTED; 632 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 633 "vinvalbuf", slptimeo); 634 if (error) { 635 splx(s); 636 return (error); 637 } 638 goto restart; 639 } 640 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 641 brelse(bp); 642 } 643 644 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 645 nbp = LIST_NEXT(bp, b_vnbufs); 646 if (bp->b_flags & B_BUSY) { 647 bp->b_flags |= B_WANTED; 648 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 649 "vinvalbuf", slptimeo); 650 if (error) { 651 splx(s); 652 return (error); 653 } 654 goto restart; 655 } 656 /* 657 * XXX Since there are no node locks for NFS, I believe 658 * there is a slight chance that a delayed write will 659 * occur while sleeping just above, so check for it. 660 */ 661 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 662 #ifdef DEBUG 663 printf("buffer still DELWRI\n"); 664 #endif 665 bp->b_flags |= B_BUSY | B_VFLUSH; 666 VOP_BWRITE(bp); 667 goto restart; 668 } 669 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 670 brelse(bp); 671 } 672 673 #ifdef DIAGNOSTIC 674 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 675 panic("vinvalbuf: flush failed, vp %p", vp); 676 #endif 677 678 splx(s); 679 680 return (0); 681 } 682 683 /* 684 * Destroy any in core blocks past the truncation length. 685 * Called with the underlying vnode locked, which should prevent new dirty 686 * buffers from being queued. 687 */ 688 int 689 vtruncbuf(vp, lbn, slpflag, slptimeo) 690 struct vnode *vp; 691 daddr_t lbn; 692 int slpflag, slptimeo; 693 { 694 struct buf *bp, *nbp; 695 int s, error; 696 697 s = splbio(); 698 699 restart: 700 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 701 nbp = LIST_NEXT(bp, b_vnbufs); 702 if (bp->b_lblkno < lbn) 703 continue; 704 if (bp->b_flags & B_BUSY) { 705 bp->b_flags |= B_WANTED; 706 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 707 "vtruncbuf", slptimeo); 708 if (error) { 709 splx(s); 710 return (error); 711 } 712 goto restart; 713 } 714 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 715 brelse(bp); 716 } 717 718 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 719 nbp = LIST_NEXT(bp, b_vnbufs); 720 if (bp->b_lblkno < lbn) 721 continue; 722 if (bp->b_flags & B_BUSY) { 723 bp->b_flags |= B_WANTED; 724 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 725 "vtruncbuf", slptimeo); 726 if (error) { 727 splx(s); 728 return (error); 729 } 730 goto restart; 731 } 732 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 733 brelse(bp); 734 } 735 736 splx(s); 737 738 return (0); 739 } 740 741 void 742 vflushbuf(vp, sync) 743 struct vnode *vp; 744 int sync; 745 { 746 struct buf *bp, *nbp; 747 int s; 748 749 loop: 750 s = splbio(); 751 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 752 nbp = LIST_NEXT(bp, b_vnbufs); 753 if ((bp->b_flags & B_BUSY)) 754 continue; 755 if ((bp->b_flags & B_DELWRI) == 0) 756 panic("vflushbuf: not dirty, bp %p", bp); 757 bp->b_flags |= B_BUSY | B_VFLUSH; 758 splx(s); 759 /* 760 * Wait for I/O associated with indirect blocks to complete, 761 * since there is no way to quickly wait for them below. 762 */ 763 if (bp->b_vp == vp || sync == 0) 764 (void) bawrite(bp); 765 else 766 (void) bwrite(bp); 767 goto loop; 768 } 769 if (sync == 0) { 770 splx(s); 771 return; 772 } 773 while (vp->v_numoutput) { 774 vp->v_flag |= VBWAIT; 775 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 776 } 777 splx(s); 778 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 779 vprint("vflushbuf: dirty", vp); 780 goto loop; 781 } 782 } 783 784 /* 785 * Associate a buffer with a vnode. 786 */ 787 void 788 bgetvp(vp, bp) 789 struct vnode *vp; 790 struct buf *bp; 791 { 792 int s; 793 794 if (bp->b_vp) 795 panic("bgetvp: not free, bp %p", bp); 796 VHOLD(vp); 797 s = splbio(); 798 bp->b_vp = vp; 799 if (vp->v_type == VBLK || vp->v_type == VCHR) 800 bp->b_dev = vp->v_rdev; 801 else 802 bp->b_dev = NODEV; 803 /* 804 * Insert onto list for new vnode. 805 */ 806 bufinsvn(bp, &vp->v_cleanblkhd); 807 splx(s); 808 } 809 810 /* 811 * Disassociate a buffer from a vnode. 812 */ 813 void 814 brelvp(bp) 815 struct buf *bp; 816 { 817 struct vnode *vp; 818 int s; 819 820 if (bp->b_vp == NULL) 821 panic("brelvp: vp NULL, bp %p", bp); 822 823 s = splbio(); 824 vp = bp->b_vp; 825 /* 826 * Delete from old vnode list, if on one. 827 */ 828 if (bp->b_vnbufs.le_next != NOLIST) 829 bufremvn(bp); 830 if ((vp->v_flag & VONWORKLST) && LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 831 vp->v_flag &= ~VONWORKLST; 832 LIST_REMOVE(vp, v_synclist); 833 } 834 bp->b_vp = (struct vnode *) 0; 835 HOLDRELE(vp); 836 splx(s); 837 } 838 839 /* 840 * Reassign a buffer from one vnode to another. 841 * Used to assign file specific control information 842 * (indirect blocks) to the vnode to which they belong. 843 * 844 * This function must be called at splbio(). 845 */ 846 void 847 reassignbuf(bp, newvp) 848 struct buf *bp; 849 struct vnode *newvp; 850 { 851 struct buflists *listheadp; 852 int delay; 853 854 if (newvp == NULL) { 855 printf("reassignbuf: NULL"); 856 return; 857 } 858 859 /* 860 * Delete from old vnode list, if on one. 861 */ 862 if (bp->b_vnbufs.le_next != NOLIST) 863 bufremvn(bp); 864 /* 865 * If dirty, put on list of dirty buffers; 866 * otherwise insert onto list of clean buffers. 867 */ 868 if ((bp->b_flags & B_DELWRI) == 0) { 869 listheadp = &newvp->v_cleanblkhd; 870 if ((newvp->v_flag & VONWORKLST) && 871 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 872 newvp->v_flag &= ~VONWORKLST; 873 LIST_REMOVE(newvp, v_synclist); 874 } 875 } else { 876 listheadp = &newvp->v_dirtyblkhd; 877 if ((newvp->v_flag & VONWORKLST) == 0) { 878 switch (newvp->v_type) { 879 case VDIR: 880 delay = dirdelay; 881 break; 882 case VBLK: 883 if (newvp->v_specmountpoint != NULL) { 884 delay = metadelay; 885 break; 886 } 887 /* fall through */ 888 default: 889 delay = filedelay; 890 break; 891 } 892 if (!newvp->v_mount || 893 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 894 vn_syncer_add_to_worklist(newvp, delay); 895 } 896 } 897 bufinsvn(bp, listheadp); 898 } 899 900 /* 901 * Create a vnode for a block device. 902 * Used for root filesystem and swap areas. 903 * Also used for memory file system special devices. 904 */ 905 int 906 bdevvp(dev, vpp) 907 dev_t dev; 908 struct vnode **vpp; 909 { 910 911 return (getdevvp(dev, vpp, VBLK)); 912 } 913 914 /* 915 * Create a vnode for a character device. 916 * Used for kernfs and some console handling. 917 */ 918 int 919 cdevvp(dev, vpp) 920 dev_t dev; 921 struct vnode **vpp; 922 { 923 924 return (getdevvp(dev, vpp, VCHR)); 925 } 926 927 /* 928 * Create a vnode for a device. 929 * Used by bdevvp (block device) for root file system etc., 930 * and by cdevvp (character device) for console and kernfs. 931 */ 932 int 933 getdevvp(dev, vpp, type) 934 dev_t dev; 935 struct vnode **vpp; 936 enum vtype type; 937 { 938 struct vnode *vp; 939 struct vnode *nvp; 940 int error; 941 942 if (dev == NODEV) { 943 *vpp = NULLVP; 944 return (0); 945 } 946 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 947 if (error) { 948 *vpp = NULLVP; 949 return (error); 950 } 951 vp = nvp; 952 vp->v_type = type; 953 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 954 vput(vp); 955 vp = nvp; 956 } 957 *vpp = vp; 958 return (0); 959 } 960 961 /* 962 * Check to see if the new vnode represents a special device 963 * for which we already have a vnode (either because of 964 * bdevvp() or because of a different vnode representing 965 * the same block device). If such an alias exists, deallocate 966 * the existing contents and return the aliased vnode. The 967 * caller is responsible for filling it with its new contents. 968 */ 969 struct vnode * 970 checkalias(nvp, nvp_rdev, mp) 971 struct vnode *nvp; 972 dev_t nvp_rdev; 973 struct mount *mp; 974 { 975 struct proc *p = curproc; /* XXX */ 976 struct vnode *vp; 977 struct vnode **vpp; 978 979 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 980 return (NULLVP); 981 982 vpp = &speclisth[SPECHASH(nvp_rdev)]; 983 loop: 984 simple_lock(&spechash_slock); 985 for (vp = *vpp; vp; vp = vp->v_specnext) { 986 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 987 continue; 988 /* 989 * Alias, but not in use, so flush it out. 990 */ 991 simple_lock(&vp->v_interlock); 992 if (vp->v_usecount == 0) { 993 simple_unlock(&spechash_slock); 994 vgonel(vp, p); 995 goto loop; 996 } 997 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 998 simple_unlock(&spechash_slock); 999 goto loop; 1000 } 1001 break; 1002 } 1003 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1004 MALLOC(nvp->v_specinfo, struct specinfo *, 1005 sizeof(struct specinfo), M_VNODE, M_WAITOK); 1006 nvp->v_rdev = nvp_rdev; 1007 nvp->v_hashchain = vpp; 1008 nvp->v_specnext = *vpp; 1009 nvp->v_specmountpoint = NULL; 1010 simple_unlock(&spechash_slock); 1011 nvp->v_speclockf = NULL; 1012 *vpp = nvp; 1013 if (vp != NULLVP) { 1014 nvp->v_flag |= VALIASED; 1015 vp->v_flag |= VALIASED; 1016 vput(vp); 1017 } 1018 return (NULLVP); 1019 } 1020 simple_unlock(&spechash_slock); 1021 VOP_UNLOCK(vp, 0); 1022 simple_lock(&vp->v_interlock); 1023 vclean(vp, 0, p); 1024 vp->v_op = nvp->v_op; 1025 vp->v_tag = nvp->v_tag; 1026 vp->v_vnlock = &vp->v_lock; 1027 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1028 nvp->v_type = VNON; 1029 insmntque(vp, mp); 1030 return (vp); 1031 } 1032 1033 /* 1034 * Grab a particular vnode from the free list, increment its 1035 * reference count and lock it. If the vnode lock bit is set the 1036 * vnode is being eliminated in vgone. In that case, we can not 1037 * grab the vnode, so the process is awakened when the transition is 1038 * completed, and an error returned to indicate that the vnode is no 1039 * longer usable (possibly having been changed to a new file system type). 1040 */ 1041 int 1042 vget(vp, flags) 1043 struct vnode *vp; 1044 int flags; 1045 { 1046 int error; 1047 1048 /* 1049 * If the vnode is in the process of being cleaned out for 1050 * another use, we wait for the cleaning to finish and then 1051 * return failure. Cleaning is determined by checking that 1052 * the VXLOCK flag is set. 1053 */ 1054 if ((flags & LK_INTERLOCK) == 0) 1055 simple_lock(&vp->v_interlock); 1056 if (vp->v_flag & VXLOCK) { 1057 vp->v_flag |= VXWANT; 1058 simple_unlock(&vp->v_interlock); 1059 tsleep((caddr_t)vp, PINOD, "vget", 0); 1060 return (ENOENT); 1061 } 1062 if (vp->v_usecount == 0) { 1063 simple_lock(&vnode_free_list_slock); 1064 if (vp->v_holdcnt > 0) 1065 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1066 else 1067 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1068 simple_unlock(&vnode_free_list_slock); 1069 } 1070 vp->v_usecount++; 1071 #ifdef DIAGNOSTIC 1072 if (vp->v_usecount == 0) { 1073 vprint("vget", vp); 1074 panic("vget: usecount overflow, vp %p", vp); 1075 } 1076 #endif 1077 if (flags & LK_TYPE_MASK) { 1078 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1079 /* 1080 * must expand vrele here because we do not want 1081 * to call VOP_INACTIVE if the reference count 1082 * drops back to zero since it was never really 1083 * active. We must remove it from the free list 1084 * before sleeping so that multiple processes do 1085 * not try to recycle it. 1086 */ 1087 simple_lock(&vp->v_interlock); 1088 vp->v_usecount--; 1089 if (vp->v_usecount > 0) { 1090 simple_unlock(&vp->v_interlock); 1091 return (error); 1092 } 1093 /* 1094 * insert at tail of LRU list 1095 */ 1096 simple_lock(&vnode_free_list_slock); 1097 if (vp->v_holdcnt > 0) 1098 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, 1099 v_freelist); 1100 else 1101 TAILQ_INSERT_TAIL(&vnode_free_list, vp, 1102 v_freelist); 1103 simple_unlock(&vnode_free_list_slock); 1104 simple_unlock(&vp->v_interlock); 1105 } 1106 return (error); 1107 } 1108 simple_unlock(&vp->v_interlock); 1109 return (0); 1110 } 1111 1112 /* 1113 * vput(), just unlock and vrele() 1114 */ 1115 void 1116 vput(vp) 1117 struct vnode *vp; 1118 { 1119 struct proc *p = curproc; /* XXX */ 1120 1121 #ifdef DIAGNOSTIC 1122 if (vp == NULL) 1123 panic("vput: null vp"); 1124 #endif 1125 simple_lock(&vp->v_interlock); 1126 vp->v_usecount--; 1127 if (vp->v_usecount > 0) { 1128 simple_unlock(&vp->v_interlock); 1129 VOP_UNLOCK(vp, 0); 1130 return; 1131 } 1132 #ifdef DIAGNOSTIC 1133 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1134 vprint("vput: bad ref count", vp); 1135 panic("vput: ref cnt"); 1136 } 1137 #endif 1138 /* 1139 * Insert at tail of LRU list. 1140 */ 1141 simple_lock(&vnode_free_list_slock); 1142 if (vp->v_holdcnt > 0) 1143 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1144 else 1145 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1146 simple_unlock(&vnode_free_list_slock); 1147 simple_unlock(&vp->v_interlock); 1148 VOP_INACTIVE(vp, p); 1149 } 1150 1151 /* 1152 * Vnode release. 1153 * If count drops to zero, call inactive routine and return to freelist. 1154 */ 1155 void 1156 vrele(vp) 1157 struct vnode *vp; 1158 { 1159 struct proc *p = curproc; /* XXX */ 1160 1161 #ifdef DIAGNOSTIC 1162 if (vp == NULL) 1163 panic("vrele: null vp"); 1164 #endif 1165 simple_lock(&vp->v_interlock); 1166 vp->v_usecount--; 1167 if (vp->v_usecount > 0) { 1168 simple_unlock(&vp->v_interlock); 1169 return; 1170 } 1171 #ifdef DIAGNOSTIC 1172 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1173 vprint("vrele: bad ref count", vp); 1174 panic("vrele: ref cnt"); 1175 } 1176 #endif 1177 /* 1178 * Insert at tail of LRU list. 1179 */ 1180 simple_lock(&vnode_free_list_slock); 1181 if (vp->v_holdcnt > 0) 1182 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1183 else 1184 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1185 simple_unlock(&vnode_free_list_slock); 1186 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1187 VOP_INACTIVE(vp, p); 1188 } 1189 1190 #ifdef DIAGNOSTIC 1191 /* 1192 * Page or buffer structure gets a reference. 1193 */ 1194 void 1195 vhold(vp) 1196 struct vnode *vp; 1197 { 1198 1199 /* 1200 * If it is on the freelist and the hold count is currently 1201 * zero, move it to the hold list. The test of the back 1202 * pointer and the use reference count of zero is because 1203 * it will be removed from a free list by getnewvnode, 1204 * but will not have its reference count incremented until 1205 * after calling vgone. If the reference count were 1206 * incremented first, vgone would (incorrectly) try to 1207 * close the previous instance of the underlying object. 1208 * So, the back pointer is explicitly set to `0xdeadb' in 1209 * getnewvnode after removing it from a freelist to ensure 1210 * that we do not try to move it here. 1211 */ 1212 simple_lock(&vp->v_interlock); 1213 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1214 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1215 simple_lock(&vnode_free_list_slock); 1216 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1217 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1218 simple_unlock(&vnode_free_list_slock); 1219 } 1220 vp->v_holdcnt++; 1221 simple_unlock(&vp->v_interlock); 1222 } 1223 1224 /* 1225 * Page or buffer structure frees a reference. 1226 */ 1227 void 1228 holdrele(vp) 1229 struct vnode *vp; 1230 { 1231 1232 simple_lock(&vp->v_interlock); 1233 if (vp->v_holdcnt <= 0) 1234 panic("holdrele: holdcnt vp %p", vp); 1235 vp->v_holdcnt--; 1236 /* 1237 * If it is on the holdlist and the hold count drops to 1238 * zero, move it to the free list. The test of the back 1239 * pointer and the use reference count of zero is because 1240 * it will be removed from a free list by getnewvnode, 1241 * but will not have its reference count incremented until 1242 * after calling vgone. If the reference count were 1243 * incremented first, vgone would (incorrectly) try to 1244 * close the previous instance of the underlying object. 1245 * So, the back pointer is explicitly set to `0xdeadb' in 1246 * getnewvnode after removing it from a freelist to ensure 1247 * that we do not try to move it here. 1248 */ 1249 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1250 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1251 simple_lock(&vnode_free_list_slock); 1252 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1253 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1254 simple_unlock(&vnode_free_list_slock); 1255 } 1256 simple_unlock(&vp->v_interlock); 1257 } 1258 1259 /* 1260 * Vnode reference. 1261 */ 1262 void 1263 vref(vp) 1264 struct vnode *vp; 1265 { 1266 1267 simple_lock(&vp->v_interlock); 1268 if (vp->v_usecount <= 0) 1269 panic("vref used where vget required, vp %p", vp); 1270 vp->v_usecount++; 1271 #ifdef DIAGNOSTIC 1272 if (vp->v_usecount == 0) { 1273 vprint("vref", vp); 1274 panic("vref: usecount overflow, vp %p", vp); 1275 } 1276 #endif 1277 simple_unlock(&vp->v_interlock); 1278 } 1279 #endif /* DIAGNOSTIC */ 1280 1281 /* 1282 * Remove any vnodes in the vnode table belonging to mount point mp. 1283 * 1284 * If MNT_NOFORCE is specified, there should not be any active ones, 1285 * return error if any are found (nb: this is a user error, not a 1286 * system error). If MNT_FORCE is specified, detach any active vnodes 1287 * that are found. 1288 */ 1289 #ifdef DEBUG 1290 int busyprt = 0; /* print out busy vnodes */ 1291 struct ctldebug debug1 = { "busyprt", &busyprt }; 1292 #endif 1293 1294 int 1295 vflush(mp, skipvp, flags) 1296 struct mount *mp; 1297 struct vnode *skipvp; 1298 int flags; 1299 { 1300 struct proc *p = curproc; /* XXX */ 1301 struct vnode *vp, *nvp; 1302 int busy = 0; 1303 1304 simple_lock(&mntvnode_slock); 1305 loop: 1306 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1307 if (vp->v_mount != mp) 1308 goto loop; 1309 nvp = vp->v_mntvnodes.le_next; 1310 /* 1311 * Skip over a selected vnode. 1312 */ 1313 if (vp == skipvp) 1314 continue; 1315 simple_lock(&vp->v_interlock); 1316 /* 1317 * Skip over a vnodes marked VSYSTEM. 1318 */ 1319 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1320 simple_unlock(&vp->v_interlock); 1321 continue; 1322 } 1323 /* 1324 * If WRITECLOSE is set, only flush out regular file 1325 * vnodes open for writing. 1326 */ 1327 if ((flags & WRITECLOSE) && 1328 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1329 simple_unlock(&vp->v_interlock); 1330 continue; 1331 } 1332 /* 1333 * With v_usecount == 0, all we need to do is clear 1334 * out the vnode data structures and we are done. 1335 */ 1336 if (vp->v_usecount == 0) { 1337 simple_unlock(&mntvnode_slock); 1338 vgonel(vp, p); 1339 simple_lock(&mntvnode_slock); 1340 continue; 1341 } 1342 /* 1343 * If FORCECLOSE is set, forcibly close the vnode. 1344 * For block or character devices, revert to an 1345 * anonymous device. For all other files, just kill them. 1346 */ 1347 if (flags & FORCECLOSE) { 1348 simple_unlock(&mntvnode_slock); 1349 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1350 vgonel(vp, p); 1351 } else { 1352 vclean(vp, 0, p); 1353 vp->v_op = spec_vnodeop_p; 1354 insmntque(vp, (struct mount *)0); 1355 } 1356 simple_lock(&mntvnode_slock); 1357 continue; 1358 } 1359 #ifdef DEBUG 1360 if (busyprt) 1361 vprint("vflush: busy vnode", vp); 1362 #endif 1363 simple_unlock(&vp->v_interlock); 1364 busy++; 1365 } 1366 simple_unlock(&mntvnode_slock); 1367 if (busy) 1368 return (EBUSY); 1369 return (0); 1370 } 1371 1372 /* 1373 * Disassociate the underlying file system from a vnode. 1374 */ 1375 void 1376 vclean(vp, flags, p) 1377 struct vnode *vp; 1378 int flags; 1379 struct proc *p; 1380 { 1381 int active; 1382 1383 /* 1384 * Check to see if the vnode is in use. 1385 * If so we have to reference it before we clean it out 1386 * so that its count cannot fall to zero and generate a 1387 * race against ourselves to recycle it. 1388 */ 1389 if ((active = vp->v_usecount) != 0) { 1390 /* We have the vnode interlock. */ 1391 vp->v_usecount++; 1392 #ifdef DIAGNOSTIC 1393 if (vp->v_usecount == 0) { 1394 vprint("vclean", vp); 1395 panic("vclean: usecount overflow"); 1396 } 1397 #endif 1398 } 1399 1400 /* 1401 * Prevent the vnode from being recycled or 1402 * brought into use while we clean it out. 1403 */ 1404 if (vp->v_flag & VXLOCK) 1405 panic("vclean: deadlock, vp %p", vp); 1406 vp->v_flag |= VXLOCK; 1407 /* 1408 * Even if the count is zero, the VOP_INACTIVE routine may still 1409 * have the object locked while it cleans it out. The VOP_LOCK 1410 * ensures that the VOP_INACTIVE routine is done with its work. 1411 * For active vnodes, it ensures that no other activity can 1412 * occur while the underlying object is being cleaned out. 1413 */ 1414 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1415 1416 /* 1417 * clean out any VM data associated with the vnode. 1418 */ 1419 uvm_vnp_terminate(vp); 1420 /* 1421 * Clean out any buffers associated with the vnode. 1422 */ 1423 if (flags & DOCLOSE) 1424 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1425 1426 /* 1427 * If purging an active vnode, it must be closed and 1428 * deactivated before being reclaimed. Note that the 1429 * VOP_INACTIVE will unlock the vnode. 1430 */ 1431 if (active) { 1432 if (flags & DOCLOSE) 1433 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1434 VOP_INACTIVE(vp, p); 1435 } else { 1436 /* 1437 * Any other processes trying to obtain this lock must first 1438 * wait for VXLOCK to clear, then call the new lock operation. 1439 */ 1440 VOP_UNLOCK(vp, 0); 1441 } 1442 /* 1443 * Reclaim the vnode. 1444 */ 1445 if (VOP_RECLAIM(vp, p)) 1446 panic("vclean: cannot reclaim, vp %p", vp); 1447 1448 if (active) { 1449 /* 1450 * Inline copy of vrele() since VOP_INACTIVE 1451 * has already been called. 1452 */ 1453 simple_lock(&vp->v_interlock); 1454 if (--vp->v_usecount <= 0) { 1455 #ifdef DIAGNOSTIC 1456 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1457 vprint("vclean: bad ref count", vp); 1458 panic("vclean: ref cnt"); 1459 } 1460 #endif 1461 /* 1462 * Insert at tail of LRU list. 1463 */ 1464 simple_unlock(&vp->v_interlock); 1465 simple_lock(&vnode_free_list_slock); 1466 #ifdef DIAGNOSTIC 1467 if (vp->v_vnlock) { 1468 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1469 vprint("vclean: lock not drained", vp); 1470 } 1471 if (vp->v_holdcnt > 0) 1472 panic("vclean: not clean, vp %p", vp); 1473 #endif 1474 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1475 simple_unlock(&vnode_free_list_slock); 1476 } else 1477 simple_unlock(&vp->v_interlock); 1478 } 1479 1480 cache_purge(vp); 1481 1482 /* 1483 * Done with purge, notify sleepers of the grim news. 1484 */ 1485 vp->v_op = dead_vnodeop_p; 1486 vp->v_tag = VT_NON; 1487 vp->v_flag &= ~VXLOCK; 1488 if (vp->v_flag & VXWANT) { 1489 vp->v_flag &= ~VXWANT; 1490 wakeup((caddr_t)vp); 1491 } 1492 } 1493 1494 /* 1495 * Recycle an unused vnode to the front of the free list. 1496 * Release the passed interlock if the vnode will be recycled. 1497 */ 1498 int 1499 vrecycle(vp, inter_lkp, p) 1500 struct vnode *vp; 1501 struct simplelock *inter_lkp; 1502 struct proc *p; 1503 { 1504 1505 simple_lock(&vp->v_interlock); 1506 if (vp->v_usecount == 0) { 1507 if (inter_lkp) 1508 simple_unlock(inter_lkp); 1509 vgonel(vp, p); 1510 return (1); 1511 } 1512 simple_unlock(&vp->v_interlock); 1513 return (0); 1514 } 1515 1516 /* 1517 * Eliminate all activity associated with a vnode 1518 * in preparation for reuse. 1519 */ 1520 void 1521 vgone(vp) 1522 struct vnode *vp; 1523 { 1524 struct proc *p = curproc; /* XXX */ 1525 1526 simple_lock(&vp->v_interlock); 1527 vgonel(vp, p); 1528 } 1529 1530 /* 1531 * vgone, with the vp interlock held. 1532 */ 1533 void 1534 vgonel(vp, p) 1535 struct vnode *vp; 1536 struct proc *p; 1537 { 1538 struct vnode *vq; 1539 struct vnode *vx; 1540 1541 /* 1542 * If a vgone (or vclean) is already in progress, 1543 * wait until it is done and return. 1544 */ 1545 if (vp->v_flag & VXLOCK) { 1546 vp->v_flag |= VXWANT; 1547 simple_unlock(&vp->v_interlock); 1548 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1549 return; 1550 } 1551 /* 1552 * Clean out the filesystem specific data. 1553 */ 1554 vclean(vp, DOCLOSE, p); 1555 /* 1556 * Delete from old mount point vnode list, if on one. 1557 */ 1558 if (vp->v_mount != NULL) 1559 insmntque(vp, (struct mount *)0); 1560 /* 1561 * If special device, remove it from special device alias list. 1562 * if it is on one. 1563 */ 1564 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1565 simple_lock(&spechash_slock); 1566 if (vp->v_hashchain != NULL) { 1567 if (*vp->v_hashchain == vp) { 1568 *vp->v_hashchain = vp->v_specnext; 1569 } else { 1570 for (vq = *vp->v_hashchain; vq; 1571 vq = vq->v_specnext) { 1572 if (vq->v_specnext != vp) 1573 continue; 1574 vq->v_specnext = vp->v_specnext; 1575 break; 1576 } 1577 if (vq == NULL) 1578 panic("missing bdev"); 1579 } 1580 if (vp->v_flag & VALIASED) { 1581 vx = NULL; 1582 for (vq = *vp->v_hashchain; vq; 1583 vq = vq->v_specnext) { 1584 if (vq->v_rdev != vp->v_rdev || 1585 vq->v_type != vp->v_type) 1586 continue; 1587 if (vx) 1588 break; 1589 vx = vq; 1590 } 1591 if (vx == NULL) 1592 panic("missing alias"); 1593 if (vq == NULL) 1594 vx->v_flag &= ~VALIASED; 1595 vp->v_flag &= ~VALIASED; 1596 } 1597 } 1598 simple_unlock(&spechash_slock); 1599 FREE(vp->v_specinfo, M_VNODE); 1600 vp->v_specinfo = NULL; 1601 } 1602 /* 1603 * If it is on the freelist and not already at the head, 1604 * move it to the head of the list. The test of the back 1605 * pointer and the reference count of zero is because 1606 * it will be removed from the free list by getnewvnode, 1607 * but will not have its reference count incremented until 1608 * after calling vgone. If the reference count were 1609 * incremented first, vgone would (incorrectly) try to 1610 * close the previous instance of the underlying object. 1611 * So, the back pointer is explicitly set to `0xdeadb' in 1612 * getnewvnode after removing it from the freelist to ensure 1613 * that we do not try to move it here. 1614 */ 1615 if (vp->v_usecount == 0) { 1616 simple_lock(&vnode_free_list_slock); 1617 if (vp->v_holdcnt > 0) 1618 panic("vgonel: not clean, vp %p", vp); 1619 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1620 TAILQ_FIRST(&vnode_free_list) != vp) { 1621 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1622 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1623 } 1624 simple_unlock(&vnode_free_list_slock); 1625 } 1626 vp->v_type = VBAD; 1627 } 1628 1629 /* 1630 * Lookup a vnode by device number. 1631 */ 1632 int 1633 vfinddev(dev, type, vpp) 1634 dev_t dev; 1635 enum vtype type; 1636 struct vnode **vpp; 1637 { 1638 struct vnode *vp; 1639 int rc = 0; 1640 1641 simple_lock(&spechash_slock); 1642 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1643 if (dev != vp->v_rdev || type != vp->v_type) 1644 continue; 1645 *vpp = vp; 1646 rc = 1; 1647 break; 1648 } 1649 simple_unlock(&spechash_slock); 1650 return (rc); 1651 } 1652 1653 /* 1654 * Revoke all the vnodes corresponding to the specified minor number 1655 * range (endpoints inclusive) of the specified major. 1656 */ 1657 void 1658 vdevgone(maj, minl, minh, type) 1659 int maj, minl, minh; 1660 enum vtype type; 1661 { 1662 struct vnode *vp; 1663 int mn; 1664 1665 for (mn = minl; mn <= minh; mn++) 1666 if (vfinddev(makedev(maj, mn), type, &vp)) 1667 VOP_REVOKE(vp, REVOKEALL); 1668 } 1669 1670 /* 1671 * Calculate the total number of references to a special device. 1672 */ 1673 int 1674 vcount(vp) 1675 struct vnode *vp; 1676 { 1677 struct vnode *vq, *vnext; 1678 int count; 1679 1680 loop: 1681 if ((vp->v_flag & VALIASED) == 0) 1682 return (vp->v_usecount); 1683 simple_lock(&spechash_slock); 1684 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1685 vnext = vq->v_specnext; 1686 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1687 continue; 1688 /* 1689 * Alias, but not in use, so flush it out. 1690 */ 1691 if (vq->v_usecount == 0 && vq != vp) { 1692 simple_unlock(&spechash_slock); 1693 vgone(vq); 1694 goto loop; 1695 } 1696 count += vq->v_usecount; 1697 } 1698 simple_unlock(&spechash_slock); 1699 return (count); 1700 } 1701 1702 /* 1703 * Print out a description of a vnode. 1704 */ 1705 static char *typename[] = 1706 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1707 1708 void 1709 vprint(label, vp) 1710 char *label; 1711 struct vnode *vp; 1712 { 1713 char buf[64]; 1714 1715 if (label != NULL) 1716 printf("%s: ", label); 1717 printf("tag %d type %s, usecount %ld, writecount %ld, refcount %ld,", 1718 vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1719 vp->v_holdcnt); 1720 buf[0] = '\0'; 1721 if (vp->v_flag & VROOT) 1722 strcat(buf, "|VROOT"); 1723 if (vp->v_flag & VTEXT) 1724 strcat(buf, "|VTEXT"); 1725 if (vp->v_flag & VSYSTEM) 1726 strcat(buf, "|VSYSTEM"); 1727 if (vp->v_flag & VXLOCK) 1728 strcat(buf, "|VXLOCK"); 1729 if (vp->v_flag & VXWANT) 1730 strcat(buf, "|VXWANT"); 1731 if (vp->v_flag & VBWAIT) 1732 strcat(buf, "|VBWAIT"); 1733 if (vp->v_flag & VALIASED) 1734 strcat(buf, "|VALIASED"); 1735 if (buf[0] != '\0') 1736 printf(" flags (%s)", &buf[1]); 1737 if (vp->v_data == NULL) { 1738 printf("\n"); 1739 } else { 1740 printf("\n\t"); 1741 VOP_PRINT(vp); 1742 } 1743 } 1744 1745 #ifdef DEBUG 1746 /* 1747 * List all of the locked vnodes in the system. 1748 * Called when debugging the kernel. 1749 */ 1750 void 1751 printlockedvnodes() 1752 { 1753 struct mount *mp, *nmp; 1754 struct vnode *vp; 1755 1756 printf("Locked vnodes\n"); 1757 simple_lock(&mountlist_slock); 1758 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1759 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1760 nmp = mp->mnt_list.cqe_next; 1761 continue; 1762 } 1763 for (vp = mp->mnt_vnodelist.lh_first; 1764 vp != NULL; 1765 vp = vp->v_mntvnodes.le_next) { 1766 if (VOP_ISLOCKED(vp)) 1767 vprint((char *)0, vp); 1768 } 1769 simple_lock(&mountlist_slock); 1770 nmp = mp->mnt_list.cqe_next; 1771 vfs_unbusy(mp); 1772 } 1773 simple_unlock(&mountlist_slock); 1774 } 1775 #endif 1776 1777 extern const char *mountcompatnames[]; 1778 extern const int nmountcompatnames; 1779 1780 /* 1781 * Top level filesystem related information gathering. 1782 */ 1783 int 1784 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1785 int *name; 1786 u_int namelen; 1787 void *oldp; 1788 size_t *oldlenp; 1789 void *newp; 1790 size_t newlen; 1791 struct proc *p; 1792 { 1793 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1794 struct vfsconf vfc; 1795 #endif 1796 struct vfsops *vfsp; 1797 1798 /* all sysctl names at this level are at least name and field */ 1799 if (namelen < 2) 1800 return (ENOTDIR); /* overloaded */ 1801 1802 /* Not generic: goes to file system. */ 1803 if (name[0] != VFS_GENERIC) { 1804 if (name[0] >= nmountcompatnames || name[0] < 0 || 1805 mountcompatnames[name[0]] == NULL) 1806 return (EOPNOTSUPP); 1807 vfsp = vfs_getopsbyname(mountcompatnames[name[0]]); 1808 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1809 return (EOPNOTSUPP); 1810 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1811 oldp, oldlenp, newp, newlen, p)); 1812 } 1813 1814 /* The rest are generic vfs sysctls. */ 1815 switch (name[1]) { 1816 case VFS_USERMOUNT: 1817 return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount); 1818 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1819 case VFS_MAXTYPENUM: 1820 /* 1821 * Provided for 4.4BSD-Lite2 compatibility. 1822 */ 1823 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1824 case VFS_CONF: 1825 /* 1826 * Special: a node, next is a file system name. 1827 * Provided for 4.4BSD-Lite2 compatibility. 1828 */ 1829 if (namelen < 3) 1830 return (ENOTDIR); /* overloaded */ 1831 if (name[2] >= nmountcompatnames || name[2] < 0 || 1832 mountcompatnames[name[2]] == NULL) 1833 return (EOPNOTSUPP); 1834 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 1835 if (vfsp == NULL) 1836 return (EOPNOTSUPP); 1837 vfc.vfc_vfsops = vfsp; 1838 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 1839 vfc.vfc_typenum = name[2]; 1840 vfc.vfc_refcount = vfsp->vfs_refcount; 1841 vfc.vfc_flags = 0; 1842 vfc.vfc_mountroot = vfsp->vfs_mountroot; 1843 vfc.vfc_next = NULL; 1844 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 1845 sizeof(struct vfsconf))); 1846 #endif 1847 default: 1848 break; 1849 } 1850 return (EOPNOTSUPP); 1851 } 1852 1853 int kinfo_vdebug = 1; 1854 int kinfo_vgetfailed; 1855 #define KINFO_VNODESLOP 10 1856 /* 1857 * Dump vnode list (via sysctl). 1858 * Copyout address of vnode followed by vnode. 1859 */ 1860 /* ARGSUSED */ 1861 int 1862 sysctl_vnode(where, sizep, p) 1863 char *where; 1864 size_t *sizep; 1865 struct proc *p; 1866 { 1867 struct mount *mp, *nmp; 1868 struct vnode *nvp, *vp; 1869 char *bp = where, *savebp; 1870 char *ewhere; 1871 int error; 1872 1873 #define VPTRSZ sizeof(struct vnode *) 1874 #define VNODESZ sizeof(struct vnode) 1875 if (where == NULL) { 1876 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1877 return (0); 1878 } 1879 ewhere = where + *sizep; 1880 1881 simple_lock(&mountlist_slock); 1882 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1883 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1884 nmp = mp->mnt_list.cqe_next; 1885 continue; 1886 } 1887 savebp = bp; 1888 again: 1889 simple_lock(&mntvnode_slock); 1890 for (vp = mp->mnt_vnodelist.lh_first; 1891 vp != NULL; 1892 vp = nvp) { 1893 /* 1894 * Check that the vp is still associated with 1895 * this filesystem. RACE: could have been 1896 * recycled onto the same filesystem. 1897 */ 1898 if (vp->v_mount != mp) { 1899 simple_unlock(&mntvnode_slock); 1900 if (kinfo_vdebug) 1901 printf("kinfo: vp changed\n"); 1902 bp = savebp; 1903 goto again; 1904 } 1905 nvp = vp->v_mntvnodes.le_next; 1906 if (bp + VPTRSZ + VNODESZ > ewhere) { 1907 simple_unlock(&mntvnode_slock); 1908 *sizep = bp - where; 1909 return (ENOMEM); 1910 } 1911 simple_unlock(&mntvnode_slock); 1912 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1913 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1914 return (error); 1915 bp += VPTRSZ + VNODESZ; 1916 simple_lock(&mntvnode_slock); 1917 } 1918 simple_unlock(&mntvnode_slock); 1919 simple_lock(&mountlist_slock); 1920 nmp = mp->mnt_list.cqe_next; 1921 vfs_unbusy(mp); 1922 } 1923 simple_unlock(&mountlist_slock); 1924 1925 *sizep = bp - where; 1926 return (0); 1927 } 1928 1929 /* 1930 * Check to see if a filesystem is mounted on a block device. 1931 */ 1932 int 1933 vfs_mountedon(vp) 1934 struct vnode *vp; 1935 { 1936 struct vnode *vq; 1937 int error = 0; 1938 1939 if (vp->v_specmountpoint != NULL) 1940 return (EBUSY); 1941 if (vp->v_flag & VALIASED) { 1942 simple_lock(&spechash_slock); 1943 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1944 if (vq->v_rdev != vp->v_rdev || 1945 vq->v_type != vp->v_type) 1946 continue; 1947 if (vq->v_specmountpoint != NULL) { 1948 error = EBUSY; 1949 break; 1950 } 1951 } 1952 simple_unlock(&spechash_slock); 1953 } 1954 return (error); 1955 } 1956 1957 /* 1958 * Build hash lists of net addresses and hang them off the mount point. 1959 * Called by ufs_mount() to set up the lists of export addresses. 1960 */ 1961 static int 1962 vfs_hang_addrlist(mp, nep, argp) 1963 struct mount *mp; 1964 struct netexport *nep; 1965 struct export_args *argp; 1966 { 1967 struct netcred *np, *enp; 1968 struct radix_node_head *rnh; 1969 int i; 1970 struct radix_node *rn; 1971 struct sockaddr *saddr, *smask = 0; 1972 struct domain *dom; 1973 int error; 1974 1975 if (argp->ex_addrlen == 0) { 1976 if (mp->mnt_flag & MNT_DEFEXPORTED) 1977 return (EPERM); 1978 np = &nep->ne_defexported; 1979 np->netc_exflags = argp->ex_flags; 1980 np->netc_anon = argp->ex_anon; 1981 np->netc_anon.cr_ref = 1; 1982 mp->mnt_flag |= MNT_DEFEXPORTED; 1983 return (0); 1984 } 1985 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1986 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1987 memset((caddr_t)np, 0, i); 1988 saddr = (struct sockaddr *)(np + 1); 1989 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 1990 if (error) 1991 goto out; 1992 if (saddr->sa_len > argp->ex_addrlen) 1993 saddr->sa_len = argp->ex_addrlen; 1994 if (argp->ex_masklen) { 1995 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1996 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1997 if (error) 1998 goto out; 1999 if (smask->sa_len > argp->ex_masklen) 2000 smask->sa_len = argp->ex_masklen; 2001 } 2002 i = saddr->sa_family; 2003 if ((rnh = nep->ne_rtable[i]) == 0) { 2004 /* 2005 * Seems silly to initialize every AF when most are not 2006 * used, do so on demand here 2007 */ 2008 for (dom = domains; dom; dom = dom->dom_next) 2009 if (dom->dom_family == i && dom->dom_rtattach) { 2010 dom->dom_rtattach((void **)&nep->ne_rtable[i], 2011 dom->dom_rtoffset); 2012 break; 2013 } 2014 if ((rnh = nep->ne_rtable[i]) == 0) { 2015 error = ENOBUFS; 2016 goto out; 2017 } 2018 } 2019 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 2020 np->netc_rnodes); 2021 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 2022 if (rn == 0) { 2023 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 2024 smask, rnh); 2025 if (enp == 0) { 2026 error = EPERM; 2027 goto out; 2028 } 2029 } else 2030 enp = (struct netcred *)rn; 2031 2032 if (enp->netc_exflags != argp->ex_flags || 2033 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 2034 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 2035 enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups || 2036 memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 2037 enp->netc_anon.cr_ngroups)) 2038 error = EPERM; 2039 else 2040 error = 0; 2041 goto out; 2042 } 2043 np->netc_exflags = argp->ex_flags; 2044 np->netc_anon = argp->ex_anon; 2045 np->netc_anon.cr_ref = 1; 2046 return (0); 2047 out: 2048 free(np, M_NETADDR); 2049 return (error); 2050 } 2051 2052 /* ARGSUSED */ 2053 static int 2054 vfs_free_netcred(rn, w) 2055 struct radix_node *rn; 2056 void *w; 2057 { 2058 struct radix_node_head *rnh = (struct radix_node_head *)w; 2059 2060 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 2061 free((caddr_t)rn, M_NETADDR); 2062 return (0); 2063 } 2064 2065 /* 2066 * Free the net address hash lists that are hanging off the mount points. 2067 */ 2068 static void 2069 vfs_free_addrlist(nep) 2070 struct netexport *nep; 2071 { 2072 int i; 2073 struct radix_node_head *rnh; 2074 2075 for (i = 0; i <= AF_MAX; i++) 2076 if ((rnh = nep->ne_rtable[i]) != NULL) { 2077 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 2078 free((caddr_t)rnh, M_RTABLE); 2079 nep->ne_rtable[i] = 0; 2080 } 2081 } 2082 2083 int 2084 vfs_export(mp, nep, argp) 2085 struct mount *mp; 2086 struct netexport *nep; 2087 struct export_args *argp; 2088 { 2089 int error; 2090 2091 if (argp->ex_flags & MNT_DELEXPORT) { 2092 if (mp->mnt_flag & MNT_EXPUBLIC) { 2093 vfs_setpublicfs(NULL, NULL, NULL); 2094 mp->mnt_flag &= ~MNT_EXPUBLIC; 2095 } 2096 vfs_free_addrlist(nep); 2097 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2098 } 2099 if (argp->ex_flags & MNT_EXPORTED) { 2100 if (argp->ex_flags & MNT_EXPUBLIC) { 2101 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2102 return (error); 2103 mp->mnt_flag |= MNT_EXPUBLIC; 2104 } 2105 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 2106 return (error); 2107 mp->mnt_flag |= MNT_EXPORTED; 2108 } 2109 return (0); 2110 } 2111 2112 /* 2113 * Set the publicly exported filesystem (WebNFS). Currently, only 2114 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2115 */ 2116 int 2117 vfs_setpublicfs(mp, nep, argp) 2118 struct mount *mp; 2119 struct netexport *nep; 2120 struct export_args *argp; 2121 { 2122 int error; 2123 struct vnode *rvp; 2124 char *cp; 2125 2126 /* 2127 * mp == NULL -> invalidate the current info, the FS is 2128 * no longer exported. May be called from either vfs_export 2129 * or unmount, so check if it hasn't already been done. 2130 */ 2131 if (mp == NULL) { 2132 if (nfs_pub.np_valid) { 2133 nfs_pub.np_valid = 0; 2134 if (nfs_pub.np_index != NULL) { 2135 FREE(nfs_pub.np_index, M_TEMP); 2136 nfs_pub.np_index = NULL; 2137 } 2138 } 2139 return (0); 2140 } 2141 2142 /* 2143 * Only one allowed at a time. 2144 */ 2145 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2146 return (EBUSY); 2147 2148 /* 2149 * Get real filehandle for root of exported FS. 2150 */ 2151 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 2152 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2153 2154 if ((error = VFS_ROOT(mp, &rvp))) 2155 return (error); 2156 2157 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2158 return (error); 2159 2160 vput(rvp); 2161 2162 /* 2163 * If an indexfile was specified, pull it in. 2164 */ 2165 if (argp->ex_indexfile != NULL) { 2166 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2167 M_WAITOK); 2168 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2169 MAXNAMLEN, (size_t *)0); 2170 if (!error) { 2171 /* 2172 * Check for illegal filenames. 2173 */ 2174 for (cp = nfs_pub.np_index; *cp; cp++) { 2175 if (*cp == '/') { 2176 error = EINVAL; 2177 break; 2178 } 2179 } 2180 } 2181 if (error) { 2182 FREE(nfs_pub.np_index, M_TEMP); 2183 return (error); 2184 } 2185 } 2186 2187 nfs_pub.np_mount = mp; 2188 nfs_pub.np_valid = 1; 2189 return (0); 2190 } 2191 2192 struct netcred * 2193 vfs_export_lookup(mp, nep, nam) 2194 struct mount *mp; 2195 struct netexport *nep; 2196 struct mbuf *nam; 2197 { 2198 struct netcred *np; 2199 struct radix_node_head *rnh; 2200 struct sockaddr *saddr; 2201 2202 np = NULL; 2203 if (mp->mnt_flag & MNT_EXPORTED) { 2204 /* 2205 * Lookup in the export list first. 2206 */ 2207 if (nam != NULL) { 2208 saddr = mtod(nam, struct sockaddr *); 2209 rnh = nep->ne_rtable[saddr->sa_family]; 2210 if (rnh != NULL) { 2211 np = (struct netcred *) 2212 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2213 rnh); 2214 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2215 np = NULL; 2216 } 2217 } 2218 /* 2219 * If no address match, use the default if it exists. 2220 */ 2221 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2222 np = &nep->ne_defexported; 2223 } 2224 return (np); 2225 } 2226 2227 /* 2228 * Do the usual access checking. 2229 * file_mode, uid and gid are from the vnode in question, 2230 * while acc_mode and cred are from the VOP_ACCESS parameter list 2231 */ 2232 int 2233 vaccess(type, file_mode, uid, gid, acc_mode, cred) 2234 enum vtype type; 2235 mode_t file_mode; 2236 uid_t uid; 2237 gid_t gid; 2238 mode_t acc_mode; 2239 struct ucred *cred; 2240 { 2241 mode_t mask; 2242 2243 /* 2244 * Super-user always gets read/write access, but execute access depends 2245 * on at least one execute bit being set. 2246 */ 2247 if (cred->cr_uid == 0) { 2248 if ((acc_mode & VEXEC) && type != VDIR && 2249 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2250 return (EACCES); 2251 return (0); 2252 } 2253 2254 mask = 0; 2255 2256 /* Otherwise, check the owner. */ 2257 if (cred->cr_uid == uid) { 2258 if (acc_mode & VEXEC) 2259 mask |= S_IXUSR; 2260 if (acc_mode & VREAD) 2261 mask |= S_IRUSR; 2262 if (acc_mode & VWRITE) 2263 mask |= S_IWUSR; 2264 return ((file_mode & mask) == mask ? 0 : EACCES); 2265 } 2266 2267 /* Otherwise, check the groups. */ 2268 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2269 if (acc_mode & VEXEC) 2270 mask |= S_IXGRP; 2271 if (acc_mode & VREAD) 2272 mask |= S_IRGRP; 2273 if (acc_mode & VWRITE) 2274 mask |= S_IWGRP; 2275 return ((file_mode & mask) == mask ? 0 : EACCES); 2276 } 2277 2278 /* Otherwise, check everyone else. */ 2279 if (acc_mode & VEXEC) 2280 mask |= S_IXOTH; 2281 if (acc_mode & VREAD) 2282 mask |= S_IROTH; 2283 if (acc_mode & VWRITE) 2284 mask |= S_IWOTH; 2285 return ((file_mode & mask) == mask ? 0 : EACCES); 2286 } 2287 2288 /* 2289 * Unmount all file systems. 2290 * We traverse the list in reverse order under the assumption that doing so 2291 * will avoid needing to worry about dependencies. 2292 */ 2293 void 2294 vfs_unmountall() 2295 { 2296 struct mount *mp, *nmp; 2297 int allerror, error; 2298 struct proc *p = curproc; /* XXX */ 2299 2300 /* 2301 * Unmounting a file system blocks the requesting process. 2302 * However, it's possible for this routine to be called when 2303 * curproc is NULL (e.g. panic situation, or via the debugger). 2304 * If we get stuck in this situation, just abort, since any 2305 * attempts to sleep will fault. 2306 */ 2307 if (p == NULL) { 2308 printf("vfs_unmountall: no context, aborting\n"); 2309 return; 2310 } 2311 2312 for (allerror = 0, 2313 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2314 nmp = mp->mnt_list.cqe_prev; 2315 #ifdef DEBUG 2316 printf("unmounting %s (%s)...\n", 2317 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2318 #endif 2319 if (vfs_busy(mp, 0, 0)) 2320 continue; 2321 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2322 printf("unmount of %s failed with error %d\n", 2323 mp->mnt_stat.f_mntonname, error); 2324 allerror = 1; 2325 } 2326 } 2327 if (allerror) 2328 printf("WARNING: some file systems would not unmount\n"); 2329 } 2330 2331 /* 2332 * Sync and unmount file systems before shutting down. 2333 */ 2334 void 2335 vfs_shutdown() 2336 { 2337 struct buf *bp; 2338 int iter, nbusy, dcount, s; 2339 2340 printf("syncing disks... "); 2341 2342 /* XXX Should suspend scheduling. */ 2343 (void) spl0(); 2344 2345 sys_sync(&proc0, (void *)0, (register_t *)0); 2346 2347 /* Wait for sync to finish. */ 2348 dcount = 10000; 2349 for (iter = 0; iter < 20; iter++) { 2350 nbusy = 0; 2351 for (bp = &buf[nbuf]; --bp >= buf; ) { 2352 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 2353 nbusy++; 2354 /* 2355 * With soft updates, some buffers that are 2356 * written will be remarked as dirty until other 2357 * buffers are written. 2358 */ 2359 if (bp->b_vp && bp->b_vp->v_mount 2360 && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP) 2361 && (bp->b_flags & B_DELWRI)) { 2362 s = splbio(); 2363 bremfree(bp); 2364 bp->b_flags |= B_BUSY; 2365 splx(s); 2366 nbusy++; 2367 bawrite(bp); 2368 if (dcount-- <= 0) { 2369 printf("softdep "); 2370 goto fail; 2371 } 2372 } 2373 } 2374 if (nbusy == 0) 2375 break; 2376 printf("%d ", nbusy); 2377 DELAY(40000 * iter); 2378 } 2379 if (nbusy) { 2380 fail: 2381 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 2382 printf("giving up\nPrinting vnodes for busy buffers\n"); 2383 for (bp = &buf[nbuf]; --bp >= buf; ) 2384 if ((bp->b_flags & (B_BUSY|B_INVAL)) == B_BUSY) 2385 vprint(NULL, bp->b_vp); 2386 2387 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2388 Debugger(); 2389 #endif 2390 2391 #else /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2392 printf("giving up\n"); 2393 #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2394 return; 2395 } else 2396 printf("done\n"); 2397 2398 /* 2399 * If we've panic'd, don't make the situation potentially 2400 * worse by unmounting the file systems. 2401 */ 2402 if (panicstr != NULL) 2403 return; 2404 2405 /* Release inodes held by texts before update. */ 2406 #ifdef notdef 2407 vnshutdown(); 2408 #endif 2409 /* Unmount file systems. */ 2410 vfs_unmountall(); 2411 } 2412 2413 /* 2414 * Mount the root file system. If the operator didn't specify a 2415 * file system to use, try all possible file systems until one 2416 * succeeds. 2417 */ 2418 int 2419 vfs_mountroot() 2420 { 2421 extern int (*mountroot) __P((void)); 2422 struct vfsops *v; 2423 2424 if (root_device == NULL) 2425 panic("vfs_mountroot: root device unknown"); 2426 2427 switch (root_device->dv_class) { 2428 case DV_IFNET: 2429 if (rootdev != NODEV) 2430 panic("vfs_mountroot: rootdev set for DV_IFNET"); 2431 break; 2432 2433 case DV_DISK: 2434 if (rootdev == NODEV) 2435 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2436 break; 2437 2438 default: 2439 printf("%s: inappropriate for root file system\n", 2440 root_device->dv_xname); 2441 return (ENODEV); 2442 } 2443 2444 /* 2445 * If user specified a file system, use it. 2446 */ 2447 if (mountroot != NULL) 2448 return ((*mountroot)()); 2449 2450 /* 2451 * Try each file system currently configured into the kernel. 2452 */ 2453 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2454 if (v->vfs_mountroot == NULL) 2455 continue; 2456 #ifdef DEBUG 2457 printf("mountroot: trying %s...\n", v->vfs_name); 2458 #endif 2459 if ((*v->vfs_mountroot)() == 0) { 2460 printf("root file system type: %s\n", v->vfs_name); 2461 break; 2462 } 2463 } 2464 2465 if (v == NULL) { 2466 printf("no file system for %s", root_device->dv_xname); 2467 if (root_device->dv_class == DV_DISK) 2468 printf(" (dev 0x%x)", rootdev); 2469 printf("\n"); 2470 return (EFTYPE); 2471 } 2472 return (0); 2473 } 2474 2475 /* 2476 * Given a file system name, look up the vfsops for that 2477 * file system, or return NULL if file system isn't present 2478 * in the kernel. 2479 */ 2480 struct vfsops * 2481 vfs_getopsbyname(name) 2482 const char *name; 2483 { 2484 struct vfsops *v; 2485 2486 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2487 if (strcmp(v->vfs_name, name) == 0) 2488 break; 2489 } 2490 2491 return (v); 2492 } 2493 2494 /* 2495 * Establish a file system and initialize it. 2496 */ 2497 int 2498 vfs_attach(vfs) 2499 struct vfsops *vfs; 2500 { 2501 struct vfsops *v; 2502 int error = 0; 2503 2504 2505 /* 2506 * Make sure this file system doesn't already exist. 2507 */ 2508 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2509 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2510 error = EEXIST; 2511 goto out; 2512 } 2513 } 2514 2515 /* 2516 * Initialize the vnode operations for this file system. 2517 */ 2518 vfs_opv_init(vfs->vfs_opv_descs); 2519 2520 /* 2521 * Now initialize the file system itself. 2522 */ 2523 (*vfs->vfs_init)(); 2524 2525 /* 2526 * ...and link it into the kernel's list. 2527 */ 2528 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2529 2530 /* 2531 * Sanity: make sure the reference count is 0. 2532 */ 2533 vfs->vfs_refcount = 0; 2534 2535 out: 2536 return (error); 2537 } 2538 2539 /* 2540 * Remove a file system from the kernel. 2541 */ 2542 int 2543 vfs_detach(vfs) 2544 struct vfsops *vfs; 2545 { 2546 struct vfsops *v; 2547 2548 /* 2549 * Make sure no one is using the filesystem. 2550 */ 2551 if (vfs->vfs_refcount != 0) 2552 return (EBUSY); 2553 2554 /* 2555 * ...and remove it from the kernel's list. 2556 */ 2557 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2558 if (v == vfs) { 2559 LIST_REMOVE(v, vfs_list); 2560 break; 2561 } 2562 } 2563 2564 if (v == NULL) 2565 return (ESRCH); 2566 2567 /* 2568 * Now run the file system-specific cleanups. 2569 */ 2570 (*vfs->vfs_done)(); 2571 2572 /* 2573 * Free the vnode operations vector. 2574 */ 2575 vfs_opv_free(vfs->vfs_opv_descs); 2576 return (0); 2577 } 2578 2579 #ifdef DDB 2580 const char buf_flagbits[] = 2581 "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6CACHE\7CALL\10DELWRI" 2582 "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE" 2583 "\21PAGET\22PGIN\23PHYS\24RAW\25READ\26TAPE\27UAREA\30WANTED" 2584 "\31WRITEINPROG\32XXX\33VFLUSH"; 2585 2586 void 2587 vfs_buf_print(bp, full, pr) 2588 struct buf *bp; 2589 int full; 2590 void (*pr) __P((const char *, ...)); 2591 { 2592 char buf[1024]; 2593 2594 (*pr)(" vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n", 2595 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev); 2596 2597 bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf)); 2598 (*pr)(" error %d flags 0x%s\n", bp->b_error, buf); 2599 2600 (*pr)(" bufsize 0x%x bcount 0x%x resid 0x%x\n", 2601 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2602 (*pr)(" data %p saveaddr %p\n", 2603 bp->b_data, bp->b_saveaddr); 2604 (*pr)(" iodone %p\n", bp->b_iodone); 2605 2606 (*pr)(" dirtyoff 0x%x dirtyend 0x%x validoff 0x%x validend 0x%x\n", 2607 bp->b_dirtyoff, bp->b_dirtyend, 2608 bp->b_validoff, bp->b_validend); 2609 2610 (*pr)(" rcred %p wcred %p\n", bp->b_rcred, bp->b_wcred); 2611 } 2612 2613 2614 const char vnode_flagbits[] = 2615 "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\11XLOCK\12XWANT\13BWAIT\14ALIASED" 2616 "\15DIROP\17DIRTY"; 2617 2618 const char *vnode_types[] = { 2619 "VNON", 2620 "VREG", 2621 "VDIR", 2622 "VBLK", 2623 "VCHR", 2624 "VLNK", 2625 "VSOCK", 2626 "VFIFO", 2627 "VBAD", 2628 }; 2629 2630 const char *vnode_tags[] = { 2631 "VT_NON", 2632 "VT_UFS", 2633 "VT_NFS", 2634 "VT_MFS", 2635 "VT_MSDOSFS", 2636 "VT_LFS", 2637 "VT_LOFS", 2638 "VT_FDESC", 2639 "VT_PORTAL", 2640 "VT_NULL", 2641 "VT_UMAP", 2642 "VT_KERNFS", 2643 "VT_PROCFS", 2644 "VT_AFS", 2645 "VT_ISOFS", 2646 "VT_UNION", 2647 "VT_ADOSFS", 2648 "VT_EXT2FS", 2649 "VT_CODA", 2650 "VT_FILECORE", 2651 "VT_NTFS", 2652 "VT_VFS", 2653 "VT_OVERLAY" 2654 }; 2655 2656 void 2657 vfs_vnode_print(vp, full, pr) 2658 struct vnode *vp; 2659 int full; 2660 void (*pr) __P((const char *, ...)); 2661 { 2662 char buf[1024]; 2663 2664 const char *vtype, *vtag; 2665 2666 uvm_object_printit(&vp->v_uvm.u_obj, full, pr); 2667 bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf)); 2668 (*pr)("\nVNODE flags %s\n", buf); 2669 (*pr)("nio %d size 0x%x wlist %s\n", 2670 vp->v_uvm.u_nio, vp->v_uvm.u_size, 2671 vp->v_uvm.u_wlist.le_next ? "YES" : "NO"); 2672 2673 (*pr)("data %p usecount %d writecount %d holdcnt %d numoutput %d\n", 2674 vp->v_data, vp->v_usecount, vp->v_writecount, 2675 vp->v_holdcnt, vp->v_numoutput); 2676 2677 vtype = (vp->v_type >= 0 && 2678 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ? 2679 vnode_types[vp->v_type] : "UNKNOWN"; 2680 vtag = (vp->v_tag >= 0 && 2681 vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ? 2682 vnode_tags[vp->v_tag] : "UNKNOWN"; 2683 2684 (*pr)("type %s(%d) tag %s(%d) id 0x%x mount %p typedata %p\n", 2685 vtype, vp->v_type, vtag, vp->v_tag, 2686 vp->v_id, vp->v_mount, vp->v_mountedhere); 2687 (*pr)("lastr 0x%x lastw 0x%x lasta 0x%x\n", 2688 vp->v_lastr, vp->v_lastw, vp->v_lasta); 2689 (*pr)("cstart 0x%x clen 0x%x ralen 0x%x maxra 0x%x\n", 2690 vp->v_cstart, vp->v_clen, vp->v_ralen, vp->v_maxra); 2691 2692 if (full) { 2693 struct buf *bp; 2694 2695 (*pr)("clean bufs:\n"); 2696 for (bp = LIST_FIRST(&vp->v_cleanblkhd); 2697 bp != NULL; 2698 bp = LIST_NEXT(bp, b_vnbufs)) { 2699 vfs_buf_print(bp, full, pr); 2700 } 2701 2702 (*pr)("dirty bufs:\n"); 2703 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); 2704 bp != NULL; 2705 bp = LIST_NEXT(bp, b_vnbufs)) { 2706 vfs_buf_print(bp, full, pr); 2707 } 2708 } 2709 } 2710 #endif 2711