1 /* $NetBSD: vfs_subr.c,v 1.445 2014/09/05 05:57:21 matt Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998, 2004, 2005, 2007, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.445 2014/09/05 05:57:21 matt Exp $"); 71 72 #include "opt_ddb.h" 73 #include "opt_compat_netbsd.h" 74 #include "opt_compat_43.h" 75 76 #include <sys/param.h> 77 #include <sys/systm.h> 78 #include <sys/conf.h> 79 #include <sys/dirent.h> 80 #include <sys/filedesc.h> 81 #include <sys/kernel.h> 82 #include <sys/mount.h> 83 #include <sys/vnode.h> 84 #include <sys/stat.h> 85 #include <sys/sysctl.h> 86 #include <sys/namei.h> 87 #include <sys/buf.h> 88 #include <sys/errno.h> 89 #include <sys/kmem.h> 90 #include <sys/syscallargs.h> 91 #include <sys/kauth.h> 92 #include <sys/module.h> 93 94 #include <miscfs/genfs/genfs.h> 95 #include <miscfs/syncfs/syncfs.h> 96 #include <miscfs/specfs/specdev.h> 97 #include <uvm/uvm_ddb.h> 98 99 const enum vtype iftovt_tab[16] = { 100 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 101 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 102 }; 103 const int vttoif_tab[9] = { 104 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 105 S_IFSOCK, S_IFIFO, S_IFMT, 106 }; 107 108 /* 109 * Insq/Remq for the vnode usage lists. 110 */ 111 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 112 #define bufremvn(bp) { \ 113 LIST_REMOVE(bp, b_vnbufs); \ 114 (bp)->b_vnbufs.le_next = NOLIST; \ 115 } 116 117 int doforce = 1; /* 1 => permit forcible unmounting */ 118 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 119 120 /* 121 * Local declarations. 122 */ 123 124 static int getdevvp(dev_t, vnode_t **, enum vtype); 125 126 /* 127 * Initialize the vnode management data structures. 128 */ 129 void 130 vntblinit(void) 131 { 132 133 vn_initialize_syncerd(); 134 vfs_mount_sysinit(); 135 vfs_vnode_sysinit(); 136 } 137 138 /* 139 * Flush out and invalidate all buffers associated with a vnode. 140 * Called with the underlying vnode locked, which should prevent new dirty 141 * buffers from being queued. 142 */ 143 int 144 vinvalbuf(struct vnode *vp, int flags, kauth_cred_t cred, struct lwp *l, 145 bool catch_p, int slptimeo) 146 { 147 struct buf *bp, *nbp; 148 int error; 149 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 150 (flags & V_SAVE ? PGO_CLEANIT | PGO_RECLAIM : 0); 151 152 /* XXXUBC this doesn't look at flags or slp* */ 153 mutex_enter(vp->v_interlock); 154 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 155 if (error) { 156 return error; 157 } 158 159 if (flags & V_SAVE) { 160 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0); 161 if (error) 162 return (error); 163 KASSERT(LIST_EMPTY(&vp->v_dirtyblkhd)); 164 } 165 166 mutex_enter(&bufcache_lock); 167 restart: 168 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 169 KASSERT(bp->b_vp == vp); 170 nbp = LIST_NEXT(bp, b_vnbufs); 171 error = bbusy(bp, catch_p, slptimeo, NULL); 172 if (error != 0) { 173 if (error == EPASSTHROUGH) 174 goto restart; 175 mutex_exit(&bufcache_lock); 176 return (error); 177 } 178 brelsel(bp, BC_INVAL | BC_VFLUSH); 179 } 180 181 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 182 KASSERT(bp->b_vp == vp); 183 nbp = LIST_NEXT(bp, b_vnbufs); 184 error = bbusy(bp, catch_p, slptimeo, NULL); 185 if (error != 0) { 186 if (error == EPASSTHROUGH) 187 goto restart; 188 mutex_exit(&bufcache_lock); 189 return (error); 190 } 191 /* 192 * XXX Since there are no node locks for NFS, I believe 193 * there is a slight chance that a delayed write will 194 * occur while sleeping just above, so check for it. 195 */ 196 if ((bp->b_oflags & BO_DELWRI) && (flags & V_SAVE)) { 197 #ifdef DEBUG 198 printf("buffer still DELWRI\n"); 199 #endif 200 bp->b_cflags |= BC_BUSY | BC_VFLUSH; 201 mutex_exit(&bufcache_lock); 202 VOP_BWRITE(bp->b_vp, bp); 203 mutex_enter(&bufcache_lock); 204 goto restart; 205 } 206 brelsel(bp, BC_INVAL | BC_VFLUSH); 207 } 208 209 #ifdef DIAGNOSTIC 210 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 211 panic("vinvalbuf: flush failed, vp %p", vp); 212 #endif 213 214 mutex_exit(&bufcache_lock); 215 216 return (0); 217 } 218 219 /* 220 * Destroy any in core blocks past the truncation length. 221 * Called with the underlying vnode locked, which should prevent new dirty 222 * buffers from being queued. 223 */ 224 int 225 vtruncbuf(struct vnode *vp, daddr_t lbn, bool catch_p, int slptimeo) 226 { 227 struct buf *bp, *nbp; 228 int error; 229 voff_t off; 230 231 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 232 mutex_enter(vp->v_interlock); 233 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 234 if (error) { 235 return error; 236 } 237 238 mutex_enter(&bufcache_lock); 239 restart: 240 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 241 KASSERT(bp->b_vp == vp); 242 nbp = LIST_NEXT(bp, b_vnbufs); 243 if (bp->b_lblkno < lbn) 244 continue; 245 error = bbusy(bp, catch_p, slptimeo, NULL); 246 if (error != 0) { 247 if (error == EPASSTHROUGH) 248 goto restart; 249 mutex_exit(&bufcache_lock); 250 return (error); 251 } 252 brelsel(bp, BC_INVAL | BC_VFLUSH); 253 } 254 255 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 256 KASSERT(bp->b_vp == vp); 257 nbp = LIST_NEXT(bp, b_vnbufs); 258 if (bp->b_lblkno < lbn) 259 continue; 260 error = bbusy(bp, catch_p, slptimeo, NULL); 261 if (error != 0) { 262 if (error == EPASSTHROUGH) 263 goto restart; 264 mutex_exit(&bufcache_lock); 265 return (error); 266 } 267 brelsel(bp, BC_INVAL | BC_VFLUSH); 268 } 269 mutex_exit(&bufcache_lock); 270 271 return (0); 272 } 273 274 /* 275 * Flush all dirty buffers from a vnode. 276 * Called with the underlying vnode locked, which should prevent new dirty 277 * buffers from being queued. 278 */ 279 int 280 vflushbuf(struct vnode *vp, int flags) 281 { 282 struct buf *bp, *nbp; 283 int error, pflags; 284 bool dirty, sync; 285 286 sync = (flags & FSYNC_WAIT) != 0; 287 pflags = PGO_CLEANIT | PGO_ALLPAGES | 288 (sync ? PGO_SYNCIO : 0) | 289 ((flags & FSYNC_LAZY) ? PGO_LAZY : 0); 290 mutex_enter(vp->v_interlock); 291 (void) VOP_PUTPAGES(vp, 0, 0, pflags); 292 293 loop: 294 mutex_enter(&bufcache_lock); 295 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 296 KASSERT(bp->b_vp == vp); 297 nbp = LIST_NEXT(bp, b_vnbufs); 298 if ((bp->b_cflags & BC_BUSY)) 299 continue; 300 if ((bp->b_oflags & BO_DELWRI) == 0) 301 panic("vflushbuf: not dirty, bp %p", bp); 302 bp->b_cflags |= BC_BUSY | BC_VFLUSH; 303 mutex_exit(&bufcache_lock); 304 /* 305 * Wait for I/O associated with indirect blocks to complete, 306 * since there is no way to quickly wait for them below. 307 */ 308 if (bp->b_vp == vp || !sync) 309 (void) bawrite(bp); 310 else { 311 error = bwrite(bp); 312 if (error) 313 return error; 314 } 315 goto loop; 316 } 317 mutex_exit(&bufcache_lock); 318 319 if (!sync) 320 return 0; 321 322 mutex_enter(vp->v_interlock); 323 while (vp->v_numoutput != 0) 324 cv_wait(&vp->v_cv, vp->v_interlock); 325 dirty = !LIST_EMPTY(&vp->v_dirtyblkhd); 326 mutex_exit(vp->v_interlock); 327 328 if (dirty) { 329 vprint("vflushbuf: dirty", vp); 330 goto loop; 331 } 332 333 return 0; 334 } 335 336 /* 337 * Create a vnode for a block device. 338 * Used for root filesystem and swap areas. 339 * Also used for memory file system special devices. 340 */ 341 int 342 bdevvp(dev_t dev, vnode_t **vpp) 343 { 344 345 return (getdevvp(dev, vpp, VBLK)); 346 } 347 348 /* 349 * Create a vnode for a character device. 350 * Used for kernfs and some console handling. 351 */ 352 int 353 cdevvp(dev_t dev, vnode_t **vpp) 354 { 355 356 return (getdevvp(dev, vpp, VCHR)); 357 } 358 359 /* 360 * Associate a buffer with a vnode. There must already be a hold on 361 * the vnode. 362 */ 363 void 364 bgetvp(struct vnode *vp, struct buf *bp) 365 { 366 367 KASSERT(bp->b_vp == NULL); 368 KASSERT(bp->b_objlock == &buffer_lock); 369 KASSERT(mutex_owned(vp->v_interlock)); 370 KASSERT(mutex_owned(&bufcache_lock)); 371 KASSERT((bp->b_cflags & BC_BUSY) != 0); 372 KASSERT(!cv_has_waiters(&bp->b_done)); 373 374 vholdl(vp); 375 bp->b_vp = vp; 376 if (vp->v_type == VBLK || vp->v_type == VCHR) 377 bp->b_dev = vp->v_rdev; 378 else 379 bp->b_dev = NODEV; 380 381 /* 382 * Insert onto list for new vnode. 383 */ 384 bufinsvn(bp, &vp->v_cleanblkhd); 385 bp->b_objlock = vp->v_interlock; 386 } 387 388 /* 389 * Disassociate a buffer from a vnode. 390 */ 391 void 392 brelvp(struct buf *bp) 393 { 394 struct vnode *vp = bp->b_vp; 395 396 KASSERT(vp != NULL); 397 KASSERT(bp->b_objlock == vp->v_interlock); 398 KASSERT(mutex_owned(vp->v_interlock)); 399 KASSERT(mutex_owned(&bufcache_lock)); 400 KASSERT((bp->b_cflags & BC_BUSY) != 0); 401 KASSERT(!cv_has_waiters(&bp->b_done)); 402 403 /* 404 * Delete from old vnode list, if on one. 405 */ 406 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 407 bufremvn(bp); 408 409 if (vp->v_uobj.uo_npages == 0 && (vp->v_iflag & VI_ONWORKLST) && 410 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 411 vp->v_iflag &= ~VI_WRMAPDIRTY; 412 vn_syncer_remove_from_worklist(vp); 413 } 414 415 bp->b_objlock = &buffer_lock; 416 bp->b_vp = NULL; 417 holdrelel(vp); 418 } 419 420 /* 421 * Reassign a buffer from one vnode list to another. 422 * The list reassignment must be within the same vnode. 423 * Used to assign file specific control information 424 * (indirect blocks) to the list to which they belong. 425 */ 426 void 427 reassignbuf(struct buf *bp, struct vnode *vp) 428 { 429 struct buflists *listheadp; 430 int delayx; 431 432 KASSERT(mutex_owned(&bufcache_lock)); 433 KASSERT(bp->b_objlock == vp->v_interlock); 434 KASSERT(mutex_owned(vp->v_interlock)); 435 KASSERT((bp->b_cflags & BC_BUSY) != 0); 436 437 /* 438 * Delete from old vnode list, if on one. 439 */ 440 if (LIST_NEXT(bp, b_vnbufs) != NOLIST) 441 bufremvn(bp); 442 443 /* 444 * If dirty, put on list of dirty buffers; 445 * otherwise insert onto list of clean buffers. 446 */ 447 if ((bp->b_oflags & BO_DELWRI) == 0) { 448 listheadp = &vp->v_cleanblkhd; 449 if (vp->v_uobj.uo_npages == 0 && 450 (vp->v_iflag & VI_ONWORKLST) && 451 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 452 vp->v_iflag &= ~VI_WRMAPDIRTY; 453 vn_syncer_remove_from_worklist(vp); 454 } 455 } else { 456 listheadp = &vp->v_dirtyblkhd; 457 if ((vp->v_iflag & VI_ONWORKLST) == 0) { 458 switch (vp->v_type) { 459 case VDIR: 460 delayx = dirdelay; 461 break; 462 case VBLK: 463 if (spec_node_getmountedfs(vp) != NULL) { 464 delayx = metadelay; 465 break; 466 } 467 /* fall through */ 468 default: 469 delayx = filedelay; 470 break; 471 } 472 if (!vp->v_mount || 473 (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) 474 vn_syncer_add_to_worklist(vp, delayx); 475 } 476 } 477 bufinsvn(bp, listheadp); 478 } 479 480 /* 481 * Create a vnode for a device. 482 * Used by bdevvp (block device) for root file system etc., 483 * and by cdevvp (character device) for console and kernfs. 484 */ 485 static int 486 getdevvp(dev_t dev, vnode_t **vpp, enum vtype type) 487 { 488 vnode_t *vp; 489 vnode_t *nvp; 490 int error; 491 492 if (dev == NODEV) { 493 *vpp = NULL; 494 return (0); 495 } 496 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, NULL, &nvp); 497 if (error) { 498 *vpp = NULL; 499 return (error); 500 } 501 vp = nvp; 502 vp->v_type = type; 503 vp->v_vflag |= VV_MPSAFE; 504 uvm_vnp_setsize(vp, 0); 505 spec_node_init(vp, dev); 506 *vpp = vp; 507 return (0); 508 } 509 510 /* 511 * Lookup a vnode by device number and return it referenced. 512 */ 513 int 514 vfinddev(dev_t dev, enum vtype type, vnode_t **vpp) 515 { 516 517 return (spec_node_lookup_by_dev(type, dev, vpp) == 0); 518 } 519 520 /* 521 * Revoke all the vnodes corresponding to the specified minor number 522 * range (endpoints inclusive) of the specified major. 523 */ 524 void 525 vdevgone(int maj, int minl, int minh, enum vtype type) 526 { 527 vnode_t *vp; 528 dev_t dev; 529 int mn; 530 531 for (mn = minl; mn <= minh; mn++) { 532 dev = makedev(maj, mn); 533 while (spec_node_lookup_by_dev(type, dev, &vp) == 0) { 534 VOP_REVOKE(vp, REVOKEALL); 535 vrele(vp); 536 } 537 } 538 } 539 540 /* 541 * sysctl helper routine to return list of supported fstypes 542 */ 543 int 544 sysctl_vfs_generic_fstypes(SYSCTLFN_ARGS) 545 { 546 char bf[sizeof(((struct statvfs *)NULL)->f_fstypename)]; 547 char *where = oldp; 548 struct vfsops *v; 549 size_t needed, left, slen; 550 int error, first; 551 552 if (newp != NULL) 553 return (EPERM); 554 if (namelen != 0) 555 return (EINVAL); 556 557 first = 1; 558 error = 0; 559 needed = 0; 560 left = *oldlenp; 561 562 sysctl_unlock(); 563 mutex_enter(&vfs_list_lock); 564 LIST_FOREACH(v, &vfs_list, vfs_list) { 565 if (where == NULL) 566 needed += strlen(v->vfs_name) + 1; 567 else { 568 memset(bf, 0, sizeof(bf)); 569 if (first) { 570 strncpy(bf, v->vfs_name, sizeof(bf)); 571 first = 0; 572 } else { 573 bf[0] = ' '; 574 strncpy(bf + 1, v->vfs_name, sizeof(bf) - 1); 575 } 576 bf[sizeof(bf)-1] = '\0'; 577 slen = strlen(bf); 578 if (left < slen + 1) 579 break; 580 v->vfs_refcount++; 581 mutex_exit(&vfs_list_lock); 582 /* +1 to copy out the trailing NUL byte */ 583 error = copyout(bf, where, slen + 1); 584 mutex_enter(&vfs_list_lock); 585 v->vfs_refcount--; 586 if (error) 587 break; 588 where += slen; 589 needed += slen; 590 left -= slen; 591 } 592 } 593 mutex_exit(&vfs_list_lock); 594 sysctl_relock(); 595 *oldlenp = needed; 596 return (error); 597 } 598 599 int kinfo_vdebug = 1; 600 int kinfo_vgetfailed; 601 602 #define KINFO_VNODESLOP 10 603 604 /* 605 * Dump vnode list (via sysctl). 606 * Copyout address of vnode followed by vnode. 607 */ 608 int 609 sysctl_kern_vnode(SYSCTLFN_ARGS) 610 { 611 char *where = oldp; 612 size_t *sizep = oldlenp; 613 struct mount *mp, *nmp; 614 vnode_t *vp, vbuf; 615 struct vnode_iterator *marker; 616 char *bp = where; 617 char *ewhere; 618 int error; 619 620 if (namelen != 0) 621 return (EOPNOTSUPP); 622 if (newp != NULL) 623 return (EPERM); 624 625 #define VPTRSZ sizeof(vnode_t *) 626 #define VNODESZ sizeof(vnode_t) 627 if (where == NULL) { 628 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 629 return (0); 630 } 631 ewhere = where + *sizep; 632 633 sysctl_unlock(); 634 mutex_enter(&mountlist_lock); 635 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 636 if (vfs_busy(mp, &nmp)) { 637 continue; 638 } 639 vfs_vnode_iterator_init(mp, &marker); 640 while ((vp = vfs_vnode_iterator_next(marker, NULL, NULL))) { 641 if (bp + VPTRSZ + VNODESZ > ewhere) { 642 vrele(vp); 643 vfs_vnode_iterator_destroy(marker); 644 vfs_unbusy(mp, false, NULL); 645 sysctl_relock(); 646 *sizep = bp - where; 647 return (ENOMEM); 648 } 649 memcpy(&vbuf, vp, VNODESZ); 650 if ((error = copyout(&vp, bp, VPTRSZ)) || 651 (error = copyout(&vbuf, bp + VPTRSZ, VNODESZ))) { 652 vrele(vp); 653 vfs_vnode_iterator_destroy(marker); 654 vfs_unbusy(mp, false, NULL); 655 sysctl_relock(); 656 return (error); 657 } 658 vrele(vp); 659 bp += VPTRSZ + VNODESZ; 660 } 661 vfs_vnode_iterator_destroy(marker); 662 vfs_unbusy(mp, false, &nmp); 663 } 664 mutex_exit(&mountlist_lock); 665 sysctl_relock(); 666 667 *sizep = bp - where; 668 return (0); 669 } 670 671 /* 672 * Set vnode attributes to VNOVAL 673 */ 674 void 675 vattr_null(struct vattr *vap) 676 { 677 678 memset(vap, 0, sizeof(*vap)); 679 680 vap->va_type = VNON; 681 682 /* 683 * Assign individually so that it is safe even if size and 684 * sign of each member are varied. 685 */ 686 vap->va_mode = VNOVAL; 687 vap->va_nlink = VNOVAL; 688 vap->va_uid = VNOVAL; 689 vap->va_gid = VNOVAL; 690 vap->va_fsid = VNOVAL; 691 vap->va_fileid = VNOVAL; 692 vap->va_size = VNOVAL; 693 vap->va_blocksize = VNOVAL; 694 vap->va_atime.tv_sec = 695 vap->va_mtime.tv_sec = 696 vap->va_ctime.tv_sec = 697 vap->va_birthtime.tv_sec = VNOVAL; 698 vap->va_atime.tv_nsec = 699 vap->va_mtime.tv_nsec = 700 vap->va_ctime.tv_nsec = 701 vap->va_birthtime.tv_nsec = VNOVAL; 702 vap->va_gen = VNOVAL; 703 vap->va_flags = VNOVAL; 704 vap->va_rdev = VNOVAL; 705 vap->va_bytes = VNOVAL; 706 } 707 708 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 709 #define ARRAY_PRINT(idx, arr) \ 710 ((unsigned int)(idx) < ARRAY_SIZE(arr) ? (arr)[(idx)] : "UNKNOWN") 711 712 const char * const vnode_tags[] = { VNODE_TAGS }; 713 const char * const vnode_types[] = { VNODE_TYPES }; 714 const char vnode_flagbits[] = VNODE_FLAGBITS; 715 716 /* 717 * Print out a description of a vnode. 718 */ 719 void 720 vprint(const char *label, struct vnode *vp) 721 { 722 char bf[96]; 723 int flag; 724 725 flag = vp->v_iflag | vp->v_vflag | vp->v_uflag; 726 snprintb(bf, sizeof(bf), vnode_flagbits, flag); 727 728 if (label != NULL) 729 printf("%s: ", label); 730 printf("vnode @ %p, flags (%s)\n\ttag %s(%d), type %s(%d), " 731 "usecount %d, writecount %d, holdcount %d\n" 732 "\tfreelisthd %p, mount %p, data %p lock %p\n", 733 vp, bf, ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 734 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 735 vp->v_usecount, vp->v_writecount, vp->v_holdcnt, 736 vp->v_freelisthd, vp->v_mount, vp->v_data, &vp->v_lock); 737 if (vp->v_data != NULL) { 738 printf("\t"); 739 VOP_PRINT(vp); 740 } 741 } 742 743 /* Deprecated. Kept for KPI compatibility. */ 744 int 745 vaccess(enum vtype type, mode_t file_mode, uid_t uid, gid_t gid, 746 mode_t acc_mode, kauth_cred_t cred) 747 { 748 749 #ifdef DIAGNOSTIC 750 printf("vaccess: deprecated interface used.\n"); 751 #endif /* DIAGNOSTIC */ 752 753 return kauth_authorize_vnode(cred, KAUTH_ACCESS_ACTION(acc_mode, 754 type, file_mode), NULL /* This may panic. */, NULL, 755 genfs_can_access(type, file_mode, uid, gid, acc_mode, cred)); 756 } 757 758 /* 759 * Given a file system name, look up the vfsops for that 760 * file system, or return NULL if file system isn't present 761 * in the kernel. 762 */ 763 struct vfsops * 764 vfs_getopsbyname(const char *name) 765 { 766 struct vfsops *v; 767 768 mutex_enter(&vfs_list_lock); 769 LIST_FOREACH(v, &vfs_list, vfs_list) { 770 if (strcmp(v->vfs_name, name) == 0) 771 break; 772 } 773 if (v != NULL) 774 v->vfs_refcount++; 775 mutex_exit(&vfs_list_lock); 776 777 return (v); 778 } 779 780 void 781 copy_statvfs_info(struct statvfs *sbp, const struct mount *mp) 782 { 783 const struct statvfs *mbp; 784 785 if (sbp == (mbp = &mp->mnt_stat)) 786 return; 787 788 (void)memcpy(&sbp->f_fsidx, &mbp->f_fsidx, sizeof(sbp->f_fsidx)); 789 sbp->f_fsid = mbp->f_fsid; 790 sbp->f_owner = mbp->f_owner; 791 sbp->f_flag = mbp->f_flag; 792 sbp->f_syncwrites = mbp->f_syncwrites; 793 sbp->f_asyncwrites = mbp->f_asyncwrites; 794 sbp->f_syncreads = mbp->f_syncreads; 795 sbp->f_asyncreads = mbp->f_asyncreads; 796 (void)memcpy(sbp->f_spare, mbp->f_spare, sizeof(mbp->f_spare)); 797 (void)memcpy(sbp->f_fstypename, mbp->f_fstypename, 798 sizeof(sbp->f_fstypename)); 799 (void)memcpy(sbp->f_mntonname, mbp->f_mntonname, 800 sizeof(sbp->f_mntonname)); 801 (void)memcpy(sbp->f_mntfromname, mp->mnt_stat.f_mntfromname, 802 sizeof(sbp->f_mntfromname)); 803 sbp->f_namemax = mbp->f_namemax; 804 } 805 806 int 807 set_statvfs_info(const char *onp, int ukon, const char *fromp, int ukfrom, 808 const char *vfsname, struct mount *mp, struct lwp *l) 809 { 810 int error; 811 size_t size; 812 struct statvfs *sfs = &mp->mnt_stat; 813 int (*fun)(const void *, void *, size_t, size_t *); 814 815 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsname, 816 sizeof(mp->mnt_stat.f_fstypename)); 817 818 if (onp) { 819 struct cwdinfo *cwdi = l->l_proc->p_cwdi; 820 fun = (ukon == UIO_SYSSPACE) ? copystr : copyinstr; 821 if (cwdi->cwdi_rdir != NULL) { 822 size_t len; 823 char *bp; 824 char *path = PNBUF_GET(); 825 826 bp = path + MAXPATHLEN; 827 *--bp = '\0'; 828 rw_enter(&cwdi->cwdi_lock, RW_READER); 829 error = getcwd_common(cwdi->cwdi_rdir, rootvnode, &bp, 830 path, MAXPATHLEN / 2, 0, l); 831 rw_exit(&cwdi->cwdi_lock); 832 if (error) { 833 PNBUF_PUT(path); 834 return error; 835 } 836 837 len = strlen(bp); 838 if (len > sizeof(sfs->f_mntonname) - 1) 839 len = sizeof(sfs->f_mntonname) - 1; 840 (void)strncpy(sfs->f_mntonname, bp, len); 841 PNBUF_PUT(path); 842 843 if (len < sizeof(sfs->f_mntonname) - 1) { 844 error = (*fun)(onp, &sfs->f_mntonname[len], 845 sizeof(sfs->f_mntonname) - len - 1, &size); 846 if (error) 847 return error; 848 size += len; 849 } else { 850 size = len; 851 } 852 } else { 853 error = (*fun)(onp, &sfs->f_mntonname, 854 sizeof(sfs->f_mntonname) - 1, &size); 855 if (error) 856 return error; 857 } 858 (void)memset(sfs->f_mntonname + size, 0, 859 sizeof(sfs->f_mntonname) - size); 860 } 861 862 if (fromp) { 863 fun = (ukfrom == UIO_SYSSPACE) ? copystr : copyinstr; 864 error = (*fun)(fromp, sfs->f_mntfromname, 865 sizeof(sfs->f_mntfromname) - 1, &size); 866 if (error) 867 return error; 868 (void)memset(sfs->f_mntfromname + size, 0, 869 sizeof(sfs->f_mntfromname) - size); 870 } 871 return 0; 872 } 873 874 void 875 vfs_timestamp(struct timespec *ts) 876 { 877 878 nanotime(ts); 879 } 880 881 time_t rootfstime; /* recorded root fs time, if known */ 882 void 883 setrootfstime(time_t t) 884 { 885 rootfstime = t; 886 } 887 888 static const uint8_t vttodt_tab[ ] = { 889 [VNON] = DT_UNKNOWN, 890 [VREG] = DT_REG, 891 [VDIR] = DT_DIR, 892 [VBLK] = DT_BLK, 893 [VCHR] = DT_CHR, 894 [VLNK] = DT_LNK, 895 [VSOCK] = DT_SOCK, 896 [VFIFO] = DT_FIFO, 897 [VBAD] = DT_UNKNOWN 898 }; 899 900 uint8_t 901 vtype2dt(enum vtype vt) 902 { 903 904 CTASSERT(VBAD == __arraycount(vttodt_tab) - 1); 905 return vttodt_tab[vt]; 906 } 907 908 int 909 VFS_MOUNT(struct mount *mp, const char *a, void *b, size_t *c) 910 { 911 int error; 912 913 KERNEL_LOCK(1, NULL); 914 error = (*(mp->mnt_op->vfs_mount))(mp, a, b, c); 915 KERNEL_UNLOCK_ONE(NULL); 916 917 return error; 918 } 919 920 int 921 VFS_START(struct mount *mp, int a) 922 { 923 int error; 924 925 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 926 KERNEL_LOCK(1, NULL); 927 } 928 error = (*(mp->mnt_op->vfs_start))(mp, a); 929 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 930 KERNEL_UNLOCK_ONE(NULL); 931 } 932 933 return error; 934 } 935 936 int 937 VFS_UNMOUNT(struct mount *mp, int a) 938 { 939 int error; 940 941 KERNEL_LOCK(1, NULL); 942 error = (*(mp->mnt_op->vfs_unmount))(mp, a); 943 KERNEL_UNLOCK_ONE(NULL); 944 945 return error; 946 } 947 948 int 949 VFS_ROOT(struct mount *mp, struct vnode **a) 950 { 951 int error; 952 953 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 954 KERNEL_LOCK(1, NULL); 955 } 956 error = (*(mp->mnt_op->vfs_root))(mp, a); 957 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 958 KERNEL_UNLOCK_ONE(NULL); 959 } 960 961 return error; 962 } 963 964 int 965 VFS_QUOTACTL(struct mount *mp, struct quotactl_args *args) 966 { 967 int error; 968 969 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 970 KERNEL_LOCK(1, NULL); 971 } 972 error = (*(mp->mnt_op->vfs_quotactl))(mp, args); 973 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 974 KERNEL_UNLOCK_ONE(NULL); 975 } 976 977 return error; 978 } 979 980 int 981 VFS_STATVFS(struct mount *mp, struct statvfs *a) 982 { 983 int error; 984 985 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 986 KERNEL_LOCK(1, NULL); 987 } 988 error = (*(mp->mnt_op->vfs_statvfs))(mp, a); 989 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 990 KERNEL_UNLOCK_ONE(NULL); 991 } 992 993 return error; 994 } 995 996 int 997 VFS_SYNC(struct mount *mp, int a, struct kauth_cred *b) 998 { 999 int error; 1000 1001 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 1002 KERNEL_LOCK(1, NULL); 1003 } 1004 error = (*(mp->mnt_op->vfs_sync))(mp, a, b); 1005 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 1006 KERNEL_UNLOCK_ONE(NULL); 1007 } 1008 1009 return error; 1010 } 1011 1012 int 1013 VFS_FHTOVP(struct mount *mp, struct fid *a, struct vnode **b) 1014 { 1015 int error; 1016 1017 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 1018 KERNEL_LOCK(1, NULL); 1019 } 1020 error = (*(mp->mnt_op->vfs_fhtovp))(mp, a, b); 1021 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 1022 KERNEL_UNLOCK_ONE(NULL); 1023 } 1024 1025 return error; 1026 } 1027 1028 int 1029 VFS_VPTOFH(struct vnode *vp, struct fid *a, size_t *b) 1030 { 1031 int error; 1032 1033 if ((vp->v_vflag & VV_MPSAFE) == 0) { 1034 KERNEL_LOCK(1, NULL); 1035 } 1036 error = (*(vp->v_mount->mnt_op->vfs_vptofh))(vp, a, b); 1037 if ((vp->v_vflag & VV_MPSAFE) == 0) { 1038 KERNEL_UNLOCK_ONE(NULL); 1039 } 1040 1041 return error; 1042 } 1043 1044 int 1045 VFS_SNAPSHOT(struct mount *mp, struct vnode *a, struct timespec *b) 1046 { 1047 int error; 1048 1049 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 1050 KERNEL_LOCK(1, NULL); 1051 } 1052 error = (*(mp->mnt_op->vfs_snapshot))(mp, a, b); 1053 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 1054 KERNEL_UNLOCK_ONE(NULL); 1055 } 1056 1057 return error; 1058 } 1059 1060 int 1061 VFS_EXTATTRCTL(struct mount *mp, int a, struct vnode *b, int c, const char *d) 1062 { 1063 int error; 1064 1065 KERNEL_LOCK(1, NULL); /* XXXSMP check ffs */ 1066 error = (*(mp->mnt_op->vfs_extattrctl))(mp, a, b, c, d); 1067 KERNEL_UNLOCK_ONE(NULL); /* XXX */ 1068 1069 return error; 1070 } 1071 1072 int 1073 VFS_SUSPENDCTL(struct mount *mp, int a) 1074 { 1075 int error; 1076 1077 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 1078 KERNEL_LOCK(1, NULL); 1079 } 1080 error = (*(mp->mnt_op->vfs_suspendctl))(mp, a); 1081 if ((mp->mnt_iflag & IMNT_MPSAFE) == 0) { 1082 KERNEL_UNLOCK_ONE(NULL); 1083 } 1084 1085 return error; 1086 } 1087 1088 #if defined(DDB) || defined(DEBUGPRINT) 1089 static const char buf_flagbits[] = BUF_FLAGBITS; 1090 1091 void 1092 vfs_buf_print(struct buf *bp, int full, void (*pr)(const char *, ...)) 1093 { 1094 char bf[1024]; 1095 1096 (*pr)(" vp %p lblkno 0x%"PRIx64" blkno 0x%"PRIx64" rawblkno 0x%" 1097 PRIx64 " dev 0x%x\n", 1098 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_rawblkno, bp->b_dev); 1099 1100 snprintb(bf, sizeof(bf), 1101 buf_flagbits, bp->b_flags | bp->b_oflags | bp->b_cflags); 1102 (*pr)(" error %d flags 0x%s\n", bp->b_error, bf); 1103 1104 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 1105 bp->b_bufsize, bp->b_bcount, bp->b_resid); 1106 (*pr)(" data %p saveaddr %p\n", 1107 bp->b_data, bp->b_saveaddr); 1108 (*pr)(" iodone %p objlock %p\n", bp->b_iodone, bp->b_objlock); 1109 } 1110 1111 void 1112 vfs_vnode_print(struct vnode *vp, int full, void (*pr)(const char *, ...)) 1113 { 1114 char bf[256]; 1115 1116 uvm_object_printit(&vp->v_uobj, full, pr); 1117 snprintb(bf, sizeof(bf), 1118 vnode_flagbits, vp->v_iflag | vp->v_vflag | vp->v_uflag); 1119 (*pr)("\nVNODE flags %s\n", bf); 1120 (*pr)("mp %p numoutput %d size 0x%llx writesize 0x%llx\n", 1121 vp->v_mount, vp->v_numoutput, vp->v_size, vp->v_writesize); 1122 1123 (*pr)("data %p writecount %ld holdcnt %ld\n", 1124 vp->v_data, vp->v_writecount, vp->v_holdcnt); 1125 1126 (*pr)("tag %s(%d) type %s(%d) mount %p typedata %p\n", 1127 ARRAY_PRINT(vp->v_tag, vnode_tags), vp->v_tag, 1128 ARRAY_PRINT(vp->v_type, vnode_types), vp->v_type, 1129 vp->v_mount, vp->v_mountedhere); 1130 1131 (*pr)("v_lock %p\n", &vp->v_lock); 1132 1133 if (full) { 1134 struct buf *bp; 1135 1136 (*pr)("clean bufs:\n"); 1137 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 1138 (*pr)(" bp %p\n", bp); 1139 vfs_buf_print(bp, full, pr); 1140 } 1141 1142 (*pr)("dirty bufs:\n"); 1143 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 1144 (*pr)(" bp %p\n", bp); 1145 vfs_buf_print(bp, full, pr); 1146 } 1147 } 1148 } 1149 1150 void 1151 vfs_mount_print(struct mount *mp, int full, void (*pr)(const char *, ...)) 1152 { 1153 char sbuf[256]; 1154 1155 (*pr)("vnodecovered = %p syncer = %p data = %p\n", 1156 mp->mnt_vnodecovered,mp->mnt_syncer,mp->mnt_data); 1157 1158 (*pr)("fs_bshift %d dev_bshift = %d\n", 1159 mp->mnt_fs_bshift,mp->mnt_dev_bshift); 1160 1161 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_flag); 1162 (*pr)("flag = %s\n", sbuf); 1163 1164 snprintb(sbuf, sizeof(sbuf), __IMNT_FLAG_BITS, mp->mnt_iflag); 1165 (*pr)("iflag = %s\n", sbuf); 1166 1167 (*pr)("refcnt = %d unmounting @ %p updating @ %p\n", mp->mnt_refcnt, 1168 &mp->mnt_unmounting, &mp->mnt_updating); 1169 1170 (*pr)("statvfs cache:\n"); 1171 (*pr)("\tbsize = %lu\n",mp->mnt_stat.f_bsize); 1172 (*pr)("\tfrsize = %lu\n",mp->mnt_stat.f_frsize); 1173 (*pr)("\tiosize = %lu\n",mp->mnt_stat.f_iosize); 1174 1175 (*pr)("\tblocks = %"PRIu64"\n",mp->mnt_stat.f_blocks); 1176 (*pr)("\tbfree = %"PRIu64"\n",mp->mnt_stat.f_bfree); 1177 (*pr)("\tbavail = %"PRIu64"\n",mp->mnt_stat.f_bavail); 1178 (*pr)("\tbresvd = %"PRIu64"\n",mp->mnt_stat.f_bresvd); 1179 1180 (*pr)("\tfiles = %"PRIu64"\n",mp->mnt_stat.f_files); 1181 (*pr)("\tffree = %"PRIu64"\n",mp->mnt_stat.f_ffree); 1182 (*pr)("\tfavail = %"PRIu64"\n",mp->mnt_stat.f_favail); 1183 (*pr)("\tfresvd = %"PRIu64"\n",mp->mnt_stat.f_fresvd); 1184 1185 (*pr)("\tf_fsidx = { 0x%"PRIx32", 0x%"PRIx32" }\n", 1186 mp->mnt_stat.f_fsidx.__fsid_val[0], 1187 mp->mnt_stat.f_fsidx.__fsid_val[1]); 1188 1189 (*pr)("\towner = %"PRIu32"\n",mp->mnt_stat.f_owner); 1190 (*pr)("\tnamemax = %lu\n",mp->mnt_stat.f_namemax); 1191 1192 snprintb(sbuf, sizeof(sbuf), __MNT_FLAG_BITS, mp->mnt_stat.f_flag); 1193 1194 (*pr)("\tflag = %s\n",sbuf); 1195 (*pr)("\tsyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_syncwrites); 1196 (*pr)("\tasyncwrites = %" PRIu64 "\n",mp->mnt_stat.f_asyncwrites); 1197 (*pr)("\tsyncreads = %" PRIu64 "\n",mp->mnt_stat.f_syncreads); 1198 (*pr)("\tasyncreads = %" PRIu64 "\n",mp->mnt_stat.f_asyncreads); 1199 (*pr)("\tfstypename = %s\n",mp->mnt_stat.f_fstypename); 1200 (*pr)("\tmntonname = %s\n",mp->mnt_stat.f_mntonname); 1201 (*pr)("\tmntfromname = %s\n",mp->mnt_stat.f_mntfromname); 1202 1203 { 1204 int cnt = 0; 1205 struct vnode *vp; 1206 (*pr)("locked vnodes ="); 1207 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1208 if (VOP_ISLOCKED(vp)) { 1209 if ((++cnt % 6) == 0) { 1210 (*pr)(" %p,\n\t", vp); 1211 } else { 1212 (*pr)(" %p,", vp); 1213 } 1214 } 1215 } 1216 (*pr)("\n"); 1217 } 1218 1219 if (full) { 1220 int cnt = 0; 1221 struct vnode *vp; 1222 (*pr)("all vnodes ="); 1223 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1224 if (!TAILQ_NEXT(vp, v_mntvnodes)) { 1225 (*pr)(" %p", vp); 1226 } else if ((++cnt % 6) == 0) { 1227 (*pr)(" %p,\n\t", vp); 1228 } else { 1229 (*pr)(" %p,", vp); 1230 } 1231 } 1232 (*pr)("\n", vp); 1233 } 1234 } 1235 1236 /* 1237 * List all of the locked vnodes in the system. 1238 */ 1239 void printlockedvnodes(void); 1240 1241 void 1242 printlockedvnodes(void) 1243 { 1244 struct mount *mp, *nmp; 1245 struct vnode *vp; 1246 1247 printf("Locked vnodes\n"); 1248 mutex_enter(&mountlist_lock); 1249 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 1250 if (vfs_busy(mp, &nmp)) { 1251 continue; 1252 } 1253 TAILQ_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1254 if (VOP_ISLOCKED(vp)) 1255 vprint(NULL, vp); 1256 } 1257 mutex_enter(&mountlist_lock); 1258 vfs_unbusy(mp, false, &nmp); 1259 } 1260 mutex_exit(&mountlist_lock); 1261 } 1262 1263 #endif /* DDB || DEBUGPRINT */ 1264