1 /* $NetBSD: vfs_mount.c,v 1.101 2022/12/09 10:33:18 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.101 2022/12/09 10:33:18 hannken Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/kernel.h> 74 75 #include <sys/atomic.h> 76 #include <sys/buf.h> 77 #include <sys/conf.h> 78 #include <sys/fcntl.h> 79 #include <sys/filedesc.h> 80 #include <sys/device.h> 81 #include <sys/kauth.h> 82 #include <sys/kmem.h> 83 #include <sys/module.h> 84 #include <sys/mount.h> 85 #include <sys/fstrans.h> 86 #include <sys/namei.h> 87 #include <sys/extattr.h> 88 #include <sys/syscallargs.h> 89 #include <sys/sysctl.h> 90 #include <sys/systm.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/vnode_impl.h> 93 94 #include <miscfs/deadfs/deadfs.h> 95 #include <miscfs/genfs/genfs.h> 96 #include <miscfs/specfs/specdev.h> 97 98 #include <uvm/uvm_swap.h> 99 100 enum mountlist_type { 101 ME_MOUNT, 102 ME_MARKER 103 }; 104 struct mountlist_entry { 105 TAILQ_ENTRY(mountlist_entry) me_list; /* Mount list. */ 106 struct mount *me_mount; /* Actual mount if ME_MOUNT, 107 current mount else. */ 108 enum mountlist_type me_type; /* Mount or marker. */ 109 }; 110 struct mount_iterator { 111 struct mountlist_entry mi_entry; 112 }; 113 114 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *, 115 bool (*)(void *, struct vnode *), void *, bool); 116 117 /* Root filesystem. */ 118 vnode_t * rootvnode; 119 120 /* Mounted filesystem list. */ 121 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist; 122 static kmutex_t mountlist_lock __cacheline_aligned; 123 int vnode_offset_next_by_lru /* XXX: ugly hack for pstat.c */ 124 = offsetof(vnode_impl_t, vi_lrulist.tqe_next); 125 126 kmutex_t vfs_list_lock __cacheline_aligned; 127 128 static specificdata_domain_t mount_specificdata_domain; 129 static kmutex_t mntid_lock; 130 131 static kmutex_t mountgen_lock __cacheline_aligned; 132 static uint64_t mountgen; 133 134 void 135 vfs_mount_sysinit(void) 136 { 137 138 TAILQ_INIT(&mountlist); 139 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 140 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 141 142 mount_specificdata_domain = specificdata_domain_create(); 143 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 144 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 145 mountgen = 0; 146 } 147 148 struct mount * 149 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp) 150 { 151 struct mount *mp; 152 int error __diagused; 153 154 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 155 mp->mnt_op = vfsops; 156 mp->mnt_refcnt = 1; 157 TAILQ_INIT(&mp->mnt_vnodelist); 158 mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 159 mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 160 mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 161 mp->mnt_vnodecovered = vp; 162 mount_initspecific(mp); 163 164 error = fstrans_mount(mp); 165 KASSERT(error == 0); 166 167 mutex_enter(&mountgen_lock); 168 mp->mnt_gen = mountgen++; 169 mutex_exit(&mountgen_lock); 170 171 return mp; 172 } 173 174 /* 175 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and 176 * initialize a mount structure for it. 177 * 178 * Devname is usually updated by mount(8) after booting. 179 */ 180 int 181 vfs_rootmountalloc(const char *fstypename, const char *devname, 182 struct mount **mpp) 183 { 184 struct vfsops *vfsp = NULL; 185 struct mount *mp; 186 int error __diagused; 187 188 mutex_enter(&vfs_list_lock); 189 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 190 if (!strncmp(vfsp->vfs_name, fstypename, 191 sizeof(mp->mnt_stat.f_fstypename))) 192 break; 193 if (vfsp == NULL) { 194 mutex_exit(&vfs_list_lock); 195 return (ENODEV); 196 } 197 vfsp->vfs_refcount++; 198 mutex_exit(&vfs_list_lock); 199 200 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 201 return ENOMEM; 202 error = vfs_busy(mp); 203 KASSERT(error == 0); 204 mp->mnt_flag = MNT_RDONLY; 205 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 206 sizeof(mp->mnt_stat.f_fstypename)); 207 mp->mnt_stat.f_mntonname[0] = '/'; 208 mp->mnt_stat.f_mntonname[1] = '\0'; 209 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 210 '\0'; 211 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 212 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 213 *mpp = mp; 214 return 0; 215 } 216 217 /* 218 * vfs_getnewfsid: get a new unique fsid. 219 */ 220 void 221 vfs_getnewfsid(struct mount *mp) 222 { 223 static u_short xxxfs_mntid; 224 struct mountlist_entry *me; 225 fsid_t tfsid; 226 int mtype; 227 228 mutex_enter(&mntid_lock); 229 if (xxxfs_mntid == 0) 230 ++xxxfs_mntid; 231 mtype = makefstype(mp->mnt_op->vfs_name); 232 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 233 tfsid.__fsid_val[1] = mtype; 234 /* Always increment to not return the same fsid to parallel mounts. */ 235 xxxfs_mntid++; 236 237 /* 238 * Directly walk mountlist to prevent deadlock through 239 * mountlist_iterator_next() -> vfs_busy(). 240 */ 241 mutex_enter(&mountlist_lock); 242 for (me = TAILQ_FIRST(&mountlist); me != TAILQ_END(&mountlist); ) { 243 if (me->me_type == ME_MOUNT && 244 me->me_mount->mnt_stat.f_fsidx.__fsid_val[0] == 245 tfsid.__fsid_val[0] && 246 me->me_mount->mnt_stat.f_fsidx.__fsid_val[1] == 247 tfsid.__fsid_val[1]) { 248 tfsid.__fsid_val[0]++; 249 xxxfs_mntid++; 250 me = TAILQ_FIRST(&mountlist); 251 } else { 252 me = TAILQ_NEXT(me, me_list); 253 } 254 } 255 mutex_exit(&mountlist_lock); 256 257 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 258 mp->mnt_stat.f_fsidx.__fsid_val[1] = tfsid.__fsid_val[1]; 259 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 260 mutex_exit(&mntid_lock); 261 } 262 263 /* 264 * Lookup a mount point by filesystem identifier. 265 * 266 * XXX Needs to add a reference to the mount point. 267 */ 268 struct mount * 269 vfs_getvfs(fsid_t *fsid) 270 { 271 mount_iterator_t *iter; 272 struct mount *mp; 273 274 mountlist_iterator_init(&iter); 275 while ((mp = mountlist_iterator_next(iter)) != NULL) { 276 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 277 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 278 mountlist_iterator_destroy(iter); 279 return mp; 280 } 281 } 282 mountlist_iterator_destroy(iter); 283 return NULL; 284 } 285 286 /* 287 * Take a reference to a mount structure. 288 */ 289 void 290 vfs_ref(struct mount *mp) 291 { 292 293 KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock)); 294 295 atomic_inc_uint(&mp->mnt_refcnt); 296 } 297 298 /* 299 * Drop a reference to a mount structure, freeing if the last reference. 300 */ 301 void 302 vfs_rele(struct mount *mp) 303 { 304 305 #ifndef __HAVE_ATOMIC_AS_MEMBAR 306 membar_release(); 307 #endif 308 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 309 return; 310 } 311 #ifndef __HAVE_ATOMIC_AS_MEMBAR 312 membar_acquire(); 313 #endif 314 315 /* 316 * Nothing else has visibility of the mount: we can now 317 * free the data structures. 318 */ 319 KASSERT(mp->mnt_refcnt == 0); 320 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 321 mutex_obj_free(mp->mnt_updating); 322 mutex_obj_free(mp->mnt_renamelock); 323 mutex_obj_free(mp->mnt_vnodelock); 324 if (mp->mnt_op != NULL) { 325 vfs_delref(mp->mnt_op); 326 } 327 fstrans_unmount(mp); 328 /* 329 * Final free of mp gets done from fstrans_mount_dtor(). 330 * 331 * Prevents this memory to be reused as a mount before 332 * fstrans releases all references to it. 333 */ 334 } 335 336 /* 337 * Mark a mount point as busy, and gain a new reference to it. Used to 338 * prevent the file system from being unmounted during critical sections. 339 * 340 * vfs_busy can be called multiple times and by multiple threads 341 * and must be accompanied by the same number of vfs_unbusy calls. 342 * 343 * => The caller must hold a pre-existing reference to the mount. 344 * => Will fail if the file system is being unmounted, or is unmounted. 345 */ 346 static inline int 347 _vfs_busy(struct mount *mp, bool wait) 348 { 349 350 KASSERT(mp->mnt_refcnt > 0); 351 352 if (wait) { 353 fstrans_start(mp); 354 } else { 355 if (fstrans_start_nowait(mp)) 356 return EBUSY; 357 } 358 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 359 fstrans_done(mp); 360 return ENOENT; 361 } 362 vfs_ref(mp); 363 return 0; 364 } 365 366 int 367 vfs_busy(struct mount *mp) 368 { 369 370 return _vfs_busy(mp, true); 371 } 372 373 int 374 vfs_trybusy(struct mount *mp) 375 { 376 377 return _vfs_busy(mp, false); 378 } 379 380 /* 381 * Unbusy a busy filesystem. 382 * 383 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call. 384 */ 385 void 386 vfs_unbusy(struct mount *mp) 387 { 388 389 KASSERT(mp->mnt_refcnt > 0); 390 391 fstrans_done(mp); 392 vfs_rele(mp); 393 } 394 395 /* 396 * Change a file systems lower mount. 397 * Both the current and the new lower mount may be NULL. The caller 398 * guarantees exclusive access to the mount and holds a pre-existing 399 * reference to the new lower mount. 400 */ 401 int 402 vfs_set_lowermount(struct mount *mp, struct mount *lowermp) 403 { 404 struct mount *oldlowermp; 405 int error; 406 407 #ifdef DEBUG 408 /* 409 * Limit the depth of file system stack so kernel sanitizers 410 * may stress mount/unmount without exhausting the kernel stack. 411 */ 412 int depth; 413 struct mount *mp2; 414 415 for (depth = 0, mp2 = lowermp; mp2; depth++, mp2 = mp2->mnt_lower) { 416 if (depth == 23) 417 return EINVAL; 418 } 419 #endif 420 421 if (lowermp) { 422 if (lowermp == dead_rootmount) 423 return ENOENT; 424 error = vfs_busy(lowermp); 425 if (error) 426 return error; 427 vfs_ref(lowermp); 428 } 429 430 oldlowermp = mp->mnt_lower; 431 mp->mnt_lower = lowermp; 432 433 if (lowermp) 434 vfs_unbusy(lowermp); 435 436 if (oldlowermp) 437 vfs_rele(oldlowermp); 438 439 return 0; 440 } 441 442 struct vnode_iterator { 443 vnode_impl_t vi_vnode; 444 }; 445 446 void 447 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip) 448 { 449 vnode_t *vp; 450 vnode_impl_t *vip; 451 452 vp = vnalloc_marker(mp); 453 vip = VNODE_TO_VIMPL(vp); 454 455 mutex_enter(mp->mnt_vnodelock); 456 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes); 457 vp->v_usecount = 1; 458 mutex_exit(mp->mnt_vnodelock); 459 460 *vnip = (struct vnode_iterator *)vip; 461 } 462 463 void 464 vfs_vnode_iterator_destroy(struct vnode_iterator *vni) 465 { 466 vnode_impl_t *mvip = &vni->vi_vnode; 467 vnode_t *mvp = VIMPL_TO_VNODE(mvip); 468 kmutex_t *lock; 469 470 KASSERT(vnis_marker(mvp)); 471 if (vrefcnt(mvp) != 0) { 472 lock = mvp->v_mount->mnt_vnodelock; 473 mutex_enter(lock); 474 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes); 475 mvp->v_usecount = 0; 476 mutex_exit(lock); 477 } 478 vnfree_marker(mvp); 479 } 480 481 static struct vnode * 482 vfs_vnode_iterator_next1(struct vnode_iterator *vni, 483 bool (*f)(void *, struct vnode *), void *cl, bool do_wait) 484 { 485 vnode_impl_t *mvip = &vni->vi_vnode; 486 struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount; 487 vnode_t *vp; 488 vnode_impl_t *vip; 489 kmutex_t *lock; 490 int error; 491 492 KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip))); 493 494 lock = mp->mnt_vnodelock; 495 do { 496 mutex_enter(lock); 497 vip = TAILQ_NEXT(mvip, vi_mntvnodes); 498 TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes); 499 VIMPL_TO_VNODE(mvip)->v_usecount = 0; 500 again: 501 if (vip == NULL) { 502 mutex_exit(lock); 503 return NULL; 504 } 505 vp = VIMPL_TO_VNODE(vip); 506 KASSERT(vp != NULL); 507 mutex_enter(vp->v_interlock); 508 if (vnis_marker(vp) || 509 vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) || 510 (f && !(*f)(cl, vp))) { 511 mutex_exit(vp->v_interlock); 512 vip = TAILQ_NEXT(vip, vi_mntvnodes); 513 goto again; 514 } 515 516 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes); 517 VIMPL_TO_VNODE(mvip)->v_usecount = 1; 518 mutex_exit(lock); 519 error = vcache_vget(vp); 520 KASSERT(error == 0 || error == ENOENT); 521 } while (error != 0); 522 523 return vp; 524 } 525 526 struct vnode * 527 vfs_vnode_iterator_next(struct vnode_iterator *vni, 528 bool (*f)(void *, struct vnode *), void *cl) 529 { 530 531 return vfs_vnode_iterator_next1(vni, f, cl, false); 532 } 533 534 /* 535 * Move a vnode from one mount queue to another. 536 */ 537 void 538 vfs_insmntque(vnode_t *vp, struct mount *mp) 539 { 540 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 541 struct mount *omp; 542 kmutex_t *lock; 543 544 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 || 545 vp->v_tag == VT_VFS); 546 547 /* 548 * Delete from old mount point vnode list, if on one. 549 */ 550 if ((omp = vp->v_mount) != NULL) { 551 lock = omp->mnt_vnodelock; 552 mutex_enter(lock); 553 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes); 554 mutex_exit(lock); 555 } 556 557 /* 558 * Insert into list of vnodes for the new mount point, if 559 * available. The caller must take a reference on the mount 560 * structure and donate to the vnode. 561 */ 562 if ((vp->v_mount = mp) != NULL) { 563 lock = mp->mnt_vnodelock; 564 mutex_enter(lock); 565 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes); 566 mutex_exit(lock); 567 } 568 569 if (omp != NULL) { 570 /* Release reference to old mount. */ 571 vfs_rele(omp); 572 } 573 } 574 575 /* 576 * Remove any vnodes in the vnode table belonging to mount point mp. 577 * 578 * If FORCECLOSE is not specified, there should not be any active ones, 579 * return error if any are found (nb: this is a user error, not a 580 * system error). If FORCECLOSE is specified, detach any active vnodes 581 * that are found. 582 * 583 * If WRITECLOSE is set, only flush out regular file vnodes open for 584 * writing. 585 * 586 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. 587 */ 588 #ifdef DEBUG 589 int busyprt = 0; /* print out busy vnodes */ 590 struct ctldebug debug1 = { "busyprt", &busyprt }; 591 #endif 592 593 static vnode_t * 594 vflushnext(struct vnode_iterator *marker, int *when) 595 { 596 if (getticks() > *when) { 597 yield(); 598 *when = getticks() + hz / 10; 599 } 600 preempt_point(); 601 return vfs_vnode_iterator_next1(marker, NULL, NULL, true); 602 } 603 604 /* 605 * Flush one vnode. Referenced on entry, unreferenced on return. 606 */ 607 static int 608 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags) 609 { 610 int error; 611 struct vattr vattr; 612 613 if (vp == skipvp || 614 ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) { 615 vrele(vp); 616 return 0; 617 } 618 /* 619 * If WRITECLOSE is set, only flush out regular file 620 * vnodes open for writing or open and unlinked. 621 */ 622 if ((flags & WRITECLOSE)) { 623 if (vp->v_type != VREG) { 624 vrele(vp); 625 return 0; 626 } 627 error = vn_lock(vp, LK_EXCLUSIVE); 628 if (error) { 629 KASSERT(error == ENOENT); 630 vrele(vp); 631 return 0; 632 } 633 error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0); 634 if (error == 0) 635 error = VOP_GETATTR(vp, &vattr, curlwp->l_cred); 636 VOP_UNLOCK(vp); 637 if (error) { 638 vrele(vp); 639 return error; 640 } 641 if (vp->v_writecount == 0 && vattr.va_nlink > 0) { 642 vrele(vp); 643 return 0; 644 } 645 } 646 /* 647 * First try to recycle the vnode. 648 */ 649 if (vrecycle(vp)) 650 return 0; 651 /* 652 * If FORCECLOSE is set, forcibly close the vnode. 653 * For block or character devices, revert to an 654 * anonymous device. For all other files, just 655 * kill them. 656 */ 657 if (flags & FORCECLOSE) { 658 if (vrefcnt(vp) > 1 && 659 (vp->v_type == VBLK || vp->v_type == VCHR)) 660 vcache_make_anon(vp); 661 else 662 vgone(vp); 663 return 0; 664 } 665 vrele(vp); 666 return EBUSY; 667 } 668 669 int 670 vflush(struct mount *mp, vnode_t *skipvp, int flags) 671 { 672 vnode_t *vp; 673 struct vnode_iterator *marker; 674 int busy, error, when, retries = 2; 675 676 do { 677 busy = error = when = 0; 678 679 /* 680 * First, flush out any vnode references from the 681 * deferred vrele list. 682 */ 683 vrele_flush(mp); 684 685 vfs_vnode_iterator_init(mp, &marker); 686 687 while ((vp = vflushnext(marker, &when)) != NULL) { 688 error = vflush_one(vp, skipvp, flags); 689 if (error == EBUSY) { 690 error = 0; 691 busy++; 692 #ifdef DEBUG 693 if (busyprt && retries == 0) 694 vprint("vflush: busy vnode", vp); 695 #endif 696 } else if (error != 0) { 697 break; 698 } 699 } 700 701 vfs_vnode_iterator_destroy(marker); 702 } while (error == 0 && busy > 0 && retries-- > 0); 703 704 if (error) 705 return error; 706 if (busy) 707 return EBUSY; 708 return 0; 709 } 710 711 /* 712 * Mount a file system. 713 */ 714 715 /* 716 * Scan all active processes to see if any of them have a current or root 717 * directory onto which the new filesystem has just been mounted. If so, 718 * replace them with the new mount point. 719 */ 720 static void 721 mount_checkdirs(vnode_t *olddp) 722 { 723 vnode_t *newdp, *rele1, *rele2; 724 struct cwdinfo *cwdi; 725 struct proc *p; 726 bool retry; 727 728 if (vrefcnt(olddp) == 1) { 729 return; 730 } 731 if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp)) 732 panic("mount: lost mount"); 733 734 do { 735 retry = false; 736 mutex_enter(&proc_lock); 737 PROCLIST_FOREACH(p, &allproc) { 738 if ((cwdi = p->p_cwdi) == NULL) 739 continue; 740 /* 741 * Cannot change to the old directory any more, 742 * so even if we see a stale value it is not a 743 * problem. 744 */ 745 if (cwdi->cwdi_cdir != olddp && 746 cwdi->cwdi_rdir != olddp) 747 continue; 748 retry = true; 749 rele1 = NULL; 750 rele2 = NULL; 751 atomic_inc_uint(&cwdi->cwdi_refcnt); 752 mutex_exit(&proc_lock); 753 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 754 if (cwdi->cwdi_cdir == olddp) { 755 rele1 = cwdi->cwdi_cdir; 756 vref(newdp); 757 cwdi->cwdi_cdir = newdp; 758 } 759 if (cwdi->cwdi_rdir == olddp) { 760 rele2 = cwdi->cwdi_rdir; 761 vref(newdp); 762 cwdi->cwdi_rdir = newdp; 763 } 764 rw_exit(&cwdi->cwdi_lock); 765 cwdfree(cwdi); 766 if (rele1 != NULL) 767 vrele(rele1); 768 if (rele2 != NULL) 769 vrele(rele2); 770 mutex_enter(&proc_lock); 771 break; 772 } 773 mutex_exit(&proc_lock); 774 } while (retry); 775 776 if (rootvnode == olddp) { 777 vrele(rootvnode); 778 vref(newdp); 779 rootvnode = newdp; 780 } 781 vput(newdp); 782 } 783 784 /* 785 * Start extended attributes 786 */ 787 static int 788 start_extattr(struct mount *mp) 789 { 790 int error; 791 792 error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL); 793 if (error) 794 printf("%s: failed to start extattr: error = %d\n", 795 mp->mnt_stat.f_mntonname, error); 796 797 return error; 798 } 799 800 int 801 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops, 802 const char *path, int flags, void *data, size_t *data_len) 803 { 804 vnode_t *vp = *vpp; 805 struct mount *mp; 806 struct pathbuf *pb; 807 struct nameidata nd; 808 int error, error2; 809 810 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 811 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 812 if (error) { 813 vfs_delref(vfsops); 814 return error; 815 } 816 817 /* Cannot make a non-dir a mount-point (from here anyway). */ 818 if (vp->v_type != VDIR) { 819 vfs_delref(vfsops); 820 return ENOTDIR; 821 } 822 823 if (flags & MNT_EXPORTED) { 824 vfs_delref(vfsops); 825 return EINVAL; 826 } 827 828 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 829 vfs_delref(vfsops); 830 return ENOMEM; 831 } 832 833 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 834 835 /* 836 * The underlying file system may refuse the mount for 837 * various reasons. Allow the user to force it to happen. 838 * 839 * Set the mount level flags. 840 */ 841 mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE); 842 843 error = VFS_MOUNT(mp, path, data, data_len); 844 mp->mnt_flag &= ~MNT_OP_FLAGS; 845 846 if (error != 0) { 847 vfs_rele(mp); 848 return error; 849 } 850 851 /* Suspend new file system before taking mnt_updating. */ 852 do { 853 error2 = vfs_suspend(mp, 0); 854 } while (error2 == EINTR || error2 == ERESTART); 855 KASSERT(error2 == 0 || error2 == EOPNOTSUPP); 856 mutex_enter(mp->mnt_updating); 857 858 /* 859 * Validate and prepare the mount point. 860 */ 861 error = pathbuf_copyin(path, &pb); 862 if (error != 0) { 863 goto err_mounted; 864 } 865 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 866 error = namei(&nd); 867 pathbuf_destroy(pb); 868 if (error != 0) { 869 goto err_mounted; 870 } 871 if (nd.ni_vp != vp) { 872 vput(nd.ni_vp); 873 error = EINVAL; 874 goto err_mounted; 875 } 876 if (vp->v_mountedhere != NULL) { 877 vput(nd.ni_vp); 878 error = EBUSY; 879 goto err_mounted; 880 } 881 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 882 if (error != 0) { 883 vput(nd.ni_vp); 884 goto err_mounted; 885 } 886 887 /* 888 * Put the new filesystem on the mount list after root. 889 */ 890 cache_purge(vp); 891 mp->mnt_iflag &= ~IMNT_WANTRDWR; 892 893 mountlist_append(mp); 894 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 895 vfs_syncer_add_to_worklist(mp); 896 vp->v_mountedhere = mp; 897 vput(nd.ni_vp); 898 899 mount_checkdirs(vp); 900 mutex_exit(mp->mnt_updating); 901 if (error2 == 0) 902 vfs_resume(mp); 903 904 /* Hold an additional reference to the mount across VFS_START(). */ 905 vfs_ref(mp); 906 (void) VFS_STATVFS(mp, &mp->mnt_stat); 907 error = VFS_START(mp, 0); 908 if (error) { 909 vrele(vp); 910 } else if (flags & MNT_EXTATTR) { 911 if (start_extattr(mp) != 0) 912 mp->mnt_flag &= ~MNT_EXTATTR; 913 } 914 /* Drop reference held for VFS_START(). */ 915 vfs_rele(mp); 916 *vpp = NULL; 917 return error; 918 919 err_mounted: 920 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 921 panic("Unmounting fresh file system failed"); 922 mutex_exit(mp->mnt_updating); 923 if (error2 == 0) 924 vfs_resume(mp); 925 vfs_set_lowermount(mp, NULL); 926 vfs_rele(mp); 927 928 return error; 929 } 930 931 /* 932 * Do the actual file system unmount. File system is assumed to have 933 * been locked by the caller. 934 * 935 * => Caller hold reference to the mount, explicitly for dounmount(). 936 */ 937 int 938 dounmount(struct mount *mp, int flags, struct lwp *l) 939 { 940 vnode_t *coveredvp; 941 int error, async, used_syncer, used_extattr; 942 const bool was_suspended = fstrans_is_owner(mp); 943 944 #if NVERIEXEC > 0 945 error = veriexec_unmountchk(mp); 946 if (error) 947 return (error); 948 #endif /* NVERIEXEC > 0 */ 949 950 if (!was_suspended) { 951 error = vfs_suspend(mp, 0); 952 if (error) { 953 return error; 954 } 955 } 956 957 KASSERT((mp->mnt_iflag & IMNT_GONE) == 0); 958 959 used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0; 960 used_extattr = mp->mnt_flag & MNT_EXTATTR; 961 962 mp->mnt_iflag |= IMNT_UNMOUNT; 963 mutex_enter(mp->mnt_updating); 964 async = mp->mnt_flag & MNT_ASYNC; 965 mp->mnt_flag &= ~MNT_ASYNC; 966 cache_purgevfs(mp); /* remove cache entries for this file sys */ 967 if (used_syncer) 968 vfs_syncer_remove_from_worklist(mp); 969 error = 0; 970 if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) { 971 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 972 } 973 if (error == 0 || (flags & MNT_FORCE)) { 974 error = VFS_UNMOUNT(mp, flags); 975 } 976 if (error) { 977 mp->mnt_iflag &= ~IMNT_UNMOUNT; 978 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 979 vfs_syncer_add_to_worklist(mp); 980 mp->mnt_flag |= async; 981 mutex_exit(mp->mnt_updating); 982 if (!was_suspended) 983 vfs_resume(mp); 984 if (used_extattr) { 985 if (start_extattr(mp) != 0) 986 mp->mnt_flag &= ~MNT_EXTATTR; 987 else 988 mp->mnt_flag |= MNT_EXTATTR; 989 } 990 return (error); 991 } 992 mutex_exit(mp->mnt_updating); 993 994 /* 995 * mark filesystem as gone to prevent further umounts 996 * after mnt_umounting lock is gone, this also prevents 997 * vfs_busy() from succeeding. 998 */ 999 mp->mnt_iflag |= IMNT_GONE; 1000 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 1001 coveredvp->v_mountedhere = NULL; 1002 } 1003 if (!was_suspended) 1004 vfs_resume(mp); 1005 1006 mountlist_remove(mp); 1007 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 1008 panic("unmount: dangling vnode"); 1009 vfs_hooks_unmount(mp); 1010 1011 vfs_set_lowermount(mp, NULL); 1012 vfs_rele(mp); /* reference from mount() */ 1013 if (coveredvp != NULLVP) { 1014 vrele(coveredvp); 1015 } 1016 return (0); 1017 } 1018 1019 /* 1020 * Unmount all file systems. 1021 * We traverse the list in reverse order under the assumption that doing so 1022 * will avoid needing to worry about dependencies. 1023 */ 1024 bool 1025 vfs_unmountall(struct lwp *l) 1026 { 1027 1028 printf("unmounting file systems...\n"); 1029 return vfs_unmountall1(l, true, true); 1030 } 1031 1032 static void 1033 vfs_unmount_print(struct mount *mp, const char *pfx) 1034 { 1035 1036 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 1037 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 1038 mp->mnt_stat.f_fstypename); 1039 } 1040 1041 /* 1042 * Return the mount with the highest generation less than "gen". 1043 */ 1044 static struct mount * 1045 vfs_unmount_next(uint64_t gen) 1046 { 1047 mount_iterator_t *iter; 1048 struct mount *mp, *nmp; 1049 1050 nmp = NULL; 1051 1052 mountlist_iterator_init(&iter); 1053 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1054 if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) && 1055 mp->mnt_gen < gen) { 1056 if (nmp != NULL) 1057 vfs_rele(nmp); 1058 nmp = mp; 1059 vfs_ref(nmp); 1060 } 1061 } 1062 mountlist_iterator_destroy(iter); 1063 1064 return nmp; 1065 } 1066 1067 bool 1068 vfs_unmount_forceone(struct lwp *l) 1069 { 1070 struct mount *mp; 1071 int error; 1072 1073 mp = vfs_unmount_next(mountgen); 1074 if (mp == NULL) { 1075 return false; 1076 } 1077 1078 #ifdef DEBUG 1079 printf("forcefully unmounting %s (%s)...\n", 1080 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1081 #endif 1082 if ((error = dounmount(mp, MNT_FORCE, l)) == 0) { 1083 vfs_unmount_print(mp, "forcefully "); 1084 return true; 1085 } else { 1086 vfs_rele(mp); 1087 } 1088 1089 #ifdef DEBUG 1090 printf("forceful unmount of %s failed with error %d\n", 1091 mp->mnt_stat.f_mntonname, error); 1092 #endif 1093 1094 return false; 1095 } 1096 1097 bool 1098 vfs_unmountall1(struct lwp *l, bool force, bool verbose) 1099 { 1100 struct mount *mp; 1101 mount_iterator_t *iter; 1102 bool any_error = false, progress = false; 1103 uint64_t gen; 1104 int error; 1105 1106 gen = mountgen; 1107 for (;;) { 1108 mp = vfs_unmount_next(gen); 1109 if (mp == NULL) 1110 break; 1111 gen = mp->mnt_gen; 1112 1113 #ifdef DEBUG 1114 printf("unmounting %p %s (%s)...\n", 1115 (void *)mp, mp->mnt_stat.f_mntonname, 1116 mp->mnt_stat.f_mntfromname); 1117 #endif 1118 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 1119 vfs_unmount_print(mp, ""); 1120 progress = true; 1121 } else { 1122 vfs_rele(mp); 1123 if (verbose) { 1124 printf("unmount of %s failed with error %d\n", 1125 mp->mnt_stat.f_mntonname, error); 1126 } 1127 any_error = true; 1128 } 1129 } 1130 if (verbose) { 1131 printf("unmounting done\n"); 1132 } 1133 if (any_error && verbose) { 1134 printf("WARNING: some file systems would not unmount\n"); 1135 } 1136 /* If the mountlist is empty it is time to remove swap. */ 1137 mountlist_iterator_init(&iter); 1138 if (mountlist_iterator_next(iter) == NULL) { 1139 uvm_swap_shutdown(l); 1140 } 1141 mountlist_iterator_destroy(iter); 1142 1143 return progress; 1144 } 1145 1146 void 1147 vfs_sync_all(struct lwp *l) 1148 { 1149 printf("syncing disks... "); 1150 1151 /* remove user processes from run queue */ 1152 suspendsched(); 1153 (void)spl0(); 1154 1155 /* avoid coming back this way again if we panic. */ 1156 doing_shutdown = 1; 1157 1158 do_sys_sync(l); 1159 1160 /* Wait for sync to finish. */ 1161 if (vfs_syncwait() != 0) { 1162 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 1163 Debugger(); 1164 #endif 1165 printf("giving up\n"); 1166 return; 1167 } else 1168 printf("done\n"); 1169 } 1170 1171 /* 1172 * Sync and unmount file systems before shutting down. 1173 */ 1174 void 1175 vfs_shutdown(void) 1176 { 1177 lwp_t *l = curlwp; 1178 1179 vfs_sync_all(l); 1180 1181 /* 1182 * If we have panicked - do not make the situation potentially 1183 * worse by unmounting the file systems. 1184 */ 1185 if (panicstr != NULL) { 1186 return; 1187 } 1188 1189 /* Unmount file systems. */ 1190 vfs_unmountall(l); 1191 } 1192 1193 /* 1194 * Print a list of supported file system types (used by vfs_mountroot) 1195 */ 1196 static void 1197 vfs_print_fstypes(void) 1198 { 1199 struct vfsops *v; 1200 int cnt = 0; 1201 1202 mutex_enter(&vfs_list_lock); 1203 LIST_FOREACH(v, &vfs_list, vfs_list) 1204 ++cnt; 1205 mutex_exit(&vfs_list_lock); 1206 1207 if (cnt == 0) { 1208 printf("WARNING: No file system modules have been loaded.\n"); 1209 return; 1210 } 1211 1212 printf("Supported file systems:"); 1213 mutex_enter(&vfs_list_lock); 1214 LIST_FOREACH(v, &vfs_list, vfs_list) { 1215 printf(" %s", v->vfs_name); 1216 } 1217 mutex_exit(&vfs_list_lock); 1218 printf("\n"); 1219 } 1220 1221 /* 1222 * Mount the root file system. If the operator didn't specify a 1223 * file system to use, try all possible file systems until one 1224 * succeeds. 1225 */ 1226 int 1227 vfs_mountroot(void) 1228 { 1229 struct vfsops *v; 1230 int error = ENODEV; 1231 1232 if (root_device == NULL) 1233 panic("vfs_mountroot: root device unknown"); 1234 1235 switch (device_class(root_device)) { 1236 case DV_IFNET: 1237 if (rootdev != NODEV) 1238 panic("vfs_mountroot: rootdev set for DV_IFNET " 1239 "(0x%llx -> %llu,%llu)", 1240 (unsigned long long)rootdev, 1241 (unsigned long long)major(rootdev), 1242 (unsigned long long)minor(rootdev)); 1243 break; 1244 1245 case DV_DISK: 1246 if (rootdev == NODEV) 1247 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1248 if (bdevvp(rootdev, &rootvp)) 1249 panic("vfs_mountroot: can't get vnode for rootdev"); 1250 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1251 error = VOP_OPEN(rootvp, FREAD, FSCRED); 1252 VOP_UNLOCK(rootvp); 1253 if (error) { 1254 printf("vfs_mountroot: can't open root device\n"); 1255 return (error); 1256 } 1257 break; 1258 1259 case DV_VIRTUAL: 1260 break; 1261 1262 default: 1263 printf("%s: inappropriate for root file system\n", 1264 device_xname(root_device)); 1265 return (ENODEV); 1266 } 1267 1268 /* 1269 * If user specified a root fs type, use it. Make sure the 1270 * specified type exists and has a mount_root() 1271 */ 1272 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 1273 v = vfs_getopsbyname(rootfstype); 1274 error = EFTYPE; 1275 if (v != NULL) { 1276 if (v->vfs_mountroot != NULL) { 1277 error = (v->vfs_mountroot)(); 1278 } 1279 v->vfs_refcount--; 1280 } 1281 goto done; 1282 } 1283 1284 /* 1285 * Try each file system currently configured into the kernel. 1286 */ 1287 mutex_enter(&vfs_list_lock); 1288 LIST_FOREACH(v, &vfs_list, vfs_list) { 1289 if (v->vfs_mountroot == NULL) 1290 continue; 1291 #ifdef DEBUG 1292 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 1293 #endif 1294 v->vfs_refcount++; 1295 mutex_exit(&vfs_list_lock); 1296 error = (*v->vfs_mountroot)(); 1297 mutex_enter(&vfs_list_lock); 1298 v->vfs_refcount--; 1299 if (!error) { 1300 aprint_normal("root file system type: %s\n", 1301 v->vfs_name); 1302 break; 1303 } 1304 } 1305 mutex_exit(&vfs_list_lock); 1306 1307 if (v == NULL) { 1308 vfs_print_fstypes(); 1309 printf("no file system for %s", device_xname(root_device)); 1310 if (device_class(root_device) == DV_DISK) 1311 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 1312 printf("\n"); 1313 error = EFTYPE; 1314 } 1315 1316 done: 1317 if (error && device_class(root_device) == DV_DISK) { 1318 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1319 VOP_CLOSE(rootvp, FREAD, FSCRED); 1320 VOP_UNLOCK(rootvp); 1321 vrele(rootvp); 1322 } 1323 if (error == 0) { 1324 mount_iterator_t *iter; 1325 struct mount *mp; 1326 1327 mountlist_iterator_init(&iter); 1328 mp = mountlist_iterator_next(iter); 1329 KASSERT(mp != NULL); 1330 mountlist_iterator_destroy(iter); 1331 1332 mp->mnt_flag |= MNT_ROOTFS; 1333 mp->mnt_op->vfs_refcount++; 1334 1335 /* 1336 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 1337 * reference it, and donate it the reference grabbed 1338 * with VFS_ROOT(). 1339 */ 1340 error = VFS_ROOT(mp, LK_NONE, &rootvnode); 1341 if (error) 1342 panic("cannot find root vnode, error=%d", error); 1343 cwdi0.cwdi_cdir = rootvnode; 1344 cwdi0.cwdi_rdir = NULL; 1345 1346 /* 1347 * Now that root is mounted, we can fixup initproc's CWD 1348 * info. All other processes are kthreads, which merely 1349 * share proc0's CWD info. 1350 */ 1351 initproc->p_cwdi->cwdi_cdir = rootvnode; 1352 vref(initproc->p_cwdi->cwdi_cdir); 1353 initproc->p_cwdi->cwdi_rdir = NULL; 1354 /* 1355 * Enable loading of modules from the filesystem 1356 */ 1357 module_load_vfs_init(); 1358 1359 } 1360 return (error); 1361 } 1362 1363 /* 1364 * mount_specific_key_create -- 1365 * Create a key for subsystem mount-specific data. 1366 */ 1367 int 1368 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1369 { 1370 1371 return specificdata_key_create(mount_specificdata_domain, keyp, dtor); 1372 } 1373 1374 /* 1375 * mount_specific_key_delete -- 1376 * Delete a key for subsystem mount-specific data. 1377 */ 1378 void 1379 mount_specific_key_delete(specificdata_key_t key) 1380 { 1381 1382 specificdata_key_delete(mount_specificdata_domain, key); 1383 } 1384 1385 /* 1386 * mount_initspecific -- 1387 * Initialize a mount's specificdata container. 1388 */ 1389 void 1390 mount_initspecific(struct mount *mp) 1391 { 1392 int error __diagused; 1393 1394 error = specificdata_init(mount_specificdata_domain, 1395 &mp->mnt_specdataref); 1396 KASSERT(error == 0); 1397 } 1398 1399 /* 1400 * mount_finispecific -- 1401 * Finalize a mount's specificdata container. 1402 */ 1403 void 1404 mount_finispecific(struct mount *mp) 1405 { 1406 1407 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 1408 } 1409 1410 /* 1411 * mount_getspecific -- 1412 * Return mount-specific data corresponding to the specified key. 1413 */ 1414 void * 1415 mount_getspecific(struct mount *mp, specificdata_key_t key) 1416 { 1417 1418 return specificdata_getspecific(mount_specificdata_domain, 1419 &mp->mnt_specdataref, key); 1420 } 1421 1422 /* 1423 * mount_setspecific -- 1424 * Set mount-specific data corresponding to the specified key. 1425 */ 1426 void 1427 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 1428 { 1429 1430 specificdata_setspecific(mount_specificdata_domain, 1431 &mp->mnt_specdataref, key, data); 1432 } 1433 1434 /* 1435 * Check to see if a filesystem is mounted on a block device. 1436 */ 1437 int 1438 vfs_mountedon(vnode_t *vp) 1439 { 1440 vnode_t *vq; 1441 int error = 0; 1442 1443 if (vp->v_type != VBLK) 1444 return ENOTBLK; 1445 if (spec_node_getmountedfs(vp) != NULL) 1446 return EBUSY; 1447 if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, VDEAD_NOWAIT, &vq) 1448 == 0) { 1449 if (spec_node_getmountedfs(vq) != NULL) 1450 error = EBUSY; 1451 vrele(vq); 1452 } 1453 1454 return error; 1455 } 1456 1457 /* 1458 * Check if a device pointed to by vp is mounted. 1459 * 1460 * Returns: 1461 * EINVAL if it's not a disk 1462 * EBUSY if it's a disk and mounted 1463 * 0 if it's a disk and not mounted 1464 */ 1465 int 1466 rawdev_mounted(vnode_t *vp, vnode_t **bvpp) 1467 { 1468 vnode_t *bvp; 1469 dev_t dev; 1470 int d_type; 1471 1472 bvp = NULL; 1473 d_type = D_OTHER; 1474 1475 if (iskmemvp(vp)) 1476 return EINVAL; 1477 1478 switch (vp->v_type) { 1479 case VCHR: { 1480 const struct cdevsw *cdev; 1481 1482 dev = vp->v_rdev; 1483 cdev = cdevsw_lookup(dev); 1484 if (cdev != NULL) { 1485 dev_t blkdev; 1486 1487 blkdev = devsw_chr2blk(dev); 1488 if (blkdev != NODEV) { 1489 if (vfinddev(blkdev, VBLK, &bvp) != 0) { 1490 d_type = (cdev->d_flag & D_TYPEMASK); 1491 /* XXX: what if bvp disappears? */ 1492 vrele(bvp); 1493 } 1494 } 1495 } 1496 1497 break; 1498 } 1499 1500 case VBLK: { 1501 const struct bdevsw *bdev; 1502 1503 dev = vp->v_rdev; 1504 bdev = bdevsw_lookup(dev); 1505 if (bdev != NULL) 1506 d_type = (bdev->d_flag & D_TYPEMASK); 1507 1508 bvp = vp; 1509 1510 break; 1511 } 1512 1513 default: 1514 break; 1515 } 1516 1517 if (d_type != D_DISK) 1518 return EINVAL; 1519 1520 if (bvpp != NULL) 1521 *bvpp = bvp; 1522 1523 /* 1524 * XXX: This is bogus. We should be failing the request 1525 * XXX: not only if this specific slice is mounted, but 1526 * XXX: if it's on a disk with any other mounted slice. 1527 */ 1528 if (vfs_mountedon(bvp)) 1529 return EBUSY; 1530 1531 return 0; 1532 } 1533 1534 /* 1535 * Make a 'unique' number from a mount type name. 1536 */ 1537 long 1538 makefstype(const char *type) 1539 { 1540 long rv; 1541 1542 for (rv = 0; *type; type++) { 1543 rv <<= 2; 1544 rv ^= *type; 1545 } 1546 return rv; 1547 } 1548 1549 static struct mountlist_entry * 1550 mountlist_alloc(enum mountlist_type type, struct mount *mp) 1551 { 1552 struct mountlist_entry *me; 1553 1554 me = kmem_zalloc(sizeof(*me), KM_SLEEP); 1555 me->me_mount = mp; 1556 me->me_type = type; 1557 1558 return me; 1559 } 1560 1561 static void 1562 mountlist_free(struct mountlist_entry *me) 1563 { 1564 1565 kmem_free(me, sizeof(*me)); 1566 } 1567 1568 void 1569 mountlist_iterator_init(mount_iterator_t **mip) 1570 { 1571 struct mountlist_entry *me; 1572 1573 me = mountlist_alloc(ME_MARKER, NULL); 1574 mutex_enter(&mountlist_lock); 1575 TAILQ_INSERT_HEAD(&mountlist, me, me_list); 1576 mutex_exit(&mountlist_lock); 1577 *mip = (mount_iterator_t *)me; 1578 } 1579 1580 void 1581 mountlist_iterator_destroy(mount_iterator_t *mi) 1582 { 1583 struct mountlist_entry *marker = &mi->mi_entry; 1584 1585 if (marker->me_mount != NULL) 1586 vfs_unbusy(marker->me_mount); 1587 1588 mutex_enter(&mountlist_lock); 1589 TAILQ_REMOVE(&mountlist, marker, me_list); 1590 mutex_exit(&mountlist_lock); 1591 1592 mountlist_free(marker); 1593 1594 } 1595 1596 /* 1597 * Return the next mount or NULL for this iterator. 1598 * Mark it busy on success. 1599 */ 1600 static inline struct mount * 1601 _mountlist_iterator_next(mount_iterator_t *mi, bool wait) 1602 { 1603 struct mountlist_entry *me, *marker = &mi->mi_entry; 1604 struct mount *mp; 1605 int error; 1606 1607 if (marker->me_mount != NULL) { 1608 vfs_unbusy(marker->me_mount); 1609 marker->me_mount = NULL; 1610 } 1611 1612 mutex_enter(&mountlist_lock); 1613 for (;;) { 1614 KASSERT(marker->me_type == ME_MARKER); 1615 1616 me = TAILQ_NEXT(marker, me_list); 1617 if (me == NULL) { 1618 /* End of list: keep marker and return. */ 1619 mutex_exit(&mountlist_lock); 1620 return NULL; 1621 } 1622 TAILQ_REMOVE(&mountlist, marker, me_list); 1623 TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list); 1624 1625 /* Skip other markers. */ 1626 if (me->me_type != ME_MOUNT) 1627 continue; 1628 1629 /* Take an initial reference for vfs_busy() below. */ 1630 mp = me->me_mount; 1631 KASSERT(mp != NULL); 1632 vfs_ref(mp); 1633 mutex_exit(&mountlist_lock); 1634 1635 /* Try to mark this mount busy and return on success. */ 1636 if (wait) 1637 error = vfs_busy(mp); 1638 else 1639 error = vfs_trybusy(mp); 1640 if (error == 0) { 1641 vfs_rele(mp); 1642 marker->me_mount = mp; 1643 return mp; 1644 } 1645 vfs_rele(mp); 1646 mutex_enter(&mountlist_lock); 1647 } 1648 } 1649 1650 struct mount * 1651 mountlist_iterator_next(mount_iterator_t *mi) 1652 { 1653 1654 return _mountlist_iterator_next(mi, true); 1655 } 1656 1657 struct mount * 1658 mountlist_iterator_trynext(mount_iterator_t *mi) 1659 { 1660 1661 return _mountlist_iterator_next(mi, false); 1662 } 1663 1664 /* 1665 * Attach new mount to the end of the mount list. 1666 */ 1667 void 1668 mountlist_append(struct mount *mp) 1669 { 1670 struct mountlist_entry *me; 1671 1672 me = mountlist_alloc(ME_MOUNT, mp); 1673 mutex_enter(&mountlist_lock); 1674 TAILQ_INSERT_TAIL(&mountlist, me, me_list); 1675 mutex_exit(&mountlist_lock); 1676 } 1677 1678 /* 1679 * Remove mount from mount list. 1680 */void 1681 mountlist_remove(struct mount *mp) 1682 { 1683 struct mountlist_entry *me; 1684 1685 mutex_enter(&mountlist_lock); 1686 TAILQ_FOREACH(me, &mountlist, me_list) 1687 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1688 break; 1689 KASSERT(me != NULL); 1690 TAILQ_REMOVE(&mountlist, me, me_list); 1691 mutex_exit(&mountlist_lock); 1692 mountlist_free(me); 1693 } 1694 1695 /* 1696 * Unlocked variant to traverse the mountlist. 1697 * To be used from DDB only. 1698 */ 1699 struct mount * 1700 _mountlist_next(struct mount *mp) 1701 { 1702 struct mountlist_entry *me; 1703 1704 if (mp == NULL) { 1705 me = TAILQ_FIRST(&mountlist); 1706 } else { 1707 TAILQ_FOREACH(me, &mountlist, me_list) 1708 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1709 break; 1710 if (me != NULL) 1711 me = TAILQ_NEXT(me, me_list); 1712 } 1713 1714 while (me != NULL && me->me_type != ME_MOUNT) 1715 me = TAILQ_NEXT(me, me_list); 1716 1717 return (me ? me->me_mount : NULL); 1718 } 1719