1 /* $NetBSD: vfs_mount.c,v 1.104 2024/01/17 10:17:29 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.104 2024/01/17 10:17:29 hannken Exp $"); 71 72 #include "veriexec.h" 73 74 #include <sys/param.h> 75 #include <sys/kernel.h> 76 77 #include <sys/atomic.h> 78 #include <sys/buf.h> 79 #include <sys/conf.h> 80 #include <sys/fcntl.h> 81 #include <sys/filedesc.h> 82 #include <sys/device.h> 83 #include <sys/kauth.h> 84 #include <sys/kmem.h> 85 #include <sys/module.h> 86 #include <sys/mount.h> 87 #include <sys/fstrans.h> 88 #include <sys/namei.h> 89 #include <sys/extattr.h> 90 #include <sys/verified_exec.h> 91 #include <sys/syscallargs.h> 92 #include <sys/sysctl.h> 93 #include <sys/systm.h> 94 #include <sys/vfs_syscalls.h> 95 #include <sys/vnode_impl.h> 96 97 #include <miscfs/deadfs/deadfs.h> 98 #include <miscfs/genfs/genfs.h> 99 #include <miscfs/specfs/specdev.h> 100 101 #include <uvm/uvm_swap.h> 102 103 enum mountlist_type { 104 ME_MOUNT, 105 ME_MARKER 106 }; 107 struct mountlist_entry { 108 TAILQ_ENTRY(mountlist_entry) me_list; /* Mount list. */ 109 struct mount *me_mount; /* Actual mount if ME_MOUNT, 110 current mount else. */ 111 enum mountlist_type me_type; /* Mount or marker. */ 112 }; 113 struct mount_iterator { 114 struct mountlist_entry mi_entry; 115 }; 116 117 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *, 118 bool (*)(void *, struct vnode *), void *, bool); 119 120 /* Root filesystem. */ 121 vnode_t * rootvnode; 122 123 /* Mounted filesystem list. */ 124 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist; 125 static kmutex_t mountlist_lock __cacheline_aligned; 126 int vnode_offset_next_by_lru /* XXX: ugly hack for pstat.c */ 127 = offsetof(vnode_impl_t, vi_lrulist.tqe_next); 128 129 kmutex_t vfs_list_lock __cacheline_aligned; 130 131 static specificdata_domain_t mount_specificdata_domain; 132 static kmutex_t mntid_lock; 133 134 static kmutex_t mountgen_lock __cacheline_aligned; 135 static uint64_t mountgen; 136 137 void 138 vfs_mount_sysinit(void) 139 { 140 141 TAILQ_INIT(&mountlist); 142 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 143 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 144 145 mount_specificdata_domain = specificdata_domain_create(); 146 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 147 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 148 mountgen = 0; 149 } 150 151 struct mount * 152 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp) 153 { 154 struct mount *mp; 155 int error __diagused; 156 157 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 158 mp->mnt_op = vfsops; 159 mp->mnt_refcnt = 1; 160 TAILQ_INIT(&mp->mnt_vnodelist); 161 mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 162 mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 163 mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 164 mp->mnt_vnodecovered = vp; 165 mount_initspecific(mp); 166 167 error = fstrans_mount(mp); 168 KASSERT(error == 0); 169 170 mutex_enter(&mountgen_lock); 171 mp->mnt_gen = mountgen++; 172 mutex_exit(&mountgen_lock); 173 174 return mp; 175 } 176 177 /* 178 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and 179 * initialize a mount structure for it. 180 * 181 * Devname is usually updated by mount(8) after booting. 182 */ 183 int 184 vfs_rootmountalloc(const char *fstypename, const char *devname, 185 struct mount **mpp) 186 { 187 struct vfsops *vfsp = NULL; 188 struct mount *mp; 189 int error __diagused; 190 191 mutex_enter(&vfs_list_lock); 192 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 193 if (!strncmp(vfsp->vfs_name, fstypename, 194 sizeof(mp->mnt_stat.f_fstypename))) 195 break; 196 if (vfsp == NULL) { 197 mutex_exit(&vfs_list_lock); 198 return (ENODEV); 199 } 200 vfsp->vfs_refcount++; 201 mutex_exit(&vfs_list_lock); 202 203 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 204 return ENOMEM; 205 error = vfs_busy(mp); 206 KASSERT(error == 0); 207 mp->mnt_flag = MNT_RDONLY; 208 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 209 sizeof(mp->mnt_stat.f_fstypename)); 210 mp->mnt_stat.f_mntonname[0] = '/'; 211 mp->mnt_stat.f_mntonname[1] = '\0'; 212 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 213 '\0'; 214 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 215 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 216 *mpp = mp; 217 return 0; 218 } 219 220 /* 221 * vfs_getnewfsid: get a new unique fsid. 222 */ 223 void 224 vfs_getnewfsid(struct mount *mp) 225 { 226 static u_short xxxfs_mntid; 227 struct mountlist_entry *me; 228 fsid_t tfsid; 229 int mtype; 230 231 mutex_enter(&mntid_lock); 232 if (xxxfs_mntid == 0) 233 ++xxxfs_mntid; 234 mtype = makefstype(mp->mnt_op->vfs_name); 235 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 236 tfsid.__fsid_val[1] = mtype; 237 /* Always increment to not return the same fsid to parallel mounts. */ 238 xxxfs_mntid++; 239 240 /* 241 * Directly walk mountlist to prevent deadlock through 242 * mountlist_iterator_next() -> vfs_busy(). 243 */ 244 mutex_enter(&mountlist_lock); 245 for (me = TAILQ_FIRST(&mountlist); me != TAILQ_END(&mountlist); ) { 246 if (me->me_type == ME_MOUNT && 247 me->me_mount->mnt_stat.f_fsidx.__fsid_val[0] == 248 tfsid.__fsid_val[0] && 249 me->me_mount->mnt_stat.f_fsidx.__fsid_val[1] == 250 tfsid.__fsid_val[1]) { 251 tfsid.__fsid_val[0]++; 252 xxxfs_mntid++; 253 me = TAILQ_FIRST(&mountlist); 254 } else { 255 me = TAILQ_NEXT(me, me_list); 256 } 257 } 258 mutex_exit(&mountlist_lock); 259 260 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 261 mp->mnt_stat.f_fsidx.__fsid_val[1] = tfsid.__fsid_val[1]; 262 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 263 mutex_exit(&mntid_lock); 264 } 265 266 /* 267 * Lookup a mount point by filesystem identifier. 268 * 269 * XXX Needs to add a reference to the mount point. 270 */ 271 struct mount * 272 vfs_getvfs(fsid_t *fsid) 273 { 274 mount_iterator_t *iter; 275 struct mount *mp; 276 277 mountlist_iterator_init(&iter); 278 while ((mp = mountlist_iterator_next(iter)) != NULL) { 279 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 280 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 281 mountlist_iterator_destroy(iter); 282 return mp; 283 } 284 } 285 mountlist_iterator_destroy(iter); 286 return NULL; 287 } 288 289 /* 290 * Take a reference to a mount structure. 291 */ 292 void 293 vfs_ref(struct mount *mp) 294 { 295 296 KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock)); 297 298 atomic_inc_uint(&mp->mnt_refcnt); 299 } 300 301 /* 302 * Drop a reference to a mount structure, freeing if the last reference. 303 */ 304 void 305 vfs_rele(struct mount *mp) 306 { 307 308 membar_release(); 309 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 310 return; 311 } 312 membar_acquire(); 313 314 /* 315 * Nothing else has visibility of the mount: we can now 316 * free the data structures. 317 */ 318 KASSERT(mp->mnt_refcnt == 0); 319 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 320 mutex_obj_free(mp->mnt_updating); 321 mutex_obj_free(mp->mnt_renamelock); 322 mutex_obj_free(mp->mnt_vnodelock); 323 if (mp->mnt_op != NULL) { 324 vfs_delref(mp->mnt_op); 325 } 326 fstrans_unmount(mp); 327 /* 328 * Final free of mp gets done from fstrans_mount_dtor(). 329 * 330 * Prevents this memory to be reused as a mount before 331 * fstrans releases all references to it. 332 */ 333 } 334 335 /* 336 * Mark a mount point as busy, and gain a new reference to it. Used to 337 * prevent the file system from being unmounted during critical sections. 338 * 339 * vfs_busy can be called multiple times and by multiple threads 340 * and must be accompanied by the same number of vfs_unbusy calls. 341 * 342 * => The caller must hold a pre-existing reference to the mount. 343 * => Will fail if the file system is being unmounted, or is unmounted. 344 */ 345 static inline int 346 _vfs_busy(struct mount *mp, bool wait) 347 { 348 349 KASSERT(mp->mnt_refcnt > 0); 350 351 if (wait) { 352 fstrans_start(mp); 353 } else { 354 if (fstrans_start_nowait(mp)) 355 return EBUSY; 356 } 357 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 358 fstrans_done(mp); 359 return ENOENT; 360 } 361 vfs_ref(mp); 362 return 0; 363 } 364 365 int 366 vfs_busy(struct mount *mp) 367 { 368 369 return _vfs_busy(mp, true); 370 } 371 372 int 373 vfs_trybusy(struct mount *mp) 374 { 375 376 return _vfs_busy(mp, false); 377 } 378 379 /* 380 * Unbusy a busy filesystem. 381 * 382 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call. 383 */ 384 void 385 vfs_unbusy(struct mount *mp) 386 { 387 388 KASSERT(mp->mnt_refcnt > 0); 389 390 fstrans_done(mp); 391 vfs_rele(mp); 392 } 393 394 /* 395 * Change a file systems lower mount. 396 * Both the current and the new lower mount may be NULL. The caller 397 * guarantees exclusive access to the mount and holds a pre-existing 398 * reference to the new lower mount. 399 */ 400 int 401 vfs_set_lowermount(struct mount *mp, struct mount *lowermp) 402 { 403 struct mount *oldlowermp; 404 int error; 405 406 #ifdef DEBUG 407 /* 408 * Limit the depth of file system stack so kernel sanitizers 409 * may stress mount/unmount without exhausting the kernel stack. 410 */ 411 int depth; 412 struct mount *mp2; 413 414 for (depth = 0, mp2 = lowermp; mp2; depth++, mp2 = mp2->mnt_lower) { 415 if (depth == 23) 416 return EINVAL; 417 } 418 #endif 419 420 if (lowermp) { 421 if (lowermp == dead_rootmount) 422 return ENOENT; 423 error = vfs_busy(lowermp); 424 if (error) 425 return error; 426 vfs_ref(lowermp); 427 } 428 429 oldlowermp = mp->mnt_lower; 430 mp->mnt_lower = lowermp; 431 432 if (lowermp) 433 vfs_unbusy(lowermp); 434 435 if (oldlowermp) 436 vfs_rele(oldlowermp); 437 438 return 0; 439 } 440 441 struct vnode_iterator { 442 vnode_impl_t vi_vnode; 443 }; 444 445 void 446 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip) 447 { 448 vnode_t *vp; 449 vnode_impl_t *vip; 450 451 vp = vnalloc_marker(mp); 452 vip = VNODE_TO_VIMPL(vp); 453 454 mutex_enter(mp->mnt_vnodelock); 455 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes); 456 vp->v_usecount = 1; 457 mutex_exit(mp->mnt_vnodelock); 458 459 *vnip = (struct vnode_iterator *)vip; 460 } 461 462 void 463 vfs_vnode_iterator_destroy(struct vnode_iterator *vni) 464 { 465 vnode_impl_t *mvip = &vni->vi_vnode; 466 vnode_t *mvp = VIMPL_TO_VNODE(mvip); 467 kmutex_t *lock; 468 469 KASSERT(vnis_marker(mvp)); 470 if (vrefcnt(mvp) != 0) { 471 lock = mvp->v_mount->mnt_vnodelock; 472 mutex_enter(lock); 473 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes); 474 mvp->v_usecount = 0; 475 mutex_exit(lock); 476 } 477 vnfree_marker(mvp); 478 } 479 480 static struct vnode * 481 vfs_vnode_iterator_next1(struct vnode_iterator *vni, 482 bool (*f)(void *, struct vnode *), void *cl, bool do_wait) 483 { 484 vnode_impl_t *mvip = &vni->vi_vnode; 485 struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount; 486 vnode_t *vp; 487 vnode_impl_t *vip; 488 kmutex_t *lock; 489 int error; 490 491 KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip))); 492 493 lock = mp->mnt_vnodelock; 494 do { 495 mutex_enter(lock); 496 vip = TAILQ_NEXT(mvip, vi_mntvnodes); 497 TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes); 498 VIMPL_TO_VNODE(mvip)->v_usecount = 0; 499 again: 500 if (vip == NULL) { 501 mutex_exit(lock); 502 return NULL; 503 } 504 vp = VIMPL_TO_VNODE(vip); 505 KASSERT(vp != NULL); 506 mutex_enter(vp->v_interlock); 507 if (vnis_marker(vp) || 508 vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) || 509 (f && !(*f)(cl, vp))) { 510 mutex_exit(vp->v_interlock); 511 vip = TAILQ_NEXT(vip, vi_mntvnodes); 512 goto again; 513 } 514 515 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes); 516 VIMPL_TO_VNODE(mvip)->v_usecount = 1; 517 mutex_exit(lock); 518 error = vcache_vget(vp); 519 KASSERT(error == 0 || error == ENOENT); 520 } while (error != 0); 521 522 return vp; 523 } 524 525 struct vnode * 526 vfs_vnode_iterator_next(struct vnode_iterator *vni, 527 bool (*f)(void *, struct vnode *), void *cl) 528 { 529 530 return vfs_vnode_iterator_next1(vni, f, cl, false); 531 } 532 533 /* 534 * Move a vnode from one mount queue to another. 535 */ 536 void 537 vfs_insmntque(vnode_t *vp, struct mount *mp) 538 { 539 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 540 struct mount *omp; 541 kmutex_t *lock; 542 543 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 || 544 vp->v_tag == VT_VFS); 545 546 /* 547 * Delete from old mount point vnode list, if on one. 548 */ 549 if ((omp = vp->v_mount) != NULL) { 550 lock = omp->mnt_vnodelock; 551 mutex_enter(lock); 552 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes); 553 mutex_exit(lock); 554 } 555 556 /* 557 * Insert into list of vnodes for the new mount point, if 558 * available. The caller must take a reference on the mount 559 * structure and donate to the vnode. 560 */ 561 if ((vp->v_mount = mp) != NULL) { 562 lock = mp->mnt_vnodelock; 563 mutex_enter(lock); 564 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes); 565 mutex_exit(lock); 566 } 567 568 if (omp != NULL) { 569 /* Release reference to old mount. */ 570 vfs_rele(omp); 571 } 572 } 573 574 /* 575 * Remove any vnodes in the vnode table belonging to mount point mp. 576 * 577 * If FORCECLOSE is not specified, there should not be any active ones, 578 * return error if any are found (nb: this is a user error, not a 579 * system error). If FORCECLOSE is specified, detach any active vnodes 580 * that are found. 581 * 582 * If WRITECLOSE is set, only flush out regular file vnodes open for 583 * writing. 584 * 585 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. 586 */ 587 #ifdef DEBUG 588 int busyprt = 0; /* print out busy vnodes */ 589 struct ctldebug debug1 = { "busyprt", &busyprt }; 590 #endif 591 592 static vnode_t * 593 vflushnext(struct vnode_iterator *marker, int *when) 594 { 595 if (getticks() > *when) { 596 yield(); 597 *when = getticks() + hz / 10; 598 } 599 preempt_point(); 600 return vfs_vnode_iterator_next1(marker, NULL, NULL, true); 601 } 602 603 /* 604 * Flush one vnode. Referenced on entry, unreferenced on return. 605 */ 606 static int 607 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags) 608 { 609 int error; 610 struct vattr vattr; 611 612 if (vp == skipvp || 613 ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) { 614 vrele(vp); 615 return 0; 616 } 617 /* 618 * If WRITECLOSE is set, only flush out regular file 619 * vnodes open for writing or open and unlinked. 620 */ 621 if ((flags & WRITECLOSE)) { 622 if (vp->v_type != VREG) { 623 vrele(vp); 624 return 0; 625 } 626 error = vn_lock(vp, LK_EXCLUSIVE); 627 if (error) { 628 KASSERT(error == ENOENT); 629 vrele(vp); 630 return 0; 631 } 632 error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0); 633 if (error == 0) 634 error = VOP_GETATTR(vp, &vattr, curlwp->l_cred); 635 VOP_UNLOCK(vp); 636 if (error) { 637 vrele(vp); 638 return error; 639 } 640 if (vp->v_writecount == 0 && vattr.va_nlink > 0) { 641 vrele(vp); 642 return 0; 643 } 644 } 645 /* 646 * First try to recycle the vnode. 647 */ 648 if (vrecycle(vp)) 649 return 0; 650 /* 651 * If FORCECLOSE is set, forcibly close the vnode. 652 * For block or character devices, revert to an 653 * anonymous device. For all other files, just 654 * kill them. 655 */ 656 if (flags & FORCECLOSE) { 657 if (vrefcnt(vp) > 1 && 658 (vp->v_type == VBLK || vp->v_type == VCHR)) 659 vcache_make_anon(vp); 660 else 661 vgone(vp); 662 return 0; 663 } 664 vrele(vp); 665 return EBUSY; 666 } 667 668 int 669 vflush(struct mount *mp, vnode_t *skipvp, int flags) 670 { 671 vnode_t *vp; 672 struct vnode_iterator *marker; 673 int busy, error, when, retries = 2; 674 675 do { 676 busy = error = when = 0; 677 678 /* 679 * First, flush out any vnode references from the 680 * deferred vrele list. 681 */ 682 vrele_flush(mp); 683 684 vfs_vnode_iterator_init(mp, &marker); 685 686 while ((vp = vflushnext(marker, &when)) != NULL) { 687 error = vflush_one(vp, skipvp, flags); 688 if (error == EBUSY) { 689 error = 0; 690 busy++; 691 #ifdef DEBUG 692 if (busyprt && retries == 0) 693 vprint("vflush: busy vnode", vp); 694 #endif 695 } else if (error != 0) { 696 break; 697 } 698 } 699 700 vfs_vnode_iterator_destroy(marker); 701 } while (error == 0 && busy > 0 && retries-- > 0); 702 703 if (error) 704 return error; 705 if (busy) 706 return EBUSY; 707 return 0; 708 } 709 710 /* 711 * Mount a file system. 712 */ 713 714 /* 715 * Scan all active processes to see if any of them have a current or root 716 * directory onto which the new filesystem has just been mounted. If so, 717 * replace them with the new mount point. 718 */ 719 static void 720 mount_checkdirs(vnode_t *olddp) 721 { 722 vnode_t *newdp, *rele1, *rele2; 723 struct cwdinfo *cwdi; 724 struct proc *p; 725 bool retry; 726 727 if (vrefcnt(olddp) == 1) { 728 return; 729 } 730 if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp)) 731 panic("mount: lost mount"); 732 733 do { 734 retry = false; 735 mutex_enter(&proc_lock); 736 PROCLIST_FOREACH(p, &allproc) { 737 if ((cwdi = p->p_cwdi) == NULL) 738 continue; 739 /* 740 * Cannot change to the old directory any more, 741 * so even if we see a stale value it is not a 742 * problem. 743 */ 744 if (cwdi->cwdi_cdir != olddp && 745 cwdi->cwdi_rdir != olddp) 746 continue; 747 retry = true; 748 rele1 = NULL; 749 rele2 = NULL; 750 atomic_inc_uint(&cwdi->cwdi_refcnt); 751 mutex_exit(&proc_lock); 752 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 753 if (cwdi->cwdi_cdir == olddp) { 754 rele1 = cwdi->cwdi_cdir; 755 vref(newdp); 756 cwdi->cwdi_cdir = newdp; 757 } 758 if (cwdi->cwdi_rdir == olddp) { 759 rele2 = cwdi->cwdi_rdir; 760 vref(newdp); 761 cwdi->cwdi_rdir = newdp; 762 } 763 rw_exit(&cwdi->cwdi_lock); 764 cwdfree(cwdi); 765 if (rele1 != NULL) 766 vrele(rele1); 767 if (rele2 != NULL) 768 vrele(rele2); 769 mutex_enter(&proc_lock); 770 break; 771 } 772 mutex_exit(&proc_lock); 773 } while (retry); 774 775 if (rootvnode == olddp) { 776 vrele(rootvnode); 777 vref(newdp); 778 rootvnode = newdp; 779 } 780 vput(newdp); 781 } 782 783 /* 784 * Start extended attributes 785 */ 786 static int 787 start_extattr(struct mount *mp) 788 { 789 int error; 790 791 error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL); 792 if (error) 793 printf("%s: failed to start extattr: error = %d\n", 794 mp->mnt_stat.f_mntonname, error); 795 796 return error; 797 } 798 799 int 800 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops, 801 const char *path, int flags, void *data, size_t *data_len) 802 { 803 vnode_t *vp = *vpp; 804 struct mount *mp; 805 struct pathbuf *pb; 806 struct nameidata nd; 807 int error, error2; 808 809 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 810 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 811 if (error) { 812 vfs_delref(vfsops); 813 return error; 814 } 815 816 /* Cannot make a non-dir a mount-point (from here anyway). */ 817 if (vp->v_type != VDIR) { 818 vfs_delref(vfsops); 819 return ENOTDIR; 820 } 821 822 if (flags & MNT_EXPORTED) { 823 vfs_delref(vfsops); 824 return EINVAL; 825 } 826 827 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 828 vfs_delref(vfsops); 829 return ENOMEM; 830 } 831 832 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 833 834 /* 835 * The underlying file system may refuse the mount for 836 * various reasons. Allow the user to force it to happen. 837 * 838 * Set the mount level flags. 839 */ 840 mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE); 841 842 error = VFS_MOUNT(mp, path, data, data_len); 843 mp->mnt_flag &= ~MNT_OP_FLAGS; 844 845 if (error != 0) { 846 vfs_rele(mp); 847 return error; 848 } 849 850 /* Suspend new file system before taking mnt_updating. */ 851 do { 852 error2 = vfs_suspend(mp, 0); 853 } while (error2 == EINTR || error2 == ERESTART); 854 KASSERT(error2 == 0 || error2 == EOPNOTSUPP); 855 mutex_enter(mp->mnt_updating); 856 857 /* 858 * Validate and prepare the mount point. 859 */ 860 error = pathbuf_copyin(path, &pb); 861 if (error != 0) { 862 goto err_mounted; 863 } 864 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 865 error = namei(&nd); 866 pathbuf_destroy(pb); 867 if (error != 0) { 868 goto err_mounted; 869 } 870 if (nd.ni_vp != vp) { 871 vput(nd.ni_vp); 872 error = EINVAL; 873 goto err_mounted; 874 } 875 if (vp->v_mountedhere != NULL) { 876 vput(nd.ni_vp); 877 error = EBUSY; 878 goto err_mounted; 879 } 880 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 881 if (error != 0) { 882 vput(nd.ni_vp); 883 goto err_mounted; 884 } 885 886 /* 887 * Put the new filesystem on the mount list after root. 888 */ 889 cache_purge(vp); 890 mp->mnt_iflag &= ~IMNT_WANTRDWR; 891 892 mountlist_append(mp); 893 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 894 vfs_syncer_add_to_worklist(mp); 895 vp->v_mountedhere = mp; 896 vput(nd.ni_vp); 897 898 mount_checkdirs(vp); 899 mutex_exit(mp->mnt_updating); 900 if (error2 == 0) 901 vfs_resume(mp); 902 903 /* Hold an additional reference to the mount across VFS_START(). */ 904 vfs_ref(mp); 905 (void) VFS_STATVFS(mp, &mp->mnt_stat); 906 error = VFS_START(mp, 0); 907 if (error) { 908 vrele(vp); 909 } else if (flags & MNT_EXTATTR) { 910 if (start_extattr(mp) != 0) 911 mp->mnt_flag &= ~MNT_EXTATTR; 912 } 913 /* Drop reference held for VFS_START(). */ 914 vfs_rele(mp); 915 *vpp = NULL; 916 return error; 917 918 err_mounted: 919 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 920 panic("Unmounting fresh file system failed"); 921 mutex_exit(mp->mnt_updating); 922 if (error2 == 0) 923 vfs_resume(mp); 924 vfs_set_lowermount(mp, NULL); 925 vfs_rele(mp); 926 927 return error; 928 } 929 930 /* 931 * Do the actual file system unmount. File system is assumed to have 932 * been locked by the caller. 933 * 934 * => Caller hold reference to the mount, explicitly for dounmount(). 935 */ 936 int 937 dounmount(struct mount *mp, int flags, struct lwp *l) 938 { 939 vnode_t *coveredvp, *vp; 940 int error, async, used_syncer, used_extattr; 941 const bool was_suspended = fstrans_is_owner(mp); 942 943 #if NVERIEXEC > 0 944 error = veriexec_unmountchk(mp); 945 if (error) 946 return (error); 947 #endif /* NVERIEXEC > 0 */ 948 949 if (!was_suspended) { 950 error = vfs_suspend(mp, 0); 951 if (error) { 952 return error; 953 } 954 } 955 956 KASSERT((mp->mnt_iflag & IMNT_GONE) == 0); 957 958 used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0; 959 used_extattr = mp->mnt_flag & MNT_EXTATTR; 960 961 mp->mnt_iflag |= IMNT_UNMOUNT; 962 mutex_enter(mp->mnt_updating); 963 async = mp->mnt_flag & MNT_ASYNC; 964 mp->mnt_flag &= ~MNT_ASYNC; 965 cache_purgevfs(mp); /* remove cache entries for this file sys */ 966 if (used_syncer) 967 vfs_syncer_remove_from_worklist(mp); 968 error = 0; 969 if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) { 970 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 971 } 972 if (error == 0 || (flags & MNT_FORCE)) { 973 error = VFS_UNMOUNT(mp, flags); 974 } 975 if (error) { 976 mp->mnt_iflag &= ~IMNT_UNMOUNT; 977 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 978 vfs_syncer_add_to_worklist(mp); 979 mp->mnt_flag |= async; 980 mutex_exit(mp->mnt_updating); 981 if (!was_suspended) 982 vfs_resume(mp); 983 if (used_extattr) { 984 if (start_extattr(mp) != 0) 985 mp->mnt_flag &= ~MNT_EXTATTR; 986 else 987 mp->mnt_flag |= MNT_EXTATTR; 988 } 989 return (error); 990 } 991 mutex_exit(mp->mnt_updating); 992 993 /* 994 * mark filesystem as gone to prevent further umounts 995 * after mnt_umounting lock is gone, this also prevents 996 * vfs_busy() from succeeding. 997 */ 998 mp->mnt_iflag |= IMNT_GONE; 999 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 1000 coveredvp->v_mountedhere = NULL; 1001 } 1002 if (!was_suspended) 1003 vfs_resume(mp); 1004 1005 mountlist_remove(mp); 1006 if ((vp = VIMPL_TO_VNODE(TAILQ_FIRST(&mp->mnt_vnodelist))) != NULL) { 1007 vprint("dangling", vp); 1008 panic("unmount: dangling vnode"); 1009 } 1010 vfs_hooks_unmount(mp); 1011 1012 vfs_set_lowermount(mp, NULL); 1013 vfs_rele(mp); /* reference from mount() */ 1014 if (coveredvp != NULLVP) { 1015 vrele(coveredvp); 1016 } 1017 return (0); 1018 } 1019 1020 /* 1021 * Unmount all file systems. 1022 * We traverse the list in reverse order under the assumption that doing so 1023 * will avoid needing to worry about dependencies. 1024 */ 1025 bool 1026 vfs_unmountall(struct lwp *l) 1027 { 1028 1029 printf("unmounting file systems...\n"); 1030 return vfs_unmountall1(l, true, true); 1031 } 1032 1033 static void 1034 vfs_unmount_print(struct mount *mp, const char *pfx) 1035 { 1036 1037 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 1038 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 1039 mp->mnt_stat.f_fstypename); 1040 } 1041 1042 /* 1043 * Return the mount with the highest generation less than "gen". 1044 */ 1045 static struct mount * 1046 vfs_unmount_next(uint64_t gen) 1047 { 1048 mount_iterator_t *iter; 1049 struct mount *mp, *nmp; 1050 1051 nmp = NULL; 1052 1053 mountlist_iterator_init(&iter); 1054 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1055 if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) && 1056 mp->mnt_gen < gen) { 1057 if (nmp != NULL) 1058 vfs_rele(nmp); 1059 nmp = mp; 1060 vfs_ref(nmp); 1061 } 1062 } 1063 mountlist_iterator_destroy(iter); 1064 1065 return nmp; 1066 } 1067 1068 bool 1069 vfs_unmount_forceone(struct lwp *l) 1070 { 1071 struct mount *mp; 1072 int error; 1073 1074 mp = vfs_unmount_next(mountgen); 1075 if (mp == NULL) { 1076 return false; 1077 } 1078 1079 #ifdef DEBUG 1080 printf("forcefully unmounting %s (%s)...\n", 1081 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1082 #endif 1083 if ((error = dounmount(mp, MNT_FORCE, l)) == 0) { 1084 vfs_unmount_print(mp, "forcefully "); 1085 return true; 1086 } else { 1087 vfs_rele(mp); 1088 } 1089 1090 #ifdef DEBUG 1091 printf("forceful unmount of %s failed with error %d\n", 1092 mp->mnt_stat.f_mntonname, error); 1093 #endif 1094 1095 return false; 1096 } 1097 1098 bool 1099 vfs_unmountall1(struct lwp *l, bool force, bool verbose) 1100 { 1101 struct mount *mp; 1102 mount_iterator_t *iter; 1103 bool any_error = false, progress = false; 1104 uint64_t gen; 1105 int error; 1106 1107 gen = mountgen; 1108 for (;;) { 1109 mp = vfs_unmount_next(gen); 1110 if (mp == NULL) 1111 break; 1112 gen = mp->mnt_gen; 1113 1114 #ifdef DEBUG 1115 printf("unmounting %p %s (%s)...\n", 1116 (void *)mp, mp->mnt_stat.f_mntonname, 1117 mp->mnt_stat.f_mntfromname); 1118 #endif 1119 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 1120 vfs_unmount_print(mp, ""); 1121 progress = true; 1122 } else { 1123 vfs_rele(mp); 1124 if (verbose) { 1125 printf("unmount of %s failed with error %d\n", 1126 mp->mnt_stat.f_mntonname, error); 1127 } 1128 any_error = true; 1129 } 1130 } 1131 if (verbose) { 1132 printf("unmounting done\n"); 1133 } 1134 if (any_error && verbose) { 1135 printf("WARNING: some file systems would not unmount\n"); 1136 } 1137 /* If the mountlist is empty it is time to remove swap. */ 1138 mountlist_iterator_init(&iter); 1139 if (mountlist_iterator_next(iter) == NULL) { 1140 uvm_swap_shutdown(l); 1141 } 1142 mountlist_iterator_destroy(iter); 1143 1144 return progress; 1145 } 1146 1147 void 1148 vfs_sync_all(struct lwp *l) 1149 { 1150 printf("syncing disks... "); 1151 1152 /* remove user processes from run queue */ 1153 suspendsched(); 1154 (void)spl0(); 1155 1156 /* avoid coming back this way again if we panic. */ 1157 doing_shutdown = 1; 1158 1159 do_sys_sync(l); 1160 1161 /* Wait for sync to finish. */ 1162 if (vfs_syncwait() != 0) { 1163 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 1164 Debugger(); 1165 #endif 1166 printf("giving up\n"); 1167 return; 1168 } else 1169 printf("done\n"); 1170 } 1171 1172 /* 1173 * Sync and unmount file systems before shutting down. 1174 */ 1175 void 1176 vfs_shutdown(void) 1177 { 1178 lwp_t *l = curlwp; 1179 1180 vfs_sync_all(l); 1181 1182 /* 1183 * If we have panicked - do not make the situation potentially 1184 * worse by unmounting the file systems. 1185 */ 1186 if (panicstr != NULL) { 1187 return; 1188 } 1189 1190 /* Unmount file systems. */ 1191 vfs_unmountall(l); 1192 } 1193 1194 /* 1195 * Print a list of supported file system types (used by vfs_mountroot) 1196 */ 1197 static void 1198 vfs_print_fstypes(void) 1199 { 1200 struct vfsops *v; 1201 int cnt = 0; 1202 1203 mutex_enter(&vfs_list_lock); 1204 LIST_FOREACH(v, &vfs_list, vfs_list) 1205 ++cnt; 1206 mutex_exit(&vfs_list_lock); 1207 1208 if (cnt == 0) { 1209 printf("WARNING: No file system modules have been loaded.\n"); 1210 return; 1211 } 1212 1213 printf("Supported file systems:"); 1214 mutex_enter(&vfs_list_lock); 1215 LIST_FOREACH(v, &vfs_list, vfs_list) { 1216 printf(" %s", v->vfs_name); 1217 } 1218 mutex_exit(&vfs_list_lock); 1219 printf("\n"); 1220 } 1221 1222 /* 1223 * Mount the root file system. If the operator didn't specify a 1224 * file system to use, try all possible file systems until one 1225 * succeeds. 1226 */ 1227 int 1228 vfs_mountroot(void) 1229 { 1230 struct vfsops *v; 1231 int error = ENODEV; 1232 1233 if (root_device == NULL) 1234 panic("vfs_mountroot: root device unknown"); 1235 1236 switch (device_class(root_device)) { 1237 case DV_IFNET: 1238 if (rootdev != NODEV) 1239 panic("vfs_mountroot: rootdev set for DV_IFNET " 1240 "(0x%llx -> %llu,%llu)", 1241 (unsigned long long)rootdev, 1242 (unsigned long long)major(rootdev), 1243 (unsigned long long)minor(rootdev)); 1244 break; 1245 1246 case DV_DISK: 1247 if (rootdev == NODEV) 1248 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1249 if (bdevvp(rootdev, &rootvp)) 1250 panic("vfs_mountroot: can't get vnode for rootdev"); 1251 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1252 error = VOP_OPEN(rootvp, FREAD, FSCRED); 1253 VOP_UNLOCK(rootvp); 1254 if (error) { 1255 printf("vfs_mountroot: can't open root device\n"); 1256 return (error); 1257 } 1258 break; 1259 1260 case DV_VIRTUAL: 1261 break; 1262 1263 default: 1264 printf("%s: inappropriate for root file system\n", 1265 device_xname(root_device)); 1266 return (ENODEV); 1267 } 1268 1269 /* 1270 * If user specified a root fs type, use it. Make sure the 1271 * specified type exists and has a mount_root() 1272 */ 1273 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 1274 v = vfs_getopsbyname(rootfstype); 1275 error = EFTYPE; 1276 if (v != NULL) { 1277 if (v->vfs_mountroot != NULL) { 1278 error = (v->vfs_mountroot)(); 1279 } 1280 v->vfs_refcount--; 1281 } 1282 goto done; 1283 } 1284 1285 /* 1286 * Try each file system currently configured into the kernel. 1287 */ 1288 mutex_enter(&vfs_list_lock); 1289 LIST_FOREACH(v, &vfs_list, vfs_list) { 1290 if (v->vfs_mountroot == NULL) 1291 continue; 1292 #ifdef DEBUG 1293 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 1294 #endif 1295 v->vfs_refcount++; 1296 mutex_exit(&vfs_list_lock); 1297 error = (*v->vfs_mountroot)(); 1298 mutex_enter(&vfs_list_lock); 1299 v->vfs_refcount--; 1300 if (!error) { 1301 aprint_normal("root file system type: %s\n", 1302 v->vfs_name); 1303 break; 1304 } 1305 } 1306 mutex_exit(&vfs_list_lock); 1307 1308 if (v == NULL) { 1309 vfs_print_fstypes(); 1310 printf("no file system for %s", device_xname(root_device)); 1311 if (device_class(root_device) == DV_DISK) 1312 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 1313 printf("\n"); 1314 error = EFTYPE; 1315 } 1316 1317 done: 1318 if (error && device_class(root_device) == DV_DISK) { 1319 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1320 VOP_CLOSE(rootvp, FREAD, FSCRED); 1321 VOP_UNLOCK(rootvp); 1322 vrele(rootvp); 1323 } 1324 if (error == 0) { 1325 mount_iterator_t *iter; 1326 struct mount *mp; 1327 1328 mountlist_iterator_init(&iter); 1329 mp = mountlist_iterator_next(iter); 1330 KASSERT(mp != NULL); 1331 mountlist_iterator_destroy(iter); 1332 1333 mp->mnt_flag |= MNT_ROOTFS; 1334 mp->mnt_op->vfs_refcount++; 1335 1336 /* 1337 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 1338 * reference it, and donate it the reference grabbed 1339 * with VFS_ROOT(). 1340 */ 1341 error = VFS_ROOT(mp, LK_NONE, &rootvnode); 1342 if (error) 1343 panic("cannot find root vnode, error=%d", error); 1344 cwdi0.cwdi_cdir = rootvnode; 1345 cwdi0.cwdi_rdir = NULL; 1346 1347 /* 1348 * Now that root is mounted, we can fixup initproc's CWD 1349 * info. All other processes are kthreads, which merely 1350 * share proc0's CWD info. 1351 */ 1352 initproc->p_cwdi->cwdi_cdir = rootvnode; 1353 vref(initproc->p_cwdi->cwdi_cdir); 1354 initproc->p_cwdi->cwdi_rdir = NULL; 1355 /* 1356 * Enable loading of modules from the filesystem 1357 */ 1358 module_load_vfs_init(); 1359 1360 } 1361 return (error); 1362 } 1363 1364 /* 1365 * mount_specific_key_create -- 1366 * Create a key for subsystem mount-specific data. 1367 */ 1368 int 1369 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1370 { 1371 1372 return specificdata_key_create(mount_specificdata_domain, keyp, dtor); 1373 } 1374 1375 /* 1376 * mount_specific_key_delete -- 1377 * Delete a key for subsystem mount-specific data. 1378 */ 1379 void 1380 mount_specific_key_delete(specificdata_key_t key) 1381 { 1382 1383 specificdata_key_delete(mount_specificdata_domain, key); 1384 } 1385 1386 /* 1387 * mount_initspecific -- 1388 * Initialize a mount's specificdata container. 1389 */ 1390 void 1391 mount_initspecific(struct mount *mp) 1392 { 1393 int error __diagused; 1394 1395 error = specificdata_init(mount_specificdata_domain, 1396 &mp->mnt_specdataref); 1397 KASSERT(error == 0); 1398 } 1399 1400 /* 1401 * mount_finispecific -- 1402 * Finalize a mount's specificdata container. 1403 */ 1404 void 1405 mount_finispecific(struct mount *mp) 1406 { 1407 1408 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 1409 } 1410 1411 /* 1412 * mount_getspecific -- 1413 * Return mount-specific data corresponding to the specified key. 1414 */ 1415 void * 1416 mount_getspecific(struct mount *mp, specificdata_key_t key) 1417 { 1418 1419 return specificdata_getspecific(mount_specificdata_domain, 1420 &mp->mnt_specdataref, key); 1421 } 1422 1423 /* 1424 * mount_setspecific -- 1425 * Set mount-specific data corresponding to the specified key. 1426 */ 1427 void 1428 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 1429 { 1430 1431 specificdata_setspecific(mount_specificdata_domain, 1432 &mp->mnt_specdataref, key, data); 1433 } 1434 1435 /* 1436 * Check to see if a filesystem is mounted on a block device. 1437 */ 1438 int 1439 vfs_mountedon(vnode_t *vp) 1440 { 1441 vnode_t *vq; 1442 int error = 0; 1443 1444 if (vp->v_type != VBLK) 1445 return ENOTBLK; 1446 if (spec_node_getmountedfs(vp) != NULL) 1447 return EBUSY; 1448 if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, VDEAD_NOWAIT, &vq) 1449 == 0) { 1450 if (spec_node_getmountedfs(vq) != NULL) 1451 error = EBUSY; 1452 vrele(vq); 1453 } 1454 1455 return error; 1456 } 1457 1458 /* 1459 * Check if a device pointed to by vp is mounted. 1460 * 1461 * Returns: 1462 * EINVAL if it's not a disk 1463 * EBUSY if it's a disk and mounted 1464 * 0 if it's a disk and not mounted 1465 */ 1466 int 1467 rawdev_mounted(vnode_t *vp, vnode_t **bvpp) 1468 { 1469 vnode_t *bvp; 1470 dev_t dev; 1471 int d_type; 1472 1473 bvp = NULL; 1474 d_type = D_OTHER; 1475 1476 if (iskmemvp(vp)) 1477 return EINVAL; 1478 1479 switch (vp->v_type) { 1480 case VCHR: { 1481 const struct cdevsw *cdev; 1482 1483 dev = vp->v_rdev; 1484 cdev = cdevsw_lookup(dev); 1485 if (cdev != NULL) { 1486 dev_t blkdev; 1487 1488 blkdev = devsw_chr2blk(dev); 1489 if (blkdev != NODEV) { 1490 if (vfinddev(blkdev, VBLK, &bvp) != 0) { 1491 d_type = (cdev->d_flag & D_TYPEMASK); 1492 /* XXX: what if bvp disappears? */ 1493 vrele(bvp); 1494 } 1495 } 1496 } 1497 1498 break; 1499 } 1500 1501 case VBLK: { 1502 const struct bdevsw *bdev; 1503 1504 dev = vp->v_rdev; 1505 bdev = bdevsw_lookup(dev); 1506 if (bdev != NULL) 1507 d_type = (bdev->d_flag & D_TYPEMASK); 1508 1509 bvp = vp; 1510 1511 break; 1512 } 1513 1514 default: 1515 break; 1516 } 1517 1518 if (d_type != D_DISK) 1519 return EINVAL; 1520 1521 if (bvpp != NULL) 1522 *bvpp = bvp; 1523 1524 /* 1525 * XXX: This is bogus. We should be failing the request 1526 * XXX: not only if this specific slice is mounted, but 1527 * XXX: if it's on a disk with any other mounted slice. 1528 */ 1529 if (vfs_mountedon(bvp)) 1530 return EBUSY; 1531 1532 return 0; 1533 } 1534 1535 /* 1536 * Make a 'unique' number from a mount type name. 1537 */ 1538 long 1539 makefstype(const char *type) 1540 { 1541 long rv; 1542 1543 for (rv = 0; *type; type++) { 1544 rv <<= 2; 1545 rv ^= *type; 1546 } 1547 return rv; 1548 } 1549 1550 static struct mountlist_entry * 1551 mountlist_alloc(enum mountlist_type type, struct mount *mp) 1552 { 1553 struct mountlist_entry *me; 1554 1555 me = kmem_zalloc(sizeof(*me), KM_SLEEP); 1556 me->me_mount = mp; 1557 me->me_type = type; 1558 1559 return me; 1560 } 1561 1562 static void 1563 mountlist_free(struct mountlist_entry *me) 1564 { 1565 1566 kmem_free(me, sizeof(*me)); 1567 } 1568 1569 void 1570 mountlist_iterator_init(mount_iterator_t **mip) 1571 { 1572 struct mountlist_entry *me; 1573 1574 me = mountlist_alloc(ME_MARKER, NULL); 1575 mutex_enter(&mountlist_lock); 1576 TAILQ_INSERT_HEAD(&mountlist, me, me_list); 1577 mutex_exit(&mountlist_lock); 1578 *mip = (mount_iterator_t *)me; 1579 } 1580 1581 void 1582 mountlist_iterator_destroy(mount_iterator_t *mi) 1583 { 1584 struct mountlist_entry *marker = &mi->mi_entry; 1585 1586 if (marker->me_mount != NULL) 1587 vfs_unbusy(marker->me_mount); 1588 1589 mutex_enter(&mountlist_lock); 1590 TAILQ_REMOVE(&mountlist, marker, me_list); 1591 mutex_exit(&mountlist_lock); 1592 1593 mountlist_free(marker); 1594 1595 } 1596 1597 /* 1598 * Return the next mount or NULL for this iterator. 1599 * Mark it busy on success. 1600 */ 1601 static inline struct mount * 1602 _mountlist_iterator_next(mount_iterator_t *mi, bool wait) 1603 { 1604 struct mountlist_entry *me, *marker = &mi->mi_entry; 1605 struct mount *mp; 1606 int error; 1607 1608 if (marker->me_mount != NULL) { 1609 vfs_unbusy(marker->me_mount); 1610 marker->me_mount = NULL; 1611 } 1612 1613 mutex_enter(&mountlist_lock); 1614 for (;;) { 1615 KASSERT(marker->me_type == ME_MARKER); 1616 1617 me = TAILQ_NEXT(marker, me_list); 1618 if (me == NULL) { 1619 /* End of list: keep marker and return. */ 1620 mutex_exit(&mountlist_lock); 1621 return NULL; 1622 } 1623 TAILQ_REMOVE(&mountlist, marker, me_list); 1624 TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list); 1625 1626 /* Skip other markers. */ 1627 if (me->me_type != ME_MOUNT) 1628 continue; 1629 1630 /* Take an initial reference for vfs_busy() below. */ 1631 mp = me->me_mount; 1632 KASSERT(mp != NULL); 1633 vfs_ref(mp); 1634 mutex_exit(&mountlist_lock); 1635 1636 /* Try to mark this mount busy and return on success. */ 1637 if (wait) 1638 error = vfs_busy(mp); 1639 else 1640 error = vfs_trybusy(mp); 1641 if (error == 0) { 1642 vfs_rele(mp); 1643 marker->me_mount = mp; 1644 return mp; 1645 } 1646 vfs_rele(mp); 1647 mutex_enter(&mountlist_lock); 1648 } 1649 } 1650 1651 struct mount * 1652 mountlist_iterator_next(mount_iterator_t *mi) 1653 { 1654 1655 return _mountlist_iterator_next(mi, true); 1656 } 1657 1658 struct mount * 1659 mountlist_iterator_trynext(mount_iterator_t *mi) 1660 { 1661 1662 return _mountlist_iterator_next(mi, false); 1663 } 1664 1665 /* 1666 * Attach new mount to the end of the mount list. 1667 */ 1668 void 1669 mountlist_append(struct mount *mp) 1670 { 1671 struct mountlist_entry *me; 1672 1673 me = mountlist_alloc(ME_MOUNT, mp); 1674 mutex_enter(&mountlist_lock); 1675 TAILQ_INSERT_TAIL(&mountlist, me, me_list); 1676 mutex_exit(&mountlist_lock); 1677 } 1678 1679 /* 1680 * Remove mount from mount list. 1681 */void 1682 mountlist_remove(struct mount *mp) 1683 { 1684 struct mountlist_entry *me; 1685 1686 mutex_enter(&mountlist_lock); 1687 TAILQ_FOREACH(me, &mountlist, me_list) 1688 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1689 break; 1690 KASSERT(me != NULL); 1691 TAILQ_REMOVE(&mountlist, me, me_list); 1692 mutex_exit(&mountlist_lock); 1693 mountlist_free(me); 1694 } 1695 1696 /* 1697 * Unlocked variant to traverse the mountlist. 1698 * To be used from DDB only. 1699 */ 1700 struct mount * 1701 _mountlist_next(struct mount *mp) 1702 { 1703 struct mountlist_entry *me; 1704 1705 if (mp == NULL) { 1706 me = TAILQ_FIRST(&mountlist); 1707 } else { 1708 TAILQ_FOREACH(me, &mountlist, me_list) 1709 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1710 break; 1711 if (me != NULL) 1712 me = TAILQ_NEXT(me, me_list); 1713 } 1714 1715 while (me != NULL && me->me_type != ME_MOUNT) 1716 me = TAILQ_NEXT(me, me_list); 1717 1718 return (me ? me->me_mount : NULL); 1719 } 1720