1 /* $NetBSD: vfs_mount.c,v 1.107 2024/08/11 13:09:58 bad Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.107 2024/08/11 13:09:58 bad Exp $"); 71 72 #include "veriexec.h" 73 74 #include <sys/param.h> 75 #include <sys/kernel.h> 76 77 #include <sys/atomic.h> 78 #include <sys/buf.h> 79 #include <sys/conf.h> 80 #include <sys/fcntl.h> 81 #include <sys/filedesc.h> 82 #include <sys/device.h> 83 #include <sys/kauth.h> 84 #include <sys/kmem.h> 85 #include <sys/module.h> 86 #include <sys/mount.h> 87 #include <sys/fstrans.h> 88 #include <sys/namei.h> 89 #include <sys/extattr.h> 90 #include <sys/verified_exec.h> 91 #include <sys/syscallargs.h> 92 #include <sys/sysctl.h> 93 #include <sys/systm.h> 94 #include <sys/vfs_syscalls.h> 95 #include <sys/vnode_impl.h> 96 97 #include <miscfs/deadfs/deadfs.h> 98 #include <miscfs/genfs/genfs.h> 99 #include <miscfs/specfs/specdev.h> 100 101 #include <uvm/uvm_swap.h> 102 103 enum mountlist_type { 104 ME_MOUNT, 105 ME_MARKER 106 }; 107 struct mountlist_entry { 108 TAILQ_ENTRY(mountlist_entry) me_list; /* Mount list. */ 109 struct mount *me_mount; /* Actual mount if ME_MOUNT, 110 current mount else. */ 111 enum mountlist_type me_type; /* Mount or marker. */ 112 }; 113 struct mount_iterator { 114 struct mountlist_entry mi_entry; 115 }; 116 117 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *, 118 bool (*)(void *, struct vnode *), void *, bool); 119 120 /* Root filesystem. */ 121 vnode_t * rootvnode; 122 123 /* Mounted filesystem list. */ 124 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist; 125 static kmutex_t mountlist_lock __cacheline_aligned; 126 int vnode_offset_next_by_lru /* XXX: ugly hack for pstat.c */ 127 = offsetof(vnode_impl_t, vi_lrulist.tqe_next); 128 129 kmutex_t vfs_list_lock __cacheline_aligned; 130 131 static specificdata_domain_t mount_specificdata_domain; 132 static kmutex_t mntid_lock; 133 134 static kmutex_t mountgen_lock __cacheline_aligned; 135 static uint64_t mountgen; 136 137 void 138 vfs_mount_sysinit(void) 139 { 140 141 TAILQ_INIT(&mountlist); 142 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 143 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 144 145 mount_specificdata_domain = specificdata_domain_create(); 146 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 147 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 148 mountgen = 0; 149 } 150 151 struct mount * 152 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp) 153 { 154 struct mount *mp; 155 int error __diagused; 156 157 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 158 mp->mnt_op = vfsops; 159 mp->mnt_refcnt = 1; 160 TAILQ_INIT(&mp->mnt_vnodelist); 161 mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 162 mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 163 mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 164 mp->mnt_vnodecovered = vp; 165 mount_initspecific(mp); 166 167 error = fstrans_mount(mp); 168 KASSERT(error == 0); 169 170 mutex_enter(&mountgen_lock); 171 mp->mnt_gen = mountgen++; 172 mutex_exit(&mountgen_lock); 173 174 return mp; 175 } 176 177 /* 178 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and 179 * initialize a mount structure for it. 180 * 181 * Devname is usually updated by mount(8) after booting. 182 */ 183 int 184 vfs_rootmountalloc(const char *fstypename, const char *devname, 185 struct mount **mpp) 186 { 187 struct vfsops *vfsp = NULL; 188 struct mount *mp; 189 int error __diagused; 190 191 mutex_enter(&vfs_list_lock); 192 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 193 if (!strncmp(vfsp->vfs_name, fstypename, 194 sizeof(mp->mnt_stat.f_fstypename))) 195 break; 196 if (vfsp == NULL) { 197 mutex_exit(&vfs_list_lock); 198 return (ENODEV); 199 } 200 vfsp->vfs_refcount++; 201 mutex_exit(&vfs_list_lock); 202 203 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 204 return ENOMEM; 205 error = vfs_busy(mp); 206 KASSERT(error == 0); 207 mp->mnt_flag = MNT_RDONLY; 208 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 209 sizeof(mp->mnt_stat.f_fstypename)); 210 mp->mnt_stat.f_mntonname[0] = '/'; 211 mp->mnt_stat.f_mntonname[1] = '\0'; 212 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 213 '\0'; 214 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 215 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 216 *mpp = mp; 217 return 0; 218 } 219 220 /* 221 * vfs_getnewfsid: get a new unique fsid. 222 */ 223 void 224 vfs_getnewfsid(struct mount *mp) 225 { 226 static u_short xxxfs_mntid; 227 struct mountlist_entry *me; 228 fsid_t tfsid; 229 int mtype; 230 231 mutex_enter(&mntid_lock); 232 if (xxxfs_mntid == 0) 233 ++xxxfs_mntid; 234 mtype = makefstype(mp->mnt_op->vfs_name); 235 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 236 tfsid.__fsid_val[1] = mtype; 237 /* Always increment to not return the same fsid to parallel mounts. */ 238 xxxfs_mntid++; 239 240 /* 241 * Directly walk mountlist to prevent deadlock through 242 * mountlist_iterator_next() -> vfs_busy(). 243 */ 244 mutex_enter(&mountlist_lock); 245 for (me = TAILQ_FIRST(&mountlist); me != TAILQ_END(&mountlist); ) { 246 if (me->me_type == ME_MOUNT && 247 me->me_mount->mnt_stat.f_fsidx.__fsid_val[0] == 248 tfsid.__fsid_val[0] && 249 me->me_mount->mnt_stat.f_fsidx.__fsid_val[1] == 250 tfsid.__fsid_val[1]) { 251 tfsid.__fsid_val[0]++; 252 xxxfs_mntid++; 253 me = TAILQ_FIRST(&mountlist); 254 } else { 255 me = TAILQ_NEXT(me, me_list); 256 } 257 } 258 mutex_exit(&mountlist_lock); 259 260 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 261 mp->mnt_stat.f_fsidx.__fsid_val[1] = tfsid.__fsid_val[1]; 262 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 263 mutex_exit(&mntid_lock); 264 } 265 266 /* 267 * Lookup a mount point by filesystem identifier. 268 * 269 * XXX Needs to add a reference to the mount point. 270 */ 271 struct mount * 272 vfs_getvfs(fsid_t *fsid) 273 { 274 mount_iterator_t *iter; 275 struct mount *mp; 276 277 mountlist_iterator_init(&iter); 278 while ((mp = mountlist_iterator_next(iter)) != NULL) { 279 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 280 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 281 mountlist_iterator_destroy(iter); 282 return mp; 283 } 284 } 285 mountlist_iterator_destroy(iter); 286 return NULL; 287 } 288 289 /* 290 * Take a reference to a mount structure. 291 */ 292 void 293 vfs_ref(struct mount *mp) 294 { 295 296 KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock)); 297 298 atomic_inc_uint(&mp->mnt_refcnt); 299 } 300 301 /* 302 * Drop a reference to a mount structure, freeing if the last reference. 303 */ 304 void 305 vfs_rele(struct mount *mp) 306 { 307 308 membar_release(); 309 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 310 return; 311 } 312 membar_acquire(); 313 314 /* 315 * Nothing else has visibility of the mount: we can now 316 * free the data structures. 317 */ 318 KASSERT(mp->mnt_refcnt == 0); 319 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 320 mutex_obj_free(mp->mnt_updating); 321 mutex_obj_free(mp->mnt_renamelock); 322 mutex_obj_free(mp->mnt_vnodelock); 323 if (mp->mnt_op != NULL) { 324 vfs_delref(mp->mnt_op); 325 } 326 fstrans_unmount(mp); 327 /* 328 * Final free of mp gets done from fstrans_mount_dtor(). 329 * 330 * Prevents this memory to be reused as a mount before 331 * fstrans releases all references to it. 332 */ 333 } 334 335 /* 336 * Mark a mount point as busy, and gain a new reference to it. Used to 337 * prevent the file system from being unmounted during critical sections. 338 * 339 * vfs_busy can be called multiple times and by multiple threads 340 * and must be accompanied by the same number of vfs_unbusy calls. 341 * 342 * => The caller must hold a pre-existing reference to the mount. 343 * => Will fail if the file system is being unmounted, or is unmounted. 344 */ 345 static inline int 346 _vfs_busy(struct mount *mp, bool wait) 347 { 348 349 KASSERT(mp->mnt_refcnt > 0); 350 351 if (wait) { 352 fstrans_start(mp); 353 } else { 354 if (fstrans_start_nowait(mp)) 355 return EBUSY; 356 } 357 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 358 fstrans_done(mp); 359 return ENOENT; 360 } 361 vfs_ref(mp); 362 return 0; 363 } 364 365 int 366 vfs_busy(struct mount *mp) 367 { 368 369 return _vfs_busy(mp, true); 370 } 371 372 int 373 vfs_trybusy(struct mount *mp) 374 { 375 376 return _vfs_busy(mp, false); 377 } 378 379 /* 380 * Unbusy a busy filesystem. 381 * 382 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call. 383 */ 384 void 385 vfs_unbusy(struct mount *mp) 386 { 387 388 KASSERT(mp->mnt_refcnt > 0); 389 390 fstrans_done(mp); 391 vfs_rele(mp); 392 } 393 394 /* 395 * Change a file systems lower mount. 396 * Both the current and the new lower mount may be NULL. The caller 397 * guarantees exclusive access to the mount and holds a pre-existing 398 * reference to the new lower mount. 399 */ 400 int 401 vfs_set_lowermount(struct mount *mp, struct mount *lowermp) 402 { 403 struct mount *oldlowermp; 404 int error; 405 406 #ifdef DEBUG 407 /* 408 * Limit the depth of file system stack so kernel sanitizers 409 * may stress mount/unmount without exhausting the kernel stack. 410 */ 411 int depth; 412 struct mount *mp2; 413 414 for (depth = 0, mp2 = lowermp; mp2; depth++, mp2 = mp2->mnt_lower) { 415 if (depth == 23) 416 return EINVAL; 417 } 418 #endif 419 420 if (lowermp) { 421 if (lowermp == dead_rootmount) 422 return ENOENT; 423 error = vfs_busy(lowermp); 424 if (error) 425 return error; 426 vfs_ref(lowermp); 427 } 428 429 oldlowermp = mp->mnt_lower; 430 mp->mnt_lower = lowermp; 431 432 if (lowermp) 433 vfs_unbusy(lowermp); 434 435 if (oldlowermp) 436 vfs_rele(oldlowermp); 437 438 return 0; 439 } 440 441 struct vnode_iterator { 442 vnode_impl_t vi_vnode; 443 }; 444 445 void 446 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip) 447 { 448 vnode_t *vp; 449 vnode_impl_t *vip; 450 451 vp = vnalloc_marker(mp); 452 vip = VNODE_TO_VIMPL(vp); 453 454 mutex_enter(mp->mnt_vnodelock); 455 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes); 456 vp->v_usecount = 1; 457 mutex_exit(mp->mnt_vnodelock); 458 459 *vnip = (struct vnode_iterator *)vip; 460 } 461 462 void 463 vfs_vnode_iterator_destroy(struct vnode_iterator *vni) 464 { 465 vnode_impl_t *mvip = &vni->vi_vnode; 466 vnode_t *mvp = VIMPL_TO_VNODE(mvip); 467 kmutex_t *lock; 468 469 KASSERT(vnis_marker(mvp)); 470 if (vrefcnt(mvp) != 0) { 471 lock = mvp->v_mount->mnt_vnodelock; 472 mutex_enter(lock); 473 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes); 474 mvp->v_usecount = 0; 475 mutex_exit(lock); 476 } 477 vnfree_marker(mvp); 478 } 479 480 static struct vnode * 481 vfs_vnode_iterator_next1(struct vnode_iterator *vni, 482 bool (*f)(void *, struct vnode *), void *cl, bool do_wait) 483 { 484 vnode_impl_t *mvip = &vni->vi_vnode; 485 struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount; 486 vnode_t *vp; 487 vnode_impl_t *vip; 488 kmutex_t *lock; 489 int error; 490 491 KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip))); 492 493 lock = mp->mnt_vnodelock; 494 do { 495 mutex_enter(lock); 496 vip = TAILQ_NEXT(mvip, vi_mntvnodes); 497 TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes); 498 VIMPL_TO_VNODE(mvip)->v_usecount = 0; 499 again: 500 if (vip == NULL) { 501 mutex_exit(lock); 502 return NULL; 503 } 504 vp = VIMPL_TO_VNODE(vip); 505 KASSERT(vp != NULL); 506 mutex_enter(vp->v_interlock); 507 if (vnis_marker(vp) || 508 vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) || 509 (f && !(*f)(cl, vp))) { 510 mutex_exit(vp->v_interlock); 511 vip = TAILQ_NEXT(vip, vi_mntvnodes); 512 goto again; 513 } 514 515 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes); 516 VIMPL_TO_VNODE(mvip)->v_usecount = 1; 517 mutex_exit(lock); 518 error = vcache_vget(vp); 519 KASSERT(error == 0 || error == ENOENT); 520 } while (error != 0); 521 522 return vp; 523 } 524 525 struct vnode * 526 vfs_vnode_iterator_next(struct vnode_iterator *vni, 527 bool (*f)(void *, struct vnode *), void *cl) 528 { 529 530 return vfs_vnode_iterator_next1(vni, f, cl, false); 531 } 532 533 /* 534 * Move a vnode from one mount queue to another. 535 */ 536 void 537 vfs_insmntque(vnode_t *vp, struct mount *mp) 538 { 539 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 540 struct mount *omp; 541 kmutex_t *lock; 542 543 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 || 544 vp->v_tag == VT_VFS); 545 546 /* 547 * Delete from old mount point vnode list, if on one. 548 */ 549 if ((omp = vp->v_mount) != NULL) { 550 lock = omp->mnt_vnodelock; 551 mutex_enter(lock); 552 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes); 553 mutex_exit(lock); 554 } 555 556 /* 557 * Insert into list of vnodes for the new mount point, if 558 * available. The caller must take a reference on the mount 559 * structure and donate to the vnode. 560 */ 561 if ((vp->v_mount = mp) != NULL) { 562 lock = mp->mnt_vnodelock; 563 mutex_enter(lock); 564 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes); 565 mutex_exit(lock); 566 } 567 568 if (omp != NULL) { 569 /* Release reference to old mount. */ 570 vfs_rele(omp); 571 } 572 } 573 574 /* 575 * Remove any vnodes in the vnode table belonging to mount point mp. 576 * 577 * If FORCECLOSE is not specified, there should not be any active ones, 578 * return error if any are found (nb: this is a user error, not a 579 * system error). If FORCECLOSE is specified, detach any active vnodes 580 * that are found. 581 * 582 * If WRITECLOSE is set, only flush out regular file vnodes open for 583 * writing. 584 * 585 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. 586 */ 587 #ifdef DEBUG 588 int busyprt = 0; /* print out busy vnodes */ 589 struct ctldebug debug1 = { "busyprt", &busyprt }; 590 #endif 591 592 static vnode_t * 593 vflushnext(struct vnode_iterator *marker, int *when) 594 { 595 if (getticks() > *when) { 596 yield(); 597 *when = getticks() + hz / 10; 598 } 599 preempt_point(); 600 return vfs_vnode_iterator_next1(marker, NULL, NULL, true); 601 } 602 603 /* 604 * Flush one vnode. Referenced on entry, unreferenced on return. 605 */ 606 static int 607 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags) 608 { 609 int error; 610 struct vattr vattr; 611 612 if (vp == skipvp || 613 ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) { 614 vrele(vp); 615 return 0; 616 } 617 /* 618 * If WRITECLOSE is set, only flush out regular file 619 * vnodes open for writing or open and unlinked. 620 */ 621 if ((flags & WRITECLOSE)) { 622 if (vp->v_type != VREG) { 623 vrele(vp); 624 return 0; 625 } 626 error = vn_lock(vp, LK_EXCLUSIVE); 627 if (error) { 628 KASSERT(error == ENOENT); 629 vrele(vp); 630 return 0; 631 } 632 error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0); 633 if (error == 0) 634 error = VOP_GETATTR(vp, &vattr, curlwp->l_cred); 635 VOP_UNLOCK(vp); 636 if (error) { 637 vrele(vp); 638 return error; 639 } 640 if (vp->v_writecount == 0 && vattr.va_nlink > 0) { 641 vrele(vp); 642 return 0; 643 } 644 } 645 /* 646 * First try to recycle the vnode. 647 */ 648 if (vrecycle(vp)) 649 return 0; 650 /* 651 * If FORCECLOSE is set, forcibly close the vnode. 652 * For block or character devices, revert to an 653 * anonymous device. For all other files, just 654 * kill them. 655 */ 656 if (flags & FORCECLOSE) { 657 if (vrefcnt(vp) > 1 && 658 (vp->v_type == VBLK || vp->v_type == VCHR)) 659 vcache_make_anon(vp); 660 else 661 vgone(vp); 662 return 0; 663 } 664 vrele(vp); 665 return EBUSY; 666 } 667 668 int 669 vflush(struct mount *mp, vnode_t *skipvp, int flags) 670 { 671 vnode_t *vp; 672 struct vnode_iterator *marker; 673 int busy, error, when, retries = 2; 674 675 do { 676 busy = error = when = 0; 677 678 /* 679 * First, flush out any vnode references from the 680 * deferred vrele list. 681 */ 682 vrele_flush(mp); 683 684 vfs_vnode_iterator_init(mp, &marker); 685 686 while ((vp = vflushnext(marker, &when)) != NULL) { 687 error = vflush_one(vp, skipvp, flags); 688 if (error == EBUSY) { 689 error = 0; 690 busy++; 691 #ifdef DEBUG 692 if (busyprt && retries == 0) 693 vprint("vflush: busy vnode", vp); 694 #endif 695 } else if (error != 0) { 696 break; 697 } 698 } 699 700 vfs_vnode_iterator_destroy(marker); 701 } while (error == 0 && busy > 0 && retries-- > 0); 702 703 if (error) 704 return error; 705 if (busy) 706 return EBUSY; 707 return 0; 708 } 709 710 /* 711 * Mount a file system. 712 */ 713 714 /* 715 * Scan all active processes to see if any of them have a current or root 716 * directory onto which the new filesystem has just been mounted. If so, 717 * replace them with the new mount point. 718 */ 719 static void 720 mount_checkdirs(vnode_t *olddp) 721 { 722 vnode_t *newdp, *rele1, *rele2; 723 struct cwdinfo *cwdi; 724 struct proc *p; 725 bool retry; 726 727 if (vrefcnt(olddp) == 1) { 728 return; 729 } 730 if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp)) 731 panic("mount: lost mount"); 732 733 do { 734 retry = false; 735 mutex_enter(&proc_lock); 736 PROCLIST_FOREACH(p, &allproc) { 737 if ((cwdi = p->p_cwdi) == NULL) 738 continue; 739 /* 740 * Cannot change to the old directory any more, 741 * so even if we see a stale value it is not a 742 * problem. 743 */ 744 if (cwdi->cwdi_cdir != olddp && 745 cwdi->cwdi_rdir != olddp) 746 continue; 747 retry = true; 748 rele1 = NULL; 749 rele2 = NULL; 750 atomic_inc_uint(&cwdi->cwdi_refcnt); 751 mutex_exit(&proc_lock); 752 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 753 if (cwdi->cwdi_cdir == olddp) { 754 rele1 = cwdi->cwdi_cdir; 755 vref(newdp); 756 cwdi->cwdi_cdir = newdp; 757 } 758 if (cwdi->cwdi_rdir == olddp) { 759 rele2 = cwdi->cwdi_rdir; 760 vref(newdp); 761 cwdi->cwdi_rdir = newdp; 762 } 763 rw_exit(&cwdi->cwdi_lock); 764 cwdfree(cwdi); 765 if (rele1 != NULL) 766 vrele(rele1); 767 if (rele2 != NULL) 768 vrele(rele2); 769 mutex_enter(&proc_lock); 770 break; 771 } 772 mutex_exit(&proc_lock); 773 } while (retry); 774 775 if (rootvnode == olddp) { 776 vrele(rootvnode); 777 vref(newdp); 778 rootvnode = newdp; 779 } 780 vput(newdp); 781 } 782 783 /* 784 * Start extended attributes 785 */ 786 static int 787 start_extattr(struct mount *mp) 788 { 789 int error; 790 791 error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL); 792 if (error) 793 printf("%s: failed to start extattr: error = %d\n", 794 mp->mnt_stat.f_mntonname, error); 795 796 return error; 797 } 798 799 int 800 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops, 801 const char *path, int flags, void *data, size_t *data_len) 802 { 803 vnode_t *vp = *vpp; 804 struct mount *mp; 805 struct pathbuf *pb; 806 struct nameidata nd; 807 int error, error2; 808 809 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 810 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 811 if (error) { 812 vfs_delref(vfsops); 813 return error; 814 } 815 816 /* Cannot make a non-dir a mount-point (from here anyway). */ 817 if (vp->v_type != VDIR) { 818 vfs_delref(vfsops); 819 return ENOTDIR; 820 } 821 822 if (flags & MNT_EXPORTED) { 823 vfs_delref(vfsops); 824 return EINVAL; 825 } 826 827 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 828 vfs_delref(vfsops); 829 return ENOMEM; 830 } 831 832 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 833 834 /* 835 * The underlying file system may refuse the mount for 836 * various reasons. Allow the user to force it to happen. 837 * 838 * Set the mount level flags. 839 */ 840 mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE); 841 842 error = VFS_MOUNT(mp, path, data, data_len); 843 mp->mnt_flag &= ~MNT_OP_FLAGS; 844 845 if (error != 0) { 846 vfs_rele(mp); 847 return error; 848 } 849 850 /* Suspend new file system before taking mnt_updating. */ 851 do { 852 error2 = vfs_suspend(mp, 0); 853 } while (error2 == EINTR || error2 == ERESTART); 854 KASSERT(error2 == 0 || error2 == EOPNOTSUPP); 855 mutex_enter(mp->mnt_updating); 856 857 /* 858 * Validate and prepare the mount point. 859 */ 860 error = pathbuf_copyin(path, &pb); 861 if (error != 0) { 862 goto err_mounted; 863 } 864 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 865 error = namei(&nd); 866 pathbuf_destroy(pb); 867 if (error != 0) { 868 goto err_mounted; 869 } 870 if (nd.ni_vp != vp) { 871 vput(nd.ni_vp); 872 error = EINVAL; 873 goto err_mounted; 874 } 875 if (vp->v_mountedhere != NULL) { 876 vput(nd.ni_vp); 877 error = EBUSY; 878 goto err_mounted; 879 } 880 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 881 if (error != 0) { 882 vput(nd.ni_vp); 883 goto err_mounted; 884 } 885 886 /* 887 * Put the new filesystem on the mount list after root. 888 */ 889 cache_purge(vp); 890 mp->mnt_iflag &= ~IMNT_WANTRDWR; 891 892 mountlist_append(mp); 893 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 894 vfs_syncer_add_to_worklist(mp); 895 vp->v_mountedhere = mp; 896 vput(nd.ni_vp); 897 898 mount_checkdirs(vp); 899 mutex_exit(mp->mnt_updating); 900 if (error2 == 0) 901 vfs_resume(mp); 902 903 /* Hold an additional reference to the mount across VFS_START(). */ 904 vfs_ref(mp); 905 (void) VFS_STATVFS(mp, &mp->mnt_stat); 906 error = VFS_START(mp, 0); 907 if (error) { 908 vrele(vp); 909 } else if (flags & MNT_EXTATTR) { 910 if (start_extattr(mp) != 0) 911 mp->mnt_flag &= ~MNT_EXTATTR; 912 } 913 /* Drop reference held for VFS_START(). */ 914 vfs_rele(mp); 915 *vpp = NULL; 916 return error; 917 918 err_mounted: 919 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 920 panic("Unmounting fresh file system failed"); 921 mutex_exit(mp->mnt_updating); 922 if (error2 == 0) 923 vfs_resume(mp); 924 vfs_set_lowermount(mp, NULL); 925 vfs_rele(mp); 926 927 return error; 928 } 929 930 /* 931 * Do the actual file system unmount. File system is assumed to have 932 * been locked by the caller. 933 * 934 * => Caller hold reference to the mount, explicitly for dounmount(). 935 */ 936 int 937 dounmount(struct mount *mp, int flags, struct lwp *l) 938 { 939 struct vnode *coveredvp, *vp; 940 struct vnode_impl *vip; 941 int error, async, used_syncer, used_extattr; 942 const bool was_suspended = fstrans_is_owner(mp); 943 944 #if NVERIEXEC > 0 945 error = veriexec_unmountchk(mp); 946 if (error) 947 return (error); 948 #endif /* NVERIEXEC > 0 */ 949 950 if (!was_suspended) { 951 error = vfs_suspend(mp, 0); 952 if (error) { 953 return error; 954 } 955 } 956 957 KASSERT((mp->mnt_iflag & IMNT_GONE) == 0); 958 959 used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0; 960 used_extattr = mp->mnt_flag & MNT_EXTATTR; 961 962 mp->mnt_iflag |= IMNT_UNMOUNT; 963 mutex_enter(mp->mnt_updating); 964 /* 965 * Temporarily clear the MNT_ASYNC flags so that bwrite() doesn't 966 * convert the sync writes to delayed writes. 967 */ 968 async = mp->mnt_flag & MNT_ASYNC; 969 mp->mnt_flag &= ~MNT_ASYNC; 970 cache_purgevfs(mp); /* remove cache entries for this file sys */ 971 if (used_syncer) 972 vfs_syncer_remove_from_worklist(mp); 973 error = 0; 974 if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) { 975 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 976 } 977 if (error == 0 || (flags & MNT_FORCE)) { 978 error = VFS_UNMOUNT(mp, flags); 979 } 980 if (error) { 981 mp->mnt_iflag &= ~IMNT_UNMOUNT; 982 mp->mnt_flag |= async; 983 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 984 vfs_syncer_add_to_worklist(mp); 985 mutex_exit(mp->mnt_updating); 986 if (!was_suspended) 987 vfs_resume(mp); 988 if (used_extattr) { 989 if (start_extattr(mp) != 0) 990 mp->mnt_flag &= ~MNT_EXTATTR; 991 else 992 mp->mnt_flag |= MNT_EXTATTR; 993 } 994 return (error); 995 } 996 mutex_exit(mp->mnt_updating); 997 998 /* 999 * mark filesystem as gone to prevent further umounts 1000 * after mnt_umounting lock is gone, this also prevents 1001 * vfs_busy() from succeeding. 1002 */ 1003 mp->mnt_iflag |= IMNT_GONE; 1004 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 1005 coveredvp->v_mountedhere = NULL; 1006 } 1007 if (!was_suspended) 1008 vfs_resume(mp); 1009 1010 mountlist_remove(mp); 1011 1012 if ((vip = TAILQ_FIRST(&mp->mnt_vnodelist)) != NULL) { 1013 vp = VIMPL_TO_VNODE(vip); 1014 vprint("dangling", vp); 1015 panic("unmount: dangling vnode"); 1016 } 1017 vfs_hooks_unmount(mp); 1018 1019 vfs_set_lowermount(mp, NULL); 1020 vfs_rele(mp); /* reference from mount() */ 1021 if (coveredvp != NULLVP) { 1022 vrele(coveredvp); 1023 } 1024 return (0); 1025 } 1026 1027 /* 1028 * Unmount all file systems. 1029 * We traverse the list in reverse order under the assumption that doing so 1030 * will avoid needing to worry about dependencies. 1031 */ 1032 bool 1033 vfs_unmountall(struct lwp *l) 1034 { 1035 1036 printf("unmounting file systems...\n"); 1037 return vfs_unmountall1(l, true, true); 1038 } 1039 1040 static void 1041 vfs_unmount_print(struct mount *mp, const char *pfx) 1042 { 1043 1044 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 1045 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 1046 mp->mnt_stat.f_fstypename); 1047 } 1048 1049 /* 1050 * Return the mount with the highest generation less than "gen". 1051 */ 1052 static struct mount * 1053 vfs_unmount_next(uint64_t gen) 1054 { 1055 mount_iterator_t *iter; 1056 struct mount *mp, *nmp; 1057 1058 nmp = NULL; 1059 1060 mountlist_iterator_init(&iter); 1061 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1062 if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) && 1063 mp->mnt_gen < gen) { 1064 if (nmp != NULL) 1065 vfs_rele(nmp); 1066 nmp = mp; 1067 vfs_ref(nmp); 1068 } 1069 } 1070 mountlist_iterator_destroy(iter); 1071 1072 return nmp; 1073 } 1074 1075 bool 1076 vfs_unmount_forceone(struct lwp *l) 1077 { 1078 struct mount *mp; 1079 int error; 1080 1081 mp = vfs_unmount_next(mountgen); 1082 if (mp == NULL) { 1083 return false; 1084 } 1085 1086 #ifdef DEBUG 1087 printf("forcefully unmounting %s (%s)...\n", 1088 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1089 #endif 1090 if ((error = dounmount(mp, MNT_FORCE, l)) == 0) { 1091 vfs_unmount_print(mp, "forcefully "); 1092 return true; 1093 } else { 1094 vfs_rele(mp); 1095 } 1096 1097 #ifdef DEBUG 1098 printf("forceful unmount of %s failed with error %d\n", 1099 mp->mnt_stat.f_mntonname, error); 1100 #endif 1101 1102 return false; 1103 } 1104 1105 bool 1106 vfs_unmountall1(struct lwp *l, bool force, bool verbose) 1107 { 1108 struct mount *mp; 1109 mount_iterator_t *iter; 1110 bool any_error = false, progress = false; 1111 uint64_t gen; 1112 int error; 1113 1114 gen = mountgen; 1115 for (;;) { 1116 mp = vfs_unmount_next(gen); 1117 if (mp == NULL) 1118 break; 1119 gen = mp->mnt_gen; 1120 1121 #ifdef DEBUG 1122 printf("unmounting %p %s (%s)...\n", 1123 (void *)mp, mp->mnt_stat.f_mntonname, 1124 mp->mnt_stat.f_mntfromname); 1125 #endif 1126 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 1127 vfs_unmount_print(mp, ""); 1128 progress = true; 1129 } else { 1130 vfs_rele(mp); 1131 if (verbose) { 1132 printf("unmount of %s failed with error %d\n", 1133 mp->mnt_stat.f_mntonname, error); 1134 } 1135 any_error = true; 1136 } 1137 } 1138 if (verbose) { 1139 printf("unmounting done\n"); 1140 } 1141 if (any_error && verbose) { 1142 printf("WARNING: some file systems would not unmount\n"); 1143 } 1144 /* If the mountlist is empty it is time to remove swap. */ 1145 mountlist_iterator_init(&iter); 1146 if (mountlist_iterator_next(iter) == NULL) { 1147 uvm_swap_shutdown(l); 1148 } 1149 mountlist_iterator_destroy(iter); 1150 1151 return progress; 1152 } 1153 1154 void 1155 vfs_sync_all(struct lwp *l) 1156 { 1157 printf("syncing disks... "); 1158 1159 /* remove user processes from run queue */ 1160 suspendsched(); 1161 (void)spl0(); 1162 1163 /* avoid coming back this way again if we panic. */ 1164 doing_shutdown = 1; 1165 1166 do_sys_sync(l); 1167 1168 /* Wait for sync to finish. */ 1169 if (vfs_syncwait() != 0) { 1170 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 1171 Debugger(); 1172 #endif 1173 printf("giving up\n"); 1174 return; 1175 } else 1176 printf("done\n"); 1177 } 1178 1179 /* 1180 * Sync and unmount file systems before shutting down. 1181 */ 1182 void 1183 vfs_shutdown(void) 1184 { 1185 lwp_t *l = curlwp; 1186 1187 vfs_sync_all(l); 1188 1189 /* 1190 * If we have panicked - do not make the situation potentially 1191 * worse by unmounting the file systems. 1192 */ 1193 if (panicstr != NULL) { 1194 return; 1195 } 1196 1197 /* Unmount file systems. */ 1198 vfs_unmountall(l); 1199 } 1200 1201 /* 1202 * Print a list of supported file system types (used by vfs_mountroot) 1203 */ 1204 static void 1205 vfs_print_fstypes(void) 1206 { 1207 struct vfsops *v; 1208 int cnt = 0; 1209 1210 mutex_enter(&vfs_list_lock); 1211 LIST_FOREACH(v, &vfs_list, vfs_list) 1212 ++cnt; 1213 mutex_exit(&vfs_list_lock); 1214 1215 if (cnt == 0) { 1216 printf("WARNING: No file system modules have been loaded.\n"); 1217 return; 1218 } 1219 1220 printf("Supported file systems:"); 1221 mutex_enter(&vfs_list_lock); 1222 LIST_FOREACH(v, &vfs_list, vfs_list) { 1223 printf(" %s", v->vfs_name); 1224 } 1225 mutex_exit(&vfs_list_lock); 1226 printf("\n"); 1227 } 1228 1229 /* 1230 * Mount the root file system. If the operator didn't specify a 1231 * file system to use, try all possible file systems until one 1232 * succeeds. 1233 */ 1234 int 1235 vfs_mountroot(void) 1236 { 1237 struct vfsops *v; 1238 int error = ENODEV; 1239 1240 if (root_device == NULL) 1241 panic("vfs_mountroot: root device unknown"); 1242 1243 switch (device_class(root_device)) { 1244 case DV_IFNET: 1245 if (rootdev != NODEV) 1246 panic("vfs_mountroot: rootdev set for DV_IFNET " 1247 "(0x%llx -> %llu,%llu)", 1248 (unsigned long long)rootdev, 1249 (unsigned long long)major(rootdev), 1250 (unsigned long long)minor(rootdev)); 1251 break; 1252 1253 case DV_DISK: 1254 if (rootdev == NODEV) 1255 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1256 if (bdevvp(rootdev, &rootvp)) 1257 panic("vfs_mountroot: can't get vnode for rootdev"); 1258 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1259 error = VOP_OPEN(rootvp, FREAD, FSCRED); 1260 VOP_UNLOCK(rootvp); 1261 if (error) { 1262 printf("vfs_mountroot: can't open root device\n"); 1263 return (error); 1264 } 1265 break; 1266 1267 case DV_VIRTUAL: 1268 break; 1269 1270 default: 1271 printf("%s: inappropriate for root file system\n", 1272 device_xname(root_device)); 1273 return (ENODEV); 1274 } 1275 1276 /* 1277 * If user specified a root fs type, use it. Make sure the 1278 * specified type exists and has a mount_root() 1279 */ 1280 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 1281 v = vfs_getopsbyname(rootfstype); 1282 error = EFTYPE; 1283 if (v != NULL) { 1284 if (v->vfs_mountroot != NULL) { 1285 error = (v->vfs_mountroot)(); 1286 } 1287 v->vfs_refcount--; 1288 } 1289 goto done; 1290 } 1291 1292 /* 1293 * Try each file system currently configured into the kernel. 1294 */ 1295 mutex_enter(&vfs_list_lock); 1296 LIST_FOREACH(v, &vfs_list, vfs_list) { 1297 if (v->vfs_mountroot == NULL) 1298 continue; 1299 #ifdef DEBUG 1300 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 1301 #endif 1302 v->vfs_refcount++; 1303 mutex_exit(&vfs_list_lock); 1304 error = (*v->vfs_mountroot)(); 1305 mutex_enter(&vfs_list_lock); 1306 v->vfs_refcount--; 1307 if (!error) { 1308 aprint_normal("root file system type: %s\n", 1309 v->vfs_name); 1310 break; 1311 } 1312 } 1313 mutex_exit(&vfs_list_lock); 1314 1315 if (v == NULL) { 1316 vfs_print_fstypes(); 1317 printf("no file system for %s", device_xname(root_device)); 1318 if (device_class(root_device) == DV_DISK) 1319 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 1320 printf("\n"); 1321 error = EFTYPE; 1322 } 1323 1324 done: 1325 if (error && device_class(root_device) == DV_DISK) { 1326 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1327 VOP_CLOSE(rootvp, FREAD, FSCRED); 1328 VOP_UNLOCK(rootvp); 1329 vrele(rootvp); 1330 } 1331 if (error == 0) { 1332 mount_iterator_t *iter; 1333 struct mount *mp; 1334 1335 mountlist_iterator_init(&iter); 1336 mp = mountlist_iterator_next(iter); 1337 KASSERT(mp != NULL); 1338 mountlist_iterator_destroy(iter); 1339 1340 mp->mnt_flag |= MNT_ROOTFS; 1341 mp->mnt_op->vfs_refcount++; 1342 1343 /* 1344 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 1345 * reference it, and donate it the reference grabbed 1346 * with VFS_ROOT(). 1347 */ 1348 error = VFS_ROOT(mp, LK_NONE, &rootvnode); 1349 if (error) 1350 panic("cannot find root vnode, error=%d", error); 1351 cwdi0.cwdi_cdir = rootvnode; 1352 cwdi0.cwdi_rdir = NULL; 1353 1354 /* 1355 * Now that root is mounted, we can fixup initproc's CWD 1356 * info. All other processes are kthreads, which merely 1357 * share proc0's CWD info. 1358 */ 1359 initproc->p_cwdi->cwdi_cdir = rootvnode; 1360 vref(initproc->p_cwdi->cwdi_cdir); 1361 initproc->p_cwdi->cwdi_rdir = NULL; 1362 /* 1363 * Enable loading of modules from the filesystem 1364 */ 1365 module_load_vfs_init(); 1366 1367 } 1368 return (error); 1369 } 1370 1371 /* 1372 * mount_specific_key_create -- 1373 * Create a key for subsystem mount-specific data. 1374 */ 1375 int 1376 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1377 { 1378 1379 return specificdata_key_create(mount_specificdata_domain, keyp, dtor); 1380 } 1381 1382 /* 1383 * mount_specific_key_delete -- 1384 * Delete a key for subsystem mount-specific data. 1385 */ 1386 void 1387 mount_specific_key_delete(specificdata_key_t key) 1388 { 1389 1390 specificdata_key_delete(mount_specificdata_domain, key); 1391 } 1392 1393 /* 1394 * mount_initspecific -- 1395 * Initialize a mount's specificdata container. 1396 */ 1397 void 1398 mount_initspecific(struct mount *mp) 1399 { 1400 int error __diagused; 1401 1402 error = specificdata_init(mount_specificdata_domain, 1403 &mp->mnt_specdataref); 1404 KASSERT(error == 0); 1405 } 1406 1407 /* 1408 * mount_finispecific -- 1409 * Finalize a mount's specificdata container. 1410 */ 1411 void 1412 mount_finispecific(struct mount *mp) 1413 { 1414 1415 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 1416 } 1417 1418 /* 1419 * mount_getspecific -- 1420 * Return mount-specific data corresponding to the specified key. 1421 */ 1422 void * 1423 mount_getspecific(struct mount *mp, specificdata_key_t key) 1424 { 1425 1426 return specificdata_getspecific(mount_specificdata_domain, 1427 &mp->mnt_specdataref, key); 1428 } 1429 1430 /* 1431 * mount_setspecific -- 1432 * Set mount-specific data corresponding to the specified key. 1433 */ 1434 void 1435 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 1436 { 1437 1438 specificdata_setspecific(mount_specificdata_domain, 1439 &mp->mnt_specdataref, key, data); 1440 } 1441 1442 /* 1443 * Check to see if a filesystem is mounted on a block device. 1444 */ 1445 int 1446 vfs_mountedon(vnode_t *vp) 1447 { 1448 vnode_t *vq; 1449 int error = 0; 1450 1451 if (vp->v_type != VBLK) 1452 return ENOTBLK; 1453 if (spec_node_getmountedfs(vp) != NULL) 1454 return EBUSY; 1455 if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, VDEAD_NOWAIT, &vq) 1456 == 0) { 1457 if (spec_node_getmountedfs(vq) != NULL) 1458 error = EBUSY; 1459 vrele(vq); 1460 } 1461 1462 return error; 1463 } 1464 1465 /* 1466 * Check if a device pointed to by vp is mounted. 1467 * 1468 * Returns: 1469 * EINVAL if it's not a disk 1470 * EBUSY if it's a disk and mounted 1471 * 0 if it's a disk and not mounted 1472 */ 1473 int 1474 rawdev_mounted(vnode_t *vp, vnode_t **bvpp) 1475 { 1476 vnode_t *bvp; 1477 dev_t dev; 1478 int d_type; 1479 1480 bvp = NULL; 1481 d_type = D_OTHER; 1482 1483 if (iskmemvp(vp)) 1484 return EINVAL; 1485 1486 switch (vp->v_type) { 1487 case VCHR: { 1488 const struct cdevsw *cdev; 1489 1490 dev = vp->v_rdev; 1491 cdev = cdevsw_lookup(dev); 1492 if (cdev != NULL) { 1493 dev_t blkdev; 1494 1495 blkdev = devsw_chr2blk(dev); 1496 if (blkdev != NODEV) { 1497 if (vfinddev(blkdev, VBLK, &bvp) != 0) { 1498 d_type = (cdev->d_flag & D_TYPEMASK); 1499 /* XXX: what if bvp disappears? */ 1500 vrele(bvp); 1501 } 1502 } 1503 } 1504 1505 break; 1506 } 1507 1508 case VBLK: { 1509 const struct bdevsw *bdev; 1510 1511 dev = vp->v_rdev; 1512 bdev = bdevsw_lookup(dev); 1513 if (bdev != NULL) 1514 d_type = (bdev->d_flag & D_TYPEMASK); 1515 1516 bvp = vp; 1517 1518 break; 1519 } 1520 1521 default: 1522 break; 1523 } 1524 1525 if (d_type != D_DISK) 1526 return EINVAL; 1527 1528 if (bvpp != NULL) 1529 *bvpp = bvp; 1530 1531 /* 1532 * XXX: This is bogus. We should be failing the request 1533 * XXX: not only if this specific slice is mounted, but 1534 * XXX: if it's on a disk with any other mounted slice. 1535 */ 1536 if (vfs_mountedon(bvp)) 1537 return EBUSY; 1538 1539 return 0; 1540 } 1541 1542 /* 1543 * Make a 'unique' number from a mount type name. 1544 */ 1545 long 1546 makefstype(const char *type) 1547 { 1548 long rv; 1549 1550 for (rv = 0; *type; type++) { 1551 rv <<= 2; 1552 rv ^= *type; 1553 } 1554 return rv; 1555 } 1556 1557 static struct mountlist_entry * 1558 mountlist_alloc(enum mountlist_type type, struct mount *mp) 1559 { 1560 struct mountlist_entry *me; 1561 1562 me = kmem_zalloc(sizeof(*me), KM_SLEEP); 1563 me->me_mount = mp; 1564 me->me_type = type; 1565 1566 return me; 1567 } 1568 1569 static void 1570 mountlist_free(struct mountlist_entry *me) 1571 { 1572 1573 kmem_free(me, sizeof(*me)); 1574 } 1575 1576 void 1577 mountlist_iterator_init(mount_iterator_t **mip) 1578 { 1579 struct mountlist_entry *me; 1580 1581 me = mountlist_alloc(ME_MARKER, NULL); 1582 mutex_enter(&mountlist_lock); 1583 TAILQ_INSERT_HEAD(&mountlist, me, me_list); 1584 mutex_exit(&mountlist_lock); 1585 *mip = (mount_iterator_t *)me; 1586 } 1587 1588 void 1589 mountlist_iterator_destroy(mount_iterator_t *mi) 1590 { 1591 struct mountlist_entry *marker = &mi->mi_entry; 1592 1593 if (marker->me_mount != NULL) 1594 vfs_unbusy(marker->me_mount); 1595 1596 mutex_enter(&mountlist_lock); 1597 TAILQ_REMOVE(&mountlist, marker, me_list); 1598 mutex_exit(&mountlist_lock); 1599 1600 mountlist_free(marker); 1601 1602 } 1603 1604 /* 1605 * Return the next mount or NULL for this iterator. 1606 * Mark it busy on success. 1607 */ 1608 static inline struct mount * 1609 _mountlist_iterator_next(mount_iterator_t *mi, bool wait) 1610 { 1611 struct mountlist_entry *me, *marker = &mi->mi_entry; 1612 struct mount *mp; 1613 int error; 1614 1615 if (marker->me_mount != NULL) { 1616 vfs_unbusy(marker->me_mount); 1617 marker->me_mount = NULL; 1618 } 1619 1620 mutex_enter(&mountlist_lock); 1621 for (;;) { 1622 KASSERT(marker->me_type == ME_MARKER); 1623 1624 me = TAILQ_NEXT(marker, me_list); 1625 if (me == NULL) { 1626 /* End of list: keep marker and return. */ 1627 mutex_exit(&mountlist_lock); 1628 return NULL; 1629 } 1630 TAILQ_REMOVE(&mountlist, marker, me_list); 1631 TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list); 1632 1633 /* Skip other markers. */ 1634 if (me->me_type != ME_MOUNT) 1635 continue; 1636 1637 /* Take an initial reference for vfs_busy() below. */ 1638 mp = me->me_mount; 1639 KASSERT(mp != NULL); 1640 vfs_ref(mp); 1641 mutex_exit(&mountlist_lock); 1642 1643 /* Try to mark this mount busy and return on success. */ 1644 if (wait) 1645 error = vfs_busy(mp); 1646 else 1647 error = vfs_trybusy(mp); 1648 if (error == 0) { 1649 vfs_rele(mp); 1650 marker->me_mount = mp; 1651 return mp; 1652 } 1653 vfs_rele(mp); 1654 mutex_enter(&mountlist_lock); 1655 } 1656 } 1657 1658 struct mount * 1659 mountlist_iterator_next(mount_iterator_t *mi) 1660 { 1661 1662 return _mountlist_iterator_next(mi, true); 1663 } 1664 1665 struct mount * 1666 mountlist_iterator_trynext(mount_iterator_t *mi) 1667 { 1668 1669 return _mountlist_iterator_next(mi, false); 1670 } 1671 1672 /* 1673 * Attach new mount to the end of the mount list. 1674 */ 1675 void 1676 mountlist_append(struct mount *mp) 1677 { 1678 struct mountlist_entry *me; 1679 1680 me = mountlist_alloc(ME_MOUNT, mp); 1681 mutex_enter(&mountlist_lock); 1682 TAILQ_INSERT_TAIL(&mountlist, me, me_list); 1683 mutex_exit(&mountlist_lock); 1684 } 1685 1686 /* 1687 * Remove mount from mount list. 1688 */void 1689 mountlist_remove(struct mount *mp) 1690 { 1691 struct mountlist_entry *me; 1692 1693 mutex_enter(&mountlist_lock); 1694 TAILQ_FOREACH(me, &mountlist, me_list) 1695 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1696 break; 1697 KASSERT(me != NULL); 1698 TAILQ_REMOVE(&mountlist, me, me_list); 1699 mutex_exit(&mountlist_lock); 1700 mountlist_free(me); 1701 } 1702 1703 /* 1704 * Unlocked variant to traverse the mountlist. 1705 * To be used from DDB only. 1706 */ 1707 struct mount * 1708 _mountlist_next(struct mount *mp) 1709 { 1710 struct mountlist_entry *me; 1711 1712 if (mp == NULL) { 1713 me = TAILQ_FIRST(&mountlist); 1714 } else { 1715 TAILQ_FOREACH(me, &mountlist, me_list) 1716 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1717 break; 1718 if (me != NULL) 1719 me = TAILQ_NEXT(me, me_list); 1720 } 1721 1722 while (me != NULL && me->me_type != ME_MOUNT) 1723 me = TAILQ_NEXT(me, me_list); 1724 1725 return (me ? me->me_mount : NULL); 1726 } 1727