1 /* $NetBSD: vfs_mount.c,v 1.103 2023/12/28 12:48:08 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.103 2023/12/28 12:48:08 hannken Exp $"); 71 72 #include "veriexec.h" 73 74 #include <sys/param.h> 75 #include <sys/kernel.h> 76 77 #include <sys/atomic.h> 78 #include <sys/buf.h> 79 #include <sys/conf.h> 80 #include <sys/fcntl.h> 81 #include <sys/filedesc.h> 82 #include <sys/device.h> 83 #include <sys/kauth.h> 84 #include <sys/kmem.h> 85 #include <sys/module.h> 86 #include <sys/mount.h> 87 #include <sys/fstrans.h> 88 #include <sys/namei.h> 89 #include <sys/extattr.h> 90 #include <sys/verified_exec.h> 91 #include <sys/syscallargs.h> 92 #include <sys/sysctl.h> 93 #include <sys/systm.h> 94 #include <sys/vfs_syscalls.h> 95 #include <sys/vnode_impl.h> 96 97 #include <miscfs/deadfs/deadfs.h> 98 #include <miscfs/genfs/genfs.h> 99 #include <miscfs/specfs/specdev.h> 100 101 #include <uvm/uvm_swap.h> 102 103 enum mountlist_type { 104 ME_MOUNT, 105 ME_MARKER 106 }; 107 struct mountlist_entry { 108 TAILQ_ENTRY(mountlist_entry) me_list; /* Mount list. */ 109 struct mount *me_mount; /* Actual mount if ME_MOUNT, 110 current mount else. */ 111 enum mountlist_type me_type; /* Mount or marker. */ 112 }; 113 struct mount_iterator { 114 struct mountlist_entry mi_entry; 115 }; 116 117 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *, 118 bool (*)(void *, struct vnode *), void *, bool); 119 120 /* Root filesystem. */ 121 vnode_t * rootvnode; 122 123 /* Mounted filesystem list. */ 124 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist; 125 static kmutex_t mountlist_lock __cacheline_aligned; 126 int vnode_offset_next_by_lru /* XXX: ugly hack for pstat.c */ 127 = offsetof(vnode_impl_t, vi_lrulist.tqe_next); 128 129 kmutex_t vfs_list_lock __cacheline_aligned; 130 131 static specificdata_domain_t mount_specificdata_domain; 132 static kmutex_t mntid_lock; 133 134 static kmutex_t mountgen_lock __cacheline_aligned; 135 static uint64_t mountgen; 136 137 void 138 vfs_mount_sysinit(void) 139 { 140 141 TAILQ_INIT(&mountlist); 142 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 143 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 144 145 mount_specificdata_domain = specificdata_domain_create(); 146 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 147 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 148 mountgen = 0; 149 } 150 151 struct mount * 152 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp) 153 { 154 struct mount *mp; 155 int error __diagused; 156 157 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 158 mp->mnt_op = vfsops; 159 mp->mnt_refcnt = 1; 160 TAILQ_INIT(&mp->mnt_vnodelist); 161 mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 162 mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 163 mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 164 mp->mnt_vnodecovered = vp; 165 mount_initspecific(mp); 166 167 error = fstrans_mount(mp); 168 KASSERT(error == 0); 169 170 mutex_enter(&mountgen_lock); 171 mp->mnt_gen = mountgen++; 172 mutex_exit(&mountgen_lock); 173 174 return mp; 175 } 176 177 /* 178 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and 179 * initialize a mount structure for it. 180 * 181 * Devname is usually updated by mount(8) after booting. 182 */ 183 int 184 vfs_rootmountalloc(const char *fstypename, const char *devname, 185 struct mount **mpp) 186 { 187 struct vfsops *vfsp = NULL; 188 struct mount *mp; 189 int error __diagused; 190 191 mutex_enter(&vfs_list_lock); 192 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 193 if (!strncmp(vfsp->vfs_name, fstypename, 194 sizeof(mp->mnt_stat.f_fstypename))) 195 break; 196 if (vfsp == NULL) { 197 mutex_exit(&vfs_list_lock); 198 return (ENODEV); 199 } 200 vfsp->vfs_refcount++; 201 mutex_exit(&vfs_list_lock); 202 203 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 204 return ENOMEM; 205 error = vfs_busy(mp); 206 KASSERT(error == 0); 207 mp->mnt_flag = MNT_RDONLY; 208 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 209 sizeof(mp->mnt_stat.f_fstypename)); 210 mp->mnt_stat.f_mntonname[0] = '/'; 211 mp->mnt_stat.f_mntonname[1] = '\0'; 212 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 213 '\0'; 214 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 215 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 216 *mpp = mp; 217 return 0; 218 } 219 220 /* 221 * vfs_getnewfsid: get a new unique fsid. 222 */ 223 void 224 vfs_getnewfsid(struct mount *mp) 225 { 226 static u_short xxxfs_mntid; 227 struct mountlist_entry *me; 228 fsid_t tfsid; 229 int mtype; 230 231 mutex_enter(&mntid_lock); 232 if (xxxfs_mntid == 0) 233 ++xxxfs_mntid; 234 mtype = makefstype(mp->mnt_op->vfs_name); 235 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 236 tfsid.__fsid_val[1] = mtype; 237 /* Always increment to not return the same fsid to parallel mounts. */ 238 xxxfs_mntid++; 239 240 /* 241 * Directly walk mountlist to prevent deadlock through 242 * mountlist_iterator_next() -> vfs_busy(). 243 */ 244 mutex_enter(&mountlist_lock); 245 for (me = TAILQ_FIRST(&mountlist); me != TAILQ_END(&mountlist); ) { 246 if (me->me_type == ME_MOUNT && 247 me->me_mount->mnt_stat.f_fsidx.__fsid_val[0] == 248 tfsid.__fsid_val[0] && 249 me->me_mount->mnt_stat.f_fsidx.__fsid_val[1] == 250 tfsid.__fsid_val[1]) { 251 tfsid.__fsid_val[0]++; 252 xxxfs_mntid++; 253 me = TAILQ_FIRST(&mountlist); 254 } else { 255 me = TAILQ_NEXT(me, me_list); 256 } 257 } 258 mutex_exit(&mountlist_lock); 259 260 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 261 mp->mnt_stat.f_fsidx.__fsid_val[1] = tfsid.__fsid_val[1]; 262 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 263 mutex_exit(&mntid_lock); 264 } 265 266 /* 267 * Lookup a mount point by filesystem identifier. 268 * 269 * XXX Needs to add a reference to the mount point. 270 */ 271 struct mount * 272 vfs_getvfs(fsid_t *fsid) 273 { 274 mount_iterator_t *iter; 275 struct mount *mp; 276 277 mountlist_iterator_init(&iter); 278 while ((mp = mountlist_iterator_next(iter)) != NULL) { 279 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 280 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 281 mountlist_iterator_destroy(iter); 282 return mp; 283 } 284 } 285 mountlist_iterator_destroy(iter); 286 return NULL; 287 } 288 289 /* 290 * Take a reference to a mount structure. 291 */ 292 void 293 vfs_ref(struct mount *mp) 294 { 295 296 KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock)); 297 298 atomic_inc_uint(&mp->mnt_refcnt); 299 } 300 301 /* 302 * Drop a reference to a mount structure, freeing if the last reference. 303 */ 304 void 305 vfs_rele(struct mount *mp) 306 { 307 308 membar_release(); 309 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 310 return; 311 } 312 membar_acquire(); 313 314 /* 315 * Nothing else has visibility of the mount: we can now 316 * free the data structures. 317 */ 318 KASSERT(mp->mnt_refcnt == 0); 319 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 320 mutex_obj_free(mp->mnt_updating); 321 mutex_obj_free(mp->mnt_renamelock); 322 mutex_obj_free(mp->mnt_vnodelock); 323 if (mp->mnt_op != NULL) { 324 vfs_delref(mp->mnt_op); 325 } 326 fstrans_unmount(mp); 327 /* 328 * Final free of mp gets done from fstrans_mount_dtor(). 329 * 330 * Prevents this memory to be reused as a mount before 331 * fstrans releases all references to it. 332 */ 333 } 334 335 /* 336 * Mark a mount point as busy, and gain a new reference to it. Used to 337 * prevent the file system from being unmounted during critical sections. 338 * 339 * vfs_busy can be called multiple times and by multiple threads 340 * and must be accompanied by the same number of vfs_unbusy calls. 341 * 342 * => The caller must hold a pre-existing reference to the mount. 343 * => Will fail if the file system is being unmounted, or is unmounted. 344 */ 345 static inline int 346 _vfs_busy(struct mount *mp, bool wait) 347 { 348 349 KASSERT(mp->mnt_refcnt > 0); 350 351 if (wait) { 352 fstrans_start(mp); 353 } else { 354 if (fstrans_start_nowait(mp)) 355 return EBUSY; 356 } 357 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 358 fstrans_done(mp); 359 return ENOENT; 360 } 361 vfs_ref(mp); 362 return 0; 363 } 364 365 int 366 vfs_busy(struct mount *mp) 367 { 368 369 return _vfs_busy(mp, true); 370 } 371 372 int 373 vfs_trybusy(struct mount *mp) 374 { 375 376 return _vfs_busy(mp, false); 377 } 378 379 /* 380 * Unbusy a busy filesystem. 381 * 382 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call. 383 */ 384 void 385 vfs_unbusy(struct mount *mp) 386 { 387 388 KASSERT(mp->mnt_refcnt > 0); 389 390 fstrans_done(mp); 391 vfs_rele(mp); 392 } 393 394 /* 395 * Change a file systems lower mount. 396 * Both the current and the new lower mount may be NULL. The caller 397 * guarantees exclusive access to the mount and holds a pre-existing 398 * reference to the new lower mount. 399 */ 400 int 401 vfs_set_lowermount(struct mount *mp, struct mount *lowermp) 402 { 403 struct mount *oldlowermp; 404 int error; 405 406 #ifdef DEBUG 407 /* 408 * Limit the depth of file system stack so kernel sanitizers 409 * may stress mount/unmount without exhausting the kernel stack. 410 */ 411 int depth; 412 struct mount *mp2; 413 414 for (depth = 0, mp2 = lowermp; mp2; depth++, mp2 = mp2->mnt_lower) { 415 if (depth == 23) 416 return EINVAL; 417 } 418 #endif 419 420 if (lowermp) { 421 if (lowermp == dead_rootmount) 422 return ENOENT; 423 error = vfs_busy(lowermp); 424 if (error) 425 return error; 426 vfs_ref(lowermp); 427 } 428 429 oldlowermp = mp->mnt_lower; 430 mp->mnt_lower = lowermp; 431 432 if (lowermp) 433 vfs_unbusy(lowermp); 434 435 if (oldlowermp) 436 vfs_rele(oldlowermp); 437 438 return 0; 439 } 440 441 struct vnode_iterator { 442 vnode_impl_t vi_vnode; 443 }; 444 445 void 446 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip) 447 { 448 vnode_t *vp; 449 vnode_impl_t *vip; 450 451 vp = vnalloc_marker(mp); 452 vip = VNODE_TO_VIMPL(vp); 453 454 mutex_enter(mp->mnt_vnodelock); 455 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes); 456 vp->v_usecount = 1; 457 mutex_exit(mp->mnt_vnodelock); 458 459 *vnip = (struct vnode_iterator *)vip; 460 } 461 462 void 463 vfs_vnode_iterator_destroy(struct vnode_iterator *vni) 464 { 465 vnode_impl_t *mvip = &vni->vi_vnode; 466 vnode_t *mvp = VIMPL_TO_VNODE(mvip); 467 kmutex_t *lock; 468 469 KASSERT(vnis_marker(mvp)); 470 if (vrefcnt(mvp) != 0) { 471 lock = mvp->v_mount->mnt_vnodelock; 472 mutex_enter(lock); 473 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes); 474 mvp->v_usecount = 0; 475 mutex_exit(lock); 476 } 477 vnfree_marker(mvp); 478 } 479 480 static struct vnode * 481 vfs_vnode_iterator_next1(struct vnode_iterator *vni, 482 bool (*f)(void *, struct vnode *), void *cl, bool do_wait) 483 { 484 vnode_impl_t *mvip = &vni->vi_vnode; 485 struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount; 486 vnode_t *vp; 487 vnode_impl_t *vip; 488 kmutex_t *lock; 489 int error; 490 491 KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip))); 492 493 lock = mp->mnt_vnodelock; 494 do { 495 mutex_enter(lock); 496 vip = TAILQ_NEXT(mvip, vi_mntvnodes); 497 TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes); 498 VIMPL_TO_VNODE(mvip)->v_usecount = 0; 499 again: 500 if (vip == NULL) { 501 mutex_exit(lock); 502 return NULL; 503 } 504 vp = VIMPL_TO_VNODE(vip); 505 KASSERT(vp != NULL); 506 mutex_enter(vp->v_interlock); 507 if (vnis_marker(vp) || 508 vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) || 509 (f && !(*f)(cl, vp))) { 510 mutex_exit(vp->v_interlock); 511 vip = TAILQ_NEXT(vip, vi_mntvnodes); 512 goto again; 513 } 514 515 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes); 516 VIMPL_TO_VNODE(mvip)->v_usecount = 1; 517 mutex_exit(lock); 518 error = vcache_vget(vp); 519 KASSERT(error == 0 || error == ENOENT); 520 } while (error != 0); 521 522 return vp; 523 } 524 525 struct vnode * 526 vfs_vnode_iterator_next(struct vnode_iterator *vni, 527 bool (*f)(void *, struct vnode *), void *cl) 528 { 529 530 return vfs_vnode_iterator_next1(vni, f, cl, false); 531 } 532 533 /* 534 * Move a vnode from one mount queue to another. 535 */ 536 void 537 vfs_insmntque(vnode_t *vp, struct mount *mp) 538 { 539 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 540 struct mount *omp; 541 kmutex_t *lock; 542 543 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 || 544 vp->v_tag == VT_VFS); 545 546 /* 547 * Delete from old mount point vnode list, if on one. 548 */ 549 if ((omp = vp->v_mount) != NULL) { 550 lock = omp->mnt_vnodelock; 551 mutex_enter(lock); 552 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes); 553 mutex_exit(lock); 554 } 555 556 /* 557 * Insert into list of vnodes for the new mount point, if 558 * available. The caller must take a reference on the mount 559 * structure and donate to the vnode. 560 */ 561 if ((vp->v_mount = mp) != NULL) { 562 lock = mp->mnt_vnodelock; 563 mutex_enter(lock); 564 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes); 565 mutex_exit(lock); 566 } 567 568 if (omp != NULL) { 569 /* Release reference to old mount. */ 570 vfs_rele(omp); 571 } 572 } 573 574 /* 575 * Remove any vnodes in the vnode table belonging to mount point mp. 576 * 577 * If FORCECLOSE is not specified, there should not be any active ones, 578 * return error if any are found (nb: this is a user error, not a 579 * system error). If FORCECLOSE is specified, detach any active vnodes 580 * that are found. 581 * 582 * If WRITECLOSE is set, only flush out regular file vnodes open for 583 * writing. 584 * 585 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. 586 */ 587 #ifdef DEBUG 588 int busyprt = 0; /* print out busy vnodes */ 589 struct ctldebug debug1 = { "busyprt", &busyprt }; 590 #endif 591 592 static vnode_t * 593 vflushnext(struct vnode_iterator *marker, int *when) 594 { 595 if (getticks() > *when) { 596 yield(); 597 *when = getticks() + hz / 10; 598 } 599 preempt_point(); 600 return vfs_vnode_iterator_next1(marker, NULL, NULL, true); 601 } 602 603 /* 604 * Flush one vnode. Referenced on entry, unreferenced on return. 605 */ 606 static int 607 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags) 608 { 609 int error; 610 struct vattr vattr; 611 612 if (vp == skipvp || 613 ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) { 614 vrele(vp); 615 return 0; 616 } 617 /* 618 * If WRITECLOSE is set, only flush out regular file 619 * vnodes open for writing or open and unlinked. 620 */ 621 if ((flags & WRITECLOSE)) { 622 if (vp->v_type != VREG) { 623 vrele(vp); 624 return 0; 625 } 626 error = vn_lock(vp, LK_EXCLUSIVE); 627 if (error) { 628 KASSERT(error == ENOENT); 629 vrele(vp); 630 return 0; 631 } 632 error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0); 633 if (error == 0) 634 error = VOP_GETATTR(vp, &vattr, curlwp->l_cred); 635 VOP_UNLOCK(vp); 636 if (error) { 637 vrele(vp); 638 return error; 639 } 640 if (vp->v_writecount == 0 && vattr.va_nlink > 0) { 641 vrele(vp); 642 return 0; 643 } 644 } 645 /* 646 * First try to recycle the vnode. 647 */ 648 if (vrecycle(vp)) 649 return 0; 650 /* 651 * If FORCECLOSE is set, forcibly close the vnode. 652 * For block or character devices, revert to an 653 * anonymous device. For all other files, just 654 * kill them. 655 */ 656 if (flags & FORCECLOSE) { 657 if (vrefcnt(vp) > 1 && 658 (vp->v_type == VBLK || vp->v_type == VCHR)) 659 vcache_make_anon(vp); 660 else 661 vgone(vp); 662 return 0; 663 } 664 vrele(vp); 665 return EBUSY; 666 } 667 668 int 669 vflush(struct mount *mp, vnode_t *skipvp, int flags) 670 { 671 vnode_t *vp; 672 struct vnode_iterator *marker; 673 int busy, error, when, retries = 2; 674 675 do { 676 busy = error = when = 0; 677 678 /* 679 * First, flush out any vnode references from the 680 * deferred vrele list. 681 */ 682 vrele_flush(mp); 683 684 vfs_vnode_iterator_init(mp, &marker); 685 686 while ((vp = vflushnext(marker, &when)) != NULL) { 687 error = vflush_one(vp, skipvp, flags); 688 if (error == EBUSY) { 689 error = 0; 690 busy++; 691 #ifdef DEBUG 692 if (busyprt && retries == 0) 693 vprint("vflush: busy vnode", vp); 694 #endif 695 } else if (error != 0) { 696 break; 697 } 698 } 699 700 vfs_vnode_iterator_destroy(marker); 701 } while (error == 0 && busy > 0 && retries-- > 0); 702 703 if (error) 704 return error; 705 if (busy) 706 return EBUSY; 707 return 0; 708 } 709 710 /* 711 * Mount a file system. 712 */ 713 714 /* 715 * Scan all active processes to see if any of them have a current or root 716 * directory onto which the new filesystem has just been mounted. If so, 717 * replace them with the new mount point. 718 */ 719 static void 720 mount_checkdirs(vnode_t *olddp) 721 { 722 vnode_t *newdp, *rele1, *rele2; 723 struct cwdinfo *cwdi; 724 struct proc *p; 725 bool retry; 726 727 if (vrefcnt(olddp) == 1) { 728 return; 729 } 730 if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp)) 731 panic("mount: lost mount"); 732 733 do { 734 retry = false; 735 mutex_enter(&proc_lock); 736 PROCLIST_FOREACH(p, &allproc) { 737 if ((cwdi = p->p_cwdi) == NULL) 738 continue; 739 /* 740 * Cannot change to the old directory any more, 741 * so even if we see a stale value it is not a 742 * problem. 743 */ 744 if (cwdi->cwdi_cdir != olddp && 745 cwdi->cwdi_rdir != olddp) 746 continue; 747 retry = true; 748 rele1 = NULL; 749 rele2 = NULL; 750 atomic_inc_uint(&cwdi->cwdi_refcnt); 751 mutex_exit(&proc_lock); 752 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 753 if (cwdi->cwdi_cdir == olddp) { 754 rele1 = cwdi->cwdi_cdir; 755 vref(newdp); 756 cwdi->cwdi_cdir = newdp; 757 } 758 if (cwdi->cwdi_rdir == olddp) { 759 rele2 = cwdi->cwdi_rdir; 760 vref(newdp); 761 cwdi->cwdi_rdir = newdp; 762 } 763 rw_exit(&cwdi->cwdi_lock); 764 cwdfree(cwdi); 765 if (rele1 != NULL) 766 vrele(rele1); 767 if (rele2 != NULL) 768 vrele(rele2); 769 mutex_enter(&proc_lock); 770 break; 771 } 772 mutex_exit(&proc_lock); 773 } while (retry); 774 775 if (rootvnode == olddp) { 776 vrele(rootvnode); 777 vref(newdp); 778 rootvnode = newdp; 779 } 780 vput(newdp); 781 } 782 783 /* 784 * Start extended attributes 785 */ 786 static int 787 start_extattr(struct mount *mp) 788 { 789 int error; 790 791 error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL); 792 if (error) 793 printf("%s: failed to start extattr: error = %d\n", 794 mp->mnt_stat.f_mntonname, error); 795 796 return error; 797 } 798 799 int 800 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops, 801 const char *path, int flags, void *data, size_t *data_len) 802 { 803 vnode_t *vp = *vpp; 804 struct mount *mp; 805 struct pathbuf *pb; 806 struct nameidata nd; 807 int error, error2; 808 809 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 810 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 811 if (error) { 812 vfs_delref(vfsops); 813 return error; 814 } 815 816 /* Cannot make a non-dir a mount-point (from here anyway). */ 817 if (vp->v_type != VDIR) { 818 vfs_delref(vfsops); 819 return ENOTDIR; 820 } 821 822 if (flags & MNT_EXPORTED) { 823 vfs_delref(vfsops); 824 return EINVAL; 825 } 826 827 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 828 vfs_delref(vfsops); 829 return ENOMEM; 830 } 831 832 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 833 834 /* 835 * The underlying file system may refuse the mount for 836 * various reasons. Allow the user to force it to happen. 837 * 838 * Set the mount level flags. 839 */ 840 mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE); 841 842 error = VFS_MOUNT(mp, path, data, data_len); 843 mp->mnt_flag &= ~MNT_OP_FLAGS; 844 845 if (error != 0) { 846 vfs_rele(mp); 847 return error; 848 } 849 850 /* Suspend new file system before taking mnt_updating. */ 851 do { 852 error2 = vfs_suspend(mp, 0); 853 } while (error2 == EINTR || error2 == ERESTART); 854 KASSERT(error2 == 0 || error2 == EOPNOTSUPP); 855 mutex_enter(mp->mnt_updating); 856 857 /* 858 * Validate and prepare the mount point. 859 */ 860 error = pathbuf_copyin(path, &pb); 861 if (error != 0) { 862 goto err_mounted; 863 } 864 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 865 error = namei(&nd); 866 pathbuf_destroy(pb); 867 if (error != 0) { 868 goto err_mounted; 869 } 870 if (nd.ni_vp != vp) { 871 vput(nd.ni_vp); 872 error = EINVAL; 873 goto err_mounted; 874 } 875 if (vp->v_mountedhere != NULL) { 876 vput(nd.ni_vp); 877 error = EBUSY; 878 goto err_mounted; 879 } 880 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 881 if (error != 0) { 882 vput(nd.ni_vp); 883 goto err_mounted; 884 } 885 886 /* 887 * Put the new filesystem on the mount list after root. 888 */ 889 cache_purge(vp); 890 mp->mnt_iflag &= ~IMNT_WANTRDWR; 891 892 mountlist_append(mp); 893 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 894 vfs_syncer_add_to_worklist(mp); 895 vp->v_mountedhere = mp; 896 vput(nd.ni_vp); 897 898 mount_checkdirs(vp); 899 mutex_exit(mp->mnt_updating); 900 if (error2 == 0) 901 vfs_resume(mp); 902 903 /* Hold an additional reference to the mount across VFS_START(). */ 904 vfs_ref(mp); 905 (void) VFS_STATVFS(mp, &mp->mnt_stat); 906 error = VFS_START(mp, 0); 907 if (error) { 908 vrele(vp); 909 } else if (flags & MNT_EXTATTR) { 910 if (start_extattr(mp) != 0) 911 mp->mnt_flag &= ~MNT_EXTATTR; 912 } 913 /* Drop reference held for VFS_START(). */ 914 vfs_rele(mp); 915 *vpp = NULL; 916 return error; 917 918 err_mounted: 919 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 920 panic("Unmounting fresh file system failed"); 921 mutex_exit(mp->mnt_updating); 922 if (error2 == 0) 923 vfs_resume(mp); 924 vfs_set_lowermount(mp, NULL); 925 vfs_rele(mp); 926 927 return error; 928 } 929 930 /* 931 * Do the actual file system unmount. File system is assumed to have 932 * been locked by the caller. 933 * 934 * => Caller hold reference to the mount, explicitly for dounmount(). 935 */ 936 int 937 dounmount(struct mount *mp, int flags, struct lwp *l) 938 { 939 vnode_t *coveredvp; 940 int error, async, used_syncer, used_extattr; 941 const bool was_suspended = fstrans_is_owner(mp); 942 943 #if NVERIEXEC > 0 944 error = veriexec_unmountchk(mp); 945 if (error) 946 return (error); 947 #endif /* NVERIEXEC > 0 */ 948 949 if (!was_suspended) { 950 error = vfs_suspend(mp, 0); 951 if (error) { 952 return error; 953 } 954 } 955 956 KASSERT((mp->mnt_iflag & IMNT_GONE) == 0); 957 958 used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0; 959 used_extattr = mp->mnt_flag & MNT_EXTATTR; 960 961 mp->mnt_iflag |= IMNT_UNMOUNT; 962 mutex_enter(mp->mnt_updating); 963 async = mp->mnt_flag & MNT_ASYNC; 964 mp->mnt_flag &= ~MNT_ASYNC; 965 cache_purgevfs(mp); /* remove cache entries for this file sys */ 966 if (used_syncer) 967 vfs_syncer_remove_from_worklist(mp); 968 error = 0; 969 if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) { 970 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 971 } 972 if (error == 0 || (flags & MNT_FORCE)) { 973 error = VFS_UNMOUNT(mp, flags); 974 } 975 if (error) { 976 mp->mnt_iflag &= ~IMNT_UNMOUNT; 977 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 978 vfs_syncer_add_to_worklist(mp); 979 mp->mnt_flag |= async; 980 mutex_exit(mp->mnt_updating); 981 if (!was_suspended) 982 vfs_resume(mp); 983 if (used_extattr) { 984 if (start_extattr(mp) != 0) 985 mp->mnt_flag &= ~MNT_EXTATTR; 986 else 987 mp->mnt_flag |= MNT_EXTATTR; 988 } 989 return (error); 990 } 991 mutex_exit(mp->mnt_updating); 992 993 /* 994 * mark filesystem as gone to prevent further umounts 995 * after mnt_umounting lock is gone, this also prevents 996 * vfs_busy() from succeeding. 997 */ 998 mp->mnt_iflag |= IMNT_GONE; 999 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 1000 coveredvp->v_mountedhere = NULL; 1001 } 1002 if (!was_suspended) 1003 vfs_resume(mp); 1004 1005 mountlist_remove(mp); 1006 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 1007 panic("unmount: dangling vnode"); 1008 vfs_hooks_unmount(mp); 1009 1010 vfs_set_lowermount(mp, NULL); 1011 vfs_rele(mp); /* reference from mount() */ 1012 if (coveredvp != NULLVP) { 1013 vrele(coveredvp); 1014 } 1015 return (0); 1016 } 1017 1018 /* 1019 * Unmount all file systems. 1020 * We traverse the list in reverse order under the assumption that doing so 1021 * will avoid needing to worry about dependencies. 1022 */ 1023 bool 1024 vfs_unmountall(struct lwp *l) 1025 { 1026 1027 printf("unmounting file systems...\n"); 1028 return vfs_unmountall1(l, true, true); 1029 } 1030 1031 static void 1032 vfs_unmount_print(struct mount *mp, const char *pfx) 1033 { 1034 1035 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 1036 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 1037 mp->mnt_stat.f_fstypename); 1038 } 1039 1040 /* 1041 * Return the mount with the highest generation less than "gen". 1042 */ 1043 static struct mount * 1044 vfs_unmount_next(uint64_t gen) 1045 { 1046 mount_iterator_t *iter; 1047 struct mount *mp, *nmp; 1048 1049 nmp = NULL; 1050 1051 mountlist_iterator_init(&iter); 1052 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1053 if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) && 1054 mp->mnt_gen < gen) { 1055 if (nmp != NULL) 1056 vfs_rele(nmp); 1057 nmp = mp; 1058 vfs_ref(nmp); 1059 } 1060 } 1061 mountlist_iterator_destroy(iter); 1062 1063 return nmp; 1064 } 1065 1066 bool 1067 vfs_unmount_forceone(struct lwp *l) 1068 { 1069 struct mount *mp; 1070 int error; 1071 1072 mp = vfs_unmount_next(mountgen); 1073 if (mp == NULL) { 1074 return false; 1075 } 1076 1077 #ifdef DEBUG 1078 printf("forcefully unmounting %s (%s)...\n", 1079 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1080 #endif 1081 if ((error = dounmount(mp, MNT_FORCE, l)) == 0) { 1082 vfs_unmount_print(mp, "forcefully "); 1083 return true; 1084 } else { 1085 vfs_rele(mp); 1086 } 1087 1088 #ifdef DEBUG 1089 printf("forceful unmount of %s failed with error %d\n", 1090 mp->mnt_stat.f_mntonname, error); 1091 #endif 1092 1093 return false; 1094 } 1095 1096 bool 1097 vfs_unmountall1(struct lwp *l, bool force, bool verbose) 1098 { 1099 struct mount *mp; 1100 mount_iterator_t *iter; 1101 bool any_error = false, progress = false; 1102 uint64_t gen; 1103 int error; 1104 1105 gen = mountgen; 1106 for (;;) { 1107 mp = vfs_unmount_next(gen); 1108 if (mp == NULL) 1109 break; 1110 gen = mp->mnt_gen; 1111 1112 #ifdef DEBUG 1113 printf("unmounting %p %s (%s)...\n", 1114 (void *)mp, mp->mnt_stat.f_mntonname, 1115 mp->mnt_stat.f_mntfromname); 1116 #endif 1117 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 1118 vfs_unmount_print(mp, ""); 1119 progress = true; 1120 } else { 1121 vfs_rele(mp); 1122 if (verbose) { 1123 printf("unmount of %s failed with error %d\n", 1124 mp->mnt_stat.f_mntonname, error); 1125 } 1126 any_error = true; 1127 } 1128 } 1129 if (verbose) { 1130 printf("unmounting done\n"); 1131 } 1132 if (any_error && verbose) { 1133 printf("WARNING: some file systems would not unmount\n"); 1134 } 1135 /* If the mountlist is empty it is time to remove swap. */ 1136 mountlist_iterator_init(&iter); 1137 if (mountlist_iterator_next(iter) == NULL) { 1138 uvm_swap_shutdown(l); 1139 } 1140 mountlist_iterator_destroy(iter); 1141 1142 return progress; 1143 } 1144 1145 void 1146 vfs_sync_all(struct lwp *l) 1147 { 1148 printf("syncing disks... "); 1149 1150 /* remove user processes from run queue */ 1151 suspendsched(); 1152 (void)spl0(); 1153 1154 /* avoid coming back this way again if we panic. */ 1155 doing_shutdown = 1; 1156 1157 do_sys_sync(l); 1158 1159 /* Wait for sync to finish. */ 1160 if (vfs_syncwait() != 0) { 1161 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 1162 Debugger(); 1163 #endif 1164 printf("giving up\n"); 1165 return; 1166 } else 1167 printf("done\n"); 1168 } 1169 1170 /* 1171 * Sync and unmount file systems before shutting down. 1172 */ 1173 void 1174 vfs_shutdown(void) 1175 { 1176 lwp_t *l = curlwp; 1177 1178 vfs_sync_all(l); 1179 1180 /* 1181 * If we have panicked - do not make the situation potentially 1182 * worse by unmounting the file systems. 1183 */ 1184 if (panicstr != NULL) { 1185 return; 1186 } 1187 1188 /* Unmount file systems. */ 1189 vfs_unmountall(l); 1190 } 1191 1192 /* 1193 * Print a list of supported file system types (used by vfs_mountroot) 1194 */ 1195 static void 1196 vfs_print_fstypes(void) 1197 { 1198 struct vfsops *v; 1199 int cnt = 0; 1200 1201 mutex_enter(&vfs_list_lock); 1202 LIST_FOREACH(v, &vfs_list, vfs_list) 1203 ++cnt; 1204 mutex_exit(&vfs_list_lock); 1205 1206 if (cnt == 0) { 1207 printf("WARNING: No file system modules have been loaded.\n"); 1208 return; 1209 } 1210 1211 printf("Supported file systems:"); 1212 mutex_enter(&vfs_list_lock); 1213 LIST_FOREACH(v, &vfs_list, vfs_list) { 1214 printf(" %s", v->vfs_name); 1215 } 1216 mutex_exit(&vfs_list_lock); 1217 printf("\n"); 1218 } 1219 1220 /* 1221 * Mount the root file system. If the operator didn't specify a 1222 * file system to use, try all possible file systems until one 1223 * succeeds. 1224 */ 1225 int 1226 vfs_mountroot(void) 1227 { 1228 struct vfsops *v; 1229 int error = ENODEV; 1230 1231 if (root_device == NULL) 1232 panic("vfs_mountroot: root device unknown"); 1233 1234 switch (device_class(root_device)) { 1235 case DV_IFNET: 1236 if (rootdev != NODEV) 1237 panic("vfs_mountroot: rootdev set for DV_IFNET " 1238 "(0x%llx -> %llu,%llu)", 1239 (unsigned long long)rootdev, 1240 (unsigned long long)major(rootdev), 1241 (unsigned long long)minor(rootdev)); 1242 break; 1243 1244 case DV_DISK: 1245 if (rootdev == NODEV) 1246 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1247 if (bdevvp(rootdev, &rootvp)) 1248 panic("vfs_mountroot: can't get vnode for rootdev"); 1249 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1250 error = VOP_OPEN(rootvp, FREAD, FSCRED); 1251 VOP_UNLOCK(rootvp); 1252 if (error) { 1253 printf("vfs_mountroot: can't open root device\n"); 1254 return (error); 1255 } 1256 break; 1257 1258 case DV_VIRTUAL: 1259 break; 1260 1261 default: 1262 printf("%s: inappropriate for root file system\n", 1263 device_xname(root_device)); 1264 return (ENODEV); 1265 } 1266 1267 /* 1268 * If user specified a root fs type, use it. Make sure the 1269 * specified type exists and has a mount_root() 1270 */ 1271 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 1272 v = vfs_getopsbyname(rootfstype); 1273 error = EFTYPE; 1274 if (v != NULL) { 1275 if (v->vfs_mountroot != NULL) { 1276 error = (v->vfs_mountroot)(); 1277 } 1278 v->vfs_refcount--; 1279 } 1280 goto done; 1281 } 1282 1283 /* 1284 * Try each file system currently configured into the kernel. 1285 */ 1286 mutex_enter(&vfs_list_lock); 1287 LIST_FOREACH(v, &vfs_list, vfs_list) { 1288 if (v->vfs_mountroot == NULL) 1289 continue; 1290 #ifdef DEBUG 1291 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 1292 #endif 1293 v->vfs_refcount++; 1294 mutex_exit(&vfs_list_lock); 1295 error = (*v->vfs_mountroot)(); 1296 mutex_enter(&vfs_list_lock); 1297 v->vfs_refcount--; 1298 if (!error) { 1299 aprint_normal("root file system type: %s\n", 1300 v->vfs_name); 1301 break; 1302 } 1303 } 1304 mutex_exit(&vfs_list_lock); 1305 1306 if (v == NULL) { 1307 vfs_print_fstypes(); 1308 printf("no file system for %s", device_xname(root_device)); 1309 if (device_class(root_device) == DV_DISK) 1310 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 1311 printf("\n"); 1312 error = EFTYPE; 1313 } 1314 1315 done: 1316 if (error && device_class(root_device) == DV_DISK) { 1317 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1318 VOP_CLOSE(rootvp, FREAD, FSCRED); 1319 VOP_UNLOCK(rootvp); 1320 vrele(rootvp); 1321 } 1322 if (error == 0) { 1323 mount_iterator_t *iter; 1324 struct mount *mp; 1325 1326 mountlist_iterator_init(&iter); 1327 mp = mountlist_iterator_next(iter); 1328 KASSERT(mp != NULL); 1329 mountlist_iterator_destroy(iter); 1330 1331 mp->mnt_flag |= MNT_ROOTFS; 1332 mp->mnt_op->vfs_refcount++; 1333 1334 /* 1335 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 1336 * reference it, and donate it the reference grabbed 1337 * with VFS_ROOT(). 1338 */ 1339 error = VFS_ROOT(mp, LK_NONE, &rootvnode); 1340 if (error) 1341 panic("cannot find root vnode, error=%d", error); 1342 cwdi0.cwdi_cdir = rootvnode; 1343 cwdi0.cwdi_rdir = NULL; 1344 1345 /* 1346 * Now that root is mounted, we can fixup initproc's CWD 1347 * info. All other processes are kthreads, which merely 1348 * share proc0's CWD info. 1349 */ 1350 initproc->p_cwdi->cwdi_cdir = rootvnode; 1351 vref(initproc->p_cwdi->cwdi_cdir); 1352 initproc->p_cwdi->cwdi_rdir = NULL; 1353 /* 1354 * Enable loading of modules from the filesystem 1355 */ 1356 module_load_vfs_init(); 1357 1358 } 1359 return (error); 1360 } 1361 1362 /* 1363 * mount_specific_key_create -- 1364 * Create a key for subsystem mount-specific data. 1365 */ 1366 int 1367 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1368 { 1369 1370 return specificdata_key_create(mount_specificdata_domain, keyp, dtor); 1371 } 1372 1373 /* 1374 * mount_specific_key_delete -- 1375 * Delete a key for subsystem mount-specific data. 1376 */ 1377 void 1378 mount_specific_key_delete(specificdata_key_t key) 1379 { 1380 1381 specificdata_key_delete(mount_specificdata_domain, key); 1382 } 1383 1384 /* 1385 * mount_initspecific -- 1386 * Initialize a mount's specificdata container. 1387 */ 1388 void 1389 mount_initspecific(struct mount *mp) 1390 { 1391 int error __diagused; 1392 1393 error = specificdata_init(mount_specificdata_domain, 1394 &mp->mnt_specdataref); 1395 KASSERT(error == 0); 1396 } 1397 1398 /* 1399 * mount_finispecific -- 1400 * Finalize a mount's specificdata container. 1401 */ 1402 void 1403 mount_finispecific(struct mount *mp) 1404 { 1405 1406 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 1407 } 1408 1409 /* 1410 * mount_getspecific -- 1411 * Return mount-specific data corresponding to the specified key. 1412 */ 1413 void * 1414 mount_getspecific(struct mount *mp, specificdata_key_t key) 1415 { 1416 1417 return specificdata_getspecific(mount_specificdata_domain, 1418 &mp->mnt_specdataref, key); 1419 } 1420 1421 /* 1422 * mount_setspecific -- 1423 * Set mount-specific data corresponding to the specified key. 1424 */ 1425 void 1426 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 1427 { 1428 1429 specificdata_setspecific(mount_specificdata_domain, 1430 &mp->mnt_specdataref, key, data); 1431 } 1432 1433 /* 1434 * Check to see if a filesystem is mounted on a block device. 1435 */ 1436 int 1437 vfs_mountedon(vnode_t *vp) 1438 { 1439 vnode_t *vq; 1440 int error = 0; 1441 1442 if (vp->v_type != VBLK) 1443 return ENOTBLK; 1444 if (spec_node_getmountedfs(vp) != NULL) 1445 return EBUSY; 1446 if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, VDEAD_NOWAIT, &vq) 1447 == 0) { 1448 if (spec_node_getmountedfs(vq) != NULL) 1449 error = EBUSY; 1450 vrele(vq); 1451 } 1452 1453 return error; 1454 } 1455 1456 /* 1457 * Check if a device pointed to by vp is mounted. 1458 * 1459 * Returns: 1460 * EINVAL if it's not a disk 1461 * EBUSY if it's a disk and mounted 1462 * 0 if it's a disk and not mounted 1463 */ 1464 int 1465 rawdev_mounted(vnode_t *vp, vnode_t **bvpp) 1466 { 1467 vnode_t *bvp; 1468 dev_t dev; 1469 int d_type; 1470 1471 bvp = NULL; 1472 d_type = D_OTHER; 1473 1474 if (iskmemvp(vp)) 1475 return EINVAL; 1476 1477 switch (vp->v_type) { 1478 case VCHR: { 1479 const struct cdevsw *cdev; 1480 1481 dev = vp->v_rdev; 1482 cdev = cdevsw_lookup(dev); 1483 if (cdev != NULL) { 1484 dev_t blkdev; 1485 1486 blkdev = devsw_chr2blk(dev); 1487 if (blkdev != NODEV) { 1488 if (vfinddev(blkdev, VBLK, &bvp) != 0) { 1489 d_type = (cdev->d_flag & D_TYPEMASK); 1490 /* XXX: what if bvp disappears? */ 1491 vrele(bvp); 1492 } 1493 } 1494 } 1495 1496 break; 1497 } 1498 1499 case VBLK: { 1500 const struct bdevsw *bdev; 1501 1502 dev = vp->v_rdev; 1503 bdev = bdevsw_lookup(dev); 1504 if (bdev != NULL) 1505 d_type = (bdev->d_flag & D_TYPEMASK); 1506 1507 bvp = vp; 1508 1509 break; 1510 } 1511 1512 default: 1513 break; 1514 } 1515 1516 if (d_type != D_DISK) 1517 return EINVAL; 1518 1519 if (bvpp != NULL) 1520 *bvpp = bvp; 1521 1522 /* 1523 * XXX: This is bogus. We should be failing the request 1524 * XXX: not only if this specific slice is mounted, but 1525 * XXX: if it's on a disk with any other mounted slice. 1526 */ 1527 if (vfs_mountedon(bvp)) 1528 return EBUSY; 1529 1530 return 0; 1531 } 1532 1533 /* 1534 * Make a 'unique' number from a mount type name. 1535 */ 1536 long 1537 makefstype(const char *type) 1538 { 1539 long rv; 1540 1541 for (rv = 0; *type; type++) { 1542 rv <<= 2; 1543 rv ^= *type; 1544 } 1545 return rv; 1546 } 1547 1548 static struct mountlist_entry * 1549 mountlist_alloc(enum mountlist_type type, struct mount *mp) 1550 { 1551 struct mountlist_entry *me; 1552 1553 me = kmem_zalloc(sizeof(*me), KM_SLEEP); 1554 me->me_mount = mp; 1555 me->me_type = type; 1556 1557 return me; 1558 } 1559 1560 static void 1561 mountlist_free(struct mountlist_entry *me) 1562 { 1563 1564 kmem_free(me, sizeof(*me)); 1565 } 1566 1567 void 1568 mountlist_iterator_init(mount_iterator_t **mip) 1569 { 1570 struct mountlist_entry *me; 1571 1572 me = mountlist_alloc(ME_MARKER, NULL); 1573 mutex_enter(&mountlist_lock); 1574 TAILQ_INSERT_HEAD(&mountlist, me, me_list); 1575 mutex_exit(&mountlist_lock); 1576 *mip = (mount_iterator_t *)me; 1577 } 1578 1579 void 1580 mountlist_iterator_destroy(mount_iterator_t *mi) 1581 { 1582 struct mountlist_entry *marker = &mi->mi_entry; 1583 1584 if (marker->me_mount != NULL) 1585 vfs_unbusy(marker->me_mount); 1586 1587 mutex_enter(&mountlist_lock); 1588 TAILQ_REMOVE(&mountlist, marker, me_list); 1589 mutex_exit(&mountlist_lock); 1590 1591 mountlist_free(marker); 1592 1593 } 1594 1595 /* 1596 * Return the next mount or NULL for this iterator. 1597 * Mark it busy on success. 1598 */ 1599 static inline struct mount * 1600 _mountlist_iterator_next(mount_iterator_t *mi, bool wait) 1601 { 1602 struct mountlist_entry *me, *marker = &mi->mi_entry; 1603 struct mount *mp; 1604 int error; 1605 1606 if (marker->me_mount != NULL) { 1607 vfs_unbusy(marker->me_mount); 1608 marker->me_mount = NULL; 1609 } 1610 1611 mutex_enter(&mountlist_lock); 1612 for (;;) { 1613 KASSERT(marker->me_type == ME_MARKER); 1614 1615 me = TAILQ_NEXT(marker, me_list); 1616 if (me == NULL) { 1617 /* End of list: keep marker and return. */ 1618 mutex_exit(&mountlist_lock); 1619 return NULL; 1620 } 1621 TAILQ_REMOVE(&mountlist, marker, me_list); 1622 TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list); 1623 1624 /* Skip other markers. */ 1625 if (me->me_type != ME_MOUNT) 1626 continue; 1627 1628 /* Take an initial reference for vfs_busy() below. */ 1629 mp = me->me_mount; 1630 KASSERT(mp != NULL); 1631 vfs_ref(mp); 1632 mutex_exit(&mountlist_lock); 1633 1634 /* Try to mark this mount busy and return on success. */ 1635 if (wait) 1636 error = vfs_busy(mp); 1637 else 1638 error = vfs_trybusy(mp); 1639 if (error == 0) { 1640 vfs_rele(mp); 1641 marker->me_mount = mp; 1642 return mp; 1643 } 1644 vfs_rele(mp); 1645 mutex_enter(&mountlist_lock); 1646 } 1647 } 1648 1649 struct mount * 1650 mountlist_iterator_next(mount_iterator_t *mi) 1651 { 1652 1653 return _mountlist_iterator_next(mi, true); 1654 } 1655 1656 struct mount * 1657 mountlist_iterator_trynext(mount_iterator_t *mi) 1658 { 1659 1660 return _mountlist_iterator_next(mi, false); 1661 } 1662 1663 /* 1664 * Attach new mount to the end of the mount list. 1665 */ 1666 void 1667 mountlist_append(struct mount *mp) 1668 { 1669 struct mountlist_entry *me; 1670 1671 me = mountlist_alloc(ME_MOUNT, mp); 1672 mutex_enter(&mountlist_lock); 1673 TAILQ_INSERT_TAIL(&mountlist, me, me_list); 1674 mutex_exit(&mountlist_lock); 1675 } 1676 1677 /* 1678 * Remove mount from mount list. 1679 */void 1680 mountlist_remove(struct mount *mp) 1681 { 1682 struct mountlist_entry *me; 1683 1684 mutex_enter(&mountlist_lock); 1685 TAILQ_FOREACH(me, &mountlist, me_list) 1686 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1687 break; 1688 KASSERT(me != NULL); 1689 TAILQ_REMOVE(&mountlist, me, me_list); 1690 mutex_exit(&mountlist_lock); 1691 mountlist_free(me); 1692 } 1693 1694 /* 1695 * Unlocked variant to traverse the mountlist. 1696 * To be used from DDB only. 1697 */ 1698 struct mount * 1699 _mountlist_next(struct mount *mp) 1700 { 1701 struct mountlist_entry *me; 1702 1703 if (mp == NULL) { 1704 me = TAILQ_FIRST(&mountlist); 1705 } else { 1706 TAILQ_FOREACH(me, &mountlist, me_list) 1707 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1708 break; 1709 if (me != NULL) 1710 me = TAILQ_NEXT(me, me_list); 1711 } 1712 1713 while (me != NULL && me->me_type != ME_MOUNT) 1714 me = TAILQ_NEXT(me, me_list); 1715 1716 return (me ? me->me_mount : NULL); 1717 } 1718