1 /* $NetBSD: vfs_mount.c,v 1.110 2024/12/07 02:27:38 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.110 2024/12/07 02:27:38 riastradh Exp $"); 71 72 #include "veriexec.h" 73 74 #include <sys/param.h> 75 #include <sys/kernel.h> 76 77 #include <sys/atomic.h> 78 #include <sys/buf.h> 79 #include <sys/conf.h> 80 #include <sys/device.h> 81 #include <sys/extattr.h> 82 #include <sys/fcntl.h> 83 #include <sys/filedesc.h> 84 #include <sys/fstrans.h> 85 #include <sys/kauth.h> 86 #include <sys/kmem.h> 87 #include <sys/module.h> 88 #include <sys/mount.h> 89 #include <sys/namei.h> 90 #include <sys/sdt.h> 91 #include <sys/syscallargs.h> 92 #include <sys/sysctl.h> 93 #include <sys/systm.h> 94 #include <sys/verified_exec.h> 95 #include <sys/vfs_syscalls.h> 96 #include <sys/vnode_impl.h> 97 98 #include <miscfs/deadfs/deadfs.h> 99 #include <miscfs/genfs/genfs.h> 100 #include <miscfs/specfs/specdev.h> 101 102 #include <uvm/uvm_swap.h> 103 104 enum mountlist_type { 105 ME_MOUNT, 106 ME_MARKER 107 }; 108 struct mountlist_entry { 109 TAILQ_ENTRY(mountlist_entry) me_list; /* Mount list. */ 110 struct mount *me_mount; /* Actual mount if ME_MOUNT, 111 current mount else. */ 112 enum mountlist_type me_type; /* Mount or marker. */ 113 }; 114 struct mount_iterator { 115 struct mountlist_entry mi_entry; 116 }; 117 118 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *, 119 bool (*)(void *, struct vnode *), void *, bool); 120 121 /* Root filesystem. */ 122 vnode_t * rootvnode; 123 124 /* Mounted filesystem list. */ 125 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist; 126 static kmutex_t mountlist_lock __cacheline_aligned; 127 int vnode_offset_next_by_lru /* XXX: ugly hack for pstat.c */ 128 = offsetof(vnode_impl_t, vi_lrulist.tqe_next); 129 130 kmutex_t vfs_list_lock __cacheline_aligned; 131 132 static specificdata_domain_t mount_specificdata_domain; 133 static kmutex_t mntid_lock; 134 135 static kmutex_t mountgen_lock __cacheline_aligned; 136 static uint64_t mountgen; 137 138 void 139 vfs_mount_sysinit(void) 140 { 141 142 TAILQ_INIT(&mountlist); 143 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 144 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 145 146 mount_specificdata_domain = specificdata_domain_create(); 147 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 148 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 149 mountgen = 0; 150 } 151 152 struct mount * 153 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp) 154 { 155 struct mount *mp; 156 int error __diagused; 157 158 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 159 mp->mnt_op = vfsops; 160 mp->mnt_refcnt = 1; 161 TAILQ_INIT(&mp->mnt_vnodelist); 162 mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 163 mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 164 mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 165 mp->mnt_vnodecovered = vp; 166 mount_initspecific(mp); 167 168 error = fstrans_mount(mp); 169 KASSERT(error == 0); 170 171 mutex_enter(&mountgen_lock); 172 mp->mnt_gen = mountgen++; 173 mutex_exit(&mountgen_lock); 174 175 return mp; 176 } 177 178 /* 179 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and 180 * initialize a mount structure for it. 181 * 182 * Devname is usually updated by mount(8) after booting. 183 */ 184 int 185 vfs_rootmountalloc(const char *fstypename, const char *devname, 186 struct mount **mpp) 187 { 188 struct vfsops *vfsp = NULL; 189 struct mount *mp; 190 int error __diagused; 191 192 mutex_enter(&vfs_list_lock); 193 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 194 if (!strncmp(vfsp->vfs_name, fstypename, 195 sizeof(mp->mnt_stat.f_fstypename))) 196 break; 197 if (vfsp == NULL) { 198 mutex_exit(&vfs_list_lock); 199 return SET_ERROR(ENODEV); 200 } 201 vfsp->vfs_refcount++; 202 mutex_exit(&vfs_list_lock); 203 204 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 205 return SET_ERROR(ENOMEM); 206 error = vfs_busy(mp); 207 KASSERT(error == 0); 208 mp->mnt_flag = MNT_RDONLY; 209 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 210 sizeof(mp->mnt_stat.f_fstypename)); 211 mp->mnt_stat.f_mntonname[0] = '/'; 212 mp->mnt_stat.f_mntonname[1] = '\0'; 213 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 214 '\0'; 215 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 216 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 217 *mpp = mp; 218 return 0; 219 } 220 221 /* 222 * vfs_getnewfsid: get a new unique fsid. 223 */ 224 void 225 vfs_getnewfsid(struct mount *mp) 226 { 227 static u_short xxxfs_mntid; 228 struct mountlist_entry *me; 229 fsid_t tfsid; 230 int mtype; 231 232 mutex_enter(&mntid_lock); 233 if (xxxfs_mntid == 0) 234 ++xxxfs_mntid; 235 mtype = makefstype(mp->mnt_op->vfs_name); 236 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 237 tfsid.__fsid_val[1] = mtype; 238 /* Always increment to not return the same fsid to parallel mounts. */ 239 xxxfs_mntid++; 240 241 /* 242 * Directly walk mountlist to prevent deadlock through 243 * mountlist_iterator_next() -> vfs_busy(). 244 */ 245 mutex_enter(&mountlist_lock); 246 for (me = TAILQ_FIRST(&mountlist); me != TAILQ_END(&mountlist); ) { 247 if (me->me_type == ME_MOUNT && 248 me->me_mount->mnt_stat.f_fsidx.__fsid_val[0] == 249 tfsid.__fsid_val[0] && 250 me->me_mount->mnt_stat.f_fsidx.__fsid_val[1] == 251 tfsid.__fsid_val[1]) { 252 tfsid.__fsid_val[0]++; 253 xxxfs_mntid++; 254 me = TAILQ_FIRST(&mountlist); 255 } else { 256 me = TAILQ_NEXT(me, me_list); 257 } 258 } 259 mutex_exit(&mountlist_lock); 260 261 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 262 mp->mnt_stat.f_fsidx.__fsid_val[1] = tfsid.__fsid_val[1]; 263 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 264 mutex_exit(&mntid_lock); 265 } 266 267 /* 268 * Lookup a mount point by filesystem identifier. 269 * 270 * XXX Needs to add a reference to the mount point. 271 */ 272 struct mount * 273 vfs_getvfs(fsid_t *fsid) 274 { 275 mount_iterator_t *iter; 276 struct mount *mp; 277 278 mountlist_iterator_init(&iter); 279 while ((mp = mountlist_iterator_next(iter)) != NULL) { 280 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 281 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 282 mountlist_iterator_destroy(iter); 283 return mp; 284 } 285 } 286 mountlist_iterator_destroy(iter); 287 return NULL; 288 } 289 290 /* 291 * Take a reference to a mount structure. 292 */ 293 void 294 vfs_ref(struct mount *mp) 295 { 296 297 KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock)); 298 299 atomic_inc_uint(&mp->mnt_refcnt); 300 } 301 302 /* 303 * Drop a reference to a mount structure, freeing if the last reference. 304 */ 305 void 306 vfs_rele(struct mount *mp) 307 { 308 309 membar_release(); 310 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 311 return; 312 } 313 membar_acquire(); 314 315 /* 316 * Nothing else has visibility of the mount: we can now 317 * free the data structures. 318 */ 319 KASSERT(mp->mnt_refcnt == 0); 320 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 321 mutex_obj_free(mp->mnt_updating); 322 mutex_obj_free(mp->mnt_renamelock); 323 mutex_obj_free(mp->mnt_vnodelock); 324 if (mp->mnt_op != NULL) { 325 vfs_delref(mp->mnt_op); 326 } 327 fstrans_unmount(mp); 328 /* 329 * Final free of mp gets done from fstrans_mount_dtor(). 330 * 331 * Prevents this memory to be reused as a mount before 332 * fstrans releases all references to it. 333 */ 334 } 335 336 /* 337 * Mark a mount point as busy, and gain a new reference to it. Used to 338 * prevent the file system from being unmounted during critical sections. 339 * 340 * vfs_busy can be called multiple times and by multiple threads 341 * and must be accompanied by the same number of vfs_unbusy calls. 342 * 343 * => The caller must hold a pre-existing reference to the mount. 344 * => Will fail if the file system is being unmounted, or is unmounted. 345 */ 346 static inline int 347 _vfs_busy(struct mount *mp, bool wait) 348 { 349 350 KASSERT(mp->mnt_refcnt > 0); 351 352 if (wait) { 353 fstrans_start(mp); 354 } else { 355 if (fstrans_start_nowait(mp)) 356 return SET_ERROR(EBUSY); 357 } 358 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 359 fstrans_done(mp); 360 return SET_ERROR(ENOENT); 361 } 362 vfs_ref(mp); 363 return 0; 364 } 365 366 int 367 vfs_busy(struct mount *mp) 368 { 369 370 return _vfs_busy(mp, true); 371 } 372 373 int 374 vfs_trybusy(struct mount *mp) 375 { 376 377 return _vfs_busy(mp, false); 378 } 379 380 /* 381 * Unbusy a busy filesystem. 382 * 383 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call. 384 */ 385 void 386 vfs_unbusy(struct mount *mp) 387 { 388 389 KASSERT(mp->mnt_refcnt > 0); 390 391 fstrans_done(mp); 392 vfs_rele(mp); 393 } 394 395 /* 396 * Change a file systems lower mount. 397 * Both the current and the new lower mount may be NULL. The caller 398 * guarantees exclusive access to the mount and holds a pre-existing 399 * reference to the new lower mount. 400 */ 401 int 402 vfs_set_lowermount(struct mount *mp, struct mount *lowermp) 403 { 404 struct mount *oldlowermp; 405 int error; 406 407 #ifdef DEBUG 408 /* 409 * Limit the depth of file system stack so kernel sanitizers 410 * may stress mount/unmount without exhausting the kernel stack. 411 */ 412 int depth; 413 struct mount *mp2; 414 415 for (depth = 0, mp2 = lowermp; mp2; depth++, mp2 = mp2->mnt_lower) { 416 if (depth == 23) 417 return SET_ERROR(EINVAL); 418 } 419 #endif 420 421 if (lowermp) { 422 if (lowermp == dead_rootmount) 423 return SET_ERROR(ENOENT); 424 error = vfs_busy(lowermp); 425 if (error) 426 return error; 427 vfs_ref(lowermp); 428 } 429 430 oldlowermp = mp->mnt_lower; 431 mp->mnt_lower = lowermp; 432 433 if (lowermp) 434 vfs_unbusy(lowermp); 435 436 if (oldlowermp) 437 vfs_rele(oldlowermp); 438 439 return 0; 440 } 441 442 struct vnode_iterator { 443 vnode_impl_t vi_vnode; 444 }; 445 446 void 447 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip) 448 { 449 vnode_t *vp; 450 vnode_impl_t *vip; 451 452 vp = vnalloc_marker(mp); 453 vip = VNODE_TO_VIMPL(vp); 454 455 mutex_enter(mp->mnt_vnodelock); 456 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes); 457 vp->v_usecount = 1; 458 mutex_exit(mp->mnt_vnodelock); 459 460 *vnip = (struct vnode_iterator *)vip; 461 } 462 463 void 464 vfs_vnode_iterator_destroy(struct vnode_iterator *vni) 465 { 466 vnode_impl_t *mvip = &vni->vi_vnode; 467 vnode_t *mvp = VIMPL_TO_VNODE(mvip); 468 kmutex_t *lock; 469 470 KASSERT(vnis_marker(mvp)); 471 if (vrefcnt(mvp) != 0) { 472 lock = mvp->v_mount->mnt_vnodelock; 473 mutex_enter(lock); 474 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes); 475 mvp->v_usecount = 0; 476 mutex_exit(lock); 477 } 478 vnfree_marker(mvp); 479 } 480 481 static struct vnode * 482 vfs_vnode_iterator_next1(struct vnode_iterator *vni, 483 bool (*f)(void *, struct vnode *), void *cl, bool do_wait) 484 { 485 vnode_impl_t *mvip = &vni->vi_vnode; 486 struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount; 487 vnode_t *vp; 488 vnode_impl_t *vip; 489 kmutex_t *lock; 490 int error; 491 492 KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip))); 493 494 lock = mp->mnt_vnodelock; 495 do { 496 mutex_enter(lock); 497 vip = TAILQ_NEXT(mvip, vi_mntvnodes); 498 TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes); 499 VIMPL_TO_VNODE(mvip)->v_usecount = 0; 500 again: 501 if (vip == NULL) { 502 mutex_exit(lock); 503 return NULL; 504 } 505 vp = VIMPL_TO_VNODE(vip); 506 KASSERT(vp != NULL); 507 mutex_enter(vp->v_interlock); 508 if (vnis_marker(vp) || 509 vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) || 510 (f && !(*f)(cl, vp))) { 511 mutex_exit(vp->v_interlock); 512 vip = TAILQ_NEXT(vip, vi_mntvnodes); 513 goto again; 514 } 515 516 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, 517 vi_mntvnodes); 518 VIMPL_TO_VNODE(mvip)->v_usecount = 1; 519 mutex_exit(lock); 520 error = vcache_vget(vp); 521 KASSERT(error == 0 || error == ENOENT); 522 } while (error != 0); 523 524 return vp; 525 } 526 527 struct vnode * 528 vfs_vnode_iterator_next(struct vnode_iterator *vni, 529 bool (*f)(void *, struct vnode *), void *cl) 530 { 531 532 return vfs_vnode_iterator_next1(vni, f, cl, false); 533 } 534 535 /* 536 * Move a vnode from one mount queue to another. 537 */ 538 void 539 vfs_insmntque(vnode_t *vp, struct mount *mp) 540 { 541 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 542 struct mount *omp; 543 kmutex_t *lock; 544 545 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 || 546 vp->v_tag == VT_VFS); 547 548 /* 549 * Delete from old mount point vnode list, if on one. 550 */ 551 if ((omp = vp->v_mount) != NULL) { 552 lock = omp->mnt_vnodelock; 553 mutex_enter(lock); 554 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes); 555 mutex_exit(lock); 556 } 557 558 /* 559 * Insert into list of vnodes for the new mount point, if 560 * available. The caller must take a reference on the mount 561 * structure and donate to the vnode. 562 */ 563 if ((vp->v_mount = mp) != NULL) { 564 lock = mp->mnt_vnodelock; 565 mutex_enter(lock); 566 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes); 567 mutex_exit(lock); 568 } 569 570 if (omp != NULL) { 571 /* Release reference to old mount. */ 572 vfs_rele(omp); 573 } 574 } 575 576 /* 577 * Remove any vnodes in the vnode table belonging to mount point mp. 578 * 579 * If FORCECLOSE is not specified, there should not be any active ones, 580 * return error if any are found (nb: this is a user error, not a 581 * system error). If FORCECLOSE is specified, detach any active vnodes 582 * that are found. 583 * 584 * If WRITECLOSE is set, only flush out regular file vnodes open for 585 * writing. 586 * 587 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. 588 */ 589 #ifdef DEBUG 590 int busyprt = 0; /* print out busy vnodes */ 591 struct ctldebug debug1 = { "busyprt", &busyprt }; 592 #endif 593 594 static vnode_t * 595 vflushnext(struct vnode_iterator *marker, int *when) 596 { 597 if (getticks() > *when) { 598 yield(); 599 *when = getticks() + hz / 10; 600 } 601 preempt_point(); 602 return vfs_vnode_iterator_next1(marker, NULL, NULL, true); 603 } 604 605 /* 606 * Flush one vnode. Referenced on entry, unreferenced on return. 607 */ 608 static int 609 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags) 610 { 611 int error; 612 struct vattr vattr; 613 614 if (vp == skipvp || 615 ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) { 616 vrele(vp); 617 return 0; 618 } 619 /* 620 * If WRITECLOSE is set, only flush out regular file 621 * vnodes open for writing or open and unlinked. 622 */ 623 if ((flags & WRITECLOSE)) { 624 if (vp->v_type != VREG) { 625 vrele(vp); 626 return 0; 627 } 628 error = vn_lock(vp, LK_EXCLUSIVE); 629 if (error) { 630 KASSERT(error == ENOENT); 631 vrele(vp); 632 return 0; 633 } 634 error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0); 635 if (error == 0) 636 error = VOP_GETATTR(vp, &vattr, curlwp->l_cred); 637 VOP_UNLOCK(vp); 638 if (error) { 639 vrele(vp); 640 return error; 641 } 642 if (vp->v_writecount == 0 && vattr.va_nlink > 0) { 643 vrele(vp); 644 return 0; 645 } 646 } 647 /* 648 * First try to recycle the vnode. 649 */ 650 if (vrecycle(vp)) 651 return 0; 652 /* 653 * If FORCECLOSE is set, forcibly close the vnode. 654 * For block or character devices, revert to an 655 * anonymous device. For all other files, just 656 * kill them. 657 */ 658 if (flags & FORCECLOSE) { 659 if (vrefcnt(vp) > 1 && 660 (vp->v_type == VBLK || vp->v_type == VCHR)) 661 vcache_make_anon(vp); 662 else 663 vgone(vp); 664 return 0; 665 } 666 vrele(vp); 667 return SET_ERROR(EBUSY); 668 } 669 670 int 671 vflush(struct mount *mp, vnode_t *skipvp, int flags) 672 { 673 vnode_t *vp; 674 struct vnode_iterator *marker; 675 int busy, error, when, retries = 2; 676 677 do { 678 busy = error = when = 0; 679 680 /* 681 * First, flush out any vnode references from the 682 * deferred vrele list. 683 */ 684 vrele_flush(mp); 685 686 vfs_vnode_iterator_init(mp, &marker); 687 688 while ((vp = vflushnext(marker, &when)) != NULL) { 689 error = vflush_one(vp, skipvp, flags); 690 if (error == EBUSY) { 691 error = 0; 692 busy++; 693 #ifdef DEBUG 694 if (busyprt && retries == 0) 695 vprint("vflush: busy vnode", vp); 696 #endif 697 } else if (error != 0) { 698 break; 699 } 700 } 701 702 vfs_vnode_iterator_destroy(marker); 703 } while (error == 0 && busy > 0 && retries-- > 0); 704 705 if (error) 706 return error; 707 if (busy) 708 return SET_ERROR(EBUSY); 709 return 0; 710 } 711 712 /* 713 * Mount a file system. 714 */ 715 716 /* 717 * Scan all active processes to see if any of them have a current or root 718 * directory onto which the new filesystem has just been mounted. If so, 719 * replace them with the new mount point. 720 */ 721 static void 722 mount_checkdirs(vnode_t *olddp) 723 { 724 vnode_t *newdp, *rele1, *rele2; 725 struct cwdinfo *cwdi; 726 struct proc *p; 727 bool retry; 728 729 if (vrefcnt(olddp) == 1) { 730 return; 731 } 732 if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp)) 733 panic("mount: lost mount"); 734 735 do { 736 retry = false; 737 mutex_enter(&proc_lock); 738 PROCLIST_FOREACH(p, &allproc) { 739 if ((cwdi = p->p_cwdi) == NULL) 740 continue; 741 /* 742 * Cannot change to the old directory any more, 743 * so even if we see a stale value it is not a 744 * problem. 745 */ 746 if (cwdi->cwdi_cdir != olddp && 747 cwdi->cwdi_rdir != olddp) 748 continue; 749 retry = true; 750 rele1 = NULL; 751 rele2 = NULL; 752 atomic_inc_uint(&cwdi->cwdi_refcnt); 753 mutex_exit(&proc_lock); 754 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 755 if (cwdi->cwdi_cdir == olddp) { 756 rele1 = cwdi->cwdi_cdir; 757 vref(newdp); 758 cwdi->cwdi_cdir = newdp; 759 } 760 if (cwdi->cwdi_rdir == olddp) { 761 rele2 = cwdi->cwdi_rdir; 762 vref(newdp); 763 cwdi->cwdi_rdir = newdp; 764 } 765 rw_exit(&cwdi->cwdi_lock); 766 cwdfree(cwdi); 767 if (rele1 != NULL) 768 vrele(rele1); 769 if (rele2 != NULL) 770 vrele(rele2); 771 mutex_enter(&proc_lock); 772 break; 773 } 774 mutex_exit(&proc_lock); 775 } while (retry); 776 777 if (rootvnode == olddp) { 778 vrele(rootvnode); 779 vref(newdp); 780 rootvnode = newdp; 781 } 782 vput(newdp); 783 } 784 785 /* 786 * Start extended attributes 787 */ 788 static int 789 start_extattr(struct mount *mp) 790 { 791 int error; 792 793 error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL); 794 if (error) 795 printf("%s: failed to start extattr: error = %d\n", 796 mp->mnt_stat.f_mntonname, error); 797 798 return error; 799 } 800 801 int 802 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops, 803 const char *path, int flags, void *data, size_t *data_len) 804 { 805 vnode_t *vp = *vpp; 806 struct mount *mp; 807 struct pathbuf *pb; 808 struct nameidata nd; 809 int error, error2; 810 811 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 812 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 813 if (error) { 814 vfs_delref(vfsops); 815 return error; 816 } 817 818 /* Cannot make a non-dir a mount-point (from here anyway). */ 819 if (vp->v_type != VDIR) { 820 vfs_delref(vfsops); 821 return SET_ERROR(ENOTDIR); 822 } 823 824 if (flags & MNT_EXPORTED) { 825 vfs_delref(vfsops); 826 return SET_ERROR(EINVAL); 827 } 828 829 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 830 vfs_delref(vfsops); 831 return SET_ERROR(ENOMEM); 832 } 833 834 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 835 836 /* 837 * The underlying file system may refuse the mount for 838 * various reasons. Allow the user to force it to happen. 839 * 840 * Set the mount level flags. 841 */ 842 mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE); 843 844 error = VFS_MOUNT(mp, path, data, data_len); 845 mp->mnt_flag &= ~MNT_OP_FLAGS; 846 847 if (error != 0) { 848 vfs_rele(mp); 849 return error; 850 } 851 852 /* Suspend new file system before taking mnt_updating. */ 853 do { 854 error2 = vfs_suspend(mp, 0); 855 } while (error2 == EINTR || error2 == ERESTART); 856 KASSERT(error2 == 0 || error2 == EOPNOTSUPP); 857 mutex_enter(mp->mnt_updating); 858 859 /* 860 * Validate and prepare the mount point. 861 */ 862 error = pathbuf_copyin(path, &pb); 863 if (error != 0) { 864 goto err_mounted; 865 } 866 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 867 error = namei(&nd); 868 pathbuf_destroy(pb); 869 if (error != 0) { 870 goto err_mounted; 871 } 872 if (nd.ni_vp != vp) { 873 vput(nd.ni_vp); 874 error = SET_ERROR(EINVAL); 875 goto err_mounted; 876 } 877 if (vp->v_mountedhere != NULL) { 878 vput(nd.ni_vp); 879 error = SET_ERROR(EBUSY); 880 goto err_mounted; 881 } 882 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 883 if (error != 0) { 884 vput(nd.ni_vp); 885 goto err_mounted; 886 } 887 888 /* 889 * Put the new filesystem on the mount list after root. 890 */ 891 cache_purge(vp); 892 mp->mnt_iflag &= ~IMNT_WANTRDWR; 893 894 mountlist_append(mp); 895 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 896 vfs_syncer_add_to_worklist(mp); 897 vp->v_mountedhere = mp; 898 vput(nd.ni_vp); 899 900 mount_checkdirs(vp); 901 mutex_exit(mp->mnt_updating); 902 if (error2 == 0) 903 vfs_resume(mp); 904 905 /* Hold an additional reference to the mount across VFS_START(). */ 906 vfs_ref(mp); 907 (void) VFS_STATVFS(mp, &mp->mnt_stat); 908 error = VFS_START(mp, 0); 909 if (error) { 910 vrele(vp); 911 } else if (flags & MNT_EXTATTR) { 912 if (start_extattr(mp) != 0) 913 mp->mnt_flag &= ~MNT_EXTATTR; 914 } 915 /* Drop reference held for VFS_START(). */ 916 vfs_rele(mp); 917 *vpp = NULL; 918 return error; 919 920 err_mounted: 921 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 922 panic("Unmounting fresh file system failed"); 923 mutex_exit(mp->mnt_updating); 924 if (error2 == 0) 925 vfs_resume(mp); 926 vfs_set_lowermount(mp, NULL); 927 vfs_rele(mp); 928 929 return error; 930 } 931 932 /* 933 * Do the actual file system unmount. File system is assumed to have 934 * been locked by the caller. 935 * 936 * => Caller hold reference to the mount, explicitly for dounmount(). 937 */ 938 int 939 dounmount(struct mount *mp, int flags, struct lwp *l) 940 { 941 struct vnode *coveredvp, *vp; 942 struct vnode_impl *vip; 943 int error, async, used_syncer, used_extattr; 944 const bool was_suspended = fstrans_is_owner(mp); 945 946 #if NVERIEXEC > 0 947 error = veriexec_unmountchk(mp); 948 if (error) 949 return (error); 950 #endif /* NVERIEXEC > 0 */ 951 952 if (!was_suspended) { 953 error = vfs_suspend(mp, 0); 954 if (error) { 955 return error; 956 } 957 } 958 959 KASSERT((mp->mnt_iflag & IMNT_GONE) == 0); 960 961 used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0; 962 used_extattr = mp->mnt_flag & MNT_EXTATTR; 963 964 mp->mnt_iflag |= IMNT_UNMOUNT; 965 mutex_enter(mp->mnt_updating); 966 /* 967 * Temporarily clear the MNT_ASYNC flags so that bwrite() doesn't 968 * convert the sync writes to delayed writes. 969 */ 970 async = mp->mnt_flag & MNT_ASYNC; 971 mp->mnt_flag &= ~MNT_ASYNC; 972 cache_purgevfs(mp); /* remove cache entries for this file sys */ 973 if (used_syncer) 974 vfs_syncer_remove_from_worklist(mp); 975 error = 0; 976 if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) { 977 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 978 } 979 if (error == 0 || (flags & MNT_FORCE)) { 980 error = VFS_UNMOUNT(mp, flags); 981 } 982 if (error) { 983 mp->mnt_iflag &= ~IMNT_UNMOUNT; 984 mp->mnt_flag |= async; 985 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 986 vfs_syncer_add_to_worklist(mp); 987 mutex_exit(mp->mnt_updating); 988 if (!was_suspended) 989 vfs_resume(mp); 990 if (used_extattr) { 991 if (start_extattr(mp) != 0) 992 mp->mnt_flag &= ~MNT_EXTATTR; 993 else 994 mp->mnt_flag |= MNT_EXTATTR; 995 } 996 return (error); 997 } 998 mutex_exit(mp->mnt_updating); 999 1000 /* 1001 * mark filesystem as gone to prevent further umounts 1002 * after mnt_umounting lock is gone, this also prevents 1003 * vfs_busy() from succeeding. 1004 */ 1005 mp->mnt_iflag |= IMNT_GONE; 1006 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 1007 coveredvp->v_mountedhere = NULL; 1008 } 1009 if (!was_suspended) 1010 vfs_resume(mp); 1011 1012 mountlist_remove(mp); 1013 1014 if ((vip = TAILQ_FIRST(&mp->mnt_vnodelist)) != NULL) { 1015 vp = VIMPL_TO_VNODE(vip); 1016 vprint("dangling", vp); 1017 panic("unmount: dangling vnode"); 1018 } 1019 vfs_hooks_unmount(mp); 1020 1021 vfs_set_lowermount(mp, NULL); 1022 vfs_rele(mp); /* reference from mount() */ 1023 if (coveredvp != NULLVP) { 1024 vrele(coveredvp); 1025 } 1026 return (0); 1027 } 1028 1029 /* 1030 * Unmount all file systems. 1031 * We traverse the list in reverse order under the assumption that doing so 1032 * will avoid needing to worry about dependencies. 1033 */ 1034 bool 1035 vfs_unmountall(struct lwp *l) 1036 { 1037 1038 printf("unmounting file systems...\n"); 1039 return vfs_unmountall1(l, true, true); 1040 } 1041 1042 static void 1043 vfs_unmount_print(struct mount *mp, const char *pfx) 1044 { 1045 1046 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 1047 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 1048 mp->mnt_stat.f_fstypename); 1049 } 1050 1051 /* 1052 * Return the mount with the highest generation less than "gen". 1053 */ 1054 static struct mount * 1055 vfs_unmount_next(uint64_t gen) 1056 { 1057 mount_iterator_t *iter; 1058 struct mount *mp, *nmp; 1059 1060 nmp = NULL; 1061 1062 mountlist_iterator_init(&iter); 1063 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1064 if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) && 1065 mp->mnt_gen < gen) { 1066 if (nmp != NULL) 1067 vfs_rele(nmp); 1068 nmp = mp; 1069 vfs_ref(nmp); 1070 } 1071 } 1072 mountlist_iterator_destroy(iter); 1073 1074 return nmp; 1075 } 1076 1077 bool 1078 vfs_unmount_forceone(struct lwp *l) 1079 { 1080 struct mount *mp; 1081 int error; 1082 1083 mp = vfs_unmount_next(mountgen); 1084 if (mp == NULL) { 1085 return false; 1086 } 1087 1088 #ifdef DEBUG 1089 printf("forcefully unmounting %s (%s)...\n", 1090 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1091 #endif 1092 if ((error = dounmount(mp, MNT_FORCE, l)) == 0) { 1093 vfs_unmount_print(mp, "forcefully "); 1094 return true; 1095 } else { 1096 vfs_rele(mp); 1097 } 1098 1099 #ifdef DEBUG 1100 printf("forceful unmount of %s failed with error %d\n", 1101 mp->mnt_stat.f_mntonname, error); 1102 #endif 1103 1104 return false; 1105 } 1106 1107 bool 1108 vfs_unmountall1(struct lwp *l, bool force, bool verbose) 1109 { 1110 struct mount *mp; 1111 mount_iterator_t *iter; 1112 bool any_error = false, progress = false; 1113 uint64_t gen; 1114 int error; 1115 1116 gen = mountgen; 1117 for (;;) { 1118 mp = vfs_unmount_next(gen); 1119 if (mp == NULL) 1120 break; 1121 gen = mp->mnt_gen; 1122 1123 #ifdef DEBUG 1124 printf("unmounting %p %s (%s)...\n", 1125 (void *)mp, mp->mnt_stat.f_mntonname, 1126 mp->mnt_stat.f_mntfromname); 1127 #endif 1128 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 1129 vfs_unmount_print(mp, ""); 1130 progress = true; 1131 } else { 1132 vfs_rele(mp); 1133 if (verbose) { 1134 printf("unmount of %s failed with error %d\n", 1135 mp->mnt_stat.f_mntonname, error); 1136 } 1137 any_error = true; 1138 } 1139 } 1140 if (verbose) { 1141 printf("unmounting done\n"); 1142 } 1143 if (any_error && verbose) { 1144 printf("WARNING: some file systems would not unmount\n"); 1145 } 1146 /* If the mountlist is empty it is time to remove swap. */ 1147 mountlist_iterator_init(&iter); 1148 if (mountlist_iterator_next(iter) == NULL) { 1149 uvm_swap_shutdown(l); 1150 } 1151 mountlist_iterator_destroy(iter); 1152 1153 return progress; 1154 } 1155 1156 void 1157 vfs_sync_all(struct lwp *l) 1158 { 1159 printf("syncing disks... "); 1160 1161 /* remove user processes from run queue */ 1162 suspendsched(); 1163 (void)spl0(); 1164 1165 /* avoid coming back this way again if we panic. */ 1166 doing_shutdown = 1; 1167 1168 do_sys_sync(l); 1169 1170 /* Wait for sync to finish. */ 1171 if (vfs_syncwait() != 0) { 1172 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 1173 Debugger(); 1174 #endif 1175 printf("giving up\n"); 1176 return; 1177 } else 1178 printf("done\n"); 1179 } 1180 1181 /* 1182 * Sync and unmount file systems before shutting down. 1183 */ 1184 void 1185 vfs_shutdown(void) 1186 { 1187 lwp_t *l = curlwp; 1188 1189 vfs_sync_all(l); 1190 1191 /* 1192 * If we have panicked - do not make the situation potentially 1193 * worse by unmounting the file systems. 1194 */ 1195 if (panicstr != NULL) { 1196 return; 1197 } 1198 1199 /* Unmount file systems. */ 1200 vfs_unmountall(l); 1201 } 1202 1203 /* 1204 * Print a list of supported file system types (used by vfs_mountroot) 1205 */ 1206 static void 1207 vfs_print_fstypes(void) 1208 { 1209 struct vfsops *v; 1210 int cnt = 0; 1211 1212 mutex_enter(&vfs_list_lock); 1213 LIST_FOREACH(v, &vfs_list, vfs_list) 1214 ++cnt; 1215 mutex_exit(&vfs_list_lock); 1216 1217 if (cnt == 0) { 1218 printf("WARNING: No file system modules have been loaded.\n"); 1219 return; 1220 } 1221 1222 printf("Supported file systems:"); 1223 mutex_enter(&vfs_list_lock); 1224 LIST_FOREACH(v, &vfs_list, vfs_list) { 1225 printf(" %s", v->vfs_name); 1226 } 1227 mutex_exit(&vfs_list_lock); 1228 printf("\n"); 1229 } 1230 1231 /* 1232 * Mount the root file system. If the operator didn't specify a 1233 * file system to use, try all possible file systems until one 1234 * succeeds. 1235 */ 1236 int 1237 vfs_mountroot(void) 1238 { 1239 struct vfsops *v; 1240 int error = ENODEV; 1241 1242 if (root_device == NULL) 1243 panic("vfs_mountroot: root device unknown"); 1244 1245 switch (device_class(root_device)) { 1246 case DV_IFNET: 1247 if (rootdev != NODEV) 1248 panic("vfs_mountroot: rootdev set for DV_IFNET " 1249 "(0x%llx -> %llu,%llu)", 1250 (unsigned long long)rootdev, 1251 (unsigned long long)major(rootdev), 1252 (unsigned long long)minor(rootdev)); 1253 break; 1254 1255 case DV_DISK: 1256 if (rootdev == NODEV) 1257 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1258 if (bdevvp(rootdev, &rootvp)) 1259 panic("vfs_mountroot: can't get vnode for rootdev"); 1260 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1261 error = VOP_OPEN(rootvp, FREAD, FSCRED); 1262 VOP_UNLOCK(rootvp); 1263 if (error) { 1264 printf("vfs_mountroot: can't open root device\n"); 1265 return (error); 1266 } 1267 break; 1268 1269 case DV_VIRTUAL: 1270 break; 1271 1272 default: 1273 printf("%s: inappropriate for root file system\n", 1274 device_xname(root_device)); 1275 return SET_ERROR(ENODEV); 1276 } 1277 1278 /* 1279 * If user specified a root fs type, use it. Make sure the 1280 * specified type exists and has a mount_root() 1281 */ 1282 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 1283 v = vfs_getopsbyname(rootfstype); 1284 error = SET_ERROR(EFTYPE); 1285 if (v != NULL) { 1286 if (v->vfs_mountroot != NULL) { 1287 error = (v->vfs_mountroot)(); 1288 } 1289 v->vfs_refcount--; 1290 } 1291 goto done; 1292 } 1293 1294 /* 1295 * Try each file system currently configured into the kernel. 1296 */ 1297 mutex_enter(&vfs_list_lock); 1298 LIST_FOREACH(v, &vfs_list, vfs_list) { 1299 if (v->vfs_mountroot == NULL) 1300 continue; 1301 #ifdef DEBUG 1302 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 1303 #endif 1304 v->vfs_refcount++; 1305 mutex_exit(&vfs_list_lock); 1306 error = (*v->vfs_mountroot)(); 1307 mutex_enter(&vfs_list_lock); 1308 v->vfs_refcount--; 1309 if (!error) { 1310 aprint_normal("root file system type: %s\n", 1311 v->vfs_name); 1312 break; 1313 } 1314 } 1315 mutex_exit(&vfs_list_lock); 1316 1317 if (v == NULL) { 1318 vfs_print_fstypes(); 1319 printf("no file system for %s", device_xname(root_device)); 1320 if (device_class(root_device) == DV_DISK) 1321 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 1322 printf("\n"); 1323 error = SET_ERROR(EFTYPE); 1324 } 1325 1326 done: 1327 if (error && device_class(root_device) == DV_DISK) { 1328 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1329 VOP_CLOSE(rootvp, FREAD, FSCRED); 1330 VOP_UNLOCK(rootvp); 1331 vrele(rootvp); 1332 } 1333 if (error == 0) { 1334 mount_iterator_t *iter; 1335 struct mount *mp; 1336 1337 mountlist_iterator_init(&iter); 1338 mp = mountlist_iterator_next(iter); 1339 KASSERT(mp != NULL); 1340 mountlist_iterator_destroy(iter); 1341 1342 mp->mnt_flag |= MNT_ROOTFS; 1343 mp->mnt_op->vfs_refcount++; 1344 1345 /* 1346 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 1347 * reference it, and donate it the reference grabbed 1348 * with VFS_ROOT(). 1349 */ 1350 error = VFS_ROOT(mp, LK_NONE, &rootvnode); 1351 if (error) 1352 panic("cannot find root vnode, error=%d", error); 1353 cwdi0.cwdi_cdir = rootvnode; 1354 cwdi0.cwdi_rdir = NULL; 1355 1356 /* 1357 * Now that root is mounted, we can fixup initproc's CWD 1358 * info. All other processes are kthreads, which merely 1359 * share proc0's CWD info. 1360 */ 1361 initproc->p_cwdi->cwdi_cdir = rootvnode; 1362 vref(initproc->p_cwdi->cwdi_cdir); 1363 initproc->p_cwdi->cwdi_rdir = NULL; 1364 /* 1365 * Enable loading of modules from the filesystem 1366 */ 1367 module_load_vfs_init(); 1368 1369 } 1370 return (error); 1371 } 1372 1373 /* 1374 * mount_specific_key_create -- 1375 * Create a key for subsystem mount-specific data. 1376 */ 1377 int 1378 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1379 { 1380 1381 return specificdata_key_create(mount_specificdata_domain, keyp, dtor); 1382 } 1383 1384 /* 1385 * mount_specific_key_delete -- 1386 * Delete a key for subsystem mount-specific data. 1387 */ 1388 void 1389 mount_specific_key_delete(specificdata_key_t key) 1390 { 1391 1392 specificdata_key_delete(mount_specificdata_domain, key); 1393 } 1394 1395 /* 1396 * mount_initspecific -- 1397 * Initialize a mount's specificdata container. 1398 */ 1399 void 1400 mount_initspecific(struct mount *mp) 1401 { 1402 int error __diagused; 1403 1404 error = specificdata_init(mount_specificdata_domain, 1405 &mp->mnt_specdataref); 1406 KASSERT(error == 0); 1407 } 1408 1409 /* 1410 * mount_finispecific -- 1411 * Finalize a mount's specificdata container. 1412 */ 1413 void 1414 mount_finispecific(struct mount *mp) 1415 { 1416 1417 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 1418 } 1419 1420 /* 1421 * mount_getspecific -- 1422 * Return mount-specific data corresponding to the specified key. 1423 */ 1424 void * 1425 mount_getspecific(struct mount *mp, specificdata_key_t key) 1426 { 1427 1428 return specificdata_getspecific(mount_specificdata_domain, 1429 &mp->mnt_specdataref, key); 1430 } 1431 1432 /* 1433 * mount_setspecific -- 1434 * Set mount-specific data corresponding to the specified key. 1435 */ 1436 void 1437 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 1438 { 1439 1440 specificdata_setspecific(mount_specificdata_domain, 1441 &mp->mnt_specdataref, key, data); 1442 } 1443 1444 /* 1445 * Check to see if a filesystem is mounted on a block device. 1446 */ 1447 int 1448 vfs_mountedon(vnode_t *vp) 1449 { 1450 vnode_t *vq; 1451 int error = 0; 1452 1453 if (vp->v_type != VBLK) 1454 return SET_ERROR(ENOTBLK); 1455 if (spec_node_getmountedfs(vp) != NULL) 1456 return SET_ERROR(EBUSY); 1457 if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, VDEAD_NOWAIT, &vq) 1458 == 0) { 1459 if (spec_node_getmountedfs(vq) != NULL) 1460 error = SET_ERROR(EBUSY); 1461 vrele(vq); 1462 } 1463 1464 return error; 1465 } 1466 1467 /* 1468 * Check if a device pointed to by vp is mounted. 1469 * 1470 * Returns: 1471 * EINVAL if it's not a disk 1472 * EBUSY if it's a disk and mounted 1473 * 0 if it's a disk and not mounted 1474 */ 1475 int 1476 rawdev_mounted(vnode_t *vp, vnode_t **bvpp) 1477 { 1478 vnode_t *bvp; 1479 dev_t dev; 1480 int d_type; 1481 1482 bvp = NULL; 1483 d_type = D_OTHER; 1484 1485 if (iskmemvp(vp)) 1486 return SET_ERROR(EINVAL); 1487 1488 switch (vp->v_type) { 1489 case VCHR: { 1490 const struct cdevsw *cdev; 1491 1492 dev = vp->v_rdev; 1493 cdev = cdevsw_lookup(dev); 1494 if (cdev != NULL) { 1495 dev_t blkdev; 1496 1497 blkdev = devsw_chr2blk(dev); 1498 if (blkdev != NODEV) { 1499 if (vfinddev(blkdev, VBLK, &bvp) != 0) { 1500 d_type = (cdev->d_flag & D_TYPEMASK); 1501 /* XXX: what if bvp disappears? */ 1502 vrele(bvp); 1503 } 1504 } 1505 } 1506 1507 break; 1508 } 1509 1510 case VBLK: { 1511 const struct bdevsw *bdev; 1512 1513 dev = vp->v_rdev; 1514 bdev = bdevsw_lookup(dev); 1515 if (bdev != NULL) 1516 d_type = (bdev->d_flag & D_TYPEMASK); 1517 1518 bvp = vp; 1519 1520 break; 1521 } 1522 1523 default: 1524 break; 1525 } 1526 1527 if (d_type != D_DISK) 1528 return SET_ERROR(EINVAL); 1529 1530 if (bvpp != NULL) 1531 *bvpp = bvp; 1532 1533 /* 1534 * XXX: This is bogus. We should be failing the request 1535 * XXX: not only if this specific slice is mounted, but 1536 * XXX: if it's on a disk with any other mounted slice. 1537 */ 1538 if (vfs_mountedon(bvp)) 1539 return SET_ERROR(EBUSY); 1540 1541 return 0; 1542 } 1543 1544 /* 1545 * Make a 'unique' number from a mount type name. 1546 */ 1547 long 1548 makefstype(const char *type) 1549 { 1550 long rv; 1551 1552 for (rv = 0; *type; type++) { 1553 rv <<= 2; 1554 rv ^= *type; 1555 } 1556 return rv; 1557 } 1558 1559 static struct mountlist_entry * 1560 mountlist_alloc(enum mountlist_type type, struct mount *mp) 1561 { 1562 struct mountlist_entry *me; 1563 1564 me = kmem_zalloc(sizeof(*me), KM_SLEEP); 1565 me->me_mount = mp; 1566 me->me_type = type; 1567 1568 return me; 1569 } 1570 1571 static void 1572 mountlist_free(struct mountlist_entry *me) 1573 { 1574 1575 kmem_free(me, sizeof(*me)); 1576 } 1577 1578 void 1579 mountlist_iterator_init(mount_iterator_t **mip) 1580 { 1581 struct mountlist_entry *me; 1582 1583 me = mountlist_alloc(ME_MARKER, NULL); 1584 mutex_enter(&mountlist_lock); 1585 TAILQ_INSERT_HEAD(&mountlist, me, me_list); 1586 mutex_exit(&mountlist_lock); 1587 *mip = (mount_iterator_t *)me; 1588 } 1589 1590 void 1591 mountlist_iterator_destroy(mount_iterator_t *mi) 1592 { 1593 struct mountlist_entry *marker = &mi->mi_entry; 1594 1595 if (marker->me_mount != NULL) 1596 vfs_unbusy(marker->me_mount); 1597 1598 mutex_enter(&mountlist_lock); 1599 TAILQ_REMOVE(&mountlist, marker, me_list); 1600 mutex_exit(&mountlist_lock); 1601 1602 mountlist_free(marker); 1603 1604 } 1605 1606 /* 1607 * Return the next mount or NULL for this iterator. 1608 * Mark it busy on success. 1609 */ 1610 static inline struct mount * 1611 _mountlist_iterator_next(mount_iterator_t *mi, bool wait) 1612 { 1613 struct mountlist_entry *me, *marker = &mi->mi_entry; 1614 struct mount *mp; 1615 int error; 1616 1617 if (marker->me_mount != NULL) { 1618 vfs_unbusy(marker->me_mount); 1619 marker->me_mount = NULL; 1620 } 1621 1622 mutex_enter(&mountlist_lock); 1623 for (;;) { 1624 KASSERT(marker->me_type == ME_MARKER); 1625 1626 me = TAILQ_NEXT(marker, me_list); 1627 if (me == NULL) { 1628 /* End of list: keep marker and return. */ 1629 mutex_exit(&mountlist_lock); 1630 return NULL; 1631 } 1632 TAILQ_REMOVE(&mountlist, marker, me_list); 1633 TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list); 1634 1635 /* Skip other markers. */ 1636 if (me->me_type != ME_MOUNT) 1637 continue; 1638 1639 /* Take an initial reference for vfs_busy() below. */ 1640 mp = me->me_mount; 1641 KASSERT(mp != NULL); 1642 vfs_ref(mp); 1643 mutex_exit(&mountlist_lock); 1644 1645 /* Try to mark this mount busy and return on success. */ 1646 if (wait) 1647 error = vfs_busy(mp); 1648 else 1649 error = vfs_trybusy(mp); 1650 if (error == 0) { 1651 vfs_rele(mp); 1652 marker->me_mount = mp; 1653 return mp; 1654 } 1655 vfs_rele(mp); 1656 mutex_enter(&mountlist_lock); 1657 } 1658 } 1659 1660 struct mount * 1661 mountlist_iterator_next(mount_iterator_t *mi) 1662 { 1663 1664 return _mountlist_iterator_next(mi, true); 1665 } 1666 1667 struct mount * 1668 mountlist_iterator_trynext(mount_iterator_t *mi) 1669 { 1670 1671 return _mountlist_iterator_next(mi, false); 1672 } 1673 1674 /* 1675 * Attach new mount to the end of the mount list. 1676 */ 1677 void 1678 mountlist_append(struct mount *mp) 1679 { 1680 struct mountlist_entry *me; 1681 1682 me = mountlist_alloc(ME_MOUNT, mp); 1683 mutex_enter(&mountlist_lock); 1684 TAILQ_INSERT_TAIL(&mountlist, me, me_list); 1685 mutex_exit(&mountlist_lock); 1686 } 1687 1688 /* 1689 * Remove mount from mount list. 1690 */ 1691 void 1692 mountlist_remove(struct mount *mp) 1693 { 1694 struct mountlist_entry *me; 1695 1696 mutex_enter(&mountlist_lock); 1697 TAILQ_FOREACH(me, &mountlist, me_list) 1698 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1699 break; 1700 KASSERT(me != NULL); 1701 TAILQ_REMOVE(&mountlist, me, me_list); 1702 mutex_exit(&mountlist_lock); 1703 mountlist_free(me); 1704 } 1705 1706 /* 1707 * Unlocked variant to traverse the mountlist. 1708 * To be used from DDB only. 1709 */ 1710 struct mount * 1711 _mountlist_next(struct mount *mp) 1712 { 1713 struct mountlist_entry *me; 1714 1715 if (mp == NULL) { 1716 me = TAILQ_FIRST(&mountlist); 1717 } else { 1718 TAILQ_FOREACH(me, &mountlist, me_list) 1719 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1720 break; 1721 if (me != NULL) 1722 me = TAILQ_NEXT(me, me_list); 1723 } 1724 1725 while (me != NULL && me->me_type != ME_MOUNT) 1726 me = TAILQ_NEXT(me, me_list); 1727 1728 return (me ? me->me_mount : NULL); 1729 } 1730