1 /* $NetBSD: vfs_mount.c,v 1.93 2022/04/09 23:38:33 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.93 2022/04/09 23:38:33 riastradh Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/kernel.h> 74 75 #include <sys/atomic.h> 76 #include <sys/buf.h> 77 #include <sys/conf.h> 78 #include <sys/fcntl.h> 79 #include <sys/filedesc.h> 80 #include <sys/device.h> 81 #include <sys/kauth.h> 82 #include <sys/kmem.h> 83 #include <sys/module.h> 84 #include <sys/mount.h> 85 #include <sys/fstrans.h> 86 #include <sys/namei.h> 87 #include <sys/extattr.h> 88 #include <sys/syscallargs.h> 89 #include <sys/sysctl.h> 90 #include <sys/systm.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/vnode_impl.h> 93 94 #include <miscfs/genfs/genfs.h> 95 #include <miscfs/specfs/specdev.h> 96 97 #include <uvm/uvm_swap.h> 98 99 enum mountlist_type { 100 ME_MOUNT, 101 ME_MARKER 102 }; 103 struct mountlist_entry { 104 TAILQ_ENTRY(mountlist_entry) me_list; /* Mount list. */ 105 struct mount *me_mount; /* Actual mount if ME_MOUNT, 106 current mount else. */ 107 enum mountlist_type me_type; /* Mount or marker. */ 108 }; 109 struct mount_iterator { 110 struct mountlist_entry mi_entry; 111 }; 112 113 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *, 114 bool (*)(void *, struct vnode *), void *, bool); 115 116 /* Root filesystem. */ 117 vnode_t * rootvnode; 118 119 /* Mounted filesystem list. */ 120 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist; 121 static kmutex_t mountlist_lock __cacheline_aligned; 122 int vnode_offset_next_by_lru /* XXX: ugly hack for pstat.c */ 123 = offsetof(vnode_impl_t, vi_lrulist.tqe_next); 124 125 kmutex_t vfs_list_lock __cacheline_aligned; 126 127 static specificdata_domain_t mount_specificdata_domain; 128 static kmutex_t mntid_lock; 129 130 static kmutex_t mountgen_lock __cacheline_aligned; 131 static uint64_t mountgen; 132 133 void 134 vfs_mount_sysinit(void) 135 { 136 137 TAILQ_INIT(&mountlist); 138 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 139 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 140 141 mount_specificdata_domain = specificdata_domain_create(); 142 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 143 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 144 mountgen = 0; 145 } 146 147 struct mount * 148 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp) 149 { 150 struct mount *mp; 151 int error __diagused; 152 153 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 154 mp->mnt_op = vfsops; 155 mp->mnt_refcnt = 1; 156 TAILQ_INIT(&mp->mnt_vnodelist); 157 mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 158 mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 159 mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 160 mp->mnt_vnodecovered = vp; 161 mount_initspecific(mp); 162 163 error = fstrans_mount(mp); 164 KASSERT(error == 0); 165 166 mutex_enter(&mountgen_lock); 167 mp->mnt_gen = mountgen++; 168 mutex_exit(&mountgen_lock); 169 170 return mp; 171 } 172 173 /* 174 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and 175 * initialize a mount structure for it. 176 * 177 * Devname is usually updated by mount(8) after booting. 178 */ 179 int 180 vfs_rootmountalloc(const char *fstypename, const char *devname, 181 struct mount **mpp) 182 { 183 struct vfsops *vfsp = NULL; 184 struct mount *mp; 185 int error __diagused; 186 187 mutex_enter(&vfs_list_lock); 188 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 189 if (!strncmp(vfsp->vfs_name, fstypename, 190 sizeof(mp->mnt_stat.f_fstypename))) 191 break; 192 if (vfsp == NULL) { 193 mutex_exit(&vfs_list_lock); 194 return (ENODEV); 195 } 196 vfsp->vfs_refcount++; 197 mutex_exit(&vfs_list_lock); 198 199 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 200 return ENOMEM; 201 error = vfs_busy(mp); 202 KASSERT(error == 0); 203 mp->mnt_flag = MNT_RDONLY; 204 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 205 sizeof(mp->mnt_stat.f_fstypename)); 206 mp->mnt_stat.f_mntonname[0] = '/'; 207 mp->mnt_stat.f_mntonname[1] = '\0'; 208 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 209 '\0'; 210 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 211 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 212 *mpp = mp; 213 return 0; 214 } 215 216 /* 217 * vfs_getnewfsid: get a new unique fsid. 218 */ 219 void 220 vfs_getnewfsid(struct mount *mp) 221 { 222 static u_short xxxfs_mntid; 223 fsid_t tfsid; 224 int mtype; 225 226 mutex_enter(&mntid_lock); 227 mtype = makefstype(mp->mnt_op->vfs_name); 228 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); 229 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; 230 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 231 if (xxxfs_mntid == 0) 232 ++xxxfs_mntid; 233 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 234 tfsid.__fsid_val[1] = mtype; 235 while (vfs_getvfs(&tfsid)) { 236 tfsid.__fsid_val[0]++; 237 xxxfs_mntid++; 238 } 239 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 240 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 241 mutex_exit(&mntid_lock); 242 } 243 244 /* 245 * Lookup a mount point by filesystem identifier. 246 * 247 * XXX Needs to add a reference to the mount point. 248 */ 249 struct mount * 250 vfs_getvfs(fsid_t *fsid) 251 { 252 mount_iterator_t *iter; 253 struct mount *mp; 254 255 mountlist_iterator_init(&iter); 256 while ((mp = mountlist_iterator_next(iter)) != NULL) { 257 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 258 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 259 mountlist_iterator_destroy(iter); 260 return mp; 261 } 262 } 263 mountlist_iterator_destroy(iter); 264 return NULL; 265 } 266 267 /* 268 * Take a reference to a mount structure. 269 */ 270 void 271 vfs_ref(struct mount *mp) 272 { 273 274 KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock)); 275 276 atomic_inc_uint(&mp->mnt_refcnt); 277 } 278 279 /* 280 * Drop a reference to a mount structure, freeing if the last reference. 281 */ 282 void 283 vfs_rele(struct mount *mp) 284 { 285 286 #ifndef __HAVE_ATOMIC_AS_MEMBAR 287 membar_release(); 288 #endif 289 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 290 return; 291 } 292 #ifndef __HAVE_ATOMIC_AS_MEMBAR 293 membar_acquire(); 294 #endif 295 296 /* 297 * Nothing else has visibility of the mount: we can now 298 * free the data structures. 299 */ 300 KASSERT(mp->mnt_refcnt == 0); 301 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 302 mutex_obj_free(mp->mnt_updating); 303 mutex_obj_free(mp->mnt_renamelock); 304 mutex_obj_free(mp->mnt_vnodelock); 305 if (mp->mnt_op != NULL) { 306 vfs_delref(mp->mnt_op); 307 } 308 fstrans_unmount(mp); 309 /* 310 * Final free of mp gets done from fstrans_mount_dtor(). 311 * 312 * Prevents this memory to be reused as a mount before 313 * fstrans releases all references to it. 314 */ 315 } 316 317 /* 318 * Mark a mount point as busy, and gain a new reference to it. Used to 319 * prevent the file system from being unmounted during critical sections. 320 * 321 * vfs_busy can be called multiple times and by multiple threads 322 * and must be accompanied by the same number of vfs_unbusy calls. 323 * 324 * => The caller must hold a pre-existing reference to the mount. 325 * => Will fail if the file system is being unmounted, or is unmounted. 326 */ 327 static inline int 328 _vfs_busy(struct mount *mp, bool wait) 329 { 330 331 KASSERT(mp->mnt_refcnt > 0); 332 333 if (wait) { 334 fstrans_start(mp); 335 } else { 336 if (fstrans_start_nowait(mp)) 337 return EBUSY; 338 } 339 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 340 fstrans_done(mp); 341 return ENOENT; 342 } 343 vfs_ref(mp); 344 return 0; 345 } 346 347 int 348 vfs_busy(struct mount *mp) 349 { 350 351 return _vfs_busy(mp, true); 352 } 353 354 int 355 vfs_trybusy(struct mount *mp) 356 { 357 358 return _vfs_busy(mp, false); 359 } 360 361 /* 362 * Unbusy a busy filesystem. 363 * 364 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call. 365 */ 366 void 367 vfs_unbusy(struct mount *mp) 368 { 369 370 KASSERT(mp->mnt_refcnt > 0); 371 372 fstrans_done(mp); 373 vfs_rele(mp); 374 } 375 376 struct vnode_iterator { 377 vnode_impl_t vi_vnode; 378 }; 379 380 void 381 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip) 382 { 383 vnode_t *vp; 384 vnode_impl_t *vip; 385 386 vp = vnalloc_marker(mp); 387 vip = VNODE_TO_VIMPL(vp); 388 389 mutex_enter(mp->mnt_vnodelock); 390 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes); 391 vp->v_usecount = 1; 392 mutex_exit(mp->mnt_vnodelock); 393 394 *vnip = (struct vnode_iterator *)vip; 395 } 396 397 void 398 vfs_vnode_iterator_destroy(struct vnode_iterator *vni) 399 { 400 vnode_impl_t *mvip = &vni->vi_vnode; 401 vnode_t *mvp = VIMPL_TO_VNODE(mvip); 402 kmutex_t *lock; 403 404 KASSERT(vnis_marker(mvp)); 405 if (vrefcnt(mvp) != 0) { 406 lock = mvp->v_mount->mnt_vnodelock; 407 mutex_enter(lock); 408 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes); 409 mvp->v_usecount = 0; 410 mutex_exit(lock); 411 } 412 vnfree_marker(mvp); 413 } 414 415 static struct vnode * 416 vfs_vnode_iterator_next1(struct vnode_iterator *vni, 417 bool (*f)(void *, struct vnode *), void *cl, bool do_wait) 418 { 419 vnode_impl_t *mvip = &vni->vi_vnode; 420 struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount; 421 vnode_t *vp; 422 vnode_impl_t *vip; 423 kmutex_t *lock; 424 int error; 425 426 KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip))); 427 428 lock = mp->mnt_vnodelock; 429 do { 430 mutex_enter(lock); 431 vip = TAILQ_NEXT(mvip, vi_mntvnodes); 432 TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes); 433 VIMPL_TO_VNODE(mvip)->v_usecount = 0; 434 again: 435 if (vip == NULL) { 436 mutex_exit(lock); 437 return NULL; 438 } 439 vp = VIMPL_TO_VNODE(vip); 440 KASSERT(vp != NULL); 441 mutex_enter(vp->v_interlock); 442 if (vnis_marker(vp) || 443 vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) || 444 (f && !(*f)(cl, vp))) { 445 mutex_exit(vp->v_interlock); 446 vip = TAILQ_NEXT(vip, vi_mntvnodes); 447 goto again; 448 } 449 450 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes); 451 VIMPL_TO_VNODE(mvip)->v_usecount = 1; 452 mutex_exit(lock); 453 error = vcache_vget(vp); 454 KASSERT(error == 0 || error == ENOENT); 455 } while (error != 0); 456 457 return vp; 458 } 459 460 struct vnode * 461 vfs_vnode_iterator_next(struct vnode_iterator *vni, 462 bool (*f)(void *, struct vnode *), void *cl) 463 { 464 465 return vfs_vnode_iterator_next1(vni, f, cl, false); 466 } 467 468 /* 469 * Move a vnode from one mount queue to another. 470 */ 471 void 472 vfs_insmntque(vnode_t *vp, struct mount *mp) 473 { 474 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 475 struct mount *omp; 476 kmutex_t *lock; 477 478 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 || 479 vp->v_tag == VT_VFS); 480 481 /* 482 * Delete from old mount point vnode list, if on one. 483 */ 484 if ((omp = vp->v_mount) != NULL) { 485 lock = omp->mnt_vnodelock; 486 mutex_enter(lock); 487 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes); 488 mutex_exit(lock); 489 } 490 491 /* 492 * Insert into list of vnodes for the new mount point, if 493 * available. The caller must take a reference on the mount 494 * structure and donate to the vnode. 495 */ 496 if ((vp->v_mount = mp) != NULL) { 497 lock = mp->mnt_vnodelock; 498 mutex_enter(lock); 499 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes); 500 mutex_exit(lock); 501 } 502 503 if (omp != NULL) { 504 /* Release reference to old mount. */ 505 vfs_rele(omp); 506 } 507 } 508 509 /* 510 * Remove any vnodes in the vnode table belonging to mount point mp. 511 * 512 * If FORCECLOSE is not specified, there should not be any active ones, 513 * return error if any are found (nb: this is a user error, not a 514 * system error). If FORCECLOSE is specified, detach any active vnodes 515 * that are found. 516 * 517 * If WRITECLOSE is set, only flush out regular file vnodes open for 518 * writing. 519 * 520 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. 521 */ 522 #ifdef DEBUG 523 int busyprt = 0; /* print out busy vnodes */ 524 struct ctldebug debug1 = { "busyprt", &busyprt }; 525 #endif 526 527 static vnode_t * 528 vflushnext(struct vnode_iterator *marker, int *when) 529 { 530 if (getticks() > *when) { 531 yield(); 532 *when = getticks() + hz / 10; 533 } 534 return vfs_vnode_iterator_next1(marker, NULL, NULL, true); 535 } 536 537 /* 538 * Flush one vnode. Referenced on entry, unreferenced on return. 539 */ 540 static int 541 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags) 542 { 543 int error; 544 struct vattr vattr; 545 546 if (vp == skipvp || 547 ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) { 548 vrele(vp); 549 return 0; 550 } 551 /* 552 * If WRITECLOSE is set, only flush out regular file 553 * vnodes open for writing or open and unlinked. 554 */ 555 if ((flags & WRITECLOSE)) { 556 if (vp->v_type != VREG) { 557 vrele(vp); 558 return 0; 559 } 560 error = vn_lock(vp, LK_EXCLUSIVE); 561 if (error) { 562 KASSERT(error == ENOENT); 563 vrele(vp); 564 return 0; 565 } 566 error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0); 567 if (error == 0) 568 error = VOP_GETATTR(vp, &vattr, curlwp->l_cred); 569 VOP_UNLOCK(vp); 570 if (error) { 571 vrele(vp); 572 return error; 573 } 574 if (vp->v_writecount == 0 && vattr.va_nlink > 0) { 575 vrele(vp); 576 return 0; 577 } 578 } 579 /* 580 * First try to recycle the vnode. 581 */ 582 if (vrecycle(vp)) 583 return 0; 584 /* 585 * If FORCECLOSE is set, forcibly close the vnode. 586 * For block or character devices, revert to an 587 * anonymous device. For all other files, just 588 * kill them. 589 */ 590 if (flags & FORCECLOSE) { 591 if (vrefcnt(vp) > 1 && 592 (vp->v_type == VBLK || vp->v_type == VCHR)) 593 vcache_make_anon(vp); 594 else 595 vgone(vp); 596 return 0; 597 } 598 vrele(vp); 599 return EBUSY; 600 } 601 602 int 603 vflush(struct mount *mp, vnode_t *skipvp, int flags) 604 { 605 vnode_t *vp; 606 struct vnode_iterator *marker; 607 int busy, error, when, retries = 2; 608 609 do { 610 busy = error = when = 0; 611 612 /* 613 * First, flush out any vnode references from the 614 * deferred vrele list. 615 */ 616 vrele_flush(mp); 617 618 vfs_vnode_iterator_init(mp, &marker); 619 620 while ((vp = vflushnext(marker, &when)) != NULL) { 621 error = vflush_one(vp, skipvp, flags); 622 if (error == EBUSY) { 623 error = 0; 624 busy++; 625 #ifdef DEBUG 626 if (busyprt && retries == 0) 627 vprint("vflush: busy vnode", vp); 628 #endif 629 } else if (error != 0) { 630 break; 631 } 632 } 633 634 vfs_vnode_iterator_destroy(marker); 635 } while (error == 0 && busy > 0 && retries-- > 0); 636 637 if (error) 638 return error; 639 if (busy) 640 return EBUSY; 641 return 0; 642 } 643 644 /* 645 * Mount a file system. 646 */ 647 648 /* 649 * Scan all active processes to see if any of them have a current or root 650 * directory onto which the new filesystem has just been mounted. If so, 651 * replace them with the new mount point. 652 */ 653 static void 654 mount_checkdirs(vnode_t *olddp) 655 { 656 vnode_t *newdp, *rele1, *rele2; 657 struct cwdinfo *cwdi; 658 struct proc *p; 659 bool retry; 660 661 if (vrefcnt(olddp) == 1) { 662 return; 663 } 664 if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp)) 665 panic("mount: lost mount"); 666 667 do { 668 retry = false; 669 mutex_enter(&proc_lock); 670 PROCLIST_FOREACH(p, &allproc) { 671 if ((cwdi = p->p_cwdi) == NULL) 672 continue; 673 /* 674 * Cannot change to the old directory any more, 675 * so even if we see a stale value it is not a 676 * problem. 677 */ 678 if (cwdi->cwdi_cdir != olddp && 679 cwdi->cwdi_rdir != olddp) 680 continue; 681 retry = true; 682 rele1 = NULL; 683 rele2 = NULL; 684 atomic_inc_uint(&cwdi->cwdi_refcnt); 685 mutex_exit(&proc_lock); 686 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 687 if (cwdi->cwdi_cdir == olddp) { 688 rele1 = cwdi->cwdi_cdir; 689 vref(newdp); 690 cwdi->cwdi_cdir = newdp; 691 } 692 if (cwdi->cwdi_rdir == olddp) { 693 rele2 = cwdi->cwdi_rdir; 694 vref(newdp); 695 cwdi->cwdi_rdir = newdp; 696 } 697 rw_exit(&cwdi->cwdi_lock); 698 cwdfree(cwdi); 699 if (rele1 != NULL) 700 vrele(rele1); 701 if (rele2 != NULL) 702 vrele(rele2); 703 mutex_enter(&proc_lock); 704 break; 705 } 706 mutex_exit(&proc_lock); 707 } while (retry); 708 709 if (rootvnode == olddp) { 710 vrele(rootvnode); 711 vref(newdp); 712 rootvnode = newdp; 713 } 714 vput(newdp); 715 } 716 717 /* 718 * Start extended attributes 719 */ 720 static int 721 start_extattr(struct mount *mp) 722 { 723 int error; 724 725 error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL); 726 if (error) 727 printf("%s: failed to start extattr: error = %d\n", 728 mp->mnt_stat.f_mntonname, error); 729 730 return error; 731 } 732 733 int 734 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops, 735 const char *path, int flags, void *data, size_t *data_len) 736 { 737 vnode_t *vp = *vpp; 738 struct mount *mp; 739 struct pathbuf *pb; 740 struct nameidata nd; 741 int error, error2; 742 743 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 744 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 745 if (error) { 746 vfs_delref(vfsops); 747 return error; 748 } 749 750 /* Cannot make a non-dir a mount-point (from here anyway). */ 751 if (vp->v_type != VDIR) { 752 vfs_delref(vfsops); 753 return ENOTDIR; 754 } 755 756 if (flags & MNT_EXPORTED) { 757 vfs_delref(vfsops); 758 return EINVAL; 759 } 760 761 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 762 vfs_delref(vfsops); 763 return ENOMEM; 764 } 765 766 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 767 768 /* 769 * The underlying file system may refuse the mount for 770 * various reasons. Allow the user to force it to happen. 771 * 772 * Set the mount level flags. 773 */ 774 mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE); 775 776 mutex_enter(mp->mnt_updating); 777 error = VFS_MOUNT(mp, path, data, data_len); 778 mp->mnt_flag &= ~MNT_OP_FLAGS; 779 780 if (error != 0) 781 goto err_unmounted; 782 783 /* 784 * Validate and prepare the mount point. 785 */ 786 error = pathbuf_copyin(path, &pb); 787 if (error != 0) { 788 goto err_mounted; 789 } 790 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 791 error = namei(&nd); 792 pathbuf_destroy(pb); 793 if (error != 0) { 794 goto err_mounted; 795 } 796 if (nd.ni_vp != vp) { 797 vput(nd.ni_vp); 798 error = EINVAL; 799 goto err_mounted; 800 } 801 if (vp->v_mountedhere != NULL) { 802 vput(nd.ni_vp); 803 error = EBUSY; 804 goto err_mounted; 805 } 806 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 807 if (error != 0) { 808 vput(nd.ni_vp); 809 goto err_mounted; 810 } 811 812 /* 813 * Put the new filesystem on the mount list after root. 814 */ 815 cache_purge(vp); 816 mp->mnt_iflag &= ~IMNT_WANTRDWR; 817 818 mountlist_append(mp); 819 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 820 vfs_syncer_add_to_worklist(mp); 821 vp->v_mountedhere = mp; 822 vput(nd.ni_vp); 823 824 mount_checkdirs(vp); 825 mutex_exit(mp->mnt_updating); 826 827 /* Hold an additional reference to the mount across VFS_START(). */ 828 vfs_ref(mp); 829 (void) VFS_STATVFS(mp, &mp->mnt_stat); 830 error = VFS_START(mp, 0); 831 if (error) { 832 vrele(vp); 833 } else if (flags & MNT_EXTATTR) { 834 if (start_extattr(mp) != 0) 835 mp->mnt_flag &= ~MNT_EXTATTR; 836 } 837 /* Drop reference held for VFS_START(). */ 838 vfs_rele(mp); 839 *vpp = NULL; 840 return error; 841 842 err_mounted: 843 do { 844 error2 = vfs_suspend(mp, 0); 845 } while (error2 == EINTR || error2 == ERESTART); 846 KASSERT(error2 == 0 || error2 == EOPNOTSUPP); 847 848 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 849 panic("Unmounting fresh file system failed"); 850 851 if (error2 == 0) 852 vfs_resume(mp); 853 854 err_unmounted: 855 mutex_exit(mp->mnt_updating); 856 vfs_rele(mp); 857 858 return error; 859 } 860 861 /* 862 * Do the actual file system unmount. File system is assumed to have 863 * been locked by the caller. 864 * 865 * => Caller hold reference to the mount, explicitly for dounmount(). 866 */ 867 int 868 dounmount(struct mount *mp, int flags, struct lwp *l) 869 { 870 vnode_t *coveredvp; 871 int error, async, used_syncer, used_extattr; 872 const bool was_suspended = fstrans_is_owner(mp); 873 874 #if NVERIEXEC > 0 875 error = veriexec_unmountchk(mp); 876 if (error) 877 return (error); 878 #endif /* NVERIEXEC > 0 */ 879 880 if (!was_suspended) { 881 error = vfs_suspend(mp, 0); 882 if (error) { 883 return error; 884 } 885 } 886 887 KASSERT((mp->mnt_iflag & IMNT_GONE) == 0); 888 889 used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0; 890 used_extattr = mp->mnt_flag & MNT_EXTATTR; 891 892 mp->mnt_iflag |= IMNT_UNMOUNT; 893 mutex_enter(mp->mnt_updating); 894 async = mp->mnt_flag & MNT_ASYNC; 895 mp->mnt_flag &= ~MNT_ASYNC; 896 cache_purgevfs(mp); /* remove cache entries for this file sys */ 897 if (used_syncer) 898 vfs_syncer_remove_from_worklist(mp); 899 error = 0; 900 if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) { 901 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 902 } 903 if (error == 0 || (flags & MNT_FORCE)) { 904 error = VFS_UNMOUNT(mp, flags); 905 } 906 if (error) { 907 mp->mnt_iflag &= ~IMNT_UNMOUNT; 908 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 909 vfs_syncer_add_to_worklist(mp); 910 mp->mnt_flag |= async; 911 mutex_exit(mp->mnt_updating); 912 if (!was_suspended) 913 vfs_resume(mp); 914 if (used_extattr) { 915 if (start_extattr(mp) != 0) 916 mp->mnt_flag &= ~MNT_EXTATTR; 917 else 918 mp->mnt_flag |= MNT_EXTATTR; 919 } 920 return (error); 921 } 922 mutex_exit(mp->mnt_updating); 923 924 /* 925 * mark filesystem as gone to prevent further umounts 926 * after mnt_umounting lock is gone, this also prevents 927 * vfs_busy() from succeeding. 928 */ 929 mp->mnt_iflag |= IMNT_GONE; 930 if (!was_suspended) 931 vfs_resume(mp); 932 933 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 934 vn_lock(coveredvp, LK_EXCLUSIVE | LK_RETRY); 935 coveredvp->v_mountedhere = NULL; 936 VOP_UNLOCK(coveredvp); 937 } 938 mountlist_remove(mp); 939 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 940 panic("unmount: dangling vnode"); 941 vfs_hooks_unmount(mp); 942 943 vfs_rele(mp); /* reference from mount() */ 944 if (coveredvp != NULLVP) { 945 vrele(coveredvp); 946 } 947 return (0); 948 } 949 950 /* 951 * Unmount all file systems. 952 * We traverse the list in reverse order under the assumption that doing so 953 * will avoid needing to worry about dependencies. 954 */ 955 bool 956 vfs_unmountall(struct lwp *l) 957 { 958 959 printf("unmounting file systems...\n"); 960 return vfs_unmountall1(l, true, true); 961 } 962 963 static void 964 vfs_unmount_print(struct mount *mp, const char *pfx) 965 { 966 967 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 968 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 969 mp->mnt_stat.f_fstypename); 970 } 971 972 /* 973 * Return the mount with the highest generation less than "gen". 974 */ 975 static struct mount * 976 vfs_unmount_next(uint64_t gen) 977 { 978 mount_iterator_t *iter; 979 struct mount *mp, *nmp; 980 981 nmp = NULL; 982 983 mountlist_iterator_init(&iter); 984 while ((mp = mountlist_iterator_next(iter)) != NULL) { 985 if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) && 986 mp->mnt_gen < gen) { 987 if (nmp != NULL) 988 vfs_rele(nmp); 989 nmp = mp; 990 vfs_ref(nmp); 991 } 992 } 993 mountlist_iterator_destroy(iter); 994 995 return nmp; 996 } 997 998 bool 999 vfs_unmount_forceone(struct lwp *l) 1000 { 1001 struct mount *mp; 1002 int error; 1003 1004 mp = vfs_unmount_next(mountgen); 1005 if (mp == NULL) { 1006 return false; 1007 } 1008 1009 #ifdef DEBUG 1010 printf("forcefully unmounting %s (%s)...\n", 1011 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1012 #endif 1013 if ((error = dounmount(mp, MNT_FORCE, l)) == 0) { 1014 vfs_unmount_print(mp, "forcefully "); 1015 return true; 1016 } else { 1017 vfs_rele(mp); 1018 } 1019 1020 #ifdef DEBUG 1021 printf("forceful unmount of %s failed with error %d\n", 1022 mp->mnt_stat.f_mntonname, error); 1023 #endif 1024 1025 return false; 1026 } 1027 1028 bool 1029 vfs_unmountall1(struct lwp *l, bool force, bool verbose) 1030 { 1031 struct mount *mp; 1032 mount_iterator_t *iter; 1033 bool any_error = false, progress = false; 1034 uint64_t gen; 1035 int error; 1036 1037 gen = mountgen; 1038 for (;;) { 1039 mp = vfs_unmount_next(gen); 1040 if (mp == NULL) 1041 break; 1042 gen = mp->mnt_gen; 1043 1044 #ifdef DEBUG 1045 printf("unmounting %p %s (%s)...\n", 1046 (void *)mp, mp->mnt_stat.f_mntonname, 1047 mp->mnt_stat.f_mntfromname); 1048 #endif 1049 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 1050 vfs_unmount_print(mp, ""); 1051 progress = true; 1052 } else { 1053 vfs_rele(mp); 1054 if (verbose) { 1055 printf("unmount of %s failed with error %d\n", 1056 mp->mnt_stat.f_mntonname, error); 1057 } 1058 any_error = true; 1059 } 1060 } 1061 if (verbose) { 1062 printf("unmounting done\n"); 1063 } 1064 if (any_error && verbose) { 1065 printf("WARNING: some file systems would not unmount\n"); 1066 } 1067 /* If the mountlist is empty it is time to remove swap. */ 1068 mountlist_iterator_init(&iter); 1069 if (mountlist_iterator_next(iter) == NULL) { 1070 uvm_swap_shutdown(l); 1071 } 1072 mountlist_iterator_destroy(iter); 1073 1074 return progress; 1075 } 1076 1077 void 1078 vfs_sync_all(struct lwp *l) 1079 { 1080 printf("syncing disks... "); 1081 1082 /* remove user processes from run queue */ 1083 suspendsched(); 1084 (void)spl0(); 1085 1086 /* avoid coming back this way again if we panic. */ 1087 doing_shutdown = 1; 1088 1089 do_sys_sync(l); 1090 1091 /* Wait for sync to finish. */ 1092 if (vfs_syncwait() != 0) { 1093 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 1094 Debugger(); 1095 #endif 1096 printf("giving up\n"); 1097 return; 1098 } else 1099 printf("done\n"); 1100 } 1101 1102 /* 1103 * Sync and unmount file systems before shutting down. 1104 */ 1105 void 1106 vfs_shutdown(void) 1107 { 1108 lwp_t *l = curlwp; 1109 1110 vfs_sync_all(l); 1111 1112 /* 1113 * If we have panicked - do not make the situation potentially 1114 * worse by unmounting the file systems. 1115 */ 1116 if (panicstr != NULL) { 1117 return; 1118 } 1119 1120 /* Unmount file systems. */ 1121 vfs_unmountall(l); 1122 } 1123 1124 /* 1125 * Print a list of supported file system types (used by vfs_mountroot) 1126 */ 1127 static void 1128 vfs_print_fstypes(void) 1129 { 1130 struct vfsops *v; 1131 int cnt = 0; 1132 1133 mutex_enter(&vfs_list_lock); 1134 LIST_FOREACH(v, &vfs_list, vfs_list) 1135 ++cnt; 1136 mutex_exit(&vfs_list_lock); 1137 1138 if (cnt == 0) { 1139 printf("WARNING: No file system modules have been loaded.\n"); 1140 return; 1141 } 1142 1143 printf("Supported file systems:"); 1144 mutex_enter(&vfs_list_lock); 1145 LIST_FOREACH(v, &vfs_list, vfs_list) { 1146 printf(" %s", v->vfs_name); 1147 } 1148 mutex_exit(&vfs_list_lock); 1149 printf("\n"); 1150 } 1151 1152 /* 1153 * Mount the root file system. If the operator didn't specify a 1154 * file system to use, try all possible file systems until one 1155 * succeeds. 1156 */ 1157 int 1158 vfs_mountroot(void) 1159 { 1160 struct vfsops *v; 1161 int error = ENODEV; 1162 1163 if (root_device == NULL) 1164 panic("vfs_mountroot: root device unknown"); 1165 1166 switch (device_class(root_device)) { 1167 case DV_IFNET: 1168 if (rootdev != NODEV) 1169 panic("vfs_mountroot: rootdev set for DV_IFNET " 1170 "(0x%llx -> %llu,%llu)", 1171 (unsigned long long)rootdev, 1172 (unsigned long long)major(rootdev), 1173 (unsigned long long)minor(rootdev)); 1174 break; 1175 1176 case DV_DISK: 1177 if (rootdev == NODEV) 1178 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1179 if (bdevvp(rootdev, &rootvp)) 1180 panic("vfs_mountroot: can't get vnode for rootdev"); 1181 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1182 error = VOP_OPEN(rootvp, FREAD, FSCRED); 1183 VOP_UNLOCK(rootvp); 1184 if (error) { 1185 printf("vfs_mountroot: can't open root device\n"); 1186 return (error); 1187 } 1188 break; 1189 1190 case DV_VIRTUAL: 1191 break; 1192 1193 default: 1194 printf("%s: inappropriate for root file system\n", 1195 device_xname(root_device)); 1196 return (ENODEV); 1197 } 1198 1199 /* 1200 * If user specified a root fs type, use it. Make sure the 1201 * specified type exists and has a mount_root() 1202 */ 1203 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 1204 v = vfs_getopsbyname(rootfstype); 1205 error = EFTYPE; 1206 if (v != NULL) { 1207 if (v->vfs_mountroot != NULL) { 1208 error = (v->vfs_mountroot)(); 1209 } 1210 v->vfs_refcount--; 1211 } 1212 goto done; 1213 } 1214 1215 /* 1216 * Try each file system currently configured into the kernel. 1217 */ 1218 mutex_enter(&vfs_list_lock); 1219 LIST_FOREACH(v, &vfs_list, vfs_list) { 1220 if (v->vfs_mountroot == NULL) 1221 continue; 1222 #ifdef DEBUG 1223 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 1224 #endif 1225 v->vfs_refcount++; 1226 mutex_exit(&vfs_list_lock); 1227 error = (*v->vfs_mountroot)(); 1228 mutex_enter(&vfs_list_lock); 1229 v->vfs_refcount--; 1230 if (!error) { 1231 aprint_normal("root file system type: %s\n", 1232 v->vfs_name); 1233 break; 1234 } 1235 } 1236 mutex_exit(&vfs_list_lock); 1237 1238 if (v == NULL) { 1239 vfs_print_fstypes(); 1240 printf("no file system for %s", device_xname(root_device)); 1241 if (device_class(root_device) == DV_DISK) 1242 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 1243 printf("\n"); 1244 error = EFTYPE; 1245 } 1246 1247 done: 1248 if (error && device_class(root_device) == DV_DISK) { 1249 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1250 VOP_CLOSE(rootvp, FREAD, FSCRED); 1251 VOP_UNLOCK(rootvp); 1252 vrele(rootvp); 1253 } 1254 if (error == 0) { 1255 mount_iterator_t *iter; 1256 struct mount *mp; 1257 extern struct cwdinfo cwdi0; 1258 1259 mountlist_iterator_init(&iter); 1260 mp = mountlist_iterator_next(iter); 1261 KASSERT(mp != NULL); 1262 mountlist_iterator_destroy(iter); 1263 1264 mp->mnt_flag |= MNT_ROOTFS; 1265 mp->mnt_op->vfs_refcount++; 1266 1267 /* 1268 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 1269 * reference it, and donate it the reference grabbed 1270 * with VFS_ROOT(). 1271 */ 1272 error = VFS_ROOT(mp, LK_NONE, &rootvnode); 1273 if (error) 1274 panic("cannot find root vnode, error=%d", error); 1275 cwdi0.cwdi_cdir = rootvnode; 1276 cwdi0.cwdi_rdir = NULL; 1277 1278 /* 1279 * Now that root is mounted, we can fixup initproc's CWD 1280 * info. All other processes are kthreads, which merely 1281 * share proc0's CWD info. 1282 */ 1283 initproc->p_cwdi->cwdi_cdir = rootvnode; 1284 vref(initproc->p_cwdi->cwdi_cdir); 1285 initproc->p_cwdi->cwdi_rdir = NULL; 1286 /* 1287 * Enable loading of modules from the filesystem 1288 */ 1289 module_load_vfs_init(); 1290 1291 } 1292 return (error); 1293 } 1294 1295 /* 1296 * mount_specific_key_create -- 1297 * Create a key for subsystem mount-specific data. 1298 */ 1299 int 1300 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1301 { 1302 1303 return specificdata_key_create(mount_specificdata_domain, keyp, dtor); 1304 } 1305 1306 /* 1307 * mount_specific_key_delete -- 1308 * Delete a key for subsystem mount-specific data. 1309 */ 1310 void 1311 mount_specific_key_delete(specificdata_key_t key) 1312 { 1313 1314 specificdata_key_delete(mount_specificdata_domain, key); 1315 } 1316 1317 /* 1318 * mount_initspecific -- 1319 * Initialize a mount's specificdata container. 1320 */ 1321 void 1322 mount_initspecific(struct mount *mp) 1323 { 1324 int error __diagused; 1325 1326 error = specificdata_init(mount_specificdata_domain, 1327 &mp->mnt_specdataref); 1328 KASSERT(error == 0); 1329 } 1330 1331 /* 1332 * mount_finispecific -- 1333 * Finalize a mount's specificdata container. 1334 */ 1335 void 1336 mount_finispecific(struct mount *mp) 1337 { 1338 1339 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 1340 } 1341 1342 /* 1343 * mount_getspecific -- 1344 * Return mount-specific data corresponding to the specified key. 1345 */ 1346 void * 1347 mount_getspecific(struct mount *mp, specificdata_key_t key) 1348 { 1349 1350 return specificdata_getspecific(mount_specificdata_domain, 1351 &mp->mnt_specdataref, key); 1352 } 1353 1354 /* 1355 * mount_setspecific -- 1356 * Set mount-specific data corresponding to the specified key. 1357 */ 1358 void 1359 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 1360 { 1361 1362 specificdata_setspecific(mount_specificdata_domain, 1363 &mp->mnt_specdataref, key, data); 1364 } 1365 1366 /* 1367 * Check to see if a filesystem is mounted on a block device. 1368 */ 1369 int 1370 vfs_mountedon(vnode_t *vp) 1371 { 1372 vnode_t *vq; 1373 int error = 0; 1374 1375 if (vp->v_type != VBLK) 1376 return ENOTBLK; 1377 if (spec_node_getmountedfs(vp) != NULL) 1378 return EBUSY; 1379 if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, VDEAD_NOWAIT, &vq) 1380 == 0) { 1381 if (spec_node_getmountedfs(vq) != NULL) 1382 error = EBUSY; 1383 vrele(vq); 1384 } 1385 1386 return error; 1387 } 1388 1389 /* 1390 * Check if a device pointed to by vp is mounted. 1391 * 1392 * Returns: 1393 * EINVAL if it's not a disk 1394 * EBUSY if it's a disk and mounted 1395 * 0 if it's a disk and not mounted 1396 */ 1397 int 1398 rawdev_mounted(vnode_t *vp, vnode_t **bvpp) 1399 { 1400 vnode_t *bvp; 1401 dev_t dev; 1402 int d_type; 1403 1404 bvp = NULL; 1405 d_type = D_OTHER; 1406 1407 if (iskmemvp(vp)) 1408 return EINVAL; 1409 1410 switch (vp->v_type) { 1411 case VCHR: { 1412 const struct cdevsw *cdev; 1413 1414 dev = vp->v_rdev; 1415 cdev = cdevsw_lookup(dev); 1416 if (cdev != NULL) { 1417 dev_t blkdev; 1418 1419 blkdev = devsw_chr2blk(dev); 1420 if (blkdev != NODEV) { 1421 if (vfinddev(blkdev, VBLK, &bvp) != 0) { 1422 d_type = (cdev->d_flag & D_TYPEMASK); 1423 /* XXX: what if bvp disappears? */ 1424 vrele(bvp); 1425 } 1426 } 1427 } 1428 1429 break; 1430 } 1431 1432 case VBLK: { 1433 const struct bdevsw *bdev; 1434 1435 dev = vp->v_rdev; 1436 bdev = bdevsw_lookup(dev); 1437 if (bdev != NULL) 1438 d_type = (bdev->d_flag & D_TYPEMASK); 1439 1440 bvp = vp; 1441 1442 break; 1443 } 1444 1445 default: 1446 break; 1447 } 1448 1449 if (d_type != D_DISK) 1450 return EINVAL; 1451 1452 if (bvpp != NULL) 1453 *bvpp = bvp; 1454 1455 /* 1456 * XXX: This is bogus. We should be failing the request 1457 * XXX: not only if this specific slice is mounted, but 1458 * XXX: if it's on a disk with any other mounted slice. 1459 */ 1460 if (vfs_mountedon(bvp)) 1461 return EBUSY; 1462 1463 return 0; 1464 } 1465 1466 /* 1467 * Make a 'unique' number from a mount type name. 1468 */ 1469 long 1470 makefstype(const char *type) 1471 { 1472 long rv; 1473 1474 for (rv = 0; *type; type++) { 1475 rv <<= 2; 1476 rv ^= *type; 1477 } 1478 return rv; 1479 } 1480 1481 static struct mountlist_entry * 1482 mountlist_alloc(enum mountlist_type type, struct mount *mp) 1483 { 1484 struct mountlist_entry *me; 1485 1486 me = kmem_zalloc(sizeof(*me), KM_SLEEP); 1487 me->me_mount = mp; 1488 me->me_type = type; 1489 1490 return me; 1491 } 1492 1493 static void 1494 mountlist_free(struct mountlist_entry *me) 1495 { 1496 1497 kmem_free(me, sizeof(*me)); 1498 } 1499 1500 void 1501 mountlist_iterator_init(mount_iterator_t **mip) 1502 { 1503 struct mountlist_entry *me; 1504 1505 me = mountlist_alloc(ME_MARKER, NULL); 1506 mutex_enter(&mountlist_lock); 1507 TAILQ_INSERT_HEAD(&mountlist, me, me_list); 1508 mutex_exit(&mountlist_lock); 1509 *mip = (mount_iterator_t *)me; 1510 } 1511 1512 void 1513 mountlist_iterator_destroy(mount_iterator_t *mi) 1514 { 1515 struct mountlist_entry *marker = &mi->mi_entry; 1516 1517 if (marker->me_mount != NULL) 1518 vfs_unbusy(marker->me_mount); 1519 1520 mutex_enter(&mountlist_lock); 1521 TAILQ_REMOVE(&mountlist, marker, me_list); 1522 mutex_exit(&mountlist_lock); 1523 1524 mountlist_free(marker); 1525 1526 } 1527 1528 /* 1529 * Return the next mount or NULL for this iterator. 1530 * Mark it busy on success. 1531 */ 1532 static inline struct mount * 1533 _mountlist_iterator_next(mount_iterator_t *mi, bool wait) 1534 { 1535 struct mountlist_entry *me, *marker = &mi->mi_entry; 1536 struct mount *mp; 1537 int error; 1538 1539 if (marker->me_mount != NULL) { 1540 vfs_unbusy(marker->me_mount); 1541 marker->me_mount = NULL; 1542 } 1543 1544 mutex_enter(&mountlist_lock); 1545 for (;;) { 1546 KASSERT(marker->me_type == ME_MARKER); 1547 1548 me = TAILQ_NEXT(marker, me_list); 1549 if (me == NULL) { 1550 /* End of list: keep marker and return. */ 1551 mutex_exit(&mountlist_lock); 1552 return NULL; 1553 } 1554 TAILQ_REMOVE(&mountlist, marker, me_list); 1555 TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list); 1556 1557 /* Skip other markers. */ 1558 if (me->me_type != ME_MOUNT) 1559 continue; 1560 1561 /* Take an initial reference for vfs_busy() below. */ 1562 mp = me->me_mount; 1563 KASSERT(mp != NULL); 1564 vfs_ref(mp); 1565 mutex_exit(&mountlist_lock); 1566 1567 /* Try to mark this mount busy and return on success. */ 1568 if (wait) 1569 error = vfs_busy(mp); 1570 else 1571 error = vfs_trybusy(mp); 1572 if (error == 0) { 1573 vfs_rele(mp); 1574 marker->me_mount = mp; 1575 return mp; 1576 } 1577 vfs_rele(mp); 1578 mutex_enter(&mountlist_lock); 1579 } 1580 } 1581 1582 struct mount * 1583 mountlist_iterator_next(mount_iterator_t *mi) 1584 { 1585 1586 return _mountlist_iterator_next(mi, true); 1587 } 1588 1589 struct mount * 1590 mountlist_iterator_trynext(mount_iterator_t *mi) 1591 { 1592 1593 return _mountlist_iterator_next(mi, false); 1594 } 1595 1596 /* 1597 * Attach new mount to the end of the mount list. 1598 */ 1599 void 1600 mountlist_append(struct mount *mp) 1601 { 1602 struct mountlist_entry *me; 1603 1604 me = mountlist_alloc(ME_MOUNT, mp); 1605 mutex_enter(&mountlist_lock); 1606 TAILQ_INSERT_TAIL(&mountlist, me, me_list); 1607 mutex_exit(&mountlist_lock); 1608 } 1609 1610 /* 1611 * Remove mount from mount list. 1612 */void 1613 mountlist_remove(struct mount *mp) 1614 { 1615 struct mountlist_entry *me; 1616 1617 mutex_enter(&mountlist_lock); 1618 TAILQ_FOREACH(me, &mountlist, me_list) 1619 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1620 break; 1621 KASSERT(me != NULL); 1622 TAILQ_REMOVE(&mountlist, me, me_list); 1623 mutex_exit(&mountlist_lock); 1624 mountlist_free(me); 1625 } 1626 1627 /* 1628 * Unlocked variant to traverse the mountlist. 1629 * To be used from DDB only. 1630 */ 1631 struct mount * 1632 _mountlist_next(struct mount *mp) 1633 { 1634 struct mountlist_entry *me; 1635 1636 if (mp == NULL) { 1637 me = TAILQ_FIRST(&mountlist); 1638 } else { 1639 TAILQ_FOREACH(me, &mountlist, me_list) 1640 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1641 break; 1642 if (me != NULL) 1643 me = TAILQ_NEXT(me, me_list); 1644 } 1645 1646 while (me != NULL && me->me_type != ME_MOUNT) 1647 me = TAILQ_NEXT(me, me_list); 1648 1649 return (me ? me->me_mount : NULL); 1650 } 1651