1 /* $NetBSD: vfs_mount.c,v 1.97 2022/09/13 09:35:31 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.97 2022/09/13 09:35:31 riastradh Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/kernel.h> 74 75 #include <sys/atomic.h> 76 #include <sys/buf.h> 77 #include <sys/conf.h> 78 #include <sys/fcntl.h> 79 #include <sys/filedesc.h> 80 #include <sys/device.h> 81 #include <sys/kauth.h> 82 #include <sys/kmem.h> 83 #include <sys/module.h> 84 #include <sys/mount.h> 85 #include <sys/fstrans.h> 86 #include <sys/namei.h> 87 #include <sys/extattr.h> 88 #include <sys/syscallargs.h> 89 #include <sys/sysctl.h> 90 #include <sys/systm.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/vnode_impl.h> 93 94 #include <miscfs/genfs/genfs.h> 95 #include <miscfs/specfs/specdev.h> 96 97 #include <uvm/uvm_swap.h> 98 99 enum mountlist_type { 100 ME_MOUNT, 101 ME_MARKER 102 }; 103 struct mountlist_entry { 104 TAILQ_ENTRY(mountlist_entry) me_list; /* Mount list. */ 105 struct mount *me_mount; /* Actual mount if ME_MOUNT, 106 current mount else. */ 107 enum mountlist_type me_type; /* Mount or marker. */ 108 }; 109 struct mount_iterator { 110 struct mountlist_entry mi_entry; 111 }; 112 113 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *, 114 bool (*)(void *, struct vnode *), void *, bool); 115 116 /* Root filesystem. */ 117 vnode_t * rootvnode; 118 119 /* Mounted filesystem list. */ 120 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist; 121 static kmutex_t mountlist_lock __cacheline_aligned; 122 int vnode_offset_next_by_lru /* XXX: ugly hack for pstat.c */ 123 = offsetof(vnode_impl_t, vi_lrulist.tqe_next); 124 125 kmutex_t vfs_list_lock __cacheline_aligned; 126 127 static specificdata_domain_t mount_specificdata_domain; 128 static kmutex_t mntid_lock; 129 130 static kmutex_t mountgen_lock __cacheline_aligned; 131 static uint64_t mountgen; 132 133 void 134 vfs_mount_sysinit(void) 135 { 136 137 TAILQ_INIT(&mountlist); 138 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 139 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 140 141 mount_specificdata_domain = specificdata_domain_create(); 142 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 143 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 144 mountgen = 0; 145 } 146 147 struct mount * 148 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp) 149 { 150 struct mount *mp; 151 int error __diagused; 152 153 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 154 mp->mnt_op = vfsops; 155 mp->mnt_refcnt = 1; 156 TAILQ_INIT(&mp->mnt_vnodelist); 157 mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 158 mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 159 mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 160 mp->mnt_vnodecovered = vp; 161 mount_initspecific(mp); 162 163 error = fstrans_mount(mp); 164 KASSERT(error == 0); 165 166 mutex_enter(&mountgen_lock); 167 mp->mnt_gen = mountgen++; 168 mutex_exit(&mountgen_lock); 169 170 return mp; 171 } 172 173 /* 174 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and 175 * initialize a mount structure for it. 176 * 177 * Devname is usually updated by mount(8) after booting. 178 */ 179 int 180 vfs_rootmountalloc(const char *fstypename, const char *devname, 181 struct mount **mpp) 182 { 183 struct vfsops *vfsp = NULL; 184 struct mount *mp; 185 int error __diagused; 186 187 mutex_enter(&vfs_list_lock); 188 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 189 if (!strncmp(vfsp->vfs_name, fstypename, 190 sizeof(mp->mnt_stat.f_fstypename))) 191 break; 192 if (vfsp == NULL) { 193 mutex_exit(&vfs_list_lock); 194 return (ENODEV); 195 } 196 vfsp->vfs_refcount++; 197 mutex_exit(&vfs_list_lock); 198 199 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 200 return ENOMEM; 201 error = vfs_busy(mp); 202 KASSERT(error == 0); 203 mp->mnt_flag = MNT_RDONLY; 204 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 205 sizeof(mp->mnt_stat.f_fstypename)); 206 mp->mnt_stat.f_mntonname[0] = '/'; 207 mp->mnt_stat.f_mntonname[1] = '\0'; 208 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 209 '\0'; 210 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 211 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 212 *mpp = mp; 213 return 0; 214 } 215 216 /* 217 * vfs_getnewfsid: get a new unique fsid. 218 */ 219 void 220 vfs_getnewfsid(struct mount *mp) 221 { 222 static u_short xxxfs_mntid; 223 struct mountlist_entry *me; 224 fsid_t tfsid; 225 int mtype; 226 227 mutex_enter(&mntid_lock); 228 if (xxxfs_mntid == 0) 229 ++xxxfs_mntid; 230 mtype = makefstype(mp->mnt_op->vfs_name); 231 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 232 tfsid.__fsid_val[1] = mtype; 233 /* Always increment to not return the same fsid to parallel mounts. */ 234 xxxfs_mntid++; 235 236 /* 237 * Directly walk mountlist to prevent deadlock through 238 * mountlist_iterator_next() -> vfs_busy(). 239 */ 240 mutex_enter(&mountlist_lock); 241 for (me = TAILQ_FIRST(&mountlist); me != TAILQ_END(&mountlist); ) { 242 if (me->me_type == ME_MOUNT && 243 me->me_mount->mnt_stat.f_fsidx.__fsid_val[0] == 244 tfsid.__fsid_val[0] && 245 me->me_mount->mnt_stat.f_fsidx.__fsid_val[1] == 246 tfsid.__fsid_val[1]) { 247 tfsid.__fsid_val[0]++; 248 xxxfs_mntid++; 249 me = TAILQ_FIRST(&mountlist); 250 } else { 251 me = TAILQ_NEXT(me, me_list); 252 } 253 } 254 mutex_exit(&mountlist_lock); 255 256 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 257 mp->mnt_stat.f_fsidx.__fsid_val[1] = tfsid.__fsid_val[1]; 258 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 259 mutex_exit(&mntid_lock); 260 } 261 262 /* 263 * Lookup a mount point by filesystem identifier. 264 * 265 * XXX Needs to add a reference to the mount point. 266 */ 267 struct mount * 268 vfs_getvfs(fsid_t *fsid) 269 { 270 mount_iterator_t *iter; 271 struct mount *mp; 272 273 mountlist_iterator_init(&iter); 274 while ((mp = mountlist_iterator_next(iter)) != NULL) { 275 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 276 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 277 mountlist_iterator_destroy(iter); 278 return mp; 279 } 280 } 281 mountlist_iterator_destroy(iter); 282 return NULL; 283 } 284 285 /* 286 * Take a reference to a mount structure. 287 */ 288 void 289 vfs_ref(struct mount *mp) 290 { 291 292 KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock)); 293 294 atomic_inc_uint(&mp->mnt_refcnt); 295 } 296 297 /* 298 * Drop a reference to a mount structure, freeing if the last reference. 299 */ 300 void 301 vfs_rele(struct mount *mp) 302 { 303 304 #ifndef __HAVE_ATOMIC_AS_MEMBAR 305 membar_release(); 306 #endif 307 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 308 return; 309 } 310 #ifndef __HAVE_ATOMIC_AS_MEMBAR 311 membar_acquire(); 312 #endif 313 314 /* 315 * Nothing else has visibility of the mount: we can now 316 * free the data structures. 317 */ 318 KASSERT(mp->mnt_refcnt == 0); 319 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 320 mutex_obj_free(mp->mnt_updating); 321 mutex_obj_free(mp->mnt_renamelock); 322 mutex_obj_free(mp->mnt_vnodelock); 323 if (mp->mnt_op != NULL) { 324 vfs_delref(mp->mnt_op); 325 } 326 fstrans_unmount(mp); 327 /* 328 * Final free of mp gets done from fstrans_mount_dtor(). 329 * 330 * Prevents this memory to be reused as a mount before 331 * fstrans releases all references to it. 332 */ 333 } 334 335 /* 336 * Mark a mount point as busy, and gain a new reference to it. Used to 337 * prevent the file system from being unmounted during critical sections. 338 * 339 * vfs_busy can be called multiple times and by multiple threads 340 * and must be accompanied by the same number of vfs_unbusy calls. 341 * 342 * => The caller must hold a pre-existing reference to the mount. 343 * => Will fail if the file system is being unmounted, or is unmounted. 344 */ 345 static inline int 346 _vfs_busy(struct mount *mp, bool wait) 347 { 348 349 KASSERT(mp->mnt_refcnt > 0); 350 351 if (wait) { 352 fstrans_start(mp); 353 } else { 354 if (fstrans_start_nowait(mp)) 355 return EBUSY; 356 } 357 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 358 fstrans_done(mp); 359 return ENOENT; 360 } 361 vfs_ref(mp); 362 return 0; 363 } 364 365 int 366 vfs_busy(struct mount *mp) 367 { 368 369 return _vfs_busy(mp, true); 370 } 371 372 int 373 vfs_trybusy(struct mount *mp) 374 { 375 376 return _vfs_busy(mp, false); 377 } 378 379 /* 380 * Unbusy a busy filesystem. 381 * 382 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call. 383 */ 384 void 385 vfs_unbusy(struct mount *mp) 386 { 387 388 KASSERT(mp->mnt_refcnt > 0); 389 390 fstrans_done(mp); 391 vfs_rele(mp); 392 } 393 394 struct vnode_iterator { 395 vnode_impl_t vi_vnode; 396 }; 397 398 void 399 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip) 400 { 401 vnode_t *vp; 402 vnode_impl_t *vip; 403 404 vp = vnalloc_marker(mp); 405 vip = VNODE_TO_VIMPL(vp); 406 407 mutex_enter(mp->mnt_vnodelock); 408 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes); 409 vp->v_usecount = 1; 410 mutex_exit(mp->mnt_vnodelock); 411 412 *vnip = (struct vnode_iterator *)vip; 413 } 414 415 void 416 vfs_vnode_iterator_destroy(struct vnode_iterator *vni) 417 { 418 vnode_impl_t *mvip = &vni->vi_vnode; 419 vnode_t *mvp = VIMPL_TO_VNODE(mvip); 420 kmutex_t *lock; 421 422 KASSERT(vnis_marker(mvp)); 423 if (vrefcnt(mvp) != 0) { 424 lock = mvp->v_mount->mnt_vnodelock; 425 mutex_enter(lock); 426 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes); 427 mvp->v_usecount = 0; 428 mutex_exit(lock); 429 } 430 vnfree_marker(mvp); 431 } 432 433 static struct vnode * 434 vfs_vnode_iterator_next1(struct vnode_iterator *vni, 435 bool (*f)(void *, struct vnode *), void *cl, bool do_wait) 436 { 437 vnode_impl_t *mvip = &vni->vi_vnode; 438 struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount; 439 vnode_t *vp; 440 vnode_impl_t *vip; 441 kmutex_t *lock; 442 int error; 443 444 KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip))); 445 446 lock = mp->mnt_vnodelock; 447 do { 448 mutex_enter(lock); 449 vip = TAILQ_NEXT(mvip, vi_mntvnodes); 450 TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes); 451 VIMPL_TO_VNODE(mvip)->v_usecount = 0; 452 again: 453 if (vip == NULL) { 454 mutex_exit(lock); 455 return NULL; 456 } 457 vp = VIMPL_TO_VNODE(vip); 458 KASSERT(vp != NULL); 459 mutex_enter(vp->v_interlock); 460 if (vnis_marker(vp) || 461 vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) || 462 (f && !(*f)(cl, vp))) { 463 mutex_exit(vp->v_interlock); 464 vip = TAILQ_NEXT(vip, vi_mntvnodes); 465 goto again; 466 } 467 468 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes); 469 VIMPL_TO_VNODE(mvip)->v_usecount = 1; 470 mutex_exit(lock); 471 error = vcache_vget(vp); 472 KASSERT(error == 0 || error == ENOENT); 473 } while (error != 0); 474 475 return vp; 476 } 477 478 struct vnode * 479 vfs_vnode_iterator_next(struct vnode_iterator *vni, 480 bool (*f)(void *, struct vnode *), void *cl) 481 { 482 483 return vfs_vnode_iterator_next1(vni, f, cl, false); 484 } 485 486 /* 487 * Move a vnode from one mount queue to another. 488 */ 489 void 490 vfs_insmntque(vnode_t *vp, struct mount *mp) 491 { 492 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 493 struct mount *omp; 494 kmutex_t *lock; 495 496 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 || 497 vp->v_tag == VT_VFS); 498 499 /* 500 * Delete from old mount point vnode list, if on one. 501 */ 502 if ((omp = vp->v_mount) != NULL) { 503 lock = omp->mnt_vnodelock; 504 mutex_enter(lock); 505 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes); 506 mutex_exit(lock); 507 } 508 509 /* 510 * Insert into list of vnodes for the new mount point, if 511 * available. The caller must take a reference on the mount 512 * structure and donate to the vnode. 513 */ 514 if ((vp->v_mount = mp) != NULL) { 515 lock = mp->mnt_vnodelock; 516 mutex_enter(lock); 517 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes); 518 mutex_exit(lock); 519 } 520 521 if (omp != NULL) { 522 /* Release reference to old mount. */ 523 vfs_rele(omp); 524 } 525 } 526 527 /* 528 * Remove any vnodes in the vnode table belonging to mount point mp. 529 * 530 * If FORCECLOSE is not specified, there should not be any active ones, 531 * return error if any are found (nb: this is a user error, not a 532 * system error). If FORCECLOSE is specified, detach any active vnodes 533 * that are found. 534 * 535 * If WRITECLOSE is set, only flush out regular file vnodes open for 536 * writing. 537 * 538 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. 539 */ 540 #ifdef DEBUG 541 int busyprt = 0; /* print out busy vnodes */ 542 struct ctldebug debug1 = { "busyprt", &busyprt }; 543 #endif 544 545 static vnode_t * 546 vflushnext(struct vnode_iterator *marker, int *when) 547 { 548 if (getticks() > *when) { 549 yield(); 550 *when = getticks() + hz / 10; 551 } 552 preempt_point(); 553 return vfs_vnode_iterator_next1(marker, NULL, NULL, true); 554 } 555 556 /* 557 * Flush one vnode. Referenced on entry, unreferenced on return. 558 */ 559 static int 560 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags) 561 { 562 int error; 563 struct vattr vattr; 564 565 if (vp == skipvp || 566 ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) { 567 vrele(vp); 568 return 0; 569 } 570 /* 571 * If WRITECLOSE is set, only flush out regular file 572 * vnodes open for writing or open and unlinked. 573 */ 574 if ((flags & WRITECLOSE)) { 575 if (vp->v_type != VREG) { 576 vrele(vp); 577 return 0; 578 } 579 error = vn_lock(vp, LK_EXCLUSIVE); 580 if (error) { 581 KASSERT(error == ENOENT); 582 vrele(vp); 583 return 0; 584 } 585 error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0); 586 if (error == 0) 587 error = VOP_GETATTR(vp, &vattr, curlwp->l_cred); 588 VOP_UNLOCK(vp); 589 if (error) { 590 vrele(vp); 591 return error; 592 } 593 if (vp->v_writecount == 0 && vattr.va_nlink > 0) { 594 vrele(vp); 595 return 0; 596 } 597 } 598 /* 599 * First try to recycle the vnode. 600 */ 601 if (vrecycle(vp)) 602 return 0; 603 /* 604 * If FORCECLOSE is set, forcibly close the vnode. 605 * For block or character devices, revert to an 606 * anonymous device. For all other files, just 607 * kill them. 608 */ 609 if (flags & FORCECLOSE) { 610 if (vrefcnt(vp) > 1 && 611 (vp->v_type == VBLK || vp->v_type == VCHR)) 612 vcache_make_anon(vp); 613 else 614 vgone(vp); 615 return 0; 616 } 617 vrele(vp); 618 return EBUSY; 619 } 620 621 int 622 vflush(struct mount *mp, vnode_t *skipvp, int flags) 623 { 624 vnode_t *vp; 625 struct vnode_iterator *marker; 626 int busy, error, when, retries = 2; 627 628 do { 629 busy = error = when = 0; 630 631 /* 632 * First, flush out any vnode references from the 633 * deferred vrele list. 634 */ 635 vrele_flush(mp); 636 637 vfs_vnode_iterator_init(mp, &marker); 638 639 while ((vp = vflushnext(marker, &when)) != NULL) { 640 error = vflush_one(vp, skipvp, flags); 641 if (error == EBUSY) { 642 error = 0; 643 busy++; 644 #ifdef DEBUG 645 if (busyprt && retries == 0) 646 vprint("vflush: busy vnode", vp); 647 #endif 648 } else if (error != 0) { 649 break; 650 } 651 } 652 653 vfs_vnode_iterator_destroy(marker); 654 } while (error == 0 && busy > 0 && retries-- > 0); 655 656 if (error) 657 return error; 658 if (busy) 659 return EBUSY; 660 return 0; 661 } 662 663 /* 664 * Mount a file system. 665 */ 666 667 /* 668 * Scan all active processes to see if any of them have a current or root 669 * directory onto which the new filesystem has just been mounted. If so, 670 * replace them with the new mount point. 671 */ 672 static void 673 mount_checkdirs(vnode_t *olddp) 674 { 675 vnode_t *newdp, *rele1, *rele2; 676 struct cwdinfo *cwdi; 677 struct proc *p; 678 bool retry; 679 680 if (vrefcnt(olddp) == 1) { 681 return; 682 } 683 if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp)) 684 panic("mount: lost mount"); 685 686 do { 687 retry = false; 688 mutex_enter(&proc_lock); 689 PROCLIST_FOREACH(p, &allproc) { 690 if ((cwdi = p->p_cwdi) == NULL) 691 continue; 692 /* 693 * Cannot change to the old directory any more, 694 * so even if we see a stale value it is not a 695 * problem. 696 */ 697 if (cwdi->cwdi_cdir != olddp && 698 cwdi->cwdi_rdir != olddp) 699 continue; 700 retry = true; 701 rele1 = NULL; 702 rele2 = NULL; 703 atomic_inc_uint(&cwdi->cwdi_refcnt); 704 mutex_exit(&proc_lock); 705 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 706 if (cwdi->cwdi_cdir == olddp) { 707 rele1 = cwdi->cwdi_cdir; 708 vref(newdp); 709 cwdi->cwdi_cdir = newdp; 710 } 711 if (cwdi->cwdi_rdir == olddp) { 712 rele2 = cwdi->cwdi_rdir; 713 vref(newdp); 714 cwdi->cwdi_rdir = newdp; 715 } 716 rw_exit(&cwdi->cwdi_lock); 717 cwdfree(cwdi); 718 if (rele1 != NULL) 719 vrele(rele1); 720 if (rele2 != NULL) 721 vrele(rele2); 722 mutex_enter(&proc_lock); 723 break; 724 } 725 mutex_exit(&proc_lock); 726 } while (retry); 727 728 if (rootvnode == olddp) { 729 vrele(rootvnode); 730 vref(newdp); 731 rootvnode = newdp; 732 } 733 vput(newdp); 734 } 735 736 /* 737 * Start extended attributes 738 */ 739 static int 740 start_extattr(struct mount *mp) 741 { 742 int error; 743 744 error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL); 745 if (error) 746 printf("%s: failed to start extattr: error = %d\n", 747 mp->mnt_stat.f_mntonname, error); 748 749 return error; 750 } 751 752 int 753 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops, 754 const char *path, int flags, void *data, size_t *data_len) 755 { 756 vnode_t *vp = *vpp; 757 struct mount *mp; 758 struct pathbuf *pb; 759 struct nameidata nd; 760 int error, error2; 761 762 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 763 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 764 if (error) { 765 vfs_delref(vfsops); 766 return error; 767 } 768 769 /* Cannot make a non-dir a mount-point (from here anyway). */ 770 if (vp->v_type != VDIR) { 771 vfs_delref(vfsops); 772 return ENOTDIR; 773 } 774 775 if (flags & MNT_EXPORTED) { 776 vfs_delref(vfsops); 777 return EINVAL; 778 } 779 780 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 781 vfs_delref(vfsops); 782 return ENOMEM; 783 } 784 785 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 786 787 /* 788 * The underlying file system may refuse the mount for 789 * various reasons. Allow the user to force it to happen. 790 * 791 * Set the mount level flags. 792 */ 793 mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE); 794 795 error = VFS_MOUNT(mp, path, data, data_len); 796 mp->mnt_flag &= ~MNT_OP_FLAGS; 797 798 if (error != 0) { 799 vfs_rele(mp); 800 return error; 801 } 802 803 /* Suspend new file system before taking mnt_updating. */ 804 do { 805 error2 = vfs_suspend(mp, 0); 806 } while (error2 == EINTR || error2 == ERESTART); 807 KASSERT(error2 == 0 || error2 == EOPNOTSUPP); 808 mutex_enter(mp->mnt_updating); 809 810 /* 811 * Validate and prepare the mount point. 812 */ 813 error = pathbuf_copyin(path, &pb); 814 if (error != 0) { 815 goto err_mounted; 816 } 817 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 818 error = namei(&nd); 819 pathbuf_destroy(pb); 820 if (error != 0) { 821 goto err_mounted; 822 } 823 if (nd.ni_vp != vp) { 824 vput(nd.ni_vp); 825 error = EINVAL; 826 goto err_mounted; 827 } 828 if (vp->v_mountedhere != NULL) { 829 vput(nd.ni_vp); 830 error = EBUSY; 831 goto err_mounted; 832 } 833 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 834 if (error != 0) { 835 vput(nd.ni_vp); 836 goto err_mounted; 837 } 838 839 /* 840 * Put the new filesystem on the mount list after root. 841 */ 842 cache_purge(vp); 843 mp->mnt_iflag &= ~IMNT_WANTRDWR; 844 845 mountlist_append(mp); 846 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 847 vfs_syncer_add_to_worklist(mp); 848 vp->v_mountedhere = mp; 849 vput(nd.ni_vp); 850 851 mount_checkdirs(vp); 852 mutex_exit(mp->mnt_updating); 853 if (error2 == 0) 854 vfs_resume(mp); 855 856 /* Hold an additional reference to the mount across VFS_START(). */ 857 vfs_ref(mp); 858 (void) VFS_STATVFS(mp, &mp->mnt_stat); 859 error = VFS_START(mp, 0); 860 if (error) { 861 vrele(vp); 862 } else if (flags & MNT_EXTATTR) { 863 if (start_extattr(mp) != 0) 864 mp->mnt_flag &= ~MNT_EXTATTR; 865 } 866 /* Drop reference held for VFS_START(). */ 867 vfs_rele(mp); 868 *vpp = NULL; 869 return error; 870 871 err_mounted: 872 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 873 panic("Unmounting fresh file system failed"); 874 mutex_exit(mp->mnt_updating); 875 if (error2 == 0) 876 vfs_resume(mp); 877 vfs_rele(mp); 878 879 return error; 880 } 881 882 /* 883 * Do the actual file system unmount. File system is assumed to have 884 * been locked by the caller. 885 * 886 * => Caller hold reference to the mount, explicitly for dounmount(). 887 */ 888 int 889 dounmount(struct mount *mp, int flags, struct lwp *l) 890 { 891 vnode_t *coveredvp; 892 int error, async, used_syncer, used_extattr; 893 const bool was_suspended = fstrans_is_owner(mp); 894 895 #if NVERIEXEC > 0 896 error = veriexec_unmountchk(mp); 897 if (error) 898 return (error); 899 #endif /* NVERIEXEC > 0 */ 900 901 if (!was_suspended) { 902 error = vfs_suspend(mp, 0); 903 if (error) { 904 return error; 905 } 906 } 907 908 KASSERT((mp->mnt_iflag & IMNT_GONE) == 0); 909 910 used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0; 911 used_extattr = mp->mnt_flag & MNT_EXTATTR; 912 913 mp->mnt_iflag |= IMNT_UNMOUNT; 914 mutex_enter(mp->mnt_updating); 915 async = mp->mnt_flag & MNT_ASYNC; 916 mp->mnt_flag &= ~MNT_ASYNC; 917 cache_purgevfs(mp); /* remove cache entries for this file sys */ 918 if (used_syncer) 919 vfs_syncer_remove_from_worklist(mp); 920 error = 0; 921 if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) { 922 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 923 } 924 if (error == 0 || (flags & MNT_FORCE)) { 925 error = VFS_UNMOUNT(mp, flags); 926 } 927 if (error) { 928 mp->mnt_iflag &= ~IMNT_UNMOUNT; 929 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 930 vfs_syncer_add_to_worklist(mp); 931 mp->mnt_flag |= async; 932 mutex_exit(mp->mnt_updating); 933 if (!was_suspended) 934 vfs_resume(mp); 935 if (used_extattr) { 936 if (start_extattr(mp) != 0) 937 mp->mnt_flag &= ~MNT_EXTATTR; 938 else 939 mp->mnt_flag |= MNT_EXTATTR; 940 } 941 return (error); 942 } 943 mutex_exit(mp->mnt_updating); 944 945 /* 946 * mark filesystem as gone to prevent further umounts 947 * after mnt_umounting lock is gone, this also prevents 948 * vfs_busy() from succeeding. 949 */ 950 mp->mnt_iflag |= IMNT_GONE; 951 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 952 coveredvp->v_mountedhere = NULL; 953 } 954 if (!was_suspended) 955 vfs_resume(mp); 956 957 mountlist_remove(mp); 958 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 959 panic("unmount: dangling vnode"); 960 vfs_hooks_unmount(mp); 961 962 vfs_rele(mp); /* reference from mount() */ 963 if (coveredvp != NULLVP) { 964 vrele(coveredvp); 965 } 966 return (0); 967 } 968 969 /* 970 * Unmount all file systems. 971 * We traverse the list in reverse order under the assumption that doing so 972 * will avoid needing to worry about dependencies. 973 */ 974 bool 975 vfs_unmountall(struct lwp *l) 976 { 977 978 printf("unmounting file systems...\n"); 979 return vfs_unmountall1(l, true, true); 980 } 981 982 static void 983 vfs_unmount_print(struct mount *mp, const char *pfx) 984 { 985 986 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 987 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 988 mp->mnt_stat.f_fstypename); 989 } 990 991 /* 992 * Return the mount with the highest generation less than "gen". 993 */ 994 static struct mount * 995 vfs_unmount_next(uint64_t gen) 996 { 997 mount_iterator_t *iter; 998 struct mount *mp, *nmp; 999 1000 nmp = NULL; 1001 1002 mountlist_iterator_init(&iter); 1003 while ((mp = mountlist_iterator_next(iter)) != NULL) { 1004 if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) && 1005 mp->mnt_gen < gen) { 1006 if (nmp != NULL) 1007 vfs_rele(nmp); 1008 nmp = mp; 1009 vfs_ref(nmp); 1010 } 1011 } 1012 mountlist_iterator_destroy(iter); 1013 1014 return nmp; 1015 } 1016 1017 bool 1018 vfs_unmount_forceone(struct lwp *l) 1019 { 1020 struct mount *mp; 1021 int error; 1022 1023 mp = vfs_unmount_next(mountgen); 1024 if (mp == NULL) { 1025 return false; 1026 } 1027 1028 #ifdef DEBUG 1029 printf("forcefully unmounting %s (%s)...\n", 1030 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1031 #endif 1032 if ((error = dounmount(mp, MNT_FORCE, l)) == 0) { 1033 vfs_unmount_print(mp, "forcefully "); 1034 return true; 1035 } else { 1036 vfs_rele(mp); 1037 } 1038 1039 #ifdef DEBUG 1040 printf("forceful unmount of %s failed with error %d\n", 1041 mp->mnt_stat.f_mntonname, error); 1042 #endif 1043 1044 return false; 1045 } 1046 1047 bool 1048 vfs_unmountall1(struct lwp *l, bool force, bool verbose) 1049 { 1050 struct mount *mp; 1051 mount_iterator_t *iter; 1052 bool any_error = false, progress = false; 1053 uint64_t gen; 1054 int error; 1055 1056 gen = mountgen; 1057 for (;;) { 1058 mp = vfs_unmount_next(gen); 1059 if (mp == NULL) 1060 break; 1061 gen = mp->mnt_gen; 1062 1063 #ifdef DEBUG 1064 printf("unmounting %p %s (%s)...\n", 1065 (void *)mp, mp->mnt_stat.f_mntonname, 1066 mp->mnt_stat.f_mntfromname); 1067 #endif 1068 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 1069 vfs_unmount_print(mp, ""); 1070 progress = true; 1071 } else { 1072 vfs_rele(mp); 1073 if (verbose) { 1074 printf("unmount of %s failed with error %d\n", 1075 mp->mnt_stat.f_mntonname, error); 1076 } 1077 any_error = true; 1078 } 1079 } 1080 if (verbose) { 1081 printf("unmounting done\n"); 1082 } 1083 if (any_error && verbose) { 1084 printf("WARNING: some file systems would not unmount\n"); 1085 } 1086 /* If the mountlist is empty it is time to remove swap. */ 1087 mountlist_iterator_init(&iter); 1088 if (mountlist_iterator_next(iter) == NULL) { 1089 uvm_swap_shutdown(l); 1090 } 1091 mountlist_iterator_destroy(iter); 1092 1093 return progress; 1094 } 1095 1096 void 1097 vfs_sync_all(struct lwp *l) 1098 { 1099 printf("syncing disks... "); 1100 1101 /* remove user processes from run queue */ 1102 suspendsched(); 1103 (void)spl0(); 1104 1105 /* avoid coming back this way again if we panic. */ 1106 doing_shutdown = 1; 1107 1108 do_sys_sync(l); 1109 1110 /* Wait for sync to finish. */ 1111 if (vfs_syncwait() != 0) { 1112 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 1113 Debugger(); 1114 #endif 1115 printf("giving up\n"); 1116 return; 1117 } else 1118 printf("done\n"); 1119 } 1120 1121 /* 1122 * Sync and unmount file systems before shutting down. 1123 */ 1124 void 1125 vfs_shutdown(void) 1126 { 1127 lwp_t *l = curlwp; 1128 1129 vfs_sync_all(l); 1130 1131 /* 1132 * If we have panicked - do not make the situation potentially 1133 * worse by unmounting the file systems. 1134 */ 1135 if (panicstr != NULL) { 1136 return; 1137 } 1138 1139 /* Unmount file systems. */ 1140 vfs_unmountall(l); 1141 } 1142 1143 /* 1144 * Print a list of supported file system types (used by vfs_mountroot) 1145 */ 1146 static void 1147 vfs_print_fstypes(void) 1148 { 1149 struct vfsops *v; 1150 int cnt = 0; 1151 1152 mutex_enter(&vfs_list_lock); 1153 LIST_FOREACH(v, &vfs_list, vfs_list) 1154 ++cnt; 1155 mutex_exit(&vfs_list_lock); 1156 1157 if (cnt == 0) { 1158 printf("WARNING: No file system modules have been loaded.\n"); 1159 return; 1160 } 1161 1162 printf("Supported file systems:"); 1163 mutex_enter(&vfs_list_lock); 1164 LIST_FOREACH(v, &vfs_list, vfs_list) { 1165 printf(" %s", v->vfs_name); 1166 } 1167 mutex_exit(&vfs_list_lock); 1168 printf("\n"); 1169 } 1170 1171 /* 1172 * Mount the root file system. If the operator didn't specify a 1173 * file system to use, try all possible file systems until one 1174 * succeeds. 1175 */ 1176 int 1177 vfs_mountroot(void) 1178 { 1179 struct vfsops *v; 1180 int error = ENODEV; 1181 1182 if (root_device == NULL) 1183 panic("vfs_mountroot: root device unknown"); 1184 1185 switch (device_class(root_device)) { 1186 case DV_IFNET: 1187 if (rootdev != NODEV) 1188 panic("vfs_mountroot: rootdev set for DV_IFNET " 1189 "(0x%llx -> %llu,%llu)", 1190 (unsigned long long)rootdev, 1191 (unsigned long long)major(rootdev), 1192 (unsigned long long)minor(rootdev)); 1193 break; 1194 1195 case DV_DISK: 1196 if (rootdev == NODEV) 1197 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1198 if (bdevvp(rootdev, &rootvp)) 1199 panic("vfs_mountroot: can't get vnode for rootdev"); 1200 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1201 error = VOP_OPEN(rootvp, FREAD, FSCRED); 1202 VOP_UNLOCK(rootvp); 1203 if (error) { 1204 printf("vfs_mountroot: can't open root device\n"); 1205 return (error); 1206 } 1207 break; 1208 1209 case DV_VIRTUAL: 1210 break; 1211 1212 default: 1213 printf("%s: inappropriate for root file system\n", 1214 device_xname(root_device)); 1215 return (ENODEV); 1216 } 1217 1218 /* 1219 * If user specified a root fs type, use it. Make sure the 1220 * specified type exists and has a mount_root() 1221 */ 1222 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 1223 v = vfs_getopsbyname(rootfstype); 1224 error = EFTYPE; 1225 if (v != NULL) { 1226 if (v->vfs_mountroot != NULL) { 1227 error = (v->vfs_mountroot)(); 1228 } 1229 v->vfs_refcount--; 1230 } 1231 goto done; 1232 } 1233 1234 /* 1235 * Try each file system currently configured into the kernel. 1236 */ 1237 mutex_enter(&vfs_list_lock); 1238 LIST_FOREACH(v, &vfs_list, vfs_list) { 1239 if (v->vfs_mountroot == NULL) 1240 continue; 1241 #ifdef DEBUG 1242 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 1243 #endif 1244 v->vfs_refcount++; 1245 mutex_exit(&vfs_list_lock); 1246 error = (*v->vfs_mountroot)(); 1247 mutex_enter(&vfs_list_lock); 1248 v->vfs_refcount--; 1249 if (!error) { 1250 aprint_normal("root file system type: %s\n", 1251 v->vfs_name); 1252 break; 1253 } 1254 } 1255 mutex_exit(&vfs_list_lock); 1256 1257 if (v == NULL) { 1258 vfs_print_fstypes(); 1259 printf("no file system for %s", device_xname(root_device)); 1260 if (device_class(root_device) == DV_DISK) 1261 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 1262 printf("\n"); 1263 error = EFTYPE; 1264 } 1265 1266 done: 1267 if (error && device_class(root_device) == DV_DISK) { 1268 vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY); 1269 VOP_CLOSE(rootvp, FREAD, FSCRED); 1270 VOP_UNLOCK(rootvp); 1271 vrele(rootvp); 1272 } 1273 if (error == 0) { 1274 mount_iterator_t *iter; 1275 struct mount *mp; 1276 extern struct cwdinfo cwdi0; 1277 1278 mountlist_iterator_init(&iter); 1279 mp = mountlist_iterator_next(iter); 1280 KASSERT(mp != NULL); 1281 mountlist_iterator_destroy(iter); 1282 1283 mp->mnt_flag |= MNT_ROOTFS; 1284 mp->mnt_op->vfs_refcount++; 1285 1286 /* 1287 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 1288 * reference it, and donate it the reference grabbed 1289 * with VFS_ROOT(). 1290 */ 1291 error = VFS_ROOT(mp, LK_NONE, &rootvnode); 1292 if (error) 1293 panic("cannot find root vnode, error=%d", error); 1294 cwdi0.cwdi_cdir = rootvnode; 1295 cwdi0.cwdi_rdir = NULL; 1296 1297 /* 1298 * Now that root is mounted, we can fixup initproc's CWD 1299 * info. All other processes are kthreads, which merely 1300 * share proc0's CWD info. 1301 */ 1302 initproc->p_cwdi->cwdi_cdir = rootvnode; 1303 vref(initproc->p_cwdi->cwdi_cdir); 1304 initproc->p_cwdi->cwdi_rdir = NULL; 1305 /* 1306 * Enable loading of modules from the filesystem 1307 */ 1308 module_load_vfs_init(); 1309 1310 } 1311 return (error); 1312 } 1313 1314 /* 1315 * mount_specific_key_create -- 1316 * Create a key for subsystem mount-specific data. 1317 */ 1318 int 1319 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1320 { 1321 1322 return specificdata_key_create(mount_specificdata_domain, keyp, dtor); 1323 } 1324 1325 /* 1326 * mount_specific_key_delete -- 1327 * Delete a key for subsystem mount-specific data. 1328 */ 1329 void 1330 mount_specific_key_delete(specificdata_key_t key) 1331 { 1332 1333 specificdata_key_delete(mount_specificdata_domain, key); 1334 } 1335 1336 /* 1337 * mount_initspecific -- 1338 * Initialize a mount's specificdata container. 1339 */ 1340 void 1341 mount_initspecific(struct mount *mp) 1342 { 1343 int error __diagused; 1344 1345 error = specificdata_init(mount_specificdata_domain, 1346 &mp->mnt_specdataref); 1347 KASSERT(error == 0); 1348 } 1349 1350 /* 1351 * mount_finispecific -- 1352 * Finalize a mount's specificdata container. 1353 */ 1354 void 1355 mount_finispecific(struct mount *mp) 1356 { 1357 1358 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 1359 } 1360 1361 /* 1362 * mount_getspecific -- 1363 * Return mount-specific data corresponding to the specified key. 1364 */ 1365 void * 1366 mount_getspecific(struct mount *mp, specificdata_key_t key) 1367 { 1368 1369 return specificdata_getspecific(mount_specificdata_domain, 1370 &mp->mnt_specdataref, key); 1371 } 1372 1373 /* 1374 * mount_setspecific -- 1375 * Set mount-specific data corresponding to the specified key. 1376 */ 1377 void 1378 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 1379 { 1380 1381 specificdata_setspecific(mount_specificdata_domain, 1382 &mp->mnt_specdataref, key, data); 1383 } 1384 1385 /* 1386 * Check to see if a filesystem is mounted on a block device. 1387 */ 1388 int 1389 vfs_mountedon(vnode_t *vp) 1390 { 1391 vnode_t *vq; 1392 int error = 0; 1393 1394 if (vp->v_type != VBLK) 1395 return ENOTBLK; 1396 if (spec_node_getmountedfs(vp) != NULL) 1397 return EBUSY; 1398 if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, VDEAD_NOWAIT, &vq) 1399 == 0) { 1400 if (spec_node_getmountedfs(vq) != NULL) 1401 error = EBUSY; 1402 vrele(vq); 1403 } 1404 1405 return error; 1406 } 1407 1408 /* 1409 * Check if a device pointed to by vp is mounted. 1410 * 1411 * Returns: 1412 * EINVAL if it's not a disk 1413 * EBUSY if it's a disk and mounted 1414 * 0 if it's a disk and not mounted 1415 */ 1416 int 1417 rawdev_mounted(vnode_t *vp, vnode_t **bvpp) 1418 { 1419 vnode_t *bvp; 1420 dev_t dev; 1421 int d_type; 1422 1423 bvp = NULL; 1424 d_type = D_OTHER; 1425 1426 if (iskmemvp(vp)) 1427 return EINVAL; 1428 1429 switch (vp->v_type) { 1430 case VCHR: { 1431 const struct cdevsw *cdev; 1432 1433 dev = vp->v_rdev; 1434 cdev = cdevsw_lookup(dev); 1435 if (cdev != NULL) { 1436 dev_t blkdev; 1437 1438 blkdev = devsw_chr2blk(dev); 1439 if (blkdev != NODEV) { 1440 if (vfinddev(blkdev, VBLK, &bvp) != 0) { 1441 d_type = (cdev->d_flag & D_TYPEMASK); 1442 /* XXX: what if bvp disappears? */ 1443 vrele(bvp); 1444 } 1445 } 1446 } 1447 1448 break; 1449 } 1450 1451 case VBLK: { 1452 const struct bdevsw *bdev; 1453 1454 dev = vp->v_rdev; 1455 bdev = bdevsw_lookup(dev); 1456 if (bdev != NULL) 1457 d_type = (bdev->d_flag & D_TYPEMASK); 1458 1459 bvp = vp; 1460 1461 break; 1462 } 1463 1464 default: 1465 break; 1466 } 1467 1468 if (d_type != D_DISK) 1469 return EINVAL; 1470 1471 if (bvpp != NULL) 1472 *bvpp = bvp; 1473 1474 /* 1475 * XXX: This is bogus. We should be failing the request 1476 * XXX: not only if this specific slice is mounted, but 1477 * XXX: if it's on a disk with any other mounted slice. 1478 */ 1479 if (vfs_mountedon(bvp)) 1480 return EBUSY; 1481 1482 return 0; 1483 } 1484 1485 /* 1486 * Make a 'unique' number from a mount type name. 1487 */ 1488 long 1489 makefstype(const char *type) 1490 { 1491 long rv; 1492 1493 for (rv = 0; *type; type++) { 1494 rv <<= 2; 1495 rv ^= *type; 1496 } 1497 return rv; 1498 } 1499 1500 static struct mountlist_entry * 1501 mountlist_alloc(enum mountlist_type type, struct mount *mp) 1502 { 1503 struct mountlist_entry *me; 1504 1505 me = kmem_zalloc(sizeof(*me), KM_SLEEP); 1506 me->me_mount = mp; 1507 me->me_type = type; 1508 1509 return me; 1510 } 1511 1512 static void 1513 mountlist_free(struct mountlist_entry *me) 1514 { 1515 1516 kmem_free(me, sizeof(*me)); 1517 } 1518 1519 void 1520 mountlist_iterator_init(mount_iterator_t **mip) 1521 { 1522 struct mountlist_entry *me; 1523 1524 me = mountlist_alloc(ME_MARKER, NULL); 1525 mutex_enter(&mountlist_lock); 1526 TAILQ_INSERT_HEAD(&mountlist, me, me_list); 1527 mutex_exit(&mountlist_lock); 1528 *mip = (mount_iterator_t *)me; 1529 } 1530 1531 void 1532 mountlist_iterator_destroy(mount_iterator_t *mi) 1533 { 1534 struct mountlist_entry *marker = &mi->mi_entry; 1535 1536 if (marker->me_mount != NULL) 1537 vfs_unbusy(marker->me_mount); 1538 1539 mutex_enter(&mountlist_lock); 1540 TAILQ_REMOVE(&mountlist, marker, me_list); 1541 mutex_exit(&mountlist_lock); 1542 1543 mountlist_free(marker); 1544 1545 } 1546 1547 /* 1548 * Return the next mount or NULL for this iterator. 1549 * Mark it busy on success. 1550 */ 1551 static inline struct mount * 1552 _mountlist_iterator_next(mount_iterator_t *mi, bool wait) 1553 { 1554 struct mountlist_entry *me, *marker = &mi->mi_entry; 1555 struct mount *mp; 1556 int error; 1557 1558 if (marker->me_mount != NULL) { 1559 vfs_unbusy(marker->me_mount); 1560 marker->me_mount = NULL; 1561 } 1562 1563 mutex_enter(&mountlist_lock); 1564 for (;;) { 1565 KASSERT(marker->me_type == ME_MARKER); 1566 1567 me = TAILQ_NEXT(marker, me_list); 1568 if (me == NULL) { 1569 /* End of list: keep marker and return. */ 1570 mutex_exit(&mountlist_lock); 1571 return NULL; 1572 } 1573 TAILQ_REMOVE(&mountlist, marker, me_list); 1574 TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list); 1575 1576 /* Skip other markers. */ 1577 if (me->me_type != ME_MOUNT) 1578 continue; 1579 1580 /* Take an initial reference for vfs_busy() below. */ 1581 mp = me->me_mount; 1582 KASSERT(mp != NULL); 1583 vfs_ref(mp); 1584 mutex_exit(&mountlist_lock); 1585 1586 /* Try to mark this mount busy and return on success. */ 1587 if (wait) 1588 error = vfs_busy(mp); 1589 else 1590 error = vfs_trybusy(mp); 1591 if (error == 0) { 1592 vfs_rele(mp); 1593 marker->me_mount = mp; 1594 return mp; 1595 } 1596 vfs_rele(mp); 1597 mutex_enter(&mountlist_lock); 1598 } 1599 } 1600 1601 struct mount * 1602 mountlist_iterator_next(mount_iterator_t *mi) 1603 { 1604 1605 return _mountlist_iterator_next(mi, true); 1606 } 1607 1608 struct mount * 1609 mountlist_iterator_trynext(mount_iterator_t *mi) 1610 { 1611 1612 return _mountlist_iterator_next(mi, false); 1613 } 1614 1615 /* 1616 * Attach new mount to the end of the mount list. 1617 */ 1618 void 1619 mountlist_append(struct mount *mp) 1620 { 1621 struct mountlist_entry *me; 1622 1623 me = mountlist_alloc(ME_MOUNT, mp); 1624 mutex_enter(&mountlist_lock); 1625 TAILQ_INSERT_TAIL(&mountlist, me, me_list); 1626 mutex_exit(&mountlist_lock); 1627 } 1628 1629 /* 1630 * Remove mount from mount list. 1631 */void 1632 mountlist_remove(struct mount *mp) 1633 { 1634 struct mountlist_entry *me; 1635 1636 mutex_enter(&mountlist_lock); 1637 TAILQ_FOREACH(me, &mountlist, me_list) 1638 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1639 break; 1640 KASSERT(me != NULL); 1641 TAILQ_REMOVE(&mountlist, me, me_list); 1642 mutex_exit(&mountlist_lock); 1643 mountlist_free(me); 1644 } 1645 1646 /* 1647 * Unlocked variant to traverse the mountlist. 1648 * To be used from DDB only. 1649 */ 1650 struct mount * 1651 _mountlist_next(struct mount *mp) 1652 { 1653 struct mountlist_entry *me; 1654 1655 if (mp == NULL) { 1656 me = TAILQ_FIRST(&mountlist); 1657 } else { 1658 TAILQ_FOREACH(me, &mountlist, me_list) 1659 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1660 break; 1661 if (me != NULL) 1662 me = TAILQ_NEXT(me, me_list); 1663 } 1664 1665 while (me != NULL && me->me_type != ME_MOUNT) 1666 me = TAILQ_NEXT(me, me_list); 1667 1668 return (me ? me->me_mount : NULL); 1669 } 1670