1 /* $NetBSD: vfs_mount.c,v 1.86 2021/02/16 09:56:32 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.86 2021/02/16 09:56:32 hannken Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/kernel.h> 74 75 #include <sys/atomic.h> 76 #include <sys/buf.h> 77 #include <sys/conf.h> 78 #include <sys/fcntl.h> 79 #include <sys/filedesc.h> 80 #include <sys/device.h> 81 #include <sys/kauth.h> 82 #include <sys/kmem.h> 83 #include <sys/module.h> 84 #include <sys/mount.h> 85 #include <sys/fstrans.h> 86 #include <sys/namei.h> 87 #include <sys/extattr.h> 88 #include <sys/syscallargs.h> 89 #include <sys/sysctl.h> 90 #include <sys/systm.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/vnode_impl.h> 93 94 #include <miscfs/genfs/genfs.h> 95 #include <miscfs/specfs/specdev.h> 96 97 #include <uvm/uvm_swap.h> 98 99 enum mountlist_type { 100 ME_MOUNT, 101 ME_MARKER 102 }; 103 struct mountlist_entry { 104 TAILQ_ENTRY(mountlist_entry) me_list; /* Mount list. */ 105 struct mount *me_mount; /* Actual mount if ME_MOUNT, 106 current mount else. */ 107 enum mountlist_type me_type; /* Mount or marker. */ 108 }; 109 struct mount_iterator { 110 struct mountlist_entry mi_entry; 111 }; 112 113 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *, 114 bool (*)(void *, struct vnode *), void *, bool); 115 116 /* Root filesystem. */ 117 vnode_t * rootvnode; 118 119 /* Mounted filesystem list. */ 120 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist; 121 static kmutex_t mountlist_lock __cacheline_aligned; 122 int vnode_offset_next_by_lru /* XXX: ugly hack for pstat.c */ 123 = offsetof(vnode_impl_t, vi_lrulist.tqe_next); 124 125 kmutex_t vfs_list_lock __cacheline_aligned; 126 127 static specificdata_domain_t mount_specificdata_domain; 128 static kmutex_t mntid_lock; 129 130 static kmutex_t mountgen_lock __cacheline_aligned; 131 static uint64_t mountgen; 132 133 void 134 vfs_mount_sysinit(void) 135 { 136 137 TAILQ_INIT(&mountlist); 138 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 139 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 140 141 mount_specificdata_domain = specificdata_domain_create(); 142 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 143 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 144 mountgen = 0; 145 } 146 147 struct mount * 148 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp) 149 { 150 struct mount *mp; 151 int error __diagused; 152 153 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 154 mp->mnt_op = vfsops; 155 mp->mnt_refcnt = 1; 156 TAILQ_INIT(&mp->mnt_vnodelist); 157 mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 158 mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 159 mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 160 mp->mnt_vnodecovered = vp; 161 mount_initspecific(mp); 162 163 error = fstrans_mount(mp); 164 KASSERT(error == 0); 165 166 mutex_enter(&mountgen_lock); 167 mp->mnt_gen = mountgen++; 168 mutex_exit(&mountgen_lock); 169 170 return mp; 171 } 172 173 /* 174 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and 175 * initialize a mount structure for it. 176 * 177 * Devname is usually updated by mount(8) after booting. 178 */ 179 int 180 vfs_rootmountalloc(const char *fstypename, const char *devname, 181 struct mount **mpp) 182 { 183 struct vfsops *vfsp = NULL; 184 struct mount *mp; 185 int error __diagused; 186 187 mutex_enter(&vfs_list_lock); 188 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 189 if (!strncmp(vfsp->vfs_name, fstypename, 190 sizeof(mp->mnt_stat.f_fstypename))) 191 break; 192 if (vfsp == NULL) { 193 mutex_exit(&vfs_list_lock); 194 return (ENODEV); 195 } 196 vfsp->vfs_refcount++; 197 mutex_exit(&vfs_list_lock); 198 199 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 200 return ENOMEM; 201 error = vfs_busy(mp); 202 KASSERT(error == 0); 203 mp->mnt_flag = MNT_RDONLY; 204 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 205 sizeof(mp->mnt_stat.f_fstypename)); 206 mp->mnt_stat.f_mntonname[0] = '/'; 207 mp->mnt_stat.f_mntonname[1] = '\0'; 208 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 209 '\0'; 210 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 211 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 212 *mpp = mp; 213 return 0; 214 } 215 216 /* 217 * vfs_getnewfsid: get a new unique fsid. 218 */ 219 void 220 vfs_getnewfsid(struct mount *mp) 221 { 222 static u_short xxxfs_mntid; 223 fsid_t tfsid; 224 int mtype; 225 226 mutex_enter(&mntid_lock); 227 mtype = makefstype(mp->mnt_op->vfs_name); 228 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); 229 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; 230 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 231 if (xxxfs_mntid == 0) 232 ++xxxfs_mntid; 233 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 234 tfsid.__fsid_val[1] = mtype; 235 while (vfs_getvfs(&tfsid)) { 236 tfsid.__fsid_val[0]++; 237 xxxfs_mntid++; 238 } 239 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 240 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 241 mutex_exit(&mntid_lock); 242 } 243 244 /* 245 * Lookup a mount point by filesystem identifier. 246 * 247 * XXX Needs to add a reference to the mount point. 248 */ 249 struct mount * 250 vfs_getvfs(fsid_t *fsid) 251 { 252 mount_iterator_t *iter; 253 struct mount *mp; 254 255 mountlist_iterator_init(&iter); 256 while ((mp = mountlist_iterator_next(iter)) != NULL) { 257 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 258 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 259 mountlist_iterator_destroy(iter); 260 return mp; 261 } 262 } 263 mountlist_iterator_destroy(iter); 264 return NULL; 265 } 266 267 /* 268 * Take a reference to a mount structure. 269 */ 270 void 271 vfs_ref(struct mount *mp) 272 { 273 274 KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock)); 275 276 atomic_inc_uint(&mp->mnt_refcnt); 277 } 278 279 /* 280 * Drop a reference to a mount structure, freeing if the last reference. 281 */ 282 void 283 vfs_rele(struct mount *mp) 284 { 285 286 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 287 return; 288 } 289 290 /* 291 * Nothing else has visibility of the mount: we can now 292 * free the data structures. 293 */ 294 KASSERT(mp->mnt_refcnt == 0); 295 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 296 mutex_obj_free(mp->mnt_updating); 297 mutex_obj_free(mp->mnt_renamelock); 298 mutex_obj_free(mp->mnt_vnodelock); 299 if (mp->mnt_op != NULL) { 300 vfs_delref(mp->mnt_op); 301 } 302 fstrans_unmount(mp); 303 /* 304 * Final free of mp gets done from fstrans_mount_dtor(). 305 * 306 * Prevents this memory to be reused as a mount before 307 * fstrans releases all references to it. 308 */ 309 } 310 311 /* 312 * Mark a mount point as busy, and gain a new reference to it. Used to 313 * prevent the file system from being unmounted during critical sections. 314 * 315 * vfs_busy can be called multiple times and by multiple threads 316 * and must be accompanied by the same number of vfs_unbusy calls. 317 * 318 * => The caller must hold a pre-existing reference to the mount. 319 * => Will fail if the file system is being unmounted, or is unmounted. 320 */ 321 static inline int 322 _vfs_busy(struct mount *mp, bool wait) 323 { 324 325 KASSERT(mp->mnt_refcnt > 0); 326 327 if (wait) { 328 fstrans_start(mp); 329 } else { 330 if (fstrans_start_nowait(mp)) 331 return EBUSY; 332 } 333 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 334 fstrans_done(mp); 335 return ENOENT; 336 } 337 vfs_ref(mp); 338 return 0; 339 } 340 341 int 342 vfs_busy(struct mount *mp) 343 { 344 345 return _vfs_busy(mp, true); 346 } 347 348 int 349 vfs_trybusy(struct mount *mp) 350 { 351 352 return _vfs_busy(mp, false); 353 } 354 355 /* 356 * Unbusy a busy filesystem. 357 * 358 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call. 359 */ 360 void 361 vfs_unbusy(struct mount *mp) 362 { 363 364 KASSERT(mp->mnt_refcnt > 0); 365 366 fstrans_done(mp); 367 vfs_rele(mp); 368 } 369 370 struct vnode_iterator { 371 vnode_impl_t vi_vnode; 372 }; 373 374 void 375 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip) 376 { 377 vnode_t *vp; 378 vnode_impl_t *vip; 379 380 vp = vnalloc_marker(mp); 381 vip = VNODE_TO_VIMPL(vp); 382 383 mutex_enter(mp->mnt_vnodelock); 384 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes); 385 vp->v_usecount = 1; 386 mutex_exit(mp->mnt_vnodelock); 387 388 *vnip = (struct vnode_iterator *)vip; 389 } 390 391 void 392 vfs_vnode_iterator_destroy(struct vnode_iterator *vni) 393 { 394 vnode_impl_t *mvip = &vni->vi_vnode; 395 vnode_t *mvp = VIMPL_TO_VNODE(mvip); 396 kmutex_t *lock; 397 398 KASSERT(vnis_marker(mvp)); 399 if (vrefcnt(mvp) != 0) { 400 lock = mvp->v_mount->mnt_vnodelock; 401 mutex_enter(lock); 402 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes); 403 mvp->v_usecount = 0; 404 mutex_exit(lock); 405 } 406 vnfree_marker(mvp); 407 } 408 409 static struct vnode * 410 vfs_vnode_iterator_next1(struct vnode_iterator *vni, 411 bool (*f)(void *, struct vnode *), void *cl, bool do_wait) 412 { 413 vnode_impl_t *mvip = &vni->vi_vnode; 414 struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount; 415 vnode_t *vp; 416 vnode_impl_t *vip; 417 kmutex_t *lock; 418 int error; 419 420 KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip))); 421 422 lock = mp->mnt_vnodelock; 423 do { 424 mutex_enter(lock); 425 vip = TAILQ_NEXT(mvip, vi_mntvnodes); 426 TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes); 427 VIMPL_TO_VNODE(mvip)->v_usecount = 0; 428 again: 429 if (vip == NULL) { 430 mutex_exit(lock); 431 return NULL; 432 } 433 vp = VIMPL_TO_VNODE(vip); 434 KASSERT(vp != NULL); 435 mutex_enter(vp->v_interlock); 436 if (vnis_marker(vp) || 437 vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) || 438 (f && !(*f)(cl, vp))) { 439 mutex_exit(vp->v_interlock); 440 vip = TAILQ_NEXT(vip, vi_mntvnodes); 441 goto again; 442 } 443 444 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes); 445 VIMPL_TO_VNODE(mvip)->v_usecount = 1; 446 mutex_exit(lock); 447 error = vcache_vget(vp); 448 KASSERT(error == 0 || error == ENOENT); 449 } while (error != 0); 450 451 return vp; 452 } 453 454 struct vnode * 455 vfs_vnode_iterator_next(struct vnode_iterator *vni, 456 bool (*f)(void *, struct vnode *), void *cl) 457 { 458 459 return vfs_vnode_iterator_next1(vni, f, cl, false); 460 } 461 462 /* 463 * Move a vnode from one mount queue to another. 464 */ 465 void 466 vfs_insmntque(vnode_t *vp, struct mount *mp) 467 { 468 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 469 struct mount *omp; 470 kmutex_t *lock; 471 472 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 || 473 vp->v_tag == VT_VFS); 474 475 /* 476 * Delete from old mount point vnode list, if on one. 477 */ 478 if ((omp = vp->v_mount) != NULL) { 479 lock = omp->mnt_vnodelock; 480 mutex_enter(lock); 481 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes); 482 mutex_exit(lock); 483 } 484 485 /* 486 * Insert into list of vnodes for the new mount point, if 487 * available. The caller must take a reference on the mount 488 * structure and donate to the vnode. 489 */ 490 if ((vp->v_mount = mp) != NULL) { 491 lock = mp->mnt_vnodelock; 492 mutex_enter(lock); 493 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes); 494 mutex_exit(lock); 495 } 496 497 if (omp != NULL) { 498 /* Release reference to old mount. */ 499 vfs_rele(omp); 500 } 501 } 502 503 /* 504 * Remove any vnodes in the vnode table belonging to mount point mp. 505 * 506 * If FORCECLOSE is not specified, there should not be any active ones, 507 * return error if any are found (nb: this is a user error, not a 508 * system error). If FORCECLOSE is specified, detach any active vnodes 509 * that are found. 510 * 511 * If WRITECLOSE is set, only flush out regular file vnodes open for 512 * writing. 513 * 514 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. 515 */ 516 #ifdef DEBUG 517 int busyprt = 0; /* print out busy vnodes */ 518 struct ctldebug debug1 = { "busyprt", &busyprt }; 519 #endif 520 521 static vnode_t * 522 vflushnext(struct vnode_iterator *marker, int *when) 523 { 524 if (getticks() > *when) { 525 yield(); 526 *when = getticks() + hz / 10; 527 } 528 return vfs_vnode_iterator_next1(marker, NULL, NULL, true); 529 } 530 531 /* 532 * Flush one vnode. Referenced on entry, unreferenced on return. 533 */ 534 static int 535 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags) 536 { 537 int error; 538 struct vattr vattr; 539 540 if (vp == skipvp || 541 ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) { 542 vrele(vp); 543 return 0; 544 } 545 /* 546 * If WRITECLOSE is set, only flush out regular file 547 * vnodes open for writing or open and unlinked. 548 */ 549 if ((flags & WRITECLOSE)) { 550 if (vp->v_type != VREG) { 551 vrele(vp); 552 return 0; 553 } 554 error = vn_lock(vp, LK_EXCLUSIVE); 555 if (error) { 556 KASSERT(error == ENOENT); 557 vrele(vp); 558 return 0; 559 } 560 error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0); 561 if (error == 0) 562 error = VOP_GETATTR(vp, &vattr, curlwp->l_cred); 563 VOP_UNLOCK(vp); 564 if (error) { 565 vrele(vp); 566 return error; 567 } 568 if (vp->v_writecount == 0 && vattr.va_nlink > 0) { 569 vrele(vp); 570 return 0; 571 } 572 } 573 /* 574 * First try to recycle the vnode. 575 */ 576 if (vrecycle(vp)) 577 return 0; 578 /* 579 * If FORCECLOSE is set, forcibly close the vnode. 580 * For block or character devices, revert to an 581 * anonymous device. For all other files, just 582 * kill them. 583 */ 584 if (flags & FORCECLOSE) { 585 if (vrefcnt(vp) > 1 && 586 (vp->v_type == VBLK || vp->v_type == VCHR)) 587 vcache_make_anon(vp); 588 else 589 vgone(vp); 590 return 0; 591 } 592 vrele(vp); 593 return EBUSY; 594 } 595 596 int 597 vflush(struct mount *mp, vnode_t *skipvp, int flags) 598 { 599 vnode_t *vp; 600 struct vnode_iterator *marker; 601 int busy, error, when, retries = 2; 602 603 do { 604 busy = error = when = 0; 605 606 /* 607 * First, flush out any vnode references from the 608 * deferred vrele list. 609 */ 610 vrele_flush(mp); 611 612 vfs_vnode_iterator_init(mp, &marker); 613 614 while ((vp = vflushnext(marker, &when)) != NULL) { 615 error = vflush_one(vp, skipvp, flags); 616 if (error == EBUSY) { 617 error = 0; 618 busy++; 619 #ifdef DEBUG 620 if (busyprt && retries == 0) 621 vprint("vflush: busy vnode", vp); 622 #endif 623 } else if (error != 0) { 624 break; 625 } 626 } 627 628 vfs_vnode_iterator_destroy(marker); 629 } while (error == 0 && busy > 0 && retries-- > 0); 630 631 if (error) 632 return error; 633 if (busy) 634 return EBUSY; 635 return 0; 636 } 637 638 /* 639 * Mount a file system. 640 */ 641 642 /* 643 * Scan all active processes to see if any of them have a current or root 644 * directory onto which the new filesystem has just been mounted. If so, 645 * replace them with the new mount point. 646 */ 647 static void 648 mount_checkdirs(vnode_t *olddp) 649 { 650 vnode_t *newdp, *rele1, *rele2; 651 struct cwdinfo *cwdi; 652 struct proc *p; 653 bool retry; 654 655 if (vrefcnt(olddp) == 1) { 656 return; 657 } 658 if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp)) 659 panic("mount: lost mount"); 660 661 do { 662 retry = false; 663 mutex_enter(&proc_lock); 664 PROCLIST_FOREACH(p, &allproc) { 665 if ((cwdi = p->p_cwdi) == NULL) 666 continue; 667 /* 668 * Cannot change to the old directory any more, 669 * so even if we see a stale value it is not a 670 * problem. 671 */ 672 if (cwdi->cwdi_cdir != olddp && 673 cwdi->cwdi_rdir != olddp) 674 continue; 675 retry = true; 676 rele1 = NULL; 677 rele2 = NULL; 678 atomic_inc_uint(&cwdi->cwdi_refcnt); 679 mutex_exit(&proc_lock); 680 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 681 if (cwdi->cwdi_cdir == olddp) { 682 rele1 = cwdi->cwdi_cdir; 683 vref(newdp); 684 cwdi->cwdi_cdir = newdp; 685 } 686 if (cwdi->cwdi_rdir == olddp) { 687 rele2 = cwdi->cwdi_rdir; 688 vref(newdp); 689 cwdi->cwdi_rdir = newdp; 690 } 691 rw_exit(&cwdi->cwdi_lock); 692 cwdfree(cwdi); 693 if (rele1 != NULL) 694 vrele(rele1); 695 if (rele2 != NULL) 696 vrele(rele2); 697 mutex_enter(&proc_lock); 698 break; 699 } 700 mutex_exit(&proc_lock); 701 } while (retry); 702 703 if (rootvnode == olddp) { 704 vrele(rootvnode); 705 vref(newdp); 706 rootvnode = newdp; 707 } 708 vput(newdp); 709 } 710 711 /* 712 * Start extended attributes 713 */ 714 static int 715 start_extattr(struct mount *mp) 716 { 717 int error; 718 719 error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL); 720 if (error) 721 printf("%s: failed to start extattr: error = %d\n", 722 mp->mnt_stat.f_mntonname, error); 723 724 return error; 725 } 726 727 int 728 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops, 729 const char *path, int flags, void *data, size_t *data_len) 730 { 731 vnode_t *vp = *vpp; 732 struct mount *mp; 733 struct pathbuf *pb; 734 struct nameidata nd; 735 int error, error2; 736 737 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 738 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 739 if (error) { 740 vfs_delref(vfsops); 741 return error; 742 } 743 744 /* Cannot make a non-dir a mount-point (from here anyway). */ 745 if (vp->v_type != VDIR) { 746 vfs_delref(vfsops); 747 return ENOTDIR; 748 } 749 750 if (flags & MNT_EXPORTED) { 751 vfs_delref(vfsops); 752 return EINVAL; 753 } 754 755 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 756 vfs_delref(vfsops); 757 return ENOMEM; 758 } 759 760 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 761 762 /* 763 * The underlying file system may refuse the mount for 764 * various reasons. Allow the user to force it to happen. 765 * 766 * Set the mount level flags. 767 */ 768 mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE); 769 770 mutex_enter(mp->mnt_updating); 771 error = VFS_MOUNT(mp, path, data, data_len); 772 mp->mnt_flag &= ~MNT_OP_FLAGS; 773 774 if (error != 0) 775 goto err_unmounted; 776 777 /* 778 * Validate and prepare the mount point. 779 */ 780 error = pathbuf_copyin(path, &pb); 781 if (error != 0) { 782 goto err_mounted; 783 } 784 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 785 error = namei(&nd); 786 pathbuf_destroy(pb); 787 if (error != 0) { 788 goto err_mounted; 789 } 790 if (nd.ni_vp != vp) { 791 vput(nd.ni_vp); 792 error = EINVAL; 793 goto err_mounted; 794 } 795 if (vp->v_mountedhere != NULL) { 796 vput(nd.ni_vp); 797 error = EBUSY; 798 goto err_mounted; 799 } 800 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 801 if (error != 0) { 802 vput(nd.ni_vp); 803 goto err_mounted; 804 } 805 806 /* 807 * Put the new filesystem on the mount list after root. 808 */ 809 cache_purge(vp); 810 mp->mnt_iflag &= ~IMNT_WANTRDWR; 811 812 mountlist_append(mp); 813 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 814 vfs_syncer_add_to_worklist(mp); 815 vp->v_mountedhere = mp; 816 vput(nd.ni_vp); 817 818 mount_checkdirs(vp); 819 mutex_exit(mp->mnt_updating); 820 821 /* Hold an additional reference to the mount across VFS_START(). */ 822 vfs_ref(mp); 823 (void) VFS_STATVFS(mp, &mp->mnt_stat); 824 error = VFS_START(mp, 0); 825 if (error) { 826 vrele(vp); 827 } else if (flags & MNT_EXTATTR) { 828 if (start_extattr(mp) != 0) 829 mp->mnt_flag &= ~MNT_EXTATTR; 830 } 831 /* Drop reference held for VFS_START(). */ 832 vfs_rele(mp); 833 *vpp = NULL; 834 return error; 835 836 err_mounted: 837 do { 838 error2 = vfs_suspend(mp, 0); 839 } while (error2 == EINTR || error2 == ERESTART); 840 KASSERT(error2 == 0 || error2 == EOPNOTSUPP); 841 842 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 843 panic("Unmounting fresh file system failed"); 844 845 if (error2 == 0) 846 vfs_resume(mp); 847 848 err_unmounted: 849 vp->v_mountedhere = NULL; 850 mutex_exit(mp->mnt_updating); 851 vfs_rele(mp); 852 853 return error; 854 } 855 856 /* 857 * Do the actual file system unmount. File system is assumed to have 858 * been locked by the caller. 859 * 860 * => Caller hold reference to the mount, explicitly for dounmount(). 861 */ 862 int 863 dounmount(struct mount *mp, int flags, struct lwp *l) 864 { 865 vnode_t *coveredvp; 866 int error, async, used_syncer, used_extattr; 867 const bool was_suspended = fstrans_is_owner(mp); 868 869 #if NVERIEXEC > 0 870 error = veriexec_unmountchk(mp); 871 if (error) 872 return (error); 873 #endif /* NVERIEXEC > 0 */ 874 875 if (!was_suspended) { 876 error = vfs_suspend(mp, 0); 877 if (error) { 878 return error; 879 } 880 } 881 882 KASSERT((mp->mnt_iflag & IMNT_GONE) == 0); 883 884 used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0; 885 used_extattr = mp->mnt_flag & MNT_EXTATTR; 886 887 mp->mnt_iflag |= IMNT_UNMOUNT; 888 mutex_enter(mp->mnt_updating); 889 async = mp->mnt_flag & MNT_ASYNC; 890 mp->mnt_flag &= ~MNT_ASYNC; 891 cache_purgevfs(mp); /* remove cache entries for this file sys */ 892 if (used_syncer) 893 vfs_syncer_remove_from_worklist(mp); 894 error = 0; 895 if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) { 896 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 897 } 898 if (error == 0 || (flags & MNT_FORCE)) { 899 error = VFS_UNMOUNT(mp, flags); 900 } 901 if (error) { 902 mp->mnt_iflag &= ~IMNT_UNMOUNT; 903 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 904 vfs_syncer_add_to_worklist(mp); 905 mp->mnt_flag |= async; 906 mutex_exit(mp->mnt_updating); 907 if (!was_suspended) 908 vfs_resume(mp); 909 if (used_extattr) { 910 if (start_extattr(mp) != 0) 911 mp->mnt_flag &= ~MNT_EXTATTR; 912 else 913 mp->mnt_flag |= MNT_EXTATTR; 914 } 915 return (error); 916 } 917 mutex_exit(mp->mnt_updating); 918 919 /* 920 * mark filesystem as gone to prevent further umounts 921 * after mnt_umounting lock is gone, this also prevents 922 * vfs_busy() from succeeding. 923 */ 924 mp->mnt_iflag |= IMNT_GONE; 925 if (!was_suspended) 926 vfs_resume(mp); 927 928 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 929 vn_lock(coveredvp, LK_EXCLUSIVE | LK_RETRY); 930 coveredvp->v_mountedhere = NULL; 931 VOP_UNLOCK(coveredvp); 932 } 933 mountlist_remove(mp); 934 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 935 panic("unmount: dangling vnode"); 936 vfs_hooks_unmount(mp); 937 938 vfs_rele(mp); /* reference from mount() */ 939 if (coveredvp != NULLVP) { 940 vrele(coveredvp); 941 } 942 return (0); 943 } 944 945 /* 946 * Unmount all file systems. 947 * We traverse the list in reverse order under the assumption that doing so 948 * will avoid needing to worry about dependencies. 949 */ 950 bool 951 vfs_unmountall(struct lwp *l) 952 { 953 954 printf("unmounting file systems...\n"); 955 return vfs_unmountall1(l, true, true); 956 } 957 958 static void 959 vfs_unmount_print(struct mount *mp, const char *pfx) 960 { 961 962 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 963 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 964 mp->mnt_stat.f_fstypename); 965 } 966 967 /* 968 * Return the mount with the highest generation less than "gen". 969 */ 970 static struct mount * 971 vfs_unmount_next(uint64_t gen) 972 { 973 mount_iterator_t *iter; 974 struct mount *mp, *nmp; 975 976 nmp = NULL; 977 978 mountlist_iterator_init(&iter); 979 while ((mp = mountlist_iterator_next(iter)) != NULL) { 980 if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) && 981 mp->mnt_gen < gen) { 982 if (nmp != NULL) 983 vfs_rele(nmp); 984 nmp = mp; 985 vfs_ref(nmp); 986 } 987 } 988 mountlist_iterator_destroy(iter); 989 990 return nmp; 991 } 992 993 bool 994 vfs_unmount_forceone(struct lwp *l) 995 { 996 struct mount *mp; 997 int error; 998 999 mp = vfs_unmount_next(mountgen); 1000 if (mp == NULL) { 1001 return false; 1002 } 1003 1004 #ifdef DEBUG 1005 printf("forcefully unmounting %s (%s)...\n", 1006 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1007 #endif 1008 if ((error = dounmount(mp, MNT_FORCE, l)) == 0) { 1009 vfs_unmount_print(mp, "forcefully "); 1010 return true; 1011 } else { 1012 vfs_rele(mp); 1013 } 1014 1015 #ifdef DEBUG 1016 printf("forceful unmount of %s failed with error %d\n", 1017 mp->mnt_stat.f_mntonname, error); 1018 #endif 1019 1020 return false; 1021 } 1022 1023 bool 1024 vfs_unmountall1(struct lwp *l, bool force, bool verbose) 1025 { 1026 struct mount *mp; 1027 mount_iterator_t *iter; 1028 bool any_error = false, progress = false; 1029 uint64_t gen; 1030 int error; 1031 1032 gen = mountgen; 1033 for (;;) { 1034 mp = vfs_unmount_next(gen); 1035 if (mp == NULL) 1036 break; 1037 gen = mp->mnt_gen; 1038 1039 #ifdef DEBUG 1040 printf("unmounting %p %s (%s)...\n", 1041 (void *)mp, mp->mnt_stat.f_mntonname, 1042 mp->mnt_stat.f_mntfromname); 1043 #endif 1044 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 1045 vfs_unmount_print(mp, ""); 1046 progress = true; 1047 } else { 1048 vfs_rele(mp); 1049 if (verbose) { 1050 printf("unmount of %s failed with error %d\n", 1051 mp->mnt_stat.f_mntonname, error); 1052 } 1053 any_error = true; 1054 } 1055 } 1056 if (verbose) { 1057 printf("unmounting done\n"); 1058 } 1059 if (any_error && verbose) { 1060 printf("WARNING: some file systems would not unmount\n"); 1061 } 1062 /* If the mountlist is empty it is time to remove swap. */ 1063 mountlist_iterator_init(&iter); 1064 if (mountlist_iterator_next(iter) == NULL) { 1065 uvm_swap_shutdown(l); 1066 } 1067 mountlist_iterator_destroy(iter); 1068 1069 return progress; 1070 } 1071 1072 void 1073 vfs_sync_all(struct lwp *l) 1074 { 1075 printf("syncing disks... "); 1076 1077 /* remove user processes from run queue */ 1078 suspendsched(); 1079 (void)spl0(); 1080 1081 /* avoid coming back this way again if we panic. */ 1082 doing_shutdown = 1; 1083 1084 do_sys_sync(l); 1085 1086 /* Wait for sync to finish. */ 1087 if (vfs_syncwait() != 0) { 1088 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 1089 Debugger(); 1090 #endif 1091 printf("giving up\n"); 1092 return; 1093 } else 1094 printf("done\n"); 1095 } 1096 1097 /* 1098 * Sync and unmount file systems before shutting down. 1099 */ 1100 void 1101 vfs_shutdown(void) 1102 { 1103 lwp_t *l = curlwp; 1104 1105 vfs_sync_all(l); 1106 1107 /* 1108 * If we have paniced - do not make the situation potentially 1109 * worse by unmounting the file systems. 1110 */ 1111 if (panicstr != NULL) { 1112 return; 1113 } 1114 1115 /* Unmount file systems. */ 1116 vfs_unmountall(l); 1117 } 1118 1119 /* 1120 * Print a list of supported file system types (used by vfs_mountroot) 1121 */ 1122 static void 1123 vfs_print_fstypes(void) 1124 { 1125 struct vfsops *v; 1126 int cnt = 0; 1127 1128 mutex_enter(&vfs_list_lock); 1129 LIST_FOREACH(v, &vfs_list, vfs_list) 1130 ++cnt; 1131 mutex_exit(&vfs_list_lock); 1132 1133 if (cnt == 0) { 1134 printf("WARNING: No file system modules have been loaded.\n"); 1135 return; 1136 } 1137 1138 printf("Supported file systems:"); 1139 mutex_enter(&vfs_list_lock); 1140 LIST_FOREACH(v, &vfs_list, vfs_list) { 1141 printf(" %s", v->vfs_name); 1142 } 1143 mutex_exit(&vfs_list_lock); 1144 printf("\n"); 1145 } 1146 1147 /* 1148 * Mount the root file system. If the operator didn't specify a 1149 * file system to use, try all possible file systems until one 1150 * succeeds. 1151 */ 1152 int 1153 vfs_mountroot(void) 1154 { 1155 struct vfsops *v; 1156 int error = ENODEV; 1157 1158 if (root_device == NULL) 1159 panic("vfs_mountroot: root device unknown"); 1160 1161 switch (device_class(root_device)) { 1162 case DV_IFNET: 1163 if (rootdev != NODEV) 1164 panic("vfs_mountroot: rootdev set for DV_IFNET " 1165 "(0x%llx -> %llu,%llu)", 1166 (unsigned long long)rootdev, 1167 (unsigned long long)major(rootdev), 1168 (unsigned long long)minor(rootdev)); 1169 break; 1170 1171 case DV_DISK: 1172 if (rootdev == NODEV) 1173 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1174 if (bdevvp(rootdev, &rootvp)) 1175 panic("vfs_mountroot: can't get vnode for rootdev"); 1176 error = VOP_OPEN(rootvp, FREAD, FSCRED); 1177 if (error) { 1178 printf("vfs_mountroot: can't open root device\n"); 1179 return (error); 1180 } 1181 break; 1182 1183 case DV_VIRTUAL: 1184 break; 1185 1186 default: 1187 printf("%s: inappropriate for root file system\n", 1188 device_xname(root_device)); 1189 return (ENODEV); 1190 } 1191 1192 /* 1193 * If user specified a root fs type, use it. Make sure the 1194 * specified type exists and has a mount_root() 1195 */ 1196 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 1197 v = vfs_getopsbyname(rootfstype); 1198 error = EFTYPE; 1199 if (v != NULL) { 1200 if (v->vfs_mountroot != NULL) { 1201 error = (v->vfs_mountroot)(); 1202 } 1203 v->vfs_refcount--; 1204 } 1205 goto done; 1206 } 1207 1208 /* 1209 * Try each file system currently configured into the kernel. 1210 */ 1211 mutex_enter(&vfs_list_lock); 1212 LIST_FOREACH(v, &vfs_list, vfs_list) { 1213 if (v->vfs_mountroot == NULL) 1214 continue; 1215 #ifdef DEBUG 1216 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 1217 #endif 1218 v->vfs_refcount++; 1219 mutex_exit(&vfs_list_lock); 1220 error = (*v->vfs_mountroot)(); 1221 mutex_enter(&vfs_list_lock); 1222 v->vfs_refcount--; 1223 if (!error) { 1224 aprint_normal("root file system type: %s\n", 1225 v->vfs_name); 1226 break; 1227 } 1228 } 1229 mutex_exit(&vfs_list_lock); 1230 1231 if (v == NULL) { 1232 vfs_print_fstypes(); 1233 printf("no file system for %s", device_xname(root_device)); 1234 if (device_class(root_device) == DV_DISK) 1235 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 1236 printf("\n"); 1237 error = EFTYPE; 1238 } 1239 1240 done: 1241 if (error && device_class(root_device) == DV_DISK) { 1242 VOP_CLOSE(rootvp, FREAD, FSCRED); 1243 vrele(rootvp); 1244 } 1245 if (error == 0) { 1246 mount_iterator_t *iter; 1247 struct mount *mp; 1248 extern struct cwdinfo cwdi0; 1249 1250 mountlist_iterator_init(&iter); 1251 mp = mountlist_iterator_next(iter); 1252 KASSERT(mp != NULL); 1253 mountlist_iterator_destroy(iter); 1254 1255 mp->mnt_flag |= MNT_ROOTFS; 1256 mp->mnt_op->vfs_refcount++; 1257 1258 /* 1259 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 1260 * reference it, and donate it the reference grabbed 1261 * with VFS_ROOT(). 1262 */ 1263 error = VFS_ROOT(mp, LK_NONE, &rootvnode); 1264 if (error) 1265 panic("cannot find root vnode, error=%d", error); 1266 cwdi0.cwdi_cdir = rootvnode; 1267 cwdi0.cwdi_rdir = NULL; 1268 1269 /* 1270 * Now that root is mounted, we can fixup initproc's CWD 1271 * info. All other processes are kthreads, which merely 1272 * share proc0's CWD info. 1273 */ 1274 initproc->p_cwdi->cwdi_cdir = rootvnode; 1275 vref(initproc->p_cwdi->cwdi_cdir); 1276 initproc->p_cwdi->cwdi_rdir = NULL; 1277 /* 1278 * Enable loading of modules from the filesystem 1279 */ 1280 module_load_vfs_init(); 1281 1282 } 1283 return (error); 1284 } 1285 1286 /* 1287 * mount_specific_key_create -- 1288 * Create a key for subsystem mount-specific data. 1289 */ 1290 int 1291 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1292 { 1293 1294 return specificdata_key_create(mount_specificdata_domain, keyp, dtor); 1295 } 1296 1297 /* 1298 * mount_specific_key_delete -- 1299 * Delete a key for subsystem mount-specific data. 1300 */ 1301 void 1302 mount_specific_key_delete(specificdata_key_t key) 1303 { 1304 1305 specificdata_key_delete(mount_specificdata_domain, key); 1306 } 1307 1308 /* 1309 * mount_initspecific -- 1310 * Initialize a mount's specificdata container. 1311 */ 1312 void 1313 mount_initspecific(struct mount *mp) 1314 { 1315 int error __diagused; 1316 1317 error = specificdata_init(mount_specificdata_domain, 1318 &mp->mnt_specdataref); 1319 KASSERT(error == 0); 1320 } 1321 1322 /* 1323 * mount_finispecific -- 1324 * Finalize a mount's specificdata container. 1325 */ 1326 void 1327 mount_finispecific(struct mount *mp) 1328 { 1329 1330 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 1331 } 1332 1333 /* 1334 * mount_getspecific -- 1335 * Return mount-specific data corresponding to the specified key. 1336 */ 1337 void * 1338 mount_getspecific(struct mount *mp, specificdata_key_t key) 1339 { 1340 1341 return specificdata_getspecific(mount_specificdata_domain, 1342 &mp->mnt_specdataref, key); 1343 } 1344 1345 /* 1346 * mount_setspecific -- 1347 * Set mount-specific data corresponding to the specified key. 1348 */ 1349 void 1350 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 1351 { 1352 1353 specificdata_setspecific(mount_specificdata_domain, 1354 &mp->mnt_specdataref, key, data); 1355 } 1356 1357 /* 1358 * Check to see if a filesystem is mounted on a block device. 1359 */ 1360 int 1361 vfs_mountedon(vnode_t *vp) 1362 { 1363 vnode_t *vq; 1364 int error = 0; 1365 1366 if (vp->v_type != VBLK) 1367 return ENOTBLK; 1368 if (spec_node_getmountedfs(vp) != NULL) 1369 return EBUSY; 1370 if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, &vq) == 0) { 1371 if (spec_node_getmountedfs(vq) != NULL) 1372 error = EBUSY; 1373 vrele(vq); 1374 } 1375 1376 return error; 1377 } 1378 1379 /* 1380 * Check if a device pointed to by vp is mounted. 1381 * 1382 * Returns: 1383 * EINVAL if it's not a disk 1384 * EBUSY if it's a disk and mounted 1385 * 0 if it's a disk and not mounted 1386 */ 1387 int 1388 rawdev_mounted(vnode_t *vp, vnode_t **bvpp) 1389 { 1390 vnode_t *bvp; 1391 dev_t dev; 1392 int d_type; 1393 1394 bvp = NULL; 1395 d_type = D_OTHER; 1396 1397 if (iskmemvp(vp)) 1398 return EINVAL; 1399 1400 switch (vp->v_type) { 1401 case VCHR: { 1402 const struct cdevsw *cdev; 1403 1404 dev = vp->v_rdev; 1405 cdev = cdevsw_lookup(dev); 1406 if (cdev != NULL) { 1407 dev_t blkdev; 1408 1409 blkdev = devsw_chr2blk(dev); 1410 if (blkdev != NODEV) { 1411 if (vfinddev(blkdev, VBLK, &bvp) != 0) { 1412 d_type = (cdev->d_flag & D_TYPEMASK); 1413 /* XXX: what if bvp disappears? */ 1414 vrele(bvp); 1415 } 1416 } 1417 } 1418 1419 break; 1420 } 1421 1422 case VBLK: { 1423 const struct bdevsw *bdev; 1424 1425 dev = vp->v_rdev; 1426 bdev = bdevsw_lookup(dev); 1427 if (bdev != NULL) 1428 d_type = (bdev->d_flag & D_TYPEMASK); 1429 1430 bvp = vp; 1431 1432 break; 1433 } 1434 1435 default: 1436 break; 1437 } 1438 1439 if (d_type != D_DISK) 1440 return EINVAL; 1441 1442 if (bvpp != NULL) 1443 *bvpp = bvp; 1444 1445 /* 1446 * XXX: This is bogus. We should be failing the request 1447 * XXX: not only if this specific slice is mounted, but 1448 * XXX: if it's on a disk with any other mounted slice. 1449 */ 1450 if (vfs_mountedon(bvp)) 1451 return EBUSY; 1452 1453 return 0; 1454 } 1455 1456 /* 1457 * Make a 'unique' number from a mount type name. 1458 */ 1459 long 1460 makefstype(const char *type) 1461 { 1462 long rv; 1463 1464 for (rv = 0; *type; type++) { 1465 rv <<= 2; 1466 rv ^= *type; 1467 } 1468 return rv; 1469 } 1470 1471 static struct mountlist_entry * 1472 mountlist_alloc(enum mountlist_type type, struct mount *mp) 1473 { 1474 struct mountlist_entry *me; 1475 1476 me = kmem_zalloc(sizeof(*me), KM_SLEEP); 1477 me->me_mount = mp; 1478 me->me_type = type; 1479 1480 return me; 1481 } 1482 1483 static void 1484 mountlist_free(struct mountlist_entry *me) 1485 { 1486 1487 kmem_free(me, sizeof(*me)); 1488 } 1489 1490 void 1491 mountlist_iterator_init(mount_iterator_t **mip) 1492 { 1493 struct mountlist_entry *me; 1494 1495 me = mountlist_alloc(ME_MARKER, NULL); 1496 mutex_enter(&mountlist_lock); 1497 TAILQ_INSERT_HEAD(&mountlist, me, me_list); 1498 mutex_exit(&mountlist_lock); 1499 *mip = (mount_iterator_t *)me; 1500 } 1501 1502 void 1503 mountlist_iterator_destroy(mount_iterator_t *mi) 1504 { 1505 struct mountlist_entry *marker = &mi->mi_entry; 1506 1507 if (marker->me_mount != NULL) 1508 vfs_unbusy(marker->me_mount); 1509 1510 mutex_enter(&mountlist_lock); 1511 TAILQ_REMOVE(&mountlist, marker, me_list); 1512 mutex_exit(&mountlist_lock); 1513 1514 mountlist_free(marker); 1515 1516 } 1517 1518 /* 1519 * Return the next mount or NULL for this iterator. 1520 * Mark it busy on success. 1521 */ 1522 static inline struct mount * 1523 _mountlist_iterator_next(mount_iterator_t *mi, bool wait) 1524 { 1525 struct mountlist_entry *me, *marker = &mi->mi_entry; 1526 struct mount *mp; 1527 int error; 1528 1529 if (marker->me_mount != NULL) { 1530 vfs_unbusy(marker->me_mount); 1531 marker->me_mount = NULL; 1532 } 1533 1534 mutex_enter(&mountlist_lock); 1535 for (;;) { 1536 KASSERT(marker->me_type == ME_MARKER); 1537 1538 me = TAILQ_NEXT(marker, me_list); 1539 if (me == NULL) { 1540 /* End of list: keep marker and return. */ 1541 mutex_exit(&mountlist_lock); 1542 return NULL; 1543 } 1544 TAILQ_REMOVE(&mountlist, marker, me_list); 1545 TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list); 1546 1547 /* Skip other markers. */ 1548 if (me->me_type != ME_MOUNT) 1549 continue; 1550 1551 /* Take an initial reference for vfs_busy() below. */ 1552 mp = me->me_mount; 1553 KASSERT(mp != NULL); 1554 vfs_ref(mp); 1555 mutex_exit(&mountlist_lock); 1556 1557 /* Try to mark this mount busy and return on success. */ 1558 if (wait) 1559 error = vfs_busy(mp); 1560 else 1561 error = vfs_trybusy(mp); 1562 if (error == 0) { 1563 vfs_rele(mp); 1564 marker->me_mount = mp; 1565 return mp; 1566 } 1567 vfs_rele(mp); 1568 mutex_enter(&mountlist_lock); 1569 } 1570 } 1571 1572 struct mount * 1573 mountlist_iterator_next(mount_iterator_t *mi) 1574 { 1575 1576 return _mountlist_iterator_next(mi, true); 1577 } 1578 1579 struct mount * 1580 mountlist_iterator_trynext(mount_iterator_t *mi) 1581 { 1582 1583 return _mountlist_iterator_next(mi, false); 1584 } 1585 1586 /* 1587 * Attach new mount to the end of the mount list. 1588 */ 1589 void 1590 mountlist_append(struct mount *mp) 1591 { 1592 struct mountlist_entry *me; 1593 1594 me = mountlist_alloc(ME_MOUNT, mp); 1595 mutex_enter(&mountlist_lock); 1596 TAILQ_INSERT_TAIL(&mountlist, me, me_list); 1597 mutex_exit(&mountlist_lock); 1598 } 1599 1600 /* 1601 * Remove mount from mount list. 1602 */void 1603 mountlist_remove(struct mount *mp) 1604 { 1605 struct mountlist_entry *me; 1606 1607 mutex_enter(&mountlist_lock); 1608 TAILQ_FOREACH(me, &mountlist, me_list) 1609 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1610 break; 1611 KASSERT(me != NULL); 1612 TAILQ_REMOVE(&mountlist, me, me_list); 1613 mutex_exit(&mountlist_lock); 1614 mountlist_free(me); 1615 } 1616 1617 /* 1618 * Unlocked variant to traverse the mountlist. 1619 * To be used from DDB only. 1620 */ 1621 struct mount * 1622 _mountlist_next(struct mount *mp) 1623 { 1624 struct mountlist_entry *me; 1625 1626 if (mp == NULL) { 1627 me = TAILQ_FIRST(&mountlist); 1628 } else { 1629 TAILQ_FOREACH(me, &mountlist, me_list) 1630 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1631 break; 1632 if (me != NULL) 1633 me = TAILQ_NEXT(me, me_list); 1634 } 1635 1636 while (me != NULL && me->me_type != ME_MOUNT) 1637 me = TAILQ_NEXT(me, me_list); 1638 1639 return (me ? me->me_mount : NULL); 1640 } 1641