1 /* $NetBSD: vfs_mount.c,v 1.75 2020/02/23 22:14:03 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.75 2020/02/23 22:14:03 ad Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/kernel.h> 74 75 #include <sys/atomic.h> 76 #include <sys/buf.h> 77 #include <sys/conf.h> 78 #include <sys/fcntl.h> 79 #include <sys/filedesc.h> 80 #include <sys/device.h> 81 #include <sys/kauth.h> 82 #include <sys/kmem.h> 83 #include <sys/module.h> 84 #include <sys/mount.h> 85 #include <sys/fstrans.h> 86 #include <sys/namei.h> 87 #include <sys/extattr.h> 88 #include <sys/syscallargs.h> 89 #include <sys/sysctl.h> 90 #include <sys/systm.h> 91 #include <sys/vfs_syscalls.h> 92 #include <sys/vnode_impl.h> 93 #include <sys/xcall.h> 94 95 #include <miscfs/genfs/genfs.h> 96 #include <miscfs/specfs/specdev.h> 97 98 enum mountlist_type { 99 ME_MOUNT, 100 ME_MARKER 101 }; 102 struct mountlist_entry { 103 TAILQ_ENTRY(mountlist_entry) me_list; /* Mount list. */ 104 struct mount *me_mount; /* Actual mount if ME_MOUNT, 105 current mount else. */ 106 enum mountlist_type me_type; /* Mount or marker. */ 107 }; 108 struct mount_iterator { 109 struct mountlist_entry mi_entry; 110 }; 111 112 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *, 113 bool (*)(void *, struct vnode *), void *, bool); 114 115 /* Root filesystem. */ 116 vnode_t * rootvnode; 117 118 /* Mounted filesystem list. */ 119 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist; 120 static kmutex_t mountlist_lock __cacheline_aligned; 121 int vnode_offset_next_by_lru /* XXX: ugly hack for pstat.c */ 122 = offsetof(vnode_impl_t, vi_lrulist.tqe_next); 123 124 kmutex_t vfs_list_lock __cacheline_aligned; 125 126 static specificdata_domain_t mount_specificdata_domain; 127 static kmutex_t mntid_lock; 128 129 static kmutex_t mountgen_lock __cacheline_aligned; 130 static uint64_t mountgen; 131 132 void 133 vfs_mount_sysinit(void) 134 { 135 136 TAILQ_INIT(&mountlist); 137 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 138 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 139 140 mount_specificdata_domain = specificdata_domain_create(); 141 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 142 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 143 mountgen = 0; 144 } 145 146 struct mount * 147 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp) 148 { 149 struct mount *mp; 150 int error __diagused; 151 152 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 153 mp->mnt_op = vfsops; 154 mp->mnt_refcnt = 1; 155 TAILQ_INIT(&mp->mnt_vnodelist); 156 mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 157 mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 158 mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 159 mp->mnt_vnodecovered = vp; 160 mount_initspecific(mp); 161 162 error = fstrans_mount(mp); 163 KASSERT(error == 0); 164 165 mutex_enter(&mountgen_lock); 166 mp->mnt_gen = mountgen++; 167 mutex_exit(&mountgen_lock); 168 169 return mp; 170 } 171 172 /* 173 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and 174 * initialize a mount structure for it. 175 * 176 * Devname is usually updated by mount(8) after booting. 177 */ 178 int 179 vfs_rootmountalloc(const char *fstypename, const char *devname, 180 struct mount **mpp) 181 { 182 struct vfsops *vfsp = NULL; 183 struct mount *mp; 184 int error __diagused; 185 186 mutex_enter(&vfs_list_lock); 187 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 188 if (!strncmp(vfsp->vfs_name, fstypename, 189 sizeof(mp->mnt_stat.f_fstypename))) 190 break; 191 if (vfsp == NULL) { 192 mutex_exit(&vfs_list_lock); 193 return (ENODEV); 194 } 195 vfsp->vfs_refcount++; 196 mutex_exit(&vfs_list_lock); 197 198 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 199 return ENOMEM; 200 error = vfs_busy(mp); 201 KASSERT(error == 0); 202 mp->mnt_flag = MNT_RDONLY; 203 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 204 sizeof(mp->mnt_stat.f_fstypename)); 205 mp->mnt_stat.f_mntonname[0] = '/'; 206 mp->mnt_stat.f_mntonname[1] = '\0'; 207 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 208 '\0'; 209 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 210 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 211 *mpp = mp; 212 return 0; 213 } 214 215 /* 216 * vfs_getnewfsid: get a new unique fsid. 217 */ 218 void 219 vfs_getnewfsid(struct mount *mp) 220 { 221 static u_short xxxfs_mntid; 222 fsid_t tfsid; 223 int mtype; 224 225 mutex_enter(&mntid_lock); 226 mtype = makefstype(mp->mnt_op->vfs_name); 227 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); 228 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; 229 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 230 if (xxxfs_mntid == 0) 231 ++xxxfs_mntid; 232 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 233 tfsid.__fsid_val[1] = mtype; 234 while (vfs_getvfs(&tfsid)) { 235 tfsid.__fsid_val[0]++; 236 xxxfs_mntid++; 237 } 238 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 239 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 240 mutex_exit(&mntid_lock); 241 } 242 243 /* 244 * Lookup a mount point by filesystem identifier. 245 * 246 * XXX Needs to add a reference to the mount point. 247 */ 248 struct mount * 249 vfs_getvfs(fsid_t *fsid) 250 { 251 mount_iterator_t *iter; 252 struct mount *mp; 253 254 mountlist_iterator_init(&iter); 255 while ((mp = mountlist_iterator_next(iter)) != NULL) { 256 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 257 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 258 mountlist_iterator_destroy(iter); 259 return mp; 260 } 261 } 262 mountlist_iterator_destroy(iter); 263 return NULL; 264 } 265 266 /* 267 * Take a reference to a mount structure. 268 */ 269 void 270 vfs_ref(struct mount *mp) 271 { 272 273 KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock)); 274 275 atomic_inc_uint(&mp->mnt_refcnt); 276 } 277 278 /* 279 * Drop a reference to a mount structure, freeing if the last reference. 280 */ 281 void 282 vfs_rele(struct mount *mp) 283 { 284 285 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 286 return; 287 } 288 289 /* 290 * Nothing else has visibility of the mount: we can now 291 * free the data structures. 292 */ 293 KASSERT(mp->mnt_refcnt == 0); 294 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 295 mutex_obj_free(mp->mnt_updating); 296 mutex_obj_free(mp->mnt_renamelock); 297 mutex_obj_free(mp->mnt_vnodelock); 298 if (mp->mnt_op != NULL) { 299 vfs_delref(mp->mnt_op); 300 } 301 fstrans_unmount(mp); 302 /* 303 * Final free of mp gets done from fstrans_mount_dtor(). 304 * 305 * Prevents this memory to be reused as a mount before 306 * fstrans releases all references to it. 307 */ 308 } 309 310 /* 311 * Mark a mount point as busy, and gain a new reference to it. Used to 312 * prevent the file system from being unmounted during critical sections. 313 * 314 * vfs_busy can be called multiple times and by multiple threads 315 * and must be accompanied by the same number of vfs_unbusy calls. 316 * 317 * => The caller must hold a pre-existing reference to the mount. 318 * => Will fail if the file system is being unmounted, or is unmounted. 319 */ 320 static inline int 321 _vfs_busy(struct mount *mp, bool wait) 322 { 323 324 KASSERT(mp->mnt_refcnt > 0); 325 326 if (wait) { 327 fstrans_start(mp); 328 } else { 329 if (fstrans_start_nowait(mp)) 330 return EBUSY; 331 } 332 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 333 fstrans_done(mp); 334 return ENOENT; 335 } 336 vfs_ref(mp); 337 return 0; 338 } 339 340 int 341 vfs_busy(struct mount *mp) 342 { 343 344 return _vfs_busy(mp, true); 345 } 346 347 int 348 vfs_trybusy(struct mount *mp) 349 { 350 351 return _vfs_busy(mp, false); 352 } 353 354 /* 355 * Unbusy a busy filesystem. 356 * 357 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call. 358 */ 359 void 360 vfs_unbusy(struct mount *mp) 361 { 362 363 KASSERT(mp->mnt_refcnt > 0); 364 365 fstrans_done(mp); 366 vfs_rele(mp); 367 } 368 369 struct vnode_iterator { 370 vnode_impl_t vi_vnode; 371 }; 372 373 void 374 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip) 375 { 376 vnode_t *vp; 377 vnode_impl_t *vip; 378 379 vp = vnalloc_marker(mp); 380 vip = VNODE_TO_VIMPL(vp); 381 382 mutex_enter(mp->mnt_vnodelock); 383 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes); 384 vp->v_usecount = 1; 385 mutex_exit(mp->mnt_vnodelock); 386 387 *vnip = (struct vnode_iterator *)vip; 388 } 389 390 void 391 vfs_vnode_iterator_destroy(struct vnode_iterator *vni) 392 { 393 vnode_impl_t *mvip = &vni->vi_vnode; 394 vnode_t *mvp = VIMPL_TO_VNODE(mvip); 395 kmutex_t *lock; 396 397 KASSERT(vnis_marker(mvp)); 398 if (mvp->v_usecount != 0) { 399 lock = mvp->v_mount->mnt_vnodelock; 400 mutex_enter(lock); 401 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes); 402 mvp->v_usecount = 0; 403 mutex_exit(lock); 404 } 405 vnfree_marker(mvp); 406 } 407 408 static struct vnode * 409 vfs_vnode_iterator_next1(struct vnode_iterator *vni, 410 bool (*f)(void *, struct vnode *), void *cl, bool do_wait) 411 { 412 vnode_impl_t *mvip = &vni->vi_vnode; 413 struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount; 414 vnode_t *vp; 415 vnode_impl_t *vip; 416 kmutex_t *lock; 417 int error; 418 419 KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip))); 420 421 lock = mp->mnt_vnodelock; 422 do { 423 mutex_enter(lock); 424 vip = TAILQ_NEXT(mvip, vi_mntvnodes); 425 TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes); 426 VIMPL_TO_VNODE(mvip)->v_usecount = 0; 427 again: 428 if (vip == NULL) { 429 mutex_exit(lock); 430 return NULL; 431 } 432 vp = VIMPL_TO_VNODE(vip); 433 KASSERT(vp != NULL); 434 mutex_enter(vp->v_interlock); 435 if (vnis_marker(vp) || 436 vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) || 437 (f && !(*f)(cl, vp))) { 438 mutex_exit(vp->v_interlock); 439 vip = TAILQ_NEXT(vip, vi_mntvnodes); 440 goto again; 441 } 442 443 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes); 444 VIMPL_TO_VNODE(mvip)->v_usecount = 1; 445 mutex_exit(lock); 446 error = vcache_vget(vp); 447 KASSERT(error == 0 || error == ENOENT); 448 } while (error != 0); 449 450 return vp; 451 } 452 453 struct vnode * 454 vfs_vnode_iterator_next(struct vnode_iterator *vni, 455 bool (*f)(void *, struct vnode *), void *cl) 456 { 457 458 return vfs_vnode_iterator_next1(vni, f, cl, false); 459 } 460 461 /* 462 * Move a vnode from one mount queue to another. 463 */ 464 void 465 vfs_insmntque(vnode_t *vp, struct mount *mp) 466 { 467 vnode_impl_t *vip = VNODE_TO_VIMPL(vp); 468 struct mount *omp; 469 kmutex_t *lock; 470 471 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 || 472 vp->v_tag == VT_VFS); 473 474 /* 475 * Delete from old mount point vnode list, if on one. 476 */ 477 if ((omp = vp->v_mount) != NULL) { 478 lock = omp->mnt_vnodelock; 479 mutex_enter(lock); 480 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes); 481 mutex_exit(lock); 482 } 483 484 /* 485 * Insert into list of vnodes for the new mount point, if 486 * available. The caller must take a reference on the mount 487 * structure and donate to the vnode. 488 */ 489 if ((vp->v_mount = mp) != NULL) { 490 lock = mp->mnt_vnodelock; 491 mutex_enter(lock); 492 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes); 493 mutex_exit(lock); 494 } 495 496 if (omp != NULL) { 497 /* Release reference to old mount. */ 498 vfs_rele(omp); 499 } 500 } 501 502 /* 503 * Remove any vnodes in the vnode table belonging to mount point mp. 504 * 505 * If FORCECLOSE is not specified, there should not be any active ones, 506 * return error if any are found (nb: this is a user error, not a 507 * system error). If FORCECLOSE is specified, detach any active vnodes 508 * that are found. 509 * 510 * If WRITECLOSE is set, only flush out regular file vnodes open for 511 * writing. 512 * 513 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. 514 */ 515 #ifdef DEBUG 516 int busyprt = 0; /* print out busy vnodes */ 517 struct ctldebug debug1 = { "busyprt", &busyprt }; 518 #endif 519 520 static vnode_t * 521 vflushnext(struct vnode_iterator *marker, int *when) 522 { 523 if (hardclock_ticks > *when) { 524 yield(); 525 *when = hardclock_ticks + hz / 10; 526 } 527 return vfs_vnode_iterator_next1(marker, NULL, NULL, true); 528 } 529 530 /* 531 * Flush one vnode. Referenced on entry, unreferenced on return. 532 */ 533 static int 534 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags) 535 { 536 int error; 537 struct vattr vattr; 538 539 if (vp == skipvp || 540 ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) { 541 vrele(vp); 542 return 0; 543 } 544 /* 545 * If WRITECLOSE is set, only flush out regular file 546 * vnodes open for writing or open and unlinked. 547 */ 548 if ((flags & WRITECLOSE)) { 549 if (vp->v_type != VREG) { 550 vrele(vp); 551 return 0; 552 } 553 error = vn_lock(vp, LK_EXCLUSIVE); 554 if (error) { 555 KASSERT(error == ENOENT); 556 vrele(vp); 557 return 0; 558 } 559 error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0); 560 if (error == 0) 561 error = VOP_GETATTR(vp, &vattr, curlwp->l_cred); 562 VOP_UNLOCK(vp); 563 if (error) { 564 vrele(vp); 565 return error; 566 } 567 if (vp->v_writecount == 0 && vattr.va_nlink > 0) { 568 vrele(vp); 569 return 0; 570 } 571 } 572 /* 573 * First try to recycle the vnode. 574 */ 575 if (vrecycle(vp)) 576 return 0; 577 /* 578 * If FORCECLOSE is set, forcibly close the vnode. 579 * For block or character devices, revert to an 580 * anonymous device. For all other files, just 581 * kill them. 582 */ 583 if (flags & FORCECLOSE) { 584 if (vp->v_usecount > 1 && 585 (vp->v_type == VBLK || vp->v_type == VCHR)) 586 vcache_make_anon(vp); 587 else 588 vgone(vp); 589 return 0; 590 } 591 vrele(vp); 592 return EBUSY; 593 } 594 595 int 596 vflush(struct mount *mp, vnode_t *skipvp, int flags) 597 { 598 vnode_t *vp; 599 struct vnode_iterator *marker; 600 int busy, error, when, retries = 2; 601 602 do { 603 busy = error = when = 0; 604 605 /* 606 * First, flush out any vnode references from the 607 * deferred vrele list. 608 */ 609 vrele_flush(mp); 610 611 vfs_vnode_iterator_init(mp, &marker); 612 613 while ((vp = vflushnext(marker, &when)) != NULL) { 614 error = vflush_one(vp, skipvp, flags); 615 if (error == EBUSY) { 616 error = 0; 617 busy++; 618 #ifdef DEBUG 619 if (busyprt && retries == 0) 620 vprint("vflush: busy vnode", vp); 621 #endif 622 } else if (error != 0) { 623 break; 624 } 625 } 626 627 vfs_vnode_iterator_destroy(marker); 628 } while (error == 0 && busy > 0 && retries-- > 0); 629 630 if (error) 631 return error; 632 if (busy) 633 return EBUSY; 634 return 0; 635 } 636 637 /* 638 * Mount a file system. 639 */ 640 641 /* 642 * Scan all active processes to see if any of them have a current or root 643 * directory onto which the new filesystem has just been mounted. If so, 644 * replace them with the new mount point. 645 */ 646 static void 647 mount_checkdirs(vnode_t *olddp) 648 { 649 vnode_t *newdp, *rele1, *rele2; 650 struct cwdinfo *cwdi; 651 struct proc *p; 652 bool retry; 653 654 if (olddp->v_usecount == 1) { 655 return; 656 } 657 if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp)) 658 panic("mount: lost mount"); 659 660 do { 661 retry = false; 662 mutex_enter(proc_lock); 663 PROCLIST_FOREACH(p, &allproc) { 664 if ((cwdi = p->p_cwdi) == NULL) 665 continue; 666 /* 667 * Cannot change to the old directory any more, 668 * so even if we see a stale value it is not a 669 * problem. 670 */ 671 if (cwdi->cwdi_cdir != olddp && 672 cwdi->cwdi_rdir != olddp) 673 continue; 674 retry = true; 675 rele1 = NULL; 676 rele2 = NULL; 677 atomic_inc_uint(&cwdi->cwdi_refcnt); 678 mutex_exit(proc_lock); 679 mutex_enter(&cwdi->cwdi_lock); 680 if (cwdi->cwdi_cdir == olddp || 681 cwdi->cwdi_rdir == olddp) { 682 /* XXX belongs in vfs_cwd.c, but rump. */ 683 xc_barrier(0); 684 if (cwdi->cwdi_cdir == olddp) { 685 rele1 = cwdi->cwdi_cdir; 686 vref(newdp); 687 cwdi->cwdi_cdir = newdp; 688 } 689 if (cwdi->cwdi_rdir == olddp) { 690 rele2 = cwdi->cwdi_rdir; 691 vref(newdp); 692 cwdi->cwdi_rdir = newdp; 693 } 694 } 695 mutex_exit(&cwdi->cwdi_lock); 696 cwdfree(cwdi); 697 if (rele1 != NULL) 698 vrele(rele1); 699 if (rele2 != NULL) 700 vrele(rele2); 701 mutex_enter(proc_lock); 702 break; 703 } 704 mutex_exit(proc_lock); 705 } while (retry); 706 707 if (rootvnode == olddp) { 708 vrele(rootvnode); 709 vref(newdp); 710 rootvnode = newdp; 711 } 712 vput(newdp); 713 } 714 715 /* 716 * Start extended attributes 717 */ 718 static int 719 start_extattr(struct mount *mp) 720 { 721 int error; 722 723 error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL); 724 if (error) 725 printf("%s: failed to start extattr: error = %d\n", 726 mp->mnt_stat.f_mntonname, error); 727 728 return error; 729 } 730 731 int 732 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops, 733 const char *path, int flags, void *data, size_t *data_len) 734 { 735 vnode_t *vp = *vpp; 736 struct mount *mp; 737 struct pathbuf *pb; 738 struct nameidata nd; 739 int error; 740 741 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 742 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 743 if (error) { 744 vfs_delref(vfsops); 745 return error; 746 } 747 748 /* Cannot make a non-dir a mount-point (from here anyway). */ 749 if (vp->v_type != VDIR) { 750 vfs_delref(vfsops); 751 return ENOTDIR; 752 } 753 754 if (flags & MNT_EXPORTED) { 755 vfs_delref(vfsops); 756 return EINVAL; 757 } 758 759 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 760 vfs_delref(vfsops); 761 return ENOMEM; 762 } 763 764 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 765 766 /* 767 * The underlying file system may refuse the mount for 768 * various reasons. Allow the user to force it to happen. 769 * 770 * Set the mount level flags. 771 */ 772 mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE); 773 774 mutex_enter(mp->mnt_updating); 775 error = VFS_MOUNT(mp, path, data, data_len); 776 mp->mnt_flag &= ~MNT_OP_FLAGS; 777 778 if (error != 0) 779 goto err_unmounted; 780 781 /* 782 * Validate and prepare the mount point. 783 */ 784 error = pathbuf_copyin(path, &pb); 785 if (error != 0) { 786 goto err_mounted; 787 } 788 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 789 error = namei(&nd); 790 pathbuf_destroy(pb); 791 if (error != 0) { 792 goto err_mounted; 793 } 794 if (nd.ni_vp != vp) { 795 vput(nd.ni_vp); 796 error = EINVAL; 797 goto err_mounted; 798 } 799 if (vp->v_mountedhere != NULL) { 800 vput(nd.ni_vp); 801 error = EBUSY; 802 goto err_mounted; 803 } 804 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 805 if (error != 0) { 806 vput(nd.ni_vp); 807 goto err_mounted; 808 } 809 810 /* 811 * Put the new filesystem on the mount list after root. 812 */ 813 cache_purge(vp); 814 mp->mnt_iflag &= ~IMNT_WANTRDWR; 815 816 mountlist_append(mp); 817 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 818 vfs_syncer_add_to_worklist(mp); 819 vp->v_mountedhere = mp; 820 vput(nd.ni_vp); 821 822 mount_checkdirs(vp); 823 mutex_exit(mp->mnt_updating); 824 825 /* Hold an additional reference to the mount across VFS_START(). */ 826 vfs_ref(mp); 827 (void) VFS_STATVFS(mp, &mp->mnt_stat); 828 error = VFS_START(mp, 0); 829 if (error) { 830 vrele(vp); 831 } else if (flags & MNT_EXTATTR) { 832 if (start_extattr(mp) != 0) 833 mp->mnt_flag &= ~MNT_EXTATTR; 834 } 835 /* Drop reference held for VFS_START(). */ 836 vfs_rele(mp); 837 *vpp = NULL; 838 return error; 839 840 err_mounted: 841 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 842 panic("Unmounting fresh file system failed"); 843 844 err_unmounted: 845 vp->v_mountedhere = NULL; 846 mutex_exit(mp->mnt_updating); 847 vfs_rele(mp); 848 849 return error; 850 } 851 852 /* 853 * Do the actual file system unmount. File system is assumed to have 854 * been locked by the caller. 855 * 856 * => Caller hold reference to the mount, explicitly for dounmount(). 857 */ 858 int 859 dounmount(struct mount *mp, int flags, struct lwp *l) 860 { 861 vnode_t *coveredvp; 862 int error, async, used_syncer, used_extattr; 863 const bool was_suspended = fstrans_is_owner(mp); 864 865 #if NVERIEXEC > 0 866 error = veriexec_unmountchk(mp); 867 if (error) 868 return (error); 869 #endif /* NVERIEXEC > 0 */ 870 871 if (!was_suspended) { 872 error = vfs_suspend(mp, 0); 873 if (error) { 874 return error; 875 } 876 } 877 878 KASSERT((mp->mnt_iflag & IMNT_GONE) == 0); 879 880 used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0; 881 used_extattr = mp->mnt_flag & MNT_EXTATTR; 882 883 mp->mnt_iflag |= IMNT_UNMOUNT; 884 mutex_enter(mp->mnt_updating); 885 async = mp->mnt_flag & MNT_ASYNC; 886 mp->mnt_flag &= ~MNT_ASYNC; 887 cache_purgevfs(mp); /* remove cache entries for this file sys */ 888 if (used_syncer) 889 vfs_syncer_remove_from_worklist(mp); 890 error = 0; 891 if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) { 892 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 893 } 894 if (error == 0 || (flags & MNT_FORCE)) { 895 error = VFS_UNMOUNT(mp, flags); 896 } 897 if (error) { 898 mp->mnt_iflag &= ~IMNT_UNMOUNT; 899 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 900 vfs_syncer_add_to_worklist(mp); 901 mp->mnt_flag |= async; 902 mutex_exit(mp->mnt_updating); 903 if (!was_suspended) 904 vfs_resume(mp); 905 if (used_extattr) { 906 if (start_extattr(mp) != 0) 907 mp->mnt_flag &= ~MNT_EXTATTR; 908 else 909 mp->mnt_flag |= MNT_EXTATTR; 910 } 911 return (error); 912 } 913 mutex_exit(mp->mnt_updating); 914 915 /* 916 * mark filesystem as gone to prevent further umounts 917 * after mnt_umounting lock is gone, this also prevents 918 * vfs_busy() from succeeding. 919 */ 920 mp->mnt_iflag |= IMNT_GONE; 921 if (!was_suspended) 922 vfs_resume(mp); 923 924 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 925 vn_lock(coveredvp, LK_EXCLUSIVE | LK_RETRY); 926 coveredvp->v_mountedhere = NULL; 927 VOP_UNLOCK(coveredvp); 928 } 929 mountlist_remove(mp); 930 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 931 panic("unmount: dangling vnode"); 932 vfs_hooks_unmount(mp); 933 934 vfs_rele(mp); /* reference from mount() */ 935 if (coveredvp != NULLVP) { 936 vrele(coveredvp); 937 } 938 return (0); 939 } 940 941 /* 942 * Unmount all file systems. 943 * We traverse the list in reverse order under the assumption that doing so 944 * will avoid needing to worry about dependencies. 945 */ 946 bool 947 vfs_unmountall(struct lwp *l) 948 { 949 950 printf("unmounting file systems...\n"); 951 return vfs_unmountall1(l, true, true); 952 } 953 954 static void 955 vfs_unmount_print(struct mount *mp, const char *pfx) 956 { 957 958 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 959 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 960 mp->mnt_stat.f_fstypename); 961 } 962 963 /* 964 * Return the mount with the highest generation less than "gen". 965 */ 966 static struct mount * 967 vfs_unmount_next(uint64_t gen) 968 { 969 mount_iterator_t *iter; 970 struct mount *mp, *nmp; 971 972 nmp = NULL; 973 974 mountlist_iterator_init(&iter); 975 while ((mp = mountlist_iterator_next(iter)) != NULL) { 976 if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) && 977 mp->mnt_gen < gen) { 978 if (nmp != NULL) 979 vfs_rele(nmp); 980 nmp = mp; 981 vfs_ref(nmp); 982 } 983 } 984 mountlist_iterator_destroy(iter); 985 986 return nmp; 987 } 988 989 bool 990 vfs_unmount_forceone(struct lwp *l) 991 { 992 struct mount *mp; 993 int error; 994 995 mp = vfs_unmount_next(mountgen); 996 if (mp == NULL) { 997 return false; 998 } 999 1000 #ifdef DEBUG 1001 printf("forcefully unmounting %s (%s)...\n", 1002 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 1003 #endif 1004 if ((error = dounmount(mp, MNT_FORCE, l)) == 0) { 1005 vfs_unmount_print(mp, "forcefully "); 1006 return true; 1007 } else { 1008 vfs_rele(mp); 1009 } 1010 1011 #ifdef DEBUG 1012 printf("forceful unmount of %s failed with error %d\n", 1013 mp->mnt_stat.f_mntonname, error); 1014 #endif 1015 1016 return false; 1017 } 1018 1019 bool 1020 vfs_unmountall1(struct lwp *l, bool force, bool verbose) 1021 { 1022 struct mount *mp; 1023 bool any_error = false, progress = false; 1024 uint64_t gen; 1025 int error; 1026 1027 gen = mountgen; 1028 for (;;) { 1029 mp = vfs_unmount_next(gen); 1030 if (mp == NULL) 1031 break; 1032 gen = mp->mnt_gen; 1033 1034 #ifdef DEBUG 1035 printf("unmounting %p %s (%s)...\n", 1036 (void *)mp, mp->mnt_stat.f_mntonname, 1037 mp->mnt_stat.f_mntfromname); 1038 #endif 1039 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 1040 vfs_unmount_print(mp, ""); 1041 progress = true; 1042 } else { 1043 vfs_rele(mp); 1044 if (verbose) { 1045 printf("unmount of %s failed with error %d\n", 1046 mp->mnt_stat.f_mntonname, error); 1047 } 1048 any_error = true; 1049 } 1050 } 1051 if (verbose) { 1052 printf("unmounting done\n"); 1053 } 1054 if (any_error && verbose) { 1055 printf("WARNING: some file systems would not unmount\n"); 1056 } 1057 return progress; 1058 } 1059 1060 void 1061 vfs_sync_all(struct lwp *l) 1062 { 1063 printf("syncing disks... "); 1064 1065 /* remove user processes from run queue */ 1066 suspendsched(); 1067 (void)spl0(); 1068 1069 /* avoid coming back this way again if we panic. */ 1070 doing_shutdown = 1; 1071 1072 do_sys_sync(l); 1073 1074 /* Wait for sync to finish. */ 1075 if (buf_syncwait() != 0) { 1076 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 1077 Debugger(); 1078 #endif 1079 printf("giving up\n"); 1080 return; 1081 } else 1082 printf("done\n"); 1083 } 1084 1085 /* 1086 * Sync and unmount file systems before shutting down. 1087 */ 1088 void 1089 vfs_shutdown(void) 1090 { 1091 lwp_t *l = curlwp; 1092 1093 vfs_sync_all(l); 1094 1095 /* 1096 * If we have paniced - do not make the situation potentially 1097 * worse by unmounting the file systems. 1098 */ 1099 if (panicstr != NULL) { 1100 return; 1101 } 1102 1103 /* Unmount file systems. */ 1104 vfs_unmountall(l); 1105 } 1106 1107 /* 1108 * Print a list of supported file system types (used by vfs_mountroot) 1109 */ 1110 static void 1111 vfs_print_fstypes(void) 1112 { 1113 struct vfsops *v; 1114 int cnt = 0; 1115 1116 mutex_enter(&vfs_list_lock); 1117 LIST_FOREACH(v, &vfs_list, vfs_list) 1118 ++cnt; 1119 mutex_exit(&vfs_list_lock); 1120 1121 if (cnt == 0) { 1122 printf("WARNING: No file system modules have been loaded.\n"); 1123 return; 1124 } 1125 1126 printf("Supported file systems:"); 1127 mutex_enter(&vfs_list_lock); 1128 LIST_FOREACH(v, &vfs_list, vfs_list) { 1129 printf(" %s", v->vfs_name); 1130 } 1131 mutex_exit(&vfs_list_lock); 1132 printf("\n"); 1133 } 1134 1135 /* 1136 * Mount the root file system. If the operator didn't specify a 1137 * file system to use, try all possible file systems until one 1138 * succeeds. 1139 */ 1140 int 1141 vfs_mountroot(void) 1142 { 1143 struct vfsops *v; 1144 int error = ENODEV; 1145 1146 if (root_device == NULL) 1147 panic("vfs_mountroot: root device unknown"); 1148 1149 switch (device_class(root_device)) { 1150 case DV_IFNET: 1151 if (rootdev != NODEV) 1152 panic("vfs_mountroot: rootdev set for DV_IFNET " 1153 "(0x%llx -> %llu,%llu)", 1154 (unsigned long long)rootdev, 1155 (unsigned long long)major(rootdev), 1156 (unsigned long long)minor(rootdev)); 1157 break; 1158 1159 case DV_DISK: 1160 if (rootdev == NODEV) 1161 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1162 if (bdevvp(rootdev, &rootvp)) 1163 panic("vfs_mountroot: can't get vnode for rootdev"); 1164 error = VOP_OPEN(rootvp, FREAD, FSCRED); 1165 if (error) { 1166 printf("vfs_mountroot: can't open root device\n"); 1167 return (error); 1168 } 1169 break; 1170 1171 case DV_VIRTUAL: 1172 break; 1173 1174 default: 1175 printf("%s: inappropriate for root file system\n", 1176 device_xname(root_device)); 1177 return (ENODEV); 1178 } 1179 1180 /* 1181 * If user specified a root fs type, use it. Make sure the 1182 * specified type exists and has a mount_root() 1183 */ 1184 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 1185 v = vfs_getopsbyname(rootfstype); 1186 error = EFTYPE; 1187 if (v != NULL) { 1188 if (v->vfs_mountroot != NULL) { 1189 error = (v->vfs_mountroot)(); 1190 } 1191 v->vfs_refcount--; 1192 } 1193 goto done; 1194 } 1195 1196 /* 1197 * Try each file system currently configured into the kernel. 1198 */ 1199 mutex_enter(&vfs_list_lock); 1200 LIST_FOREACH(v, &vfs_list, vfs_list) { 1201 if (v->vfs_mountroot == NULL) 1202 continue; 1203 #ifdef DEBUG 1204 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 1205 #endif 1206 v->vfs_refcount++; 1207 mutex_exit(&vfs_list_lock); 1208 error = (*v->vfs_mountroot)(); 1209 mutex_enter(&vfs_list_lock); 1210 v->vfs_refcount--; 1211 if (!error) { 1212 aprint_normal("root file system type: %s\n", 1213 v->vfs_name); 1214 break; 1215 } 1216 } 1217 mutex_exit(&vfs_list_lock); 1218 1219 if (v == NULL) { 1220 vfs_print_fstypes(); 1221 printf("no file system for %s", device_xname(root_device)); 1222 if (device_class(root_device) == DV_DISK) 1223 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 1224 printf("\n"); 1225 error = EFTYPE; 1226 } 1227 1228 done: 1229 if (error && device_class(root_device) == DV_DISK) { 1230 VOP_CLOSE(rootvp, FREAD, FSCRED); 1231 vrele(rootvp); 1232 } 1233 if (error == 0) { 1234 mount_iterator_t *iter; 1235 struct mount *mp; 1236 extern struct cwdinfo cwdi0; 1237 1238 mountlist_iterator_init(&iter); 1239 mp = mountlist_iterator_next(iter); 1240 KASSERT(mp != NULL); 1241 mountlist_iterator_destroy(iter); 1242 1243 mp->mnt_flag |= MNT_ROOTFS; 1244 mp->mnt_op->vfs_refcount++; 1245 1246 /* 1247 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 1248 * reference it. 1249 */ 1250 error = VFS_ROOT(mp, LK_SHARED, &rootvnode); 1251 if (error) 1252 panic("cannot find root vnode, error=%d", error); 1253 cwdi0.cwdi_cdir = rootvnode; 1254 vref(cwdi0.cwdi_cdir); 1255 VOP_UNLOCK(rootvnode); 1256 cwdi0.cwdi_rdir = NULL; 1257 1258 /* 1259 * Now that root is mounted, we can fixup initproc's CWD 1260 * info. All other processes are kthreads, which merely 1261 * share proc0's CWD info. 1262 */ 1263 initproc->p_cwdi->cwdi_cdir = rootvnode; 1264 vref(initproc->p_cwdi->cwdi_cdir); 1265 initproc->p_cwdi->cwdi_rdir = NULL; 1266 /* 1267 * Enable loading of modules from the filesystem 1268 */ 1269 module_load_vfs_init(); 1270 1271 } 1272 return (error); 1273 } 1274 1275 /* 1276 * mount_specific_key_create -- 1277 * Create a key for subsystem mount-specific data. 1278 */ 1279 int 1280 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1281 { 1282 1283 return specificdata_key_create(mount_specificdata_domain, keyp, dtor); 1284 } 1285 1286 /* 1287 * mount_specific_key_delete -- 1288 * Delete a key for subsystem mount-specific data. 1289 */ 1290 void 1291 mount_specific_key_delete(specificdata_key_t key) 1292 { 1293 1294 specificdata_key_delete(mount_specificdata_domain, key); 1295 } 1296 1297 /* 1298 * mount_initspecific -- 1299 * Initialize a mount's specificdata container. 1300 */ 1301 void 1302 mount_initspecific(struct mount *mp) 1303 { 1304 int error __diagused; 1305 1306 error = specificdata_init(mount_specificdata_domain, 1307 &mp->mnt_specdataref); 1308 KASSERT(error == 0); 1309 } 1310 1311 /* 1312 * mount_finispecific -- 1313 * Finalize a mount's specificdata container. 1314 */ 1315 void 1316 mount_finispecific(struct mount *mp) 1317 { 1318 1319 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 1320 } 1321 1322 /* 1323 * mount_getspecific -- 1324 * Return mount-specific data corresponding to the specified key. 1325 */ 1326 void * 1327 mount_getspecific(struct mount *mp, specificdata_key_t key) 1328 { 1329 1330 return specificdata_getspecific(mount_specificdata_domain, 1331 &mp->mnt_specdataref, key); 1332 } 1333 1334 /* 1335 * mount_setspecific -- 1336 * Set mount-specific data corresponding to the specified key. 1337 */ 1338 void 1339 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 1340 { 1341 1342 specificdata_setspecific(mount_specificdata_domain, 1343 &mp->mnt_specdataref, key, data); 1344 } 1345 1346 /* 1347 * Check to see if a filesystem is mounted on a block device. 1348 */ 1349 int 1350 vfs_mountedon(vnode_t *vp) 1351 { 1352 vnode_t *vq; 1353 int error = 0; 1354 1355 if (vp->v_type != VBLK) 1356 return ENOTBLK; 1357 if (spec_node_getmountedfs(vp) != NULL) 1358 return EBUSY; 1359 if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, &vq) == 0) { 1360 if (spec_node_getmountedfs(vq) != NULL) 1361 error = EBUSY; 1362 vrele(vq); 1363 } 1364 1365 return error; 1366 } 1367 1368 /* 1369 * Check if a device pointed to by vp is mounted. 1370 * 1371 * Returns: 1372 * EINVAL if it's not a disk 1373 * EBUSY if it's a disk and mounted 1374 * 0 if it's a disk and not mounted 1375 */ 1376 int 1377 rawdev_mounted(vnode_t *vp, vnode_t **bvpp) 1378 { 1379 vnode_t *bvp; 1380 dev_t dev; 1381 int d_type; 1382 1383 bvp = NULL; 1384 d_type = D_OTHER; 1385 1386 if (iskmemvp(vp)) 1387 return EINVAL; 1388 1389 switch (vp->v_type) { 1390 case VCHR: { 1391 const struct cdevsw *cdev; 1392 1393 dev = vp->v_rdev; 1394 cdev = cdevsw_lookup(dev); 1395 if (cdev != NULL) { 1396 dev_t blkdev; 1397 1398 blkdev = devsw_chr2blk(dev); 1399 if (blkdev != NODEV) { 1400 if (vfinddev(blkdev, VBLK, &bvp) != 0) { 1401 d_type = (cdev->d_flag & D_TYPEMASK); 1402 /* XXX: what if bvp disappears? */ 1403 vrele(bvp); 1404 } 1405 } 1406 } 1407 1408 break; 1409 } 1410 1411 case VBLK: { 1412 const struct bdevsw *bdev; 1413 1414 dev = vp->v_rdev; 1415 bdev = bdevsw_lookup(dev); 1416 if (bdev != NULL) 1417 d_type = (bdev->d_flag & D_TYPEMASK); 1418 1419 bvp = vp; 1420 1421 break; 1422 } 1423 1424 default: 1425 break; 1426 } 1427 1428 if (d_type != D_DISK) 1429 return EINVAL; 1430 1431 if (bvpp != NULL) 1432 *bvpp = bvp; 1433 1434 /* 1435 * XXX: This is bogus. We should be failing the request 1436 * XXX: not only if this specific slice is mounted, but 1437 * XXX: if it's on a disk with any other mounted slice. 1438 */ 1439 if (vfs_mountedon(bvp)) 1440 return EBUSY; 1441 1442 return 0; 1443 } 1444 1445 /* 1446 * Make a 'unique' number from a mount type name. 1447 */ 1448 long 1449 makefstype(const char *type) 1450 { 1451 long rv; 1452 1453 for (rv = 0; *type; type++) { 1454 rv <<= 2; 1455 rv ^= *type; 1456 } 1457 return rv; 1458 } 1459 1460 static struct mountlist_entry * 1461 mountlist_alloc(enum mountlist_type type, struct mount *mp) 1462 { 1463 struct mountlist_entry *me; 1464 1465 me = kmem_zalloc(sizeof(*me), KM_SLEEP); 1466 me->me_mount = mp; 1467 me->me_type = type; 1468 1469 return me; 1470 } 1471 1472 static void 1473 mountlist_free(struct mountlist_entry *me) 1474 { 1475 1476 kmem_free(me, sizeof(*me)); 1477 } 1478 1479 void 1480 mountlist_iterator_init(mount_iterator_t **mip) 1481 { 1482 struct mountlist_entry *me; 1483 1484 me = mountlist_alloc(ME_MARKER, NULL); 1485 mutex_enter(&mountlist_lock); 1486 TAILQ_INSERT_HEAD(&mountlist, me, me_list); 1487 mutex_exit(&mountlist_lock); 1488 *mip = (mount_iterator_t *)me; 1489 } 1490 1491 void 1492 mountlist_iterator_destroy(mount_iterator_t *mi) 1493 { 1494 struct mountlist_entry *marker = &mi->mi_entry; 1495 1496 if (marker->me_mount != NULL) 1497 vfs_unbusy(marker->me_mount); 1498 1499 mutex_enter(&mountlist_lock); 1500 TAILQ_REMOVE(&mountlist, marker, me_list); 1501 mutex_exit(&mountlist_lock); 1502 1503 mountlist_free(marker); 1504 1505 } 1506 1507 /* 1508 * Return the next mount or NULL for this iterator. 1509 * Mark it busy on success. 1510 */ 1511 static inline struct mount * 1512 _mountlist_iterator_next(mount_iterator_t *mi, bool wait) 1513 { 1514 struct mountlist_entry *me, *marker = &mi->mi_entry; 1515 struct mount *mp; 1516 int error; 1517 1518 if (marker->me_mount != NULL) { 1519 vfs_unbusy(marker->me_mount); 1520 marker->me_mount = NULL; 1521 } 1522 1523 mutex_enter(&mountlist_lock); 1524 for (;;) { 1525 KASSERT(marker->me_type == ME_MARKER); 1526 1527 me = TAILQ_NEXT(marker, me_list); 1528 if (me == NULL) { 1529 /* End of list: keep marker and return. */ 1530 mutex_exit(&mountlist_lock); 1531 return NULL; 1532 } 1533 TAILQ_REMOVE(&mountlist, marker, me_list); 1534 TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list); 1535 1536 /* Skip other markers. */ 1537 if (me->me_type != ME_MOUNT) 1538 continue; 1539 1540 /* Take an initial reference for vfs_busy() below. */ 1541 mp = me->me_mount; 1542 KASSERT(mp != NULL); 1543 vfs_ref(mp); 1544 mutex_exit(&mountlist_lock); 1545 1546 /* Try to mark this mount busy and return on success. */ 1547 if (wait) 1548 error = vfs_busy(mp); 1549 else 1550 error = vfs_trybusy(mp); 1551 if (error == 0) { 1552 vfs_rele(mp); 1553 marker->me_mount = mp; 1554 return mp; 1555 } 1556 vfs_rele(mp); 1557 mutex_enter(&mountlist_lock); 1558 } 1559 } 1560 1561 struct mount * 1562 mountlist_iterator_next(mount_iterator_t *mi) 1563 { 1564 1565 return _mountlist_iterator_next(mi, true); 1566 } 1567 1568 struct mount * 1569 mountlist_iterator_trynext(mount_iterator_t *mi) 1570 { 1571 1572 return _mountlist_iterator_next(mi, false); 1573 } 1574 1575 /* 1576 * Attach new mount to the end of the mount list. 1577 */ 1578 void 1579 mountlist_append(struct mount *mp) 1580 { 1581 struct mountlist_entry *me; 1582 1583 me = mountlist_alloc(ME_MOUNT, mp); 1584 mutex_enter(&mountlist_lock); 1585 TAILQ_INSERT_TAIL(&mountlist, me, me_list); 1586 mutex_exit(&mountlist_lock); 1587 } 1588 1589 /* 1590 * Remove mount from mount list. 1591 */void 1592 mountlist_remove(struct mount *mp) 1593 { 1594 struct mountlist_entry *me; 1595 1596 mutex_enter(&mountlist_lock); 1597 TAILQ_FOREACH(me, &mountlist, me_list) 1598 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1599 break; 1600 KASSERT(me != NULL); 1601 TAILQ_REMOVE(&mountlist, me, me_list); 1602 mutex_exit(&mountlist_lock); 1603 mountlist_free(me); 1604 } 1605 1606 /* 1607 * Unlocked variant to traverse the mountlist. 1608 * To be used from DDB only. 1609 */ 1610 struct mount * 1611 _mountlist_next(struct mount *mp) 1612 { 1613 struct mountlist_entry *me; 1614 1615 if (mp == NULL) { 1616 me = TAILQ_FIRST(&mountlist); 1617 } else { 1618 TAILQ_FOREACH(me, &mountlist, me_list) 1619 if (me->me_type == ME_MOUNT && me->me_mount == mp) 1620 break; 1621 if (me != NULL) 1622 me = TAILQ_NEXT(me, me_list); 1623 } 1624 1625 while (me != NULL && me->me_type != ME_MOUNT) 1626 me = TAILQ_NEXT(me, me_list); 1627 1628 return (me ? me->me_mount : NULL); 1629 } 1630