1 /* $NetBSD: vfs_mount.c,v 1.30 2014/05/30 08:46:00 hannken Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.30 2014/05/30 08:46:00 hannken Exp $"); 71 72 #define _VFS_VNODE_PRIVATE 73 74 #include <sys/param.h> 75 #include <sys/kernel.h> 76 77 #include <sys/atomic.h> 78 #include <sys/buf.h> 79 #include <sys/conf.h> 80 #include <sys/fcntl.h> 81 #include <sys/filedesc.h> 82 #include <sys/device.h> 83 #include <sys/kauth.h> 84 #include <sys/kmem.h> 85 #include <sys/module.h> 86 #include <sys/mount.h> 87 #include <sys/namei.h> 88 #include <sys/extattr.h> 89 #include <sys/syscallargs.h> 90 #include <sys/sysctl.h> 91 #include <sys/systm.h> 92 #include <sys/vfs_syscalls.h> 93 #include <sys/vnode.h> 94 95 #include <miscfs/genfs/genfs.h> 96 #include <miscfs/syncfs/syncfs.h> 97 #include <miscfs/specfs/specdev.h> 98 99 /* Root filesystem and device. */ 100 vnode_t * rootvnode; 101 device_t root_device; 102 103 /* Mounted filesystem list. */ 104 struct mntlist mountlist; 105 kmutex_t mountlist_lock; 106 107 kmutex_t mntvnode_lock; 108 kmutex_t vfs_list_lock; 109 110 static specificdata_domain_t mount_specificdata_domain; 111 static kmutex_t mntid_lock; 112 113 static kmutex_t mountgen_lock; 114 static uint64_t mountgen; 115 116 void 117 vfs_mount_sysinit(void) 118 { 119 120 TAILQ_INIT(&mountlist); 121 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 122 mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE); 123 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 124 125 mount_specificdata_domain = specificdata_domain_create(); 126 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 127 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 128 mountgen = 0; 129 } 130 131 struct mount * 132 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp) 133 { 134 struct mount *mp; 135 int error __diagused; 136 137 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 138 if (mp == NULL) 139 return NULL; 140 141 mp->mnt_op = vfsops; 142 mp->mnt_refcnt = 1; 143 TAILQ_INIT(&mp->mnt_vnodelist); 144 mutex_init(&mp->mnt_unmounting, MUTEX_DEFAULT, IPL_NONE); 145 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); 146 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE); 147 error = vfs_busy(mp, NULL); 148 KASSERT(error == 0); 149 mp->mnt_vnodecovered = vp; 150 mount_initspecific(mp); 151 152 mutex_enter(&mountgen_lock); 153 mp->mnt_gen = mountgen++; 154 mutex_exit(&mountgen_lock); 155 156 return mp; 157 } 158 159 /* 160 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and 161 * initialize a mount structure for it. 162 * 163 * Devname is usually updated by mount(8) after booting. 164 */ 165 int 166 vfs_rootmountalloc(const char *fstypename, const char *devname, 167 struct mount **mpp) 168 { 169 struct vfsops *vfsp = NULL; 170 struct mount *mp; 171 172 mutex_enter(&vfs_list_lock); 173 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 174 if (!strncmp(vfsp->vfs_name, fstypename, 175 sizeof(mp->mnt_stat.f_fstypename))) 176 break; 177 if (vfsp == NULL) { 178 mutex_exit(&vfs_list_lock); 179 return (ENODEV); 180 } 181 vfsp->vfs_refcount++; 182 mutex_exit(&vfs_list_lock); 183 184 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 185 return ENOMEM; 186 mp->mnt_flag = MNT_RDONLY; 187 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 188 sizeof(mp->mnt_stat.f_fstypename)); 189 mp->mnt_stat.f_mntonname[0] = '/'; 190 mp->mnt_stat.f_mntonname[1] = '\0'; 191 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 192 '\0'; 193 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 194 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 195 *mpp = mp; 196 return 0; 197 } 198 199 /* 200 * vfs_getnewfsid: get a new unique fsid. 201 */ 202 void 203 vfs_getnewfsid(struct mount *mp) 204 { 205 static u_short xxxfs_mntid; 206 fsid_t tfsid; 207 int mtype; 208 209 mutex_enter(&mntid_lock); 210 mtype = makefstype(mp->mnt_op->vfs_name); 211 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); 212 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; 213 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 214 if (xxxfs_mntid == 0) 215 ++xxxfs_mntid; 216 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 217 tfsid.__fsid_val[1] = mtype; 218 if (!TAILQ_EMPTY(&mountlist)) { 219 while (vfs_getvfs(&tfsid)) { 220 tfsid.__fsid_val[0]++; 221 xxxfs_mntid++; 222 } 223 } 224 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 225 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 226 mutex_exit(&mntid_lock); 227 } 228 229 /* 230 * Lookup a mount point by filesystem identifier. 231 * 232 * XXX Needs to add a reference to the mount point. 233 */ 234 struct mount * 235 vfs_getvfs(fsid_t *fsid) 236 { 237 struct mount *mp; 238 239 mutex_enter(&mountlist_lock); 240 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 241 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 242 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 243 mutex_exit(&mountlist_lock); 244 return (mp); 245 } 246 } 247 mutex_exit(&mountlist_lock); 248 return NULL; 249 } 250 251 /* 252 * Drop a reference to a mount structure, freeing if the last reference. 253 */ 254 void 255 vfs_destroy(struct mount *mp) 256 { 257 258 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 259 return; 260 } 261 262 /* 263 * Nothing else has visibility of the mount: we can now 264 * free the data structures. 265 */ 266 KASSERT(mp->mnt_refcnt == 0); 267 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 268 mutex_destroy(&mp->mnt_unmounting); 269 mutex_destroy(&mp->mnt_updating); 270 mutex_destroy(&mp->mnt_renamelock); 271 if (mp->mnt_op != NULL) { 272 vfs_delref(mp->mnt_op); 273 } 274 kmem_free(mp, sizeof(*mp)); 275 } 276 277 /* 278 * Mark a mount point as busy, and gain a new reference to it. Used to 279 * prevent the file system from being unmounted during critical sections. 280 * 281 * vfs_busy can be called multiple times and by multiple threads 282 * and must be accompanied by the same number of vfs_unbusy calls. 283 * 284 * => The caller must hold a pre-existing reference to the mount. 285 * => Will fail if the file system is being unmounted, or is unmounted. 286 */ 287 int 288 vfs_busy(struct mount *mp, struct mount **nextp) 289 { 290 291 KASSERT(mp->mnt_refcnt > 0); 292 293 mutex_enter(&mp->mnt_unmounting); 294 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 295 mutex_exit(&mp->mnt_unmounting); 296 if (nextp != NULL) { 297 KASSERT(mutex_owned(&mountlist_lock)); 298 *nextp = TAILQ_NEXT(mp, mnt_list); 299 } 300 return ENOENT; 301 } 302 ++mp->mnt_busynest; 303 KASSERT(mp->mnt_busynest != 0); 304 mutex_exit(&mp->mnt_unmounting); 305 if (nextp != NULL) { 306 mutex_exit(&mountlist_lock); 307 } 308 atomic_inc_uint(&mp->mnt_refcnt); 309 return 0; 310 } 311 312 /* 313 * Unbusy a busy filesystem. 314 * 315 * Every successful vfs_busy() call must be undone by a vfs_unbusy() call. 316 * 317 * => If keepref is true, preserve reference added by vfs_busy(). 318 * => If nextp != NULL, acquire mountlist_lock. 319 */ 320 void 321 vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp) 322 { 323 324 KASSERT(mp->mnt_refcnt > 0); 325 326 if (nextp != NULL) { 327 mutex_enter(&mountlist_lock); 328 } 329 mutex_enter(&mp->mnt_unmounting); 330 KASSERT(mp->mnt_busynest != 0); 331 mp->mnt_busynest--; 332 mutex_exit(&mp->mnt_unmounting); 333 if (!keepref) { 334 vfs_destroy(mp); 335 } 336 if (nextp != NULL) { 337 KASSERT(mutex_owned(&mountlist_lock)); 338 *nextp = TAILQ_NEXT(mp, mnt_list); 339 } 340 } 341 342 struct vnode_iterator { 343 struct vnode vi_vnode; 344 }; 345 346 void 347 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vip) 348 { 349 struct vnode *vp; 350 351 vp = vnalloc(mp); 352 353 mutex_enter(&mntvnode_lock); 354 TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 355 vp->v_usecount = 1; 356 mutex_exit(&mntvnode_lock); 357 358 *vip = (struct vnode_iterator *)vp; 359 } 360 361 void 362 vfs_vnode_iterator_destroy(struct vnode_iterator *vi) 363 { 364 struct vnode *mvp = &vi->vi_vnode; 365 366 mutex_enter(&mntvnode_lock); 367 KASSERT(ISSET(mvp->v_iflag, VI_MARKER)); 368 if (mvp->v_usecount != 0) 369 TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvp, v_mntvnodes); 370 mutex_exit(&mntvnode_lock); 371 vnfree(mvp); 372 } 373 374 struct vnode * 375 vfs_vnode_iterator_next(struct vnode_iterator *vi, 376 bool (*f)(void *, struct vnode *), void *cl) 377 { 378 struct vnode *mvp = &vi->vi_vnode; 379 struct mount *mp = mvp->v_mount; 380 struct vnode *vp; 381 int error; 382 383 KASSERT(ISSET(mvp->v_iflag, VI_MARKER)); 384 385 do { 386 mutex_enter(&mntvnode_lock); 387 vp = TAILQ_NEXT(mvp, v_mntvnodes); 388 TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes); 389 mvp->v_usecount = 0; 390 again: 391 if (vp == NULL) { 392 mutex_exit(&mntvnode_lock); 393 return NULL; 394 } 395 mutex_enter(vp->v_interlock); 396 if (ISSET(vp->v_iflag, VI_MARKER) || 397 (f && !ISSET(vp->v_iflag, VI_XLOCK) && !(*f)(cl, vp))) { 398 mutex_exit(vp->v_interlock); 399 vp = TAILQ_NEXT(vp, v_mntvnodes); 400 goto again; 401 } 402 403 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes); 404 mvp->v_usecount = 1; 405 mutex_exit(&mntvnode_lock); 406 error = vget(vp, 0); 407 KASSERT(error == 0 || error == ENOENT); 408 } while (error != 0); 409 410 return vp; 411 } 412 413 /* 414 * Move a vnode from one mount queue to another. 415 */ 416 void 417 vfs_insmntque(vnode_t *vp, struct mount *mp) 418 { 419 struct mount *omp; 420 421 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 || 422 vp->v_tag == VT_VFS); 423 424 mutex_enter(&mntvnode_lock); 425 /* 426 * Delete from old mount point vnode list, if on one. 427 */ 428 if ((omp = vp->v_mount) != NULL) 429 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes); 430 /* 431 * Insert into list of vnodes for the new mount point, if 432 * available. The caller must take a reference on the mount 433 * structure and donate to the vnode. 434 */ 435 if ((vp->v_mount = mp) != NULL) 436 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); 437 mutex_exit(&mntvnode_lock); 438 439 if (omp != NULL) { 440 /* Release reference to old mount. */ 441 vfs_destroy(omp); 442 } 443 } 444 445 /* 446 * Remove any vnodes in the vnode table belonging to mount point mp. 447 * 448 * If FORCECLOSE is not specified, there should not be any active ones, 449 * return error if any are found (nb: this is a user error, not a 450 * system error). If FORCECLOSE is specified, detach any active vnodes 451 * that are found. 452 * 453 * If WRITECLOSE is set, only flush out regular file vnodes open for 454 * writing. 455 * 456 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. 457 */ 458 #ifdef DEBUG 459 int busyprt = 0; /* print out busy vnodes */ 460 struct ctldebug debug1 = { "busyprt", &busyprt }; 461 #endif 462 463 struct vflush_ctx { 464 const struct vnode *skipvp; 465 int flags; 466 }; 467 468 static bool 469 vflush_selector(void *cl, struct vnode *vp) 470 { 471 struct vflush_ctx *c = cl; 472 /* 473 * Skip over a selected vnode. 474 */ 475 if (vp == c->skipvp) 476 return false; 477 /* 478 * Skip over a vnodes marked VSYSTEM. 479 */ 480 if ((c->flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) 481 return false; 482 483 /* 484 * If WRITECLOSE is set, only flush out regular file 485 * vnodes open for writing. 486 */ 487 if ((c->flags & WRITECLOSE) && vp->v_type == VREG) { 488 if (vp->v_writecount == 0) 489 return false; 490 } 491 return true; 492 } 493 494 static vnode_t * 495 vflushnext(struct vnode_iterator *marker, void *ctx, int *when) 496 { 497 if (hardclock_ticks > *when) { 498 yield(); 499 *when = hardclock_ticks + hz / 10; 500 } 501 return vfs_vnode_iterator_next(marker, vflush_selector, ctx); 502 } 503 504 505 int 506 vflush(struct mount *mp, vnode_t *skipvp, int flags) 507 { 508 vnode_t *vp; 509 struct vnode_iterator *marker; 510 int busy = 0, when = 0; 511 struct vflush_ctx ctx; 512 513 /* First, flush out any vnode references from vrele_list. */ 514 vrele_flush(); 515 516 vfs_vnode_iterator_init(mp, &marker); 517 518 ctx.skipvp = skipvp; 519 ctx.flags = flags; 520 while ((vp = vflushnext(marker, &ctx, &when)) != NULL) { 521 /* 522 * First try to recycle the vnode. 523 */ 524 if (vrecycle(vp)) 525 continue; 526 /* 527 * If FORCECLOSE is set, forcibly close the vnode. 528 */ 529 if (flags & FORCECLOSE) { 530 vgone(vp); 531 continue; 532 } 533 #ifdef DEBUG 534 if (busyprt) 535 vprint("vflush: busy vnode", vp); 536 #endif 537 vrele(vp); 538 busy++; 539 } 540 vfs_vnode_iterator_destroy(marker); 541 if (busy) 542 return (EBUSY); 543 return (0); 544 } 545 546 /* 547 * Mount a file system. 548 */ 549 550 /* 551 * Scan all active processes to see if any of them have a current or root 552 * directory onto which the new filesystem has just been mounted. If so, 553 * replace them with the new mount point. 554 */ 555 static void 556 mount_checkdirs(vnode_t *olddp) 557 { 558 vnode_t *newdp, *rele1, *rele2; 559 struct cwdinfo *cwdi; 560 struct proc *p; 561 bool retry; 562 563 if (olddp->v_usecount == 1) { 564 return; 565 } 566 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 567 panic("mount: lost mount"); 568 569 do { 570 retry = false; 571 mutex_enter(proc_lock); 572 PROCLIST_FOREACH(p, &allproc) { 573 if ((cwdi = p->p_cwdi) == NULL) 574 continue; 575 /* 576 * Cannot change to the old directory any more, 577 * so even if we see a stale value it is not a 578 * problem. 579 */ 580 if (cwdi->cwdi_cdir != olddp && 581 cwdi->cwdi_rdir != olddp) 582 continue; 583 retry = true; 584 rele1 = NULL; 585 rele2 = NULL; 586 atomic_inc_uint(&cwdi->cwdi_refcnt); 587 mutex_exit(proc_lock); 588 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 589 if (cwdi->cwdi_cdir == olddp) { 590 rele1 = cwdi->cwdi_cdir; 591 vref(newdp); 592 cwdi->cwdi_cdir = newdp; 593 } 594 if (cwdi->cwdi_rdir == olddp) { 595 rele2 = cwdi->cwdi_rdir; 596 vref(newdp); 597 cwdi->cwdi_rdir = newdp; 598 } 599 rw_exit(&cwdi->cwdi_lock); 600 cwdfree(cwdi); 601 if (rele1 != NULL) 602 vrele(rele1); 603 if (rele2 != NULL) 604 vrele(rele2); 605 mutex_enter(proc_lock); 606 break; 607 } 608 mutex_exit(proc_lock); 609 } while (retry); 610 611 if (rootvnode == olddp) { 612 vrele(rootvnode); 613 vref(newdp); 614 rootvnode = newdp; 615 } 616 vput(newdp); 617 } 618 619 int 620 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops, 621 const char *path, int flags, void *data, size_t *data_len) 622 { 623 vnode_t *vp = *vpp; 624 struct mount *mp; 625 struct pathbuf *pb; 626 struct nameidata nd; 627 int error; 628 629 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 630 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 631 if (error) { 632 vfs_delref(vfsops); 633 return error; 634 } 635 636 /* Cannot make a non-dir a mount-point (from here anyway). */ 637 if (vp->v_type != VDIR) { 638 vfs_delref(vfsops); 639 return ENOTDIR; 640 } 641 642 if (flags & MNT_EXPORTED) { 643 vfs_delref(vfsops); 644 return EINVAL; 645 } 646 647 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 648 vfs_delref(vfsops); 649 return ENOMEM; 650 } 651 652 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 653 654 /* 655 * The underlying file system may refuse the mount for 656 * various reasons. Allow the user to force it to happen. 657 * 658 * Set the mount level flags. 659 */ 660 mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE); 661 662 mutex_enter(&mp->mnt_updating); 663 error = VFS_MOUNT(mp, path, data, data_len); 664 mp->mnt_flag &= ~MNT_OP_FLAGS; 665 666 if (error != 0) 667 goto err_unmounted; 668 669 /* 670 * Validate and prepare the mount point. 671 */ 672 error = pathbuf_copyin(path, &pb); 673 if (error != 0) { 674 goto err_mounted; 675 } 676 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 677 error = namei(&nd); 678 pathbuf_destroy(pb); 679 if (error != 0) { 680 goto err_mounted; 681 } 682 if (nd.ni_vp != vp) { 683 vput(nd.ni_vp); 684 error = EINVAL; 685 goto err_mounted; 686 } 687 if (vp->v_mountedhere != NULL) { 688 vput(nd.ni_vp); 689 error = EBUSY; 690 goto err_mounted; 691 } 692 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 693 if (error != 0) { 694 vput(nd.ni_vp); 695 goto err_mounted; 696 } 697 698 /* 699 * Put the new filesystem on the mount list after root. 700 */ 701 cache_purge(vp); 702 mp->mnt_iflag &= ~IMNT_WANTRDWR; 703 704 mutex_enter(&mountlist_lock); 705 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 706 mutex_exit(&mountlist_lock); 707 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 708 error = vfs_allocate_syncvnode(mp); 709 if (error == 0) 710 vp->v_mountedhere = mp; 711 vput(nd.ni_vp); 712 if (error != 0) 713 goto err_onmountlist; 714 715 mount_checkdirs(vp); 716 mutex_exit(&mp->mnt_updating); 717 718 /* Hold an additional reference to the mount across VFS_START(). */ 719 vfs_unbusy(mp, true, NULL); 720 (void) VFS_STATVFS(mp, &mp->mnt_stat); 721 error = VFS_START(mp, 0); 722 if (error) { 723 vrele(vp); 724 } else if (flags & MNT_EXTATTR) { 725 error = VFS_EXTATTRCTL(vp->v_mountedhere, 726 EXTATTR_CMD_START, NULL, 0, NULL); 727 if (error) 728 printf("%s: failed to start extattr: error = %d\n", 729 vp->v_mountedhere->mnt_stat.f_mntonname, error); 730 } 731 /* Drop reference held for VFS_START(). */ 732 vfs_destroy(mp); 733 *vpp = NULL; 734 return error; 735 736 err_onmountlist: 737 mutex_enter(&mountlist_lock); 738 TAILQ_REMOVE(&mountlist, mp, mnt_list); 739 mp->mnt_iflag |= IMNT_GONE; 740 mutex_exit(&mountlist_lock); 741 742 err_mounted: 743 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 744 panic("Unmounting fresh file system failed"); 745 746 err_unmounted: 747 vp->v_mountedhere = NULL; 748 mutex_exit(&mp->mnt_updating); 749 vfs_unbusy(mp, false, NULL); 750 vfs_destroy(mp); 751 752 return error; 753 } 754 755 /* 756 * Do the actual file system unmount. File system is assumed to have 757 * been locked by the caller. 758 * 759 * => Caller hold reference to the mount, explicitly for dounmount(). 760 */ 761 int 762 dounmount(struct mount *mp, int flags, struct lwp *l) 763 { 764 vnode_t *coveredvp; 765 int error, async, used_syncer; 766 767 #if NVERIEXEC > 0 768 error = veriexec_unmountchk(mp); 769 if (error) 770 return (error); 771 #endif /* NVERIEXEC > 0 */ 772 773 /* 774 * XXX Freeze syncer. Must do this before locking the 775 * mount point. See dounmount() for details. 776 */ 777 mutex_enter(&syncer_mutex); 778 779 /* 780 * Abort unmount attempt when the filesystem is in use 781 */ 782 mutex_enter(&mp->mnt_unmounting); 783 if (mp->mnt_busynest != 0) { 784 mutex_exit(&mp->mnt_unmounting); 785 mutex_exit(&syncer_mutex); 786 return EBUSY; 787 } 788 789 /* 790 * Abort unmount attempt when the filesystem is not mounted 791 */ 792 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 793 mutex_exit(&mp->mnt_unmounting); 794 mutex_exit(&syncer_mutex); 795 return ENOENT; 796 } 797 798 used_syncer = (mp->mnt_syncer != NULL); 799 800 /* 801 * XXX Syncer must be frozen when we get here. This should really 802 * be done on a per-mountpoint basis, but the syncer doesn't work 803 * like that. 804 * 805 * The caller of dounmount() must acquire syncer_mutex because 806 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 807 * order, and we must preserve that order to avoid deadlock. 808 * 809 * So, if the file system did not use the syncer, now is 810 * the time to release the syncer_mutex. 811 */ 812 if (used_syncer == 0) { 813 mutex_exit(&syncer_mutex); 814 } 815 mp->mnt_iflag |= IMNT_UNMOUNT; 816 mutex_enter(&mp->mnt_updating); 817 async = mp->mnt_flag & MNT_ASYNC; 818 mp->mnt_flag &= ~MNT_ASYNC; 819 cache_purgevfs(mp); /* remove cache entries for this file sys */ 820 if (mp->mnt_syncer != NULL) 821 vfs_deallocate_syncvnode(mp); 822 error = 0; 823 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 824 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 825 } 826 if (error == 0 || (flags & MNT_FORCE)) { 827 error = VFS_UNMOUNT(mp, flags); 828 } 829 if (error) { 830 mp->mnt_iflag &= ~IMNT_UNMOUNT; 831 mutex_exit(&mp->mnt_unmounting); 832 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 833 (void) vfs_allocate_syncvnode(mp); 834 mp->mnt_flag |= async; 835 mutex_exit(&mp->mnt_updating); 836 if (used_syncer) 837 mutex_exit(&syncer_mutex); 838 return (error); 839 } 840 mutex_exit(&mp->mnt_updating); 841 842 /* 843 * release mnt_umounting lock here, because other code calls 844 * vfs_busy() while holding the mountlist_lock. 845 * 846 * mark filesystem as gone to prevent further umounts 847 * after mnt_umounting lock is gone, this also prevents 848 * vfs_busy() from succeeding. 849 */ 850 mp->mnt_iflag |= IMNT_GONE; 851 mutex_exit(&mp->mnt_unmounting); 852 853 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 854 vn_lock(coveredvp, LK_EXCLUSIVE | LK_RETRY); 855 coveredvp->v_mountedhere = NULL; 856 VOP_UNLOCK(coveredvp); 857 } 858 mutex_enter(&mountlist_lock); 859 TAILQ_REMOVE(&mountlist, mp, mnt_list); 860 mutex_exit(&mountlist_lock); 861 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 862 panic("unmount: dangling vnode"); 863 if (used_syncer) 864 mutex_exit(&syncer_mutex); 865 vfs_hooks_unmount(mp); 866 867 vfs_destroy(mp); /* reference from mount() */ 868 if (coveredvp != NULLVP) { 869 vrele(coveredvp); 870 } 871 return (0); 872 } 873 874 /* 875 * Unmount all file systems. 876 * We traverse the list in reverse order under the assumption that doing so 877 * will avoid needing to worry about dependencies. 878 */ 879 bool 880 vfs_unmountall(struct lwp *l) 881 { 882 883 printf("unmounting file systems...\n"); 884 return vfs_unmountall1(l, true, true); 885 } 886 887 static void 888 vfs_unmount_print(struct mount *mp, const char *pfx) 889 { 890 891 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 892 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 893 mp->mnt_stat.f_fstypename); 894 } 895 896 bool 897 vfs_unmount_forceone(struct lwp *l) 898 { 899 struct mount *mp, *nmp; 900 int error; 901 902 nmp = NULL; 903 904 TAILQ_FOREACH_REVERSE(mp, &mountlist, mntlist, mnt_list) { 905 if (nmp == NULL || mp->mnt_gen > nmp->mnt_gen) { 906 nmp = mp; 907 } 908 } 909 if (nmp == NULL) { 910 return false; 911 } 912 913 #ifdef DEBUG 914 printf("forcefully unmounting %s (%s)...\n", 915 nmp->mnt_stat.f_mntonname, nmp->mnt_stat.f_mntfromname); 916 #endif 917 atomic_inc_uint(&nmp->mnt_refcnt); 918 if ((error = dounmount(nmp, MNT_FORCE, l)) == 0) { 919 vfs_unmount_print(nmp, "forcefully "); 920 return true; 921 } else { 922 vfs_destroy(nmp); 923 } 924 925 #ifdef DEBUG 926 printf("forceful unmount of %s failed with error %d\n", 927 nmp->mnt_stat.f_mntonname, error); 928 #endif 929 930 return false; 931 } 932 933 bool 934 vfs_unmountall1(struct lwp *l, bool force, bool verbose) 935 { 936 struct mount *mp, *nmp; 937 bool any_error = false, progress = false; 938 int error; 939 940 TAILQ_FOREACH_REVERSE_SAFE(mp, &mountlist, mntlist, mnt_list, nmp) { 941 #ifdef DEBUG 942 printf("unmounting %p %s (%s)...\n", 943 (void *)mp, mp->mnt_stat.f_mntonname, 944 mp->mnt_stat.f_mntfromname); 945 #endif 946 atomic_inc_uint(&mp->mnt_refcnt); 947 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 948 vfs_unmount_print(mp, ""); 949 progress = true; 950 } else { 951 vfs_destroy(mp); 952 if (verbose) { 953 printf("unmount of %s failed with error %d\n", 954 mp->mnt_stat.f_mntonname, error); 955 } 956 any_error = true; 957 } 958 } 959 if (verbose) { 960 printf("unmounting done\n"); 961 } 962 if (any_error && verbose) { 963 printf("WARNING: some file systems would not unmount\n"); 964 } 965 return progress; 966 } 967 968 void 969 vfs_sync_all(struct lwp *l) 970 { 971 printf("syncing disks... "); 972 973 /* remove user processes from run queue */ 974 suspendsched(); 975 (void)spl0(); 976 977 /* avoid coming back this way again if we panic. */ 978 doing_shutdown = 1; 979 980 do_sys_sync(l); 981 982 /* Wait for sync to finish. */ 983 if (buf_syncwait() != 0) { 984 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 985 Debugger(); 986 #endif 987 printf("giving up\n"); 988 return; 989 } else 990 printf("done\n"); 991 } 992 993 /* 994 * Sync and unmount file systems before shutting down. 995 */ 996 void 997 vfs_shutdown(void) 998 { 999 lwp_t *l = curlwp; 1000 1001 vfs_sync_all(l); 1002 1003 /* 1004 * If we have paniced - do not make the situation potentially 1005 * worse by unmounting the file systems. 1006 */ 1007 if (panicstr != NULL) { 1008 return; 1009 } 1010 1011 /* Unmount file systems. */ 1012 vfs_unmountall(l); 1013 } 1014 1015 /* 1016 * Print a list of supported file system types (used by vfs_mountroot) 1017 */ 1018 static void 1019 vfs_print_fstypes(void) 1020 { 1021 struct vfsops *v; 1022 int cnt = 0; 1023 1024 mutex_enter(&vfs_list_lock); 1025 LIST_FOREACH(v, &vfs_list, vfs_list) 1026 ++cnt; 1027 mutex_exit(&vfs_list_lock); 1028 1029 if (cnt == 0) { 1030 printf("WARNING: No file system modules have been loaded.\n"); 1031 return; 1032 } 1033 1034 printf("Supported file systems:"); 1035 mutex_enter(&vfs_list_lock); 1036 LIST_FOREACH(v, &vfs_list, vfs_list) { 1037 printf(" %s", v->vfs_name); 1038 } 1039 mutex_exit(&vfs_list_lock); 1040 printf("\n"); 1041 } 1042 1043 /* 1044 * Mount the root file system. If the operator didn't specify a 1045 * file system to use, try all possible file systems until one 1046 * succeeds. 1047 */ 1048 int 1049 vfs_mountroot(void) 1050 { 1051 struct vfsops *v; 1052 int error = ENODEV; 1053 1054 if (root_device == NULL) 1055 panic("vfs_mountroot: root device unknown"); 1056 1057 switch (device_class(root_device)) { 1058 case DV_IFNET: 1059 if (rootdev != NODEV) 1060 panic("vfs_mountroot: rootdev set for DV_IFNET " 1061 "(0x%llx -> %llu,%llu)", 1062 (unsigned long long)rootdev, 1063 (unsigned long long)major(rootdev), 1064 (unsigned long long)minor(rootdev)); 1065 break; 1066 1067 case DV_DISK: 1068 if (rootdev == NODEV) 1069 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1070 if (bdevvp(rootdev, &rootvp)) 1071 panic("vfs_mountroot: can't get vnode for rootdev"); 1072 error = VOP_OPEN(rootvp, FREAD, FSCRED); 1073 if (error) { 1074 printf("vfs_mountroot: can't open root device\n"); 1075 return (error); 1076 } 1077 break; 1078 1079 case DV_VIRTUAL: 1080 break; 1081 1082 default: 1083 printf("%s: inappropriate for root file system\n", 1084 device_xname(root_device)); 1085 return (ENODEV); 1086 } 1087 1088 /* 1089 * If user specified a root fs type, use it. Make sure the 1090 * specified type exists and has a mount_root() 1091 */ 1092 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 1093 v = vfs_getopsbyname(rootfstype); 1094 error = EFTYPE; 1095 if (v != NULL) { 1096 if (v->vfs_mountroot != NULL) { 1097 error = (v->vfs_mountroot)(); 1098 } 1099 v->vfs_refcount--; 1100 } 1101 goto done; 1102 } 1103 1104 /* 1105 * Try each file system currently configured into the kernel. 1106 */ 1107 mutex_enter(&vfs_list_lock); 1108 LIST_FOREACH(v, &vfs_list, vfs_list) { 1109 if (v->vfs_mountroot == NULL) 1110 continue; 1111 #ifdef DEBUG 1112 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 1113 #endif 1114 v->vfs_refcount++; 1115 mutex_exit(&vfs_list_lock); 1116 error = (*v->vfs_mountroot)(); 1117 mutex_enter(&vfs_list_lock); 1118 v->vfs_refcount--; 1119 if (!error) { 1120 aprint_normal("root file system type: %s\n", 1121 v->vfs_name); 1122 break; 1123 } 1124 } 1125 mutex_exit(&vfs_list_lock); 1126 1127 if (v == NULL) { 1128 vfs_print_fstypes(); 1129 printf("no file system for %s", device_xname(root_device)); 1130 if (device_class(root_device) == DV_DISK) 1131 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 1132 printf("\n"); 1133 error = EFTYPE; 1134 } 1135 1136 done: 1137 if (error && device_class(root_device) == DV_DISK) { 1138 VOP_CLOSE(rootvp, FREAD, FSCRED); 1139 vrele(rootvp); 1140 } 1141 if (error == 0) { 1142 struct mount *mp; 1143 extern struct cwdinfo cwdi0; 1144 1145 mp = TAILQ_FIRST(&mountlist); 1146 mp->mnt_flag |= MNT_ROOTFS; 1147 mp->mnt_op->vfs_refcount++; 1148 1149 /* 1150 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 1151 * reference it. 1152 */ 1153 error = VFS_ROOT(mp, &rootvnode); 1154 if (error) 1155 panic("cannot find root vnode, error=%d", error); 1156 cwdi0.cwdi_cdir = rootvnode; 1157 vref(cwdi0.cwdi_cdir); 1158 VOP_UNLOCK(rootvnode); 1159 cwdi0.cwdi_rdir = NULL; 1160 1161 /* 1162 * Now that root is mounted, we can fixup initproc's CWD 1163 * info. All other processes are kthreads, which merely 1164 * share proc0's CWD info. 1165 */ 1166 initproc->p_cwdi->cwdi_cdir = rootvnode; 1167 vref(initproc->p_cwdi->cwdi_cdir); 1168 initproc->p_cwdi->cwdi_rdir = NULL; 1169 /* 1170 * Enable loading of modules from the filesystem 1171 */ 1172 module_load_vfs_init(); 1173 1174 } 1175 return (error); 1176 } 1177 1178 /* 1179 * mount_specific_key_create -- 1180 * Create a key for subsystem mount-specific data. 1181 */ 1182 int 1183 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1184 { 1185 1186 return specificdata_key_create(mount_specificdata_domain, keyp, dtor); 1187 } 1188 1189 /* 1190 * mount_specific_key_delete -- 1191 * Delete a key for subsystem mount-specific data. 1192 */ 1193 void 1194 mount_specific_key_delete(specificdata_key_t key) 1195 { 1196 1197 specificdata_key_delete(mount_specificdata_domain, key); 1198 } 1199 1200 /* 1201 * mount_initspecific -- 1202 * Initialize a mount's specificdata container. 1203 */ 1204 void 1205 mount_initspecific(struct mount *mp) 1206 { 1207 int error __diagused; 1208 1209 error = specificdata_init(mount_specificdata_domain, 1210 &mp->mnt_specdataref); 1211 KASSERT(error == 0); 1212 } 1213 1214 /* 1215 * mount_finispecific -- 1216 * Finalize a mount's specificdata container. 1217 */ 1218 void 1219 mount_finispecific(struct mount *mp) 1220 { 1221 1222 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 1223 } 1224 1225 /* 1226 * mount_getspecific -- 1227 * Return mount-specific data corresponding to the specified key. 1228 */ 1229 void * 1230 mount_getspecific(struct mount *mp, specificdata_key_t key) 1231 { 1232 1233 return specificdata_getspecific(mount_specificdata_domain, 1234 &mp->mnt_specdataref, key); 1235 } 1236 1237 /* 1238 * mount_setspecific -- 1239 * Set mount-specific data corresponding to the specified key. 1240 */ 1241 void 1242 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 1243 { 1244 1245 specificdata_setspecific(mount_specificdata_domain, 1246 &mp->mnt_specdataref, key, data); 1247 } 1248 1249 /* 1250 * Check to see if a filesystem is mounted on a block device. 1251 */ 1252 int 1253 vfs_mountedon(vnode_t *vp) 1254 { 1255 vnode_t *vq; 1256 int error = 0; 1257 1258 if (vp->v_type != VBLK) 1259 return ENOTBLK; 1260 if (spec_node_getmountedfs(vp) != NULL) 1261 return EBUSY; 1262 if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, &vq) == 0) { 1263 if (spec_node_getmountedfs(vq) != NULL) 1264 error = EBUSY; 1265 vrele(vq); 1266 } 1267 1268 return error; 1269 } 1270 1271 /* 1272 * Check if a device pointed to by vp is mounted. 1273 * 1274 * Returns: 1275 * EINVAL if it's not a disk 1276 * EBUSY if it's a disk and mounted 1277 * 0 if it's a disk and not mounted 1278 */ 1279 int 1280 rawdev_mounted(vnode_t *vp, vnode_t **bvpp) 1281 { 1282 vnode_t *bvp; 1283 dev_t dev; 1284 int d_type; 1285 1286 bvp = NULL; 1287 d_type = D_OTHER; 1288 1289 if (iskmemvp(vp)) 1290 return EINVAL; 1291 1292 switch (vp->v_type) { 1293 case VCHR: { 1294 const struct cdevsw *cdev; 1295 1296 dev = vp->v_rdev; 1297 cdev = cdevsw_lookup(dev); 1298 if (cdev != NULL) { 1299 dev_t blkdev; 1300 1301 blkdev = devsw_chr2blk(dev); 1302 if (blkdev != NODEV) { 1303 if (vfinddev(blkdev, VBLK, &bvp) != 0) { 1304 d_type = (cdev->d_flag & D_TYPEMASK); 1305 /* XXX: what if bvp disappears? */ 1306 vrele(bvp); 1307 } 1308 } 1309 } 1310 1311 break; 1312 } 1313 1314 case VBLK: { 1315 const struct bdevsw *bdev; 1316 1317 dev = vp->v_rdev; 1318 bdev = bdevsw_lookup(dev); 1319 if (bdev != NULL) 1320 d_type = (bdev->d_flag & D_TYPEMASK); 1321 1322 bvp = vp; 1323 1324 break; 1325 } 1326 1327 default: 1328 break; 1329 } 1330 1331 if (d_type != D_DISK) 1332 return EINVAL; 1333 1334 if (bvpp != NULL) 1335 *bvpp = bvp; 1336 1337 /* 1338 * XXX: This is bogus. We should be failing the request 1339 * XXX: not only if this specific slice is mounted, but 1340 * XXX: if it's on a disk with any other mounted slice. 1341 */ 1342 if (vfs_mountedon(bvp)) 1343 return EBUSY; 1344 1345 return 0; 1346 } 1347 1348 /* 1349 * Make a 'unique' number from a mount type name. 1350 */ 1351 long 1352 makefstype(const char *type) 1353 { 1354 long rv; 1355 1356 for (rv = 0; *type; type++) { 1357 rv <<= 2; 1358 rv ^= *type; 1359 } 1360 return rv; 1361 } 1362 1363 void 1364 mountlist_append(struct mount *mp) 1365 { 1366 mutex_enter(&mountlist_lock); 1367 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 1368 mutex_exit(&mountlist_lock); 1369 } 1370