1 /* $NetBSD: vfs_mount.c,v 1.4 2011/04/03 01:20:23 rmind Exp $ */ 2 3 /*- 4 * Copyright (c) 1997-2011 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1989, 1993 35 * The Regents of the University of California. All rights reserved. 36 * (c) UNIX System Laboratories, Inc. 37 * All or some portions of this file are derived from material licensed 38 * to the University of California by American Telephone and Telegraph 39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 40 * the permission of UNIX System Laboratories, Inc. 41 * 42 * Redistribution and use in source and binary forms, with or without 43 * modification, are permitted provided that the following conditions 44 * are met: 45 * 1. Redistributions of source code must retain the above copyright 46 * notice, this list of conditions and the following disclaimer. 47 * 2. Redistributions in binary form must reproduce the above copyright 48 * notice, this list of conditions and the following disclaimer in the 49 * documentation and/or other materials provided with the distribution. 50 * 3. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 67 */ 68 69 #include <sys/cdefs.h> 70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.4 2011/04/03 01:20:23 rmind Exp $"); 71 72 #include <sys/param.h> 73 #include <sys/kernel.h> 74 75 #include <sys/atomic.h> 76 #include <sys/buf.h> 77 #include <sys/conf.h> 78 #include <sys/fcntl.h> 79 #include <sys/filedesc.h> 80 #include <sys/device.h> 81 #include <sys/kauth.h> 82 #include <sys/kmem.h> 83 #include <sys/module.h> 84 #include <sys/mount.h> 85 #include <sys/namei.h> 86 #include <sys/syscallargs.h> 87 #include <sys/sysctl.h> 88 #include <sys/systm.h> 89 #include <sys/vnode.h> 90 91 #include <miscfs/genfs/genfs.h> 92 #include <miscfs/syncfs/syncfs.h> 93 #include <miscfs/specfs/specdev.h> 94 95 /* Root filesystem and device. */ 96 vnode_t * rootvnode; 97 struct device * root_device; 98 99 /* Mounted filesystem list. */ 100 struct mntlist mountlist; 101 kmutex_t mountlist_lock; 102 103 kmutex_t mntvnode_lock; 104 kmutex_t vfs_list_lock; 105 106 static specificdata_domain_t mount_specificdata_domain; 107 static kmutex_t mntid_lock; 108 109 static kmutex_t mountgen_lock; 110 static uint64_t mountgen; 111 112 void 113 vfs_mount_sysinit(void) 114 { 115 116 CIRCLEQ_INIT(&mountlist); 117 mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE); 118 mutex_init(&mntvnode_lock, MUTEX_DEFAULT, IPL_NONE); 119 mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE); 120 121 mount_specificdata_domain = specificdata_domain_create(); 122 mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE); 123 mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE); 124 mountgen = 0; 125 } 126 127 struct mount * 128 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp) 129 { 130 struct mount *mp; 131 int error; 132 133 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 134 if (mp == NULL) 135 return NULL; 136 137 mp->mnt_op = vfsops; 138 mp->mnt_refcnt = 1; 139 TAILQ_INIT(&mp->mnt_vnodelist); 140 rw_init(&mp->mnt_unmounting); 141 mutex_init(&mp->mnt_renamelock, MUTEX_DEFAULT, IPL_NONE); 142 mutex_init(&mp->mnt_updating, MUTEX_DEFAULT, IPL_NONE); 143 error = vfs_busy(mp, NULL); 144 KASSERT(error == 0); 145 mp->mnt_vnodecovered = vp; 146 mount_initspecific(mp); 147 148 mutex_enter(&mountgen_lock); 149 mp->mnt_gen = mountgen++; 150 mutex_exit(&mountgen_lock); 151 152 return mp; 153 } 154 155 /* 156 * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and 157 * initialize a mount structure for it. 158 * 159 * Devname is usually updated by mount(8) after booting. 160 */ 161 int 162 vfs_rootmountalloc(const char *fstypename, const char *devname, 163 struct mount **mpp) 164 { 165 struct vfsops *vfsp = NULL; 166 struct mount *mp; 167 168 mutex_enter(&vfs_list_lock); 169 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 170 if (!strncmp(vfsp->vfs_name, fstypename, 171 sizeof(mp->mnt_stat.f_fstypename))) 172 break; 173 if (vfsp == NULL) { 174 mutex_exit(&vfs_list_lock); 175 return (ENODEV); 176 } 177 vfsp->vfs_refcount++; 178 mutex_exit(&vfs_list_lock); 179 180 if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL) 181 return ENOMEM; 182 mp->mnt_flag = MNT_RDONLY; 183 (void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, 184 sizeof(mp->mnt_stat.f_fstypename)); 185 mp->mnt_stat.f_mntonname[0] = '/'; 186 mp->mnt_stat.f_mntonname[1] = '\0'; 187 mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] = 188 '\0'; 189 (void)copystr(devname, mp->mnt_stat.f_mntfromname, 190 sizeof(mp->mnt_stat.f_mntfromname) - 1, 0); 191 *mpp = mp; 192 return 0; 193 } 194 195 /* 196 * vfs_getnewfsid: get a new unique fsid. 197 */ 198 void 199 vfs_getnewfsid(struct mount *mp) 200 { 201 static u_short xxxfs_mntid; 202 fsid_t tfsid; 203 int mtype; 204 205 mutex_enter(&mntid_lock); 206 mtype = makefstype(mp->mnt_op->vfs_name); 207 mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0); 208 mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype; 209 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 210 if (xxxfs_mntid == 0) 211 ++xxxfs_mntid; 212 tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid); 213 tfsid.__fsid_val[1] = mtype; 214 if (!CIRCLEQ_EMPTY(&mountlist)) { 215 while (vfs_getvfs(&tfsid)) { 216 tfsid.__fsid_val[0]++; 217 xxxfs_mntid++; 218 } 219 } 220 mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0]; 221 mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0]; 222 mutex_exit(&mntid_lock); 223 } 224 225 /* 226 * Lookup a mount point by filesystem identifier. 227 * 228 * XXX Needs to add a reference to the mount point. 229 */ 230 struct mount * 231 vfs_getvfs(fsid_t *fsid) 232 { 233 struct mount *mp; 234 235 mutex_enter(&mountlist_lock); 236 CIRCLEQ_FOREACH(mp, &mountlist, mnt_list) { 237 if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] && 238 mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) { 239 mutex_exit(&mountlist_lock); 240 return (mp); 241 } 242 } 243 mutex_exit(&mountlist_lock); 244 return NULL; 245 } 246 247 /* 248 * Drop a reference to a mount structure, freeing if the last reference. 249 */ 250 void 251 vfs_destroy(struct mount *mp) 252 { 253 254 if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) { 255 return; 256 } 257 258 /* 259 * Nothing else has visibility of the mount: we can now 260 * free the data structures. 261 */ 262 KASSERT(mp->mnt_refcnt == 0); 263 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 264 rw_destroy(&mp->mnt_unmounting); 265 mutex_destroy(&mp->mnt_updating); 266 mutex_destroy(&mp->mnt_renamelock); 267 if (mp->mnt_op != NULL) { 268 vfs_delref(mp->mnt_op); 269 } 270 kmem_free(mp, sizeof(*mp)); 271 } 272 273 /* 274 * Mark a mount point as busy, and gain a new reference to it. Used to 275 * prevent the file system from being unmounted during critical sections. 276 * 277 * => The caller must hold a pre-existing reference to the mount. 278 * => Will fail if the file system is being unmounted, or is unmounted. 279 */ 280 int 281 vfs_busy(struct mount *mp, struct mount **nextp) 282 { 283 284 KASSERT(mp->mnt_refcnt > 0); 285 286 if (__predict_false(!rw_tryenter(&mp->mnt_unmounting, RW_READER))) { 287 if (nextp != NULL) { 288 KASSERT(mutex_owned(&mountlist_lock)); 289 *nextp = CIRCLEQ_NEXT(mp, mnt_list); 290 } 291 return EBUSY; 292 } 293 if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) { 294 rw_exit(&mp->mnt_unmounting); 295 if (nextp != NULL) { 296 KASSERT(mutex_owned(&mountlist_lock)); 297 *nextp = CIRCLEQ_NEXT(mp, mnt_list); 298 } 299 return ENOENT; 300 } 301 if (nextp != NULL) { 302 mutex_exit(&mountlist_lock); 303 } 304 atomic_inc_uint(&mp->mnt_refcnt); 305 return 0; 306 } 307 308 /* 309 * Unbusy a busy filesystem. 310 * 311 * => If keepref is true, preserve reference added by vfs_busy(). 312 * => If nextp != NULL, acquire mountlist_lock. 313 */ 314 void 315 vfs_unbusy(struct mount *mp, bool keepref, struct mount **nextp) 316 { 317 318 KASSERT(mp->mnt_refcnt > 0); 319 320 if (nextp != NULL) { 321 mutex_enter(&mountlist_lock); 322 } 323 rw_exit(&mp->mnt_unmounting); 324 if (!keepref) { 325 vfs_destroy(mp); 326 } 327 if (nextp != NULL) { 328 KASSERT(mutex_owned(&mountlist_lock)); 329 *nextp = CIRCLEQ_NEXT(mp, mnt_list); 330 } 331 } 332 333 /* 334 * Insert a marker vnode into a mount's vnode list, after the 335 * specified vnode. mntvnode_lock must be held. 336 */ 337 void 338 vmark(vnode_t *mvp, vnode_t *vp) 339 { 340 struct mount *mp = mvp->v_mount; 341 342 KASSERT(mutex_owned(&mntvnode_lock)); 343 KASSERT((mvp->v_iflag & VI_MARKER) != 0); 344 KASSERT(vp->v_mount == mp); 345 346 TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vp, mvp, v_mntvnodes); 347 } 348 349 /* 350 * Remove a marker vnode from a mount's vnode list, and return 351 * a pointer to the next vnode in the list. mntvnode_lock must 352 * be held. 353 */ 354 vnode_t * 355 vunmark(vnode_t *mvp) 356 { 357 struct mount *mp = mvp->v_mount; 358 vnode_t *vp; 359 360 KASSERT(mutex_owned(&mntvnode_lock)); 361 KASSERT((mvp->v_iflag & VI_MARKER) != 0); 362 363 vp = TAILQ_NEXT(mvp, v_mntvnodes); 364 TAILQ_REMOVE(&mp->mnt_vnodelist, mvp, v_mntvnodes); 365 366 KASSERT(vp == NULL || vp->v_mount == mp); 367 368 return vp; 369 } 370 371 /* 372 * Move a vnode from one mount queue to another. 373 */ 374 void 375 vfs_insmntque(vnode_t *vp, struct mount *mp) 376 { 377 struct mount *omp; 378 379 KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 || 380 vp->v_tag == VT_VFS); 381 382 mutex_enter(&mntvnode_lock); 383 /* 384 * Delete from old mount point vnode list, if on one. 385 */ 386 if ((omp = vp->v_mount) != NULL) 387 TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vp, v_mntvnodes); 388 /* 389 * Insert into list of vnodes for the new mount point, if 390 * available. The caller must take a reference on the mount 391 * structure and donate to the vnode. 392 */ 393 if ((vp->v_mount = mp) != NULL) 394 TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vp, v_mntvnodes); 395 mutex_exit(&mntvnode_lock); 396 397 if (omp != NULL) { 398 /* Release reference to old mount. */ 399 vfs_destroy(omp); 400 } 401 } 402 403 /* 404 * Remove any vnodes in the vnode table belonging to mount point mp. 405 * 406 * If FORCECLOSE is not specified, there should not be any active ones, 407 * return error if any are found (nb: this is a user error, not a 408 * system error). If FORCECLOSE is specified, detach any active vnodes 409 * that are found. 410 * 411 * If WRITECLOSE is set, only flush out regular file vnodes open for 412 * writing. 413 * 414 * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. 415 */ 416 #ifdef DEBUG 417 int busyprt = 0; /* print out busy vnodes */ 418 struct ctldebug debug1 = { "busyprt", &busyprt }; 419 #endif 420 421 static vnode_t * 422 vflushnext(vnode_t *mvp, int *when) 423 { 424 425 if (hardclock_ticks > *when) { 426 mutex_exit(&mntvnode_lock); 427 yield(); 428 mutex_enter(&mntvnode_lock); 429 *when = hardclock_ticks + hz / 10; 430 } 431 return vunmark(mvp); 432 } 433 434 int 435 vflush(struct mount *mp, vnode_t *skipvp, int flags) 436 { 437 vnode_t *vp, *mvp; 438 int busy = 0, when = 0; 439 440 /* First, flush out any vnode references from vrele_list. */ 441 vrele_flush(); 442 443 /* Allocate a marker vnode. */ 444 mvp = vnalloc(mp); 445 if (mvp == NULL) { 446 return ENOMEM; 447 } 448 449 /* 450 * NOTE: not using the TAILQ_FOREACH here since in this loop vgone() 451 * and vclean() are called. 452 */ 453 mutex_enter(&mntvnode_lock); 454 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp != NULL; 455 vp = vflushnext(mvp, &when)) { 456 vmark(mvp, vp); 457 if (vp->v_mount != mp || vismarker(vp)) 458 continue; 459 /* 460 * Skip over a selected vnode. 461 */ 462 if (vp == skipvp) 463 continue; 464 mutex_enter(&vp->v_interlock); 465 /* 466 * Ignore clean but still referenced vnodes. 467 */ 468 if ((vp->v_iflag & VI_CLEAN) != 0) { 469 mutex_exit(&vp->v_interlock); 470 continue; 471 } 472 /* 473 * Skip over a vnodes marked VSYSTEM. 474 */ 475 if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) { 476 mutex_exit(&vp->v_interlock); 477 continue; 478 } 479 /* 480 * If WRITECLOSE is set, only flush out regular file 481 * vnodes open for writing. 482 */ 483 if ((flags & WRITECLOSE) && 484 (vp->v_writecount == 0 || vp->v_type != VREG)) { 485 mutex_exit(&vp->v_interlock); 486 continue; 487 } 488 /* 489 * With v_usecount == 0, all we need to do is clear 490 * out the vnode data structures and we are done. 491 */ 492 if (vp->v_usecount == 0) { 493 mutex_exit(&mntvnode_lock); 494 vremfree(vp); 495 vp->v_usecount = 1; 496 vclean(vp, DOCLOSE); 497 vrelel(vp, 0); 498 mutex_enter(&mntvnode_lock); 499 continue; 500 } 501 /* 502 * If FORCECLOSE is set, forcibly close the vnode. 503 * For block or character devices, revert to an 504 * anonymous device. For all other files, just 505 * kill them. 506 */ 507 if (flags & FORCECLOSE) { 508 mutex_exit(&mntvnode_lock); 509 atomic_inc_uint(&vp->v_usecount); 510 if (vp->v_type != VBLK && vp->v_type != VCHR) { 511 vclean(vp, DOCLOSE); 512 vrelel(vp, 0); 513 } else { 514 vclean(vp, 0); 515 vp->v_op = spec_vnodeop_p; /* XXXSMP */ 516 mutex_exit(&vp->v_interlock); 517 /* 518 * The vnode isn't clean, but still resides 519 * on the mount list. Remove it. XXX This 520 * is a bit dodgy. 521 */ 522 vfs_insmntque(vp, NULL); 523 vrele(vp); 524 } 525 mutex_enter(&mntvnode_lock); 526 continue; 527 } 528 #ifdef DEBUG 529 if (busyprt) 530 vprint("vflush: busy vnode", vp); 531 #endif 532 mutex_exit(&vp->v_interlock); 533 busy++; 534 } 535 mutex_exit(&mntvnode_lock); 536 vnfree(mvp); 537 if (busy) 538 return (EBUSY); 539 return (0); 540 } 541 542 /* 543 * Remove clean vnodes from a mountpoint's vnode list. 544 */ 545 void 546 vfs_scrubvnlist(struct mount *mp) 547 { 548 vnode_t *vp, *nvp; 549 550 retry: 551 mutex_enter(&mntvnode_lock); 552 for (vp = TAILQ_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) { 553 nvp = TAILQ_NEXT(vp, v_mntvnodes); 554 mutex_enter(&vp->v_interlock); 555 if ((vp->v_iflag & VI_CLEAN) != 0) { 556 TAILQ_REMOVE(&mp->mnt_vnodelist, vp, v_mntvnodes); 557 vp->v_mount = NULL; 558 mutex_exit(&mntvnode_lock); 559 mutex_exit(&vp->v_interlock); 560 vfs_destroy(mp); 561 goto retry; 562 } 563 mutex_exit(&vp->v_interlock); 564 } 565 mutex_exit(&mntvnode_lock); 566 } 567 568 /* 569 * Mount a file system. 570 */ 571 572 /* 573 * Scan all active processes to see if any of them have a current or root 574 * directory onto which the new filesystem has just been mounted. If so, 575 * replace them with the new mount point. 576 */ 577 static void 578 mount_checkdirs(vnode_t *olddp) 579 { 580 vnode_t *newdp, *rele1, *rele2; 581 struct cwdinfo *cwdi; 582 struct proc *p; 583 bool retry; 584 585 if (olddp->v_usecount == 1) { 586 return; 587 } 588 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 589 panic("mount: lost mount"); 590 591 do { 592 retry = false; 593 mutex_enter(proc_lock); 594 PROCLIST_FOREACH(p, &allproc) { 595 if ((cwdi = p->p_cwdi) == NULL) 596 continue; 597 /* 598 * Cannot change to the old directory any more, 599 * so even if we see a stale value it is not a 600 * problem. 601 */ 602 if (cwdi->cwdi_cdir != olddp && 603 cwdi->cwdi_rdir != olddp) 604 continue; 605 retry = true; 606 rele1 = NULL; 607 rele2 = NULL; 608 atomic_inc_uint(&cwdi->cwdi_refcnt); 609 mutex_exit(proc_lock); 610 rw_enter(&cwdi->cwdi_lock, RW_WRITER); 611 if (cwdi->cwdi_cdir == olddp) { 612 rele1 = cwdi->cwdi_cdir; 613 vref(newdp); 614 cwdi->cwdi_cdir = newdp; 615 } 616 if (cwdi->cwdi_rdir == olddp) { 617 rele2 = cwdi->cwdi_rdir; 618 vref(newdp); 619 cwdi->cwdi_rdir = newdp; 620 } 621 rw_exit(&cwdi->cwdi_lock); 622 cwdfree(cwdi); 623 if (rele1 != NULL) 624 vrele(rele1); 625 if (rele2 != NULL) 626 vrele(rele2); 627 mutex_enter(proc_lock); 628 break; 629 } 630 mutex_exit(proc_lock); 631 } while (retry); 632 633 if (rootvnode == olddp) { 634 vrele(rootvnode); 635 vref(newdp); 636 rootvnode = newdp; 637 } 638 vput(newdp); 639 } 640 641 int 642 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops, 643 const char *path, int flags, void *data, size_t *data_len) 644 { 645 vnode_t *vp = *vpp; 646 struct mount *mp; 647 struct vattr va; 648 struct pathbuf *pb; 649 struct nameidata nd; 650 int error; 651 652 error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT, 653 KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data); 654 if (error) { 655 vfs_delref(vfsops); 656 return error; 657 } 658 659 /* Cannot make a non-dir a mount-point (from here anyway). */ 660 if (vp->v_type != VDIR) { 661 vfs_delref(vfsops); 662 return ENOTDIR; 663 } 664 665 /* 666 * If the user is not root, ensure that they own the directory 667 * onto which we are attempting to mount. 668 */ 669 if ((error = VOP_GETATTR(vp, &va, l->l_cred)) != 0 || 670 (va.va_uid != kauth_cred_geteuid(l->l_cred) && 671 (error = kauth_authorize_generic(l->l_cred, 672 KAUTH_GENERIC_ISSUSER, NULL)) != 0)) { 673 vfs_delref(vfsops); 674 return error; 675 } 676 677 if (flags & MNT_EXPORTED) { 678 vfs_delref(vfsops); 679 return EINVAL; 680 } 681 682 if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) { 683 vfs_delref(vfsops); 684 return ENOMEM; 685 } 686 687 mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred); 688 689 /* 690 * The underlying file system may refuse the mount for 691 * various reasons. Allow the user to force it to happen. 692 * 693 * Set the mount level flags. 694 */ 695 mp->mnt_flag = flags & 696 (MNT_FORCE | MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 697 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOCOREDUMP | 698 MNT_NOATIME | MNT_NODEVMTIME | MNT_SYMPERM | MNT_SOFTDEP | 699 MNT_LOG | MNT_IGNORE | MNT_RDONLY); 700 701 mutex_enter(&mp->mnt_updating); 702 error = VFS_MOUNT(mp, path, data, data_len); 703 mp->mnt_flag &= ~MNT_OP_FLAGS; 704 705 if (error != 0) 706 goto err_unmounted; 707 708 /* 709 * Validate and prepare the mount point. 710 */ 711 error = pathbuf_copyin(path, &pb); 712 if (error != 0) { 713 goto err_mounted; 714 } 715 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb); 716 error = namei(&nd); 717 pathbuf_destroy(pb); 718 if (error != 0) { 719 goto err_mounted; 720 } 721 if (nd.ni_vp != vp) { 722 vput(nd.ni_vp); 723 error = EINVAL; 724 goto err_mounted; 725 } 726 if (vp->v_mountedhere != NULL) { 727 vput(nd.ni_vp); 728 error = EBUSY; 729 goto err_mounted; 730 } 731 error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0); 732 if (error != 0) { 733 vput(nd.ni_vp); 734 goto err_mounted; 735 } 736 737 /* 738 * Put the new filesystem on the mount list after root. 739 */ 740 cache_purge(vp); 741 mp->mnt_iflag &= ~IMNT_WANTRDWR; 742 743 mutex_enter(&mountlist_lock); 744 CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); 745 mutex_exit(&mountlist_lock); 746 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 747 error = vfs_allocate_syncvnode(mp); 748 if (error == 0) 749 vp->v_mountedhere = mp; 750 vput(nd.ni_vp); 751 if (error != 0) 752 goto err_onmountlist; 753 754 mount_checkdirs(vp); 755 mutex_exit(&mp->mnt_updating); 756 757 /* Hold an additional reference to the mount across VFS_START(). */ 758 vfs_unbusy(mp, true, NULL); 759 (void) VFS_STATVFS(mp, &mp->mnt_stat); 760 error = VFS_START(mp, 0); 761 if (error) 762 vrele(vp); 763 /* Drop reference held for VFS_START(). */ 764 vfs_destroy(mp); 765 *vpp = NULL; 766 return error; 767 768 err_onmountlist: 769 mutex_enter(&mountlist_lock); 770 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 771 mp->mnt_iflag |= IMNT_GONE; 772 mutex_exit(&mountlist_lock); 773 774 err_mounted: 775 if (VFS_UNMOUNT(mp, MNT_FORCE) != 0) 776 panic("Unmounting fresh file system failed"); 777 778 err_unmounted: 779 vp->v_mountedhere = NULL; 780 mutex_exit(&mp->mnt_updating); 781 vfs_unbusy(mp, false, NULL); 782 vfs_destroy(mp); 783 784 return error; 785 } 786 787 /* 788 * Do the actual file system unmount. File system is assumed to have 789 * been locked by the caller. 790 * 791 * => Caller hold reference to the mount, explicitly for dounmount(). 792 */ 793 int 794 dounmount(struct mount *mp, int flags, struct lwp *l) 795 { 796 vnode_t *coveredvp; 797 int error, async, used_syncer; 798 799 #if NVERIEXEC > 0 800 error = veriexec_unmountchk(mp); 801 if (error) 802 return (error); 803 #endif /* NVERIEXEC > 0 */ 804 805 /* 806 * XXX Freeze syncer. Must do this before locking the 807 * mount point. See dounmount() for details. 808 */ 809 mutex_enter(&syncer_mutex); 810 rw_enter(&mp->mnt_unmounting, RW_WRITER); 811 if ((mp->mnt_iflag & IMNT_GONE) != 0) { 812 rw_exit(&mp->mnt_unmounting); 813 mutex_exit(&syncer_mutex); 814 return ENOENT; 815 } 816 817 used_syncer = (mp->mnt_syncer != NULL); 818 819 /* 820 * XXX Syncer must be frozen when we get here. This should really 821 * be done on a per-mountpoint basis, but the syncer doesn't work 822 * like that. 823 * 824 * The caller of dounmount() must acquire syncer_mutex because 825 * the syncer itself acquires locks in syncer_mutex -> vfs_busy 826 * order, and we must preserve that order to avoid deadlock. 827 * 828 * So, if the file system did not use the syncer, now is 829 * the time to release the syncer_mutex. 830 */ 831 if (used_syncer == 0) { 832 mutex_exit(&syncer_mutex); 833 } 834 mp->mnt_iflag |= IMNT_UNMOUNT; 835 async = mp->mnt_flag & MNT_ASYNC; 836 mp->mnt_flag &= ~MNT_ASYNC; 837 cache_purgevfs(mp); /* remove cache entries for this file sys */ 838 if (mp->mnt_syncer != NULL) 839 vfs_deallocate_syncvnode(mp); 840 error = 0; 841 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 842 error = VFS_SYNC(mp, MNT_WAIT, l->l_cred); 843 } 844 vfs_scrubvnlist(mp); 845 if (error == 0 || (flags & MNT_FORCE)) { 846 error = VFS_UNMOUNT(mp, flags); 847 } 848 if (error) { 849 if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0) 850 (void) vfs_allocate_syncvnode(mp); 851 mp->mnt_iflag &= ~IMNT_UNMOUNT; 852 mp->mnt_flag |= async; 853 rw_exit(&mp->mnt_unmounting); 854 if (used_syncer) 855 mutex_exit(&syncer_mutex); 856 return (error); 857 } 858 vfs_scrubvnlist(mp); 859 mutex_enter(&mountlist_lock); 860 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) 861 coveredvp->v_mountedhere = NULL; 862 CIRCLEQ_REMOVE(&mountlist, mp, mnt_list); 863 mp->mnt_iflag |= IMNT_GONE; 864 mutex_exit(&mountlist_lock); 865 if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL) 866 panic("unmount: dangling vnode"); 867 if (used_syncer) 868 mutex_exit(&syncer_mutex); 869 vfs_hooks_unmount(mp); 870 rw_exit(&mp->mnt_unmounting); 871 vfs_destroy(mp); /* reference from mount() */ 872 if (coveredvp != NULLVP) { 873 vrele(coveredvp); 874 } 875 return (0); 876 } 877 878 /* 879 * Unmount all file systems. 880 * We traverse the list in reverse order under the assumption that doing so 881 * will avoid needing to worry about dependencies. 882 */ 883 bool 884 vfs_unmountall(struct lwp *l) 885 { 886 887 printf("unmounting file systems..."); 888 return vfs_unmountall1(l, true, true); 889 } 890 891 static void 892 vfs_unmount_print(struct mount *mp, const char *pfx) 893 { 894 895 aprint_verbose("%sunmounted %s on %s type %s\n", pfx, 896 mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, 897 mp->mnt_stat.f_fstypename); 898 } 899 900 bool 901 vfs_unmount_forceone(struct lwp *l) 902 { 903 struct mount *mp, *nmp; 904 int error; 905 906 nmp = NULL; 907 908 CIRCLEQ_FOREACH_REVERSE(mp, &mountlist, mnt_list) { 909 if (nmp == NULL || mp->mnt_gen > nmp->mnt_gen) { 910 nmp = mp; 911 } 912 } 913 if (nmp == NULL) { 914 return false; 915 } 916 917 #ifdef DEBUG 918 printf("\nforcefully unmounting %s (%s)...", 919 nmp->mnt_stat.f_mntonname, nmp->mnt_stat.f_mntfromname); 920 #endif 921 atomic_inc_uint(&nmp->mnt_refcnt); 922 if ((error = dounmount(nmp, MNT_FORCE, l)) == 0) { 923 vfs_unmount_print(nmp, "forcefully "); 924 return true; 925 } else { 926 vfs_destroy(nmp); 927 } 928 929 #ifdef DEBUG 930 printf("forceful unmount of %s failed with error %d\n", 931 nmp->mnt_stat.f_mntonname, error); 932 #endif 933 934 return false; 935 } 936 937 bool 938 vfs_unmountall1(struct lwp *l, bool force, bool verbose) 939 { 940 struct mount *mp, *nmp; 941 bool any_error = false, progress = false; 942 int error; 943 944 for (mp = CIRCLEQ_LAST(&mountlist); 945 mp != (void *)&mountlist; 946 mp = nmp) { 947 nmp = CIRCLEQ_PREV(mp, mnt_list); 948 #ifdef DEBUG 949 printf("\nunmounting %p %s (%s)...", 950 (void *)mp, mp->mnt_stat.f_mntonname, 951 mp->mnt_stat.f_mntfromname); 952 #endif 953 atomic_inc_uint(&mp->mnt_refcnt); 954 if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) { 955 vfs_unmount_print(mp, ""); 956 progress = true; 957 } else { 958 vfs_destroy(mp); 959 if (verbose) { 960 printf("unmount of %s failed with error %d\n", 961 mp->mnt_stat.f_mntonname, error); 962 } 963 any_error = true; 964 } 965 } 966 if (verbose) { 967 printf(" done\n"); 968 } 969 if (any_error && verbose) { 970 printf("WARNING: some file systems would not unmount\n"); 971 } 972 return progress; 973 } 974 975 void 976 vfs_sync_all(struct lwp *l) 977 { 978 printf("syncing disks... "); 979 980 /* remove user processes from run queue */ 981 suspendsched(); 982 (void)spl0(); 983 984 /* avoid coming back this way again if we panic. */ 985 doing_shutdown = 1; 986 987 sys_sync(l, NULL, NULL); 988 989 /* Wait for sync to finish. */ 990 if (buf_syncwait() != 0) { 991 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 992 Debugger(); 993 #endif 994 printf("giving up\n"); 995 return; 996 } else 997 printf("done\n"); 998 } 999 1000 /* 1001 * Sync and unmount file systems before shutting down. 1002 */ 1003 void 1004 vfs_shutdown(void) 1005 { 1006 lwp_t *l = curlwp; 1007 1008 vfs_sync_all(l); 1009 1010 /* 1011 * If we have paniced - do not make the situation potentially 1012 * worse by unmounting the file systems. 1013 */ 1014 if (panicstr != NULL) { 1015 return; 1016 } 1017 1018 /* Unmount file systems. */ 1019 vfs_unmountall(l); 1020 } 1021 1022 /* 1023 * Print a list of supported file system types (used by vfs_mountroot) 1024 */ 1025 static void 1026 vfs_print_fstypes(void) 1027 { 1028 struct vfsops *v; 1029 int cnt = 0; 1030 1031 mutex_enter(&vfs_list_lock); 1032 LIST_FOREACH(v, &vfs_list, vfs_list) 1033 ++cnt; 1034 mutex_exit(&vfs_list_lock); 1035 1036 if (cnt == 0) { 1037 printf("WARNING: No file system modules have been loaded.\n"); 1038 return; 1039 } 1040 1041 printf("Supported file systems:"); 1042 mutex_enter(&vfs_list_lock); 1043 LIST_FOREACH(v, &vfs_list, vfs_list) { 1044 printf(" %s", v->vfs_name); 1045 } 1046 mutex_exit(&vfs_list_lock); 1047 printf("\n"); 1048 } 1049 1050 /* 1051 * Mount the root file system. If the operator didn't specify a 1052 * file system to use, try all possible file systems until one 1053 * succeeds. 1054 */ 1055 int 1056 vfs_mountroot(void) 1057 { 1058 struct vfsops *v; 1059 int error = ENODEV; 1060 1061 if (root_device == NULL) 1062 panic("vfs_mountroot: root device unknown"); 1063 1064 switch (device_class(root_device)) { 1065 case DV_IFNET: 1066 if (rootdev != NODEV) 1067 panic("vfs_mountroot: rootdev set for DV_IFNET " 1068 "(0x%llx -> %llu,%llu)", 1069 (unsigned long long)rootdev, 1070 (unsigned long long)major(rootdev), 1071 (unsigned long long)minor(rootdev)); 1072 break; 1073 1074 case DV_DISK: 1075 if (rootdev == NODEV) 1076 panic("vfs_mountroot: rootdev not set for DV_DISK"); 1077 if (bdevvp(rootdev, &rootvp)) 1078 panic("vfs_mountroot: can't get vnode for rootdev"); 1079 error = VOP_OPEN(rootvp, FREAD, FSCRED); 1080 if (error) { 1081 printf("vfs_mountroot: can't open root device\n"); 1082 return (error); 1083 } 1084 break; 1085 1086 case DV_VIRTUAL: 1087 break; 1088 1089 default: 1090 printf("%s: inappropriate for root file system\n", 1091 device_xname(root_device)); 1092 return (ENODEV); 1093 } 1094 1095 /* 1096 * If user specified a root fs type, use it. Make sure the 1097 * specified type exists and has a mount_root() 1098 */ 1099 if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) { 1100 v = vfs_getopsbyname(rootfstype); 1101 error = EFTYPE; 1102 if (v != NULL) { 1103 if (v->vfs_mountroot != NULL) { 1104 error = (v->vfs_mountroot)(); 1105 } 1106 v->vfs_refcount--; 1107 } 1108 goto done; 1109 } 1110 1111 /* 1112 * Try each file system currently configured into the kernel. 1113 */ 1114 mutex_enter(&vfs_list_lock); 1115 LIST_FOREACH(v, &vfs_list, vfs_list) { 1116 if (v->vfs_mountroot == NULL) 1117 continue; 1118 #ifdef DEBUG 1119 aprint_normal("mountroot: trying %s...\n", v->vfs_name); 1120 #endif 1121 v->vfs_refcount++; 1122 mutex_exit(&vfs_list_lock); 1123 error = (*v->vfs_mountroot)(); 1124 mutex_enter(&vfs_list_lock); 1125 v->vfs_refcount--; 1126 if (!error) { 1127 aprint_normal("root file system type: %s\n", 1128 v->vfs_name); 1129 break; 1130 } 1131 } 1132 mutex_exit(&vfs_list_lock); 1133 1134 if (v == NULL) { 1135 vfs_print_fstypes(); 1136 printf("no file system for %s", device_xname(root_device)); 1137 if (device_class(root_device) == DV_DISK) 1138 printf(" (dev 0x%llx)", (unsigned long long)rootdev); 1139 printf("\n"); 1140 error = EFTYPE; 1141 } 1142 1143 done: 1144 if (error && device_class(root_device) == DV_DISK) { 1145 VOP_CLOSE(rootvp, FREAD, FSCRED); 1146 vrele(rootvp); 1147 } 1148 if (error == 0) { 1149 extern struct cwdinfo cwdi0; 1150 1151 CIRCLEQ_FIRST(&mountlist)->mnt_flag |= MNT_ROOTFS; 1152 CIRCLEQ_FIRST(&mountlist)->mnt_op->vfs_refcount++; 1153 1154 /* 1155 * Get the vnode for '/'. Set cwdi0.cwdi_cdir to 1156 * reference it. 1157 */ 1158 error = VFS_ROOT(CIRCLEQ_FIRST(&mountlist), &rootvnode); 1159 if (error) 1160 panic("cannot find root vnode, error=%d", error); 1161 cwdi0.cwdi_cdir = rootvnode; 1162 vref(cwdi0.cwdi_cdir); 1163 VOP_UNLOCK(rootvnode); 1164 cwdi0.cwdi_rdir = NULL; 1165 1166 /* 1167 * Now that root is mounted, we can fixup initproc's CWD 1168 * info. All other processes are kthreads, which merely 1169 * share proc0's CWD info. 1170 */ 1171 initproc->p_cwdi->cwdi_cdir = rootvnode; 1172 vref(initproc->p_cwdi->cwdi_cdir); 1173 initproc->p_cwdi->cwdi_rdir = NULL; 1174 /* 1175 * Enable loading of modules from the filesystem 1176 */ 1177 module_load_vfs_init(); 1178 1179 } 1180 return (error); 1181 } 1182 1183 /* 1184 * mount_specific_key_create -- 1185 * Create a key for subsystem mount-specific data. 1186 */ 1187 int 1188 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor) 1189 { 1190 1191 return specificdata_key_create(mount_specificdata_domain, keyp, dtor); 1192 } 1193 1194 /* 1195 * mount_specific_key_delete -- 1196 * Delete a key for subsystem mount-specific data. 1197 */ 1198 void 1199 mount_specific_key_delete(specificdata_key_t key) 1200 { 1201 1202 specificdata_key_delete(mount_specificdata_domain, key); 1203 } 1204 1205 /* 1206 * mount_initspecific -- 1207 * Initialize a mount's specificdata container. 1208 */ 1209 void 1210 mount_initspecific(struct mount *mp) 1211 { 1212 int error; 1213 1214 error = specificdata_init(mount_specificdata_domain, 1215 &mp->mnt_specdataref); 1216 KASSERT(error == 0); 1217 } 1218 1219 /* 1220 * mount_finispecific -- 1221 * Finalize a mount's specificdata container. 1222 */ 1223 void 1224 mount_finispecific(struct mount *mp) 1225 { 1226 1227 specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref); 1228 } 1229 1230 /* 1231 * mount_getspecific -- 1232 * Return mount-specific data corresponding to the specified key. 1233 */ 1234 void * 1235 mount_getspecific(struct mount *mp, specificdata_key_t key) 1236 { 1237 1238 return specificdata_getspecific(mount_specificdata_domain, 1239 &mp->mnt_specdataref, key); 1240 } 1241 1242 /* 1243 * mount_setspecific -- 1244 * Set mount-specific data corresponding to the specified key. 1245 */ 1246 void 1247 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data) 1248 { 1249 1250 specificdata_setspecific(mount_specificdata_domain, 1251 &mp->mnt_specdataref, key, data); 1252 } 1253 1254 /* 1255 * Check to see if a filesystem is mounted on a block device. 1256 */ 1257 int 1258 vfs_mountedon(vnode_t *vp) 1259 { 1260 vnode_t *vq; 1261 int error = 0; 1262 1263 if (vp->v_type != VBLK) 1264 return ENOTBLK; 1265 if (vp->v_specmountpoint != NULL) 1266 return (EBUSY); 1267 mutex_enter(&device_lock); 1268 for (vq = specfs_hash[SPECHASH(vp->v_rdev)]; vq != NULL; 1269 vq = vq->v_specnext) { 1270 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1271 continue; 1272 if (vq->v_specmountpoint != NULL) { 1273 error = EBUSY; 1274 break; 1275 } 1276 } 1277 mutex_exit(&device_lock); 1278 return (error); 1279 } 1280 1281 /* 1282 * Check if a device pointed to by vp is mounted. 1283 * 1284 * Returns: 1285 * EINVAL if it's not a disk 1286 * EBUSY if it's a disk and mounted 1287 * 0 if it's a disk and not mounted 1288 */ 1289 int 1290 rawdev_mounted(vnode_t *vp, vnode_t **bvpp) 1291 { 1292 vnode_t *bvp; 1293 dev_t dev; 1294 int d_type; 1295 1296 bvp = NULL; 1297 dev = vp->v_rdev; 1298 d_type = D_OTHER; 1299 1300 if (iskmemvp(vp)) 1301 return EINVAL; 1302 1303 switch (vp->v_type) { 1304 case VCHR: { 1305 const struct cdevsw *cdev; 1306 1307 cdev = cdevsw_lookup(dev); 1308 if (cdev != NULL) { 1309 dev_t blkdev; 1310 1311 blkdev = devsw_chr2blk(dev); 1312 if (blkdev != NODEV) { 1313 if (vfinddev(blkdev, VBLK, &bvp) != 0) { 1314 d_type = (cdev->d_flag & D_TYPEMASK); 1315 /* XXX: what if bvp disappears? */ 1316 vrele(bvp); 1317 } 1318 } 1319 } 1320 1321 break; 1322 } 1323 1324 case VBLK: { 1325 const struct bdevsw *bdev; 1326 1327 bdev = bdevsw_lookup(dev); 1328 if (bdev != NULL) 1329 d_type = (bdev->d_flag & D_TYPEMASK); 1330 1331 bvp = vp; 1332 1333 break; 1334 } 1335 1336 default: 1337 break; 1338 } 1339 1340 if (d_type != D_DISK) 1341 return EINVAL; 1342 1343 if (bvpp != NULL) 1344 *bvpp = bvp; 1345 1346 /* 1347 * XXX: This is bogus. We should be failing the request 1348 * XXX: not only if this specific slice is mounted, but 1349 * XXX: if it's on a disk with any other mounted slice. 1350 */ 1351 if (vfs_mountedon(bvp)) 1352 return EBUSY; 1353 1354 return 0; 1355 } 1356 1357 /* 1358 * Make a 'unique' number from a mount type name. 1359 */ 1360 long 1361 makefstype(const char *type) 1362 { 1363 long rv; 1364 1365 for (rv = 0; *type; type++) { 1366 rv <<= 2; 1367 rv ^= *type; 1368 } 1369 return rv; 1370 } 1371