xref: /netbsd-src/sys/kern/vfs_mount.c (revision e6c7e151de239c49d2e38720a061ed9d1fa99309)
1 /*	$NetBSD: vfs_mount.c,v 1.75 2020/02/23 22:14:03 ad Exp $	*/
2 
3 /*-
4  * Copyright (c) 1997-2020 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Copyright (c) 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  * (c) UNIX System Laboratories, Inc.
37  * All or some portions of this file are derived from material licensed
38  * to the University of California by American Telephone and Telegraph
39  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40  * the permission of UNIX System Laboratories, Inc.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
67  */
68 
69 #include <sys/cdefs.h>
70 __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.75 2020/02/23 22:14:03 ad Exp $");
71 
72 #include <sys/param.h>
73 #include <sys/kernel.h>
74 
75 #include <sys/atomic.h>
76 #include <sys/buf.h>
77 #include <sys/conf.h>
78 #include <sys/fcntl.h>
79 #include <sys/filedesc.h>
80 #include <sys/device.h>
81 #include <sys/kauth.h>
82 #include <sys/kmem.h>
83 #include <sys/module.h>
84 #include <sys/mount.h>
85 #include <sys/fstrans.h>
86 #include <sys/namei.h>
87 #include <sys/extattr.h>
88 #include <sys/syscallargs.h>
89 #include <sys/sysctl.h>
90 #include <sys/systm.h>
91 #include <sys/vfs_syscalls.h>
92 #include <sys/vnode_impl.h>
93 #include <sys/xcall.h>
94 
95 #include <miscfs/genfs/genfs.h>
96 #include <miscfs/specfs/specdev.h>
97 
98 enum mountlist_type {
99 	ME_MOUNT,
100 	ME_MARKER
101 };
102 struct mountlist_entry {
103 	TAILQ_ENTRY(mountlist_entry) me_list;	/* Mount list. */
104 	struct mount *me_mount;			/* Actual mount if ME_MOUNT,
105 						   current mount else. */
106 	enum mountlist_type me_type;		/* Mount or marker. */
107 };
108 struct mount_iterator {
109 	struct mountlist_entry mi_entry;
110 };
111 
112 static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *,
113     bool (*)(void *, struct vnode *), void *, bool);
114 
115 /* Root filesystem. */
116 vnode_t *			rootvnode;
117 
118 /* Mounted filesystem list. */
119 static TAILQ_HEAD(mountlist, mountlist_entry) mountlist;
120 static kmutex_t			mountlist_lock __cacheline_aligned;
121 int vnode_offset_next_by_lru	/* XXX: ugly hack for pstat.c */
122     = offsetof(vnode_impl_t, vi_lrulist.tqe_next);
123 
124 kmutex_t			vfs_list_lock __cacheline_aligned;
125 
126 static specificdata_domain_t	mount_specificdata_domain;
127 static kmutex_t			mntid_lock;
128 
129 static kmutex_t			mountgen_lock __cacheline_aligned;
130 static uint64_t			mountgen;
131 
132 void
133 vfs_mount_sysinit(void)
134 {
135 
136 	TAILQ_INIT(&mountlist);
137 	mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
138 	mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
139 
140 	mount_specificdata_domain = specificdata_domain_create();
141 	mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
142 	mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE);
143 	mountgen = 0;
144 }
145 
146 struct mount *
147 vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp)
148 {
149 	struct mount *mp;
150 	int error __diagused;
151 
152 	mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
153 	mp->mnt_op = vfsops;
154 	mp->mnt_refcnt = 1;
155 	TAILQ_INIT(&mp->mnt_vnodelist);
156 	mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
157 	mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
158 	mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
159 	mp->mnt_vnodecovered = vp;
160 	mount_initspecific(mp);
161 
162 	error = fstrans_mount(mp);
163 	KASSERT(error == 0);
164 
165 	mutex_enter(&mountgen_lock);
166 	mp->mnt_gen = mountgen++;
167 	mutex_exit(&mountgen_lock);
168 
169 	return mp;
170 }
171 
172 /*
173  * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and
174  * initialize a mount structure for it.
175  *
176  * Devname is usually updated by mount(8) after booting.
177  */
178 int
179 vfs_rootmountalloc(const char *fstypename, const char *devname,
180     struct mount **mpp)
181 {
182 	struct vfsops *vfsp = NULL;
183 	struct mount *mp;
184 	int error __diagused;
185 
186 	mutex_enter(&vfs_list_lock);
187 	LIST_FOREACH(vfsp, &vfs_list, vfs_list)
188 		if (!strncmp(vfsp->vfs_name, fstypename,
189 		    sizeof(mp->mnt_stat.f_fstypename)))
190 			break;
191 	if (vfsp == NULL) {
192 		mutex_exit(&vfs_list_lock);
193 		return (ENODEV);
194 	}
195 	vfsp->vfs_refcount++;
196 	mutex_exit(&vfs_list_lock);
197 
198 	if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
199 		return ENOMEM;
200 	error = vfs_busy(mp);
201 	KASSERT(error == 0);
202 	mp->mnt_flag = MNT_RDONLY;
203 	(void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
204 	    sizeof(mp->mnt_stat.f_fstypename));
205 	mp->mnt_stat.f_mntonname[0] = '/';
206 	mp->mnt_stat.f_mntonname[1] = '\0';
207 	mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
208 	    '\0';
209 	(void)copystr(devname, mp->mnt_stat.f_mntfromname,
210 	    sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
211 	*mpp = mp;
212 	return 0;
213 }
214 
215 /*
216  * vfs_getnewfsid: get a new unique fsid.
217  */
218 void
219 vfs_getnewfsid(struct mount *mp)
220 {
221 	static u_short xxxfs_mntid;
222 	fsid_t tfsid;
223 	int mtype;
224 
225 	mutex_enter(&mntid_lock);
226 	mtype = makefstype(mp->mnt_op->vfs_name);
227 	mp->mnt_stat.f_fsidx.__fsid_val[0] = makedev(mtype, 0);
228 	mp->mnt_stat.f_fsidx.__fsid_val[1] = mtype;
229 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
230 	if (xxxfs_mntid == 0)
231 		++xxxfs_mntid;
232 	tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
233 	tfsid.__fsid_val[1] = mtype;
234 	while (vfs_getvfs(&tfsid)) {
235 		tfsid.__fsid_val[0]++;
236 		xxxfs_mntid++;
237 	}
238 	mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
239 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
240 	mutex_exit(&mntid_lock);
241 }
242 
243 /*
244  * Lookup a mount point by filesystem identifier.
245  *
246  * XXX Needs to add a reference to the mount point.
247  */
248 struct mount *
249 vfs_getvfs(fsid_t *fsid)
250 {
251 	mount_iterator_t *iter;
252 	struct mount *mp;
253 
254 	mountlist_iterator_init(&iter);
255 	while ((mp = mountlist_iterator_next(iter)) != NULL) {
256 		if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
257 		    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
258 			mountlist_iterator_destroy(iter);
259 			return mp;
260 		}
261 	}
262 	mountlist_iterator_destroy(iter);
263 	return NULL;
264 }
265 
266 /*
267  * Take a reference to a mount structure.
268  */
269 void
270 vfs_ref(struct mount *mp)
271 {
272 
273 	KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock));
274 
275 	atomic_inc_uint(&mp->mnt_refcnt);
276 }
277 
278 /*
279  * Drop a reference to a mount structure, freeing if the last reference.
280  */
281 void
282 vfs_rele(struct mount *mp)
283 {
284 
285 	if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
286 		return;
287 	}
288 
289 	/*
290 	 * Nothing else has visibility of the mount: we can now
291 	 * free the data structures.
292 	 */
293 	KASSERT(mp->mnt_refcnt == 0);
294 	specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
295 	mutex_obj_free(mp->mnt_updating);
296 	mutex_obj_free(mp->mnt_renamelock);
297 	mutex_obj_free(mp->mnt_vnodelock);
298 	if (mp->mnt_op != NULL) {
299 		vfs_delref(mp->mnt_op);
300 	}
301 	fstrans_unmount(mp);
302 	/*
303 	 * Final free of mp gets done from fstrans_mount_dtor().
304 	 *
305 	 * Prevents this memory to be reused as a mount before
306 	 * fstrans releases all references to it.
307 	 */
308 }
309 
310 /*
311  * Mark a mount point as busy, and gain a new reference to it.  Used to
312  * prevent the file system from being unmounted during critical sections.
313  *
314  * vfs_busy can be called multiple times and by multiple threads
315  * and must be accompanied by the same number of vfs_unbusy calls.
316  *
317  * => The caller must hold a pre-existing reference to the mount.
318  * => Will fail if the file system is being unmounted, or is unmounted.
319  */
320 static inline int
321 _vfs_busy(struct mount *mp, bool wait)
322 {
323 
324 	KASSERT(mp->mnt_refcnt > 0);
325 
326 	if (wait) {
327 		fstrans_start(mp);
328 	} else {
329 		if (fstrans_start_nowait(mp))
330 			return EBUSY;
331 	}
332 	if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
333 		fstrans_done(mp);
334 		return ENOENT;
335 	}
336 	vfs_ref(mp);
337 	return 0;
338 }
339 
340 int
341 vfs_busy(struct mount *mp)
342 {
343 
344 	return _vfs_busy(mp, true);
345 }
346 
347 int
348 vfs_trybusy(struct mount *mp)
349 {
350 
351 	return _vfs_busy(mp, false);
352 }
353 
354 /*
355  * Unbusy a busy filesystem.
356  *
357  * Every successful vfs_busy() call must be undone by a vfs_unbusy() call.
358  */
359 void
360 vfs_unbusy(struct mount *mp)
361 {
362 
363 	KASSERT(mp->mnt_refcnt > 0);
364 
365 	fstrans_done(mp);
366 	vfs_rele(mp);
367 }
368 
369 struct vnode_iterator {
370 	vnode_impl_t vi_vnode;
371 };
372 
373 void
374 vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip)
375 {
376 	vnode_t *vp;
377 	vnode_impl_t *vip;
378 
379 	vp = vnalloc_marker(mp);
380 	vip = VNODE_TO_VIMPL(vp);
381 
382 	mutex_enter(mp->mnt_vnodelock);
383 	TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes);
384 	vp->v_usecount = 1;
385 	mutex_exit(mp->mnt_vnodelock);
386 
387 	*vnip = (struct vnode_iterator *)vip;
388 }
389 
390 void
391 vfs_vnode_iterator_destroy(struct vnode_iterator *vni)
392 {
393 	vnode_impl_t *mvip = &vni->vi_vnode;
394 	vnode_t *mvp = VIMPL_TO_VNODE(mvip);
395 	kmutex_t *lock;
396 
397 	KASSERT(vnis_marker(mvp));
398 	if (mvp->v_usecount != 0) {
399 		lock = mvp->v_mount->mnt_vnodelock;
400 		mutex_enter(lock);
401 		TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes);
402 		mvp->v_usecount = 0;
403 		mutex_exit(lock);
404 	}
405 	vnfree_marker(mvp);
406 }
407 
408 static struct vnode *
409 vfs_vnode_iterator_next1(struct vnode_iterator *vni,
410     bool (*f)(void *, struct vnode *), void *cl, bool do_wait)
411 {
412 	vnode_impl_t *mvip = &vni->vi_vnode;
413 	struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount;
414 	vnode_t *vp;
415 	vnode_impl_t *vip;
416 	kmutex_t *lock;
417 	int error;
418 
419 	KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip)));
420 
421 	lock = mp->mnt_vnodelock;
422 	do {
423 		mutex_enter(lock);
424 		vip = TAILQ_NEXT(mvip, vi_mntvnodes);
425 		TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes);
426 		VIMPL_TO_VNODE(mvip)->v_usecount = 0;
427 again:
428 		if (vip == NULL) {
429 			mutex_exit(lock);
430 	       		return NULL;
431 		}
432 		vp = VIMPL_TO_VNODE(vip);
433 		KASSERT(vp != NULL);
434 		mutex_enter(vp->v_interlock);
435 		if (vnis_marker(vp) ||
436 		    vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) ||
437 		    (f && !(*f)(cl, vp))) {
438 			mutex_exit(vp->v_interlock);
439 			vip = TAILQ_NEXT(vip, vi_mntvnodes);
440 			goto again;
441 		}
442 
443 		TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip, vi_mntvnodes);
444 		VIMPL_TO_VNODE(mvip)->v_usecount = 1;
445 		mutex_exit(lock);
446 		error = vcache_vget(vp);
447 		KASSERT(error == 0 || error == ENOENT);
448 	} while (error != 0);
449 
450 	return vp;
451 }
452 
453 struct vnode *
454 vfs_vnode_iterator_next(struct vnode_iterator *vni,
455     bool (*f)(void *, struct vnode *), void *cl)
456 {
457 
458 	return vfs_vnode_iterator_next1(vni, f, cl, false);
459 }
460 
461 /*
462  * Move a vnode from one mount queue to another.
463  */
464 void
465 vfs_insmntque(vnode_t *vp, struct mount *mp)
466 {
467 	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
468 	struct mount *omp;
469 	kmutex_t *lock;
470 
471 	KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 ||
472 	    vp->v_tag == VT_VFS);
473 
474 	/*
475 	 * Delete from old mount point vnode list, if on one.
476 	 */
477 	if ((omp = vp->v_mount) != NULL) {
478 		lock = omp->mnt_vnodelock;
479 		mutex_enter(lock);
480 		TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes);
481 		mutex_exit(lock);
482 	}
483 
484 	/*
485 	 * Insert into list of vnodes for the new mount point, if
486 	 * available.  The caller must take a reference on the mount
487 	 * structure and donate to the vnode.
488 	 */
489 	if ((vp->v_mount = mp) != NULL) {
490 		lock = mp->mnt_vnodelock;
491 		mutex_enter(lock);
492 		TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes);
493 		mutex_exit(lock);
494 	}
495 
496 	if (omp != NULL) {
497 		/* Release reference to old mount. */
498 		vfs_rele(omp);
499 	}
500 }
501 
502 /*
503  * Remove any vnodes in the vnode table belonging to mount point mp.
504  *
505  * If FORCECLOSE is not specified, there should not be any active ones,
506  * return error if any are found (nb: this is a user error, not a
507  * system error). If FORCECLOSE is specified, detach any active vnodes
508  * that are found.
509  *
510  * If WRITECLOSE is set, only flush out regular file vnodes open for
511  * writing.
512  *
513  * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
514  */
515 #ifdef DEBUG
516 int busyprt = 0;	/* print out busy vnodes */
517 struct ctldebug debug1 = { "busyprt", &busyprt };
518 #endif
519 
520 static vnode_t *
521 vflushnext(struct vnode_iterator *marker, int *when)
522 {
523 	if (hardclock_ticks > *when) {
524 		yield();
525 		*when = hardclock_ticks + hz / 10;
526 	}
527 	return vfs_vnode_iterator_next1(marker, NULL, NULL, true);
528 }
529 
530 /*
531  * Flush one vnode.  Referenced on entry, unreferenced on return.
532  */
533 static int
534 vflush_one(vnode_t *vp, vnode_t *skipvp, int flags)
535 {
536 	int error;
537 	struct vattr vattr;
538 
539 	if (vp == skipvp ||
540 	    ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) {
541 		vrele(vp);
542 		return 0;
543 	}
544 	/*
545 	 * If WRITECLOSE is set, only flush out regular file
546 	 * vnodes open for writing or open and unlinked.
547 	 */
548 	if ((flags & WRITECLOSE)) {
549 		if (vp->v_type != VREG) {
550 			vrele(vp);
551 			return 0;
552 		}
553 		error = vn_lock(vp, LK_EXCLUSIVE);
554 		if (error) {
555 			KASSERT(error == ENOENT);
556 			vrele(vp);
557 			return 0;
558 		}
559 		error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0);
560 		if (error == 0)
561 			error = VOP_GETATTR(vp, &vattr, curlwp->l_cred);
562 		VOP_UNLOCK(vp);
563 		if (error) {
564 			vrele(vp);
565 			return error;
566 		}
567 		if (vp->v_writecount == 0 && vattr.va_nlink > 0) {
568 			vrele(vp);
569 			return 0;
570 		}
571 	}
572 	/*
573 	 * First try to recycle the vnode.
574 	 */
575 	if (vrecycle(vp))
576 		return 0;
577 	/*
578 	 * If FORCECLOSE is set, forcibly close the vnode.
579 	 * For block or character devices, revert to an
580 	 * anonymous device.  For all other files, just
581 	 * kill them.
582 	 */
583 	if (flags & FORCECLOSE) {
584 		if (vp->v_usecount > 1 &&
585 		    (vp->v_type == VBLK || vp->v_type == VCHR))
586 			vcache_make_anon(vp);
587 		else
588 			vgone(vp);
589 		return 0;
590 	}
591 	vrele(vp);
592 	return EBUSY;
593 }
594 
595 int
596 vflush(struct mount *mp, vnode_t *skipvp, int flags)
597 {
598 	vnode_t *vp;
599 	struct vnode_iterator *marker;
600 	int busy, error, when, retries = 2;
601 
602 	do {
603 		busy = error = when = 0;
604 
605 		/*
606 		 * First, flush out any vnode references from the
607 		 * deferred vrele list.
608 		 */
609 		vrele_flush(mp);
610 
611 		vfs_vnode_iterator_init(mp, &marker);
612 
613 		while ((vp = vflushnext(marker, &when)) != NULL) {
614 			error = vflush_one(vp, skipvp, flags);
615 			if (error == EBUSY) {
616 				error = 0;
617 				busy++;
618 #ifdef DEBUG
619 				if (busyprt && retries == 0)
620 					vprint("vflush: busy vnode", vp);
621 #endif
622 			} else if (error != 0) {
623 				break;
624 			}
625 		}
626 
627 		vfs_vnode_iterator_destroy(marker);
628 	} while (error == 0 && busy > 0 && retries-- > 0);
629 
630 	if (error)
631 		return error;
632 	if (busy)
633 		return EBUSY;
634 	return 0;
635 }
636 
637 /*
638  * Mount a file system.
639  */
640 
641 /*
642  * Scan all active processes to see if any of them have a current or root
643  * directory onto which the new filesystem has just been  mounted. If so,
644  * replace them with the new mount point.
645  */
646 static void
647 mount_checkdirs(vnode_t *olddp)
648 {
649 	vnode_t *newdp, *rele1, *rele2;
650 	struct cwdinfo *cwdi;
651 	struct proc *p;
652 	bool retry;
653 
654 	if (olddp->v_usecount == 1) {
655 		return;
656 	}
657 	if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp))
658 		panic("mount: lost mount");
659 
660 	do {
661 		retry = false;
662 		mutex_enter(proc_lock);
663 		PROCLIST_FOREACH(p, &allproc) {
664 			if ((cwdi = p->p_cwdi) == NULL)
665 				continue;
666 			/*
667 			 * Cannot change to the old directory any more,
668 			 * so even if we see a stale value it is not a
669 			 * problem.
670 			 */
671 			if (cwdi->cwdi_cdir != olddp &&
672 			    cwdi->cwdi_rdir != olddp)
673 				continue;
674 			retry = true;
675 			rele1 = NULL;
676 			rele2 = NULL;
677 			atomic_inc_uint(&cwdi->cwdi_refcnt);
678 			mutex_exit(proc_lock);
679 			mutex_enter(&cwdi->cwdi_lock);
680 			if (cwdi->cwdi_cdir == olddp ||
681 			    cwdi->cwdi_rdir == olddp) {
682 			    	/* XXX belongs in vfs_cwd.c, but rump. */
683 			    	xc_barrier(0);
684 			    	if (cwdi->cwdi_cdir == olddp) {
685 					rele1 = cwdi->cwdi_cdir;
686 					vref(newdp);
687 					cwdi->cwdi_cdir = newdp;
688 				}
689 				if (cwdi->cwdi_rdir == olddp) {
690 					rele2 = cwdi->cwdi_rdir;
691 					vref(newdp);
692 					cwdi->cwdi_rdir = newdp;
693 				}
694 			}
695 			mutex_exit(&cwdi->cwdi_lock);
696 			cwdfree(cwdi);
697 			if (rele1 != NULL)
698 				vrele(rele1);
699 			if (rele2 != NULL)
700 				vrele(rele2);
701 			mutex_enter(proc_lock);
702 			break;
703 		}
704 		mutex_exit(proc_lock);
705 	} while (retry);
706 
707 	if (rootvnode == olddp) {
708 		vrele(rootvnode);
709 		vref(newdp);
710 		rootvnode = newdp;
711 	}
712 	vput(newdp);
713 }
714 
715 /*
716  * Start extended attributes
717  */
718 static int
719 start_extattr(struct mount *mp)
720 {
721 	int error;
722 
723 	error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL);
724 	if (error)
725 		printf("%s: failed to start extattr: error = %d\n",
726 		       mp->mnt_stat.f_mntonname, error);
727 
728 	return error;
729 }
730 
731 int
732 mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops,
733     const char *path, int flags, void *data, size_t *data_len)
734 {
735 	vnode_t *vp = *vpp;
736 	struct mount *mp;
737 	struct pathbuf *pb;
738 	struct nameidata nd;
739 	int error;
740 
741 	error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
742 	    KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
743 	if (error) {
744 		vfs_delref(vfsops);
745 		return error;
746 	}
747 
748 	/* Cannot make a non-dir a mount-point (from here anyway). */
749 	if (vp->v_type != VDIR) {
750 		vfs_delref(vfsops);
751 		return ENOTDIR;
752 	}
753 
754 	if (flags & MNT_EXPORTED) {
755 		vfs_delref(vfsops);
756 		return EINVAL;
757 	}
758 
759 	if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) {
760 		vfs_delref(vfsops);
761 		return ENOMEM;
762 	}
763 
764 	mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
765 
766 	/*
767 	 * The underlying file system may refuse the mount for
768 	 * various reasons.  Allow the user to force it to happen.
769 	 *
770 	 * Set the mount level flags.
771 	 */
772 	mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE);
773 
774 	mutex_enter(mp->mnt_updating);
775 	error = VFS_MOUNT(mp, path, data, data_len);
776 	mp->mnt_flag &= ~MNT_OP_FLAGS;
777 
778 	if (error != 0)
779 		goto err_unmounted;
780 
781 	/*
782 	 * Validate and prepare the mount point.
783 	 */
784 	error = pathbuf_copyin(path, &pb);
785 	if (error != 0) {
786 		goto err_mounted;
787 	}
788 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
789 	error = namei(&nd);
790 	pathbuf_destroy(pb);
791 	if (error != 0) {
792 		goto err_mounted;
793 	}
794 	if (nd.ni_vp != vp) {
795 		vput(nd.ni_vp);
796 		error = EINVAL;
797 		goto err_mounted;
798 	}
799 	if (vp->v_mountedhere != NULL) {
800 		vput(nd.ni_vp);
801 		error = EBUSY;
802 		goto err_mounted;
803 	}
804 	error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0);
805 	if (error != 0) {
806 		vput(nd.ni_vp);
807 		goto err_mounted;
808 	}
809 
810 	/*
811 	 * Put the new filesystem on the mount list after root.
812 	 */
813 	cache_purge(vp);
814 	mp->mnt_iflag &= ~IMNT_WANTRDWR;
815 
816 	mountlist_append(mp);
817 	if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
818 		vfs_syncer_add_to_worklist(mp);
819 	vp->v_mountedhere = mp;
820 	vput(nd.ni_vp);
821 
822 	mount_checkdirs(vp);
823 	mutex_exit(mp->mnt_updating);
824 
825 	/* Hold an additional reference to the mount across VFS_START(). */
826 	vfs_ref(mp);
827 	(void) VFS_STATVFS(mp, &mp->mnt_stat);
828 	error = VFS_START(mp, 0);
829 	if (error) {
830 		vrele(vp);
831 	} else if (flags & MNT_EXTATTR) {
832 		if (start_extattr(mp) != 0)
833 			mp->mnt_flag &= ~MNT_EXTATTR;
834 	}
835 	/* Drop reference held for VFS_START(). */
836 	vfs_rele(mp);
837 	*vpp = NULL;
838 	return error;
839 
840 err_mounted:
841 	if (VFS_UNMOUNT(mp, MNT_FORCE) != 0)
842 		panic("Unmounting fresh file system failed");
843 
844 err_unmounted:
845 	vp->v_mountedhere = NULL;
846 	mutex_exit(mp->mnt_updating);
847 	vfs_rele(mp);
848 
849 	return error;
850 }
851 
852 /*
853  * Do the actual file system unmount.  File system is assumed to have
854  * been locked by the caller.
855  *
856  * => Caller hold reference to the mount, explicitly for dounmount().
857  */
858 int
859 dounmount(struct mount *mp, int flags, struct lwp *l)
860 {
861 	vnode_t *coveredvp;
862 	int error, async, used_syncer, used_extattr;
863 	const bool was_suspended = fstrans_is_owner(mp);
864 
865 #if NVERIEXEC > 0
866 	error = veriexec_unmountchk(mp);
867 	if (error)
868 		return (error);
869 #endif /* NVERIEXEC > 0 */
870 
871 	if (!was_suspended) {
872 		error = vfs_suspend(mp, 0);
873 		if (error) {
874 			return error;
875 		}
876 	}
877 
878 	KASSERT((mp->mnt_iflag & IMNT_GONE) == 0);
879 
880 	used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0;
881 	used_extattr = mp->mnt_flag & MNT_EXTATTR;
882 
883 	mp->mnt_iflag |= IMNT_UNMOUNT;
884 	mutex_enter(mp->mnt_updating);
885 	async = mp->mnt_flag & MNT_ASYNC;
886 	mp->mnt_flag &= ~MNT_ASYNC;
887 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
888 	if (used_syncer)
889 		vfs_syncer_remove_from_worklist(mp);
890 	error = 0;
891 	if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) {
892 		error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
893 	}
894 	if (error == 0 || (flags & MNT_FORCE)) {
895 		error = VFS_UNMOUNT(mp, flags);
896 	}
897 	if (error) {
898 		mp->mnt_iflag &= ~IMNT_UNMOUNT;
899 		if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
900 			vfs_syncer_add_to_worklist(mp);
901 		mp->mnt_flag |= async;
902 		mutex_exit(mp->mnt_updating);
903 		if (!was_suspended)
904 			vfs_resume(mp);
905 		if (used_extattr) {
906 			if (start_extattr(mp) != 0)
907 				mp->mnt_flag &= ~MNT_EXTATTR;
908 			else
909 				mp->mnt_flag |= MNT_EXTATTR;
910 		}
911 		return (error);
912 	}
913 	mutex_exit(mp->mnt_updating);
914 
915 	/*
916 	 * mark filesystem as gone to prevent further umounts
917 	 * after mnt_umounting lock is gone, this also prevents
918 	 * vfs_busy() from succeeding.
919 	 */
920 	mp->mnt_iflag |= IMNT_GONE;
921 	if (!was_suspended)
922 		vfs_resume(mp);
923 
924 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
925 		vn_lock(coveredvp, LK_EXCLUSIVE | LK_RETRY);
926 		coveredvp->v_mountedhere = NULL;
927 		VOP_UNLOCK(coveredvp);
928 	}
929 	mountlist_remove(mp);
930 	if (TAILQ_FIRST(&mp->mnt_vnodelist) != NULL)
931 		panic("unmount: dangling vnode");
932 	vfs_hooks_unmount(mp);
933 
934 	vfs_rele(mp);	/* reference from mount() */
935 	if (coveredvp != NULLVP) {
936 		vrele(coveredvp);
937 	}
938 	return (0);
939 }
940 
941 /*
942  * Unmount all file systems.
943  * We traverse the list in reverse order under the assumption that doing so
944  * will avoid needing to worry about dependencies.
945  */
946 bool
947 vfs_unmountall(struct lwp *l)
948 {
949 
950 	printf("unmounting file systems...\n");
951 	return vfs_unmountall1(l, true, true);
952 }
953 
954 static void
955 vfs_unmount_print(struct mount *mp, const char *pfx)
956 {
957 
958 	aprint_verbose("%sunmounted %s on %s type %s\n", pfx,
959 	    mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname,
960 	    mp->mnt_stat.f_fstypename);
961 }
962 
963 /*
964  * Return the mount with the highest generation less than "gen".
965  */
966 static struct mount *
967 vfs_unmount_next(uint64_t gen)
968 {
969 	mount_iterator_t *iter;
970 	struct mount *mp, *nmp;
971 
972 	nmp = NULL;
973 
974 	mountlist_iterator_init(&iter);
975 	while ((mp = mountlist_iterator_next(iter)) != NULL) {
976 		if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) &&
977 		    mp->mnt_gen < gen) {
978 			if (nmp != NULL)
979 				vfs_rele(nmp);
980 			nmp = mp;
981 			vfs_ref(nmp);
982 		}
983 	}
984 	mountlist_iterator_destroy(iter);
985 
986 	return nmp;
987 }
988 
989 bool
990 vfs_unmount_forceone(struct lwp *l)
991 {
992 	struct mount *mp;
993 	int error;
994 
995 	mp = vfs_unmount_next(mountgen);
996 	if (mp == NULL) {
997 		return false;
998 	}
999 
1000 #ifdef DEBUG
1001 	printf("forcefully unmounting %s (%s)...\n",
1002 	    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1003 #endif
1004 	if ((error = dounmount(mp, MNT_FORCE, l)) == 0) {
1005 		vfs_unmount_print(mp, "forcefully ");
1006 		return true;
1007 	} else {
1008 		vfs_rele(mp);
1009 	}
1010 
1011 #ifdef DEBUG
1012 	printf("forceful unmount of %s failed with error %d\n",
1013 	    mp->mnt_stat.f_mntonname, error);
1014 #endif
1015 
1016 	return false;
1017 }
1018 
1019 bool
1020 vfs_unmountall1(struct lwp *l, bool force, bool verbose)
1021 {
1022 	struct mount *mp;
1023 	bool any_error = false, progress = false;
1024 	uint64_t gen;
1025 	int error;
1026 
1027 	gen = mountgen;
1028 	for (;;) {
1029 		mp = vfs_unmount_next(gen);
1030 		if (mp == NULL)
1031 			break;
1032 		gen = mp->mnt_gen;
1033 
1034 #ifdef DEBUG
1035 		printf("unmounting %p %s (%s)...\n",
1036 		    (void *)mp, mp->mnt_stat.f_mntonname,
1037 		    mp->mnt_stat.f_mntfromname);
1038 #endif
1039 		if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) {
1040 			vfs_unmount_print(mp, "");
1041 			progress = true;
1042 		} else {
1043 			vfs_rele(mp);
1044 			if (verbose) {
1045 				printf("unmount of %s failed with error %d\n",
1046 				    mp->mnt_stat.f_mntonname, error);
1047 			}
1048 			any_error = true;
1049 		}
1050 	}
1051 	if (verbose) {
1052 		printf("unmounting done\n");
1053 	}
1054 	if (any_error && verbose) {
1055 		printf("WARNING: some file systems would not unmount\n");
1056 	}
1057 	return progress;
1058 }
1059 
1060 void
1061 vfs_sync_all(struct lwp *l)
1062 {
1063 	printf("syncing disks... ");
1064 
1065 	/* remove user processes from run queue */
1066 	suspendsched();
1067 	(void)spl0();
1068 
1069 	/* avoid coming back this way again if we panic. */
1070 	doing_shutdown = 1;
1071 
1072 	do_sys_sync(l);
1073 
1074 	/* Wait for sync to finish. */
1075 	if (buf_syncwait() != 0) {
1076 #if defined(DDB) && defined(DEBUG_HALT_BUSY)
1077 		Debugger();
1078 #endif
1079 		printf("giving up\n");
1080 		return;
1081 	} else
1082 		printf("done\n");
1083 }
1084 
1085 /*
1086  * Sync and unmount file systems before shutting down.
1087  */
1088 void
1089 vfs_shutdown(void)
1090 {
1091 	lwp_t *l = curlwp;
1092 
1093 	vfs_sync_all(l);
1094 
1095 	/*
1096 	 * If we have paniced - do not make the situation potentially
1097 	 * worse by unmounting the file systems.
1098 	 */
1099 	if (panicstr != NULL) {
1100 		return;
1101 	}
1102 
1103 	/* Unmount file systems. */
1104 	vfs_unmountall(l);
1105 }
1106 
1107 /*
1108  * Print a list of supported file system types (used by vfs_mountroot)
1109  */
1110 static void
1111 vfs_print_fstypes(void)
1112 {
1113 	struct vfsops *v;
1114 	int cnt = 0;
1115 
1116 	mutex_enter(&vfs_list_lock);
1117 	LIST_FOREACH(v, &vfs_list, vfs_list)
1118 		++cnt;
1119 	mutex_exit(&vfs_list_lock);
1120 
1121 	if (cnt == 0) {
1122 		printf("WARNING: No file system modules have been loaded.\n");
1123 		return;
1124 	}
1125 
1126 	printf("Supported file systems:");
1127 	mutex_enter(&vfs_list_lock);
1128 	LIST_FOREACH(v, &vfs_list, vfs_list) {
1129 		printf(" %s", v->vfs_name);
1130 	}
1131 	mutex_exit(&vfs_list_lock);
1132 	printf("\n");
1133 }
1134 
1135 /*
1136  * Mount the root file system.  If the operator didn't specify a
1137  * file system to use, try all possible file systems until one
1138  * succeeds.
1139  */
1140 int
1141 vfs_mountroot(void)
1142 {
1143 	struct vfsops *v;
1144 	int error = ENODEV;
1145 
1146 	if (root_device == NULL)
1147 		panic("vfs_mountroot: root device unknown");
1148 
1149 	switch (device_class(root_device)) {
1150 	case DV_IFNET:
1151 		if (rootdev != NODEV)
1152 			panic("vfs_mountroot: rootdev set for DV_IFNET "
1153 			    "(0x%llx -> %llu,%llu)",
1154 			    (unsigned long long)rootdev,
1155 			    (unsigned long long)major(rootdev),
1156 			    (unsigned long long)minor(rootdev));
1157 		break;
1158 
1159 	case DV_DISK:
1160 		if (rootdev == NODEV)
1161 			panic("vfs_mountroot: rootdev not set for DV_DISK");
1162 	        if (bdevvp(rootdev, &rootvp))
1163 	                panic("vfs_mountroot: can't get vnode for rootdev");
1164 		error = VOP_OPEN(rootvp, FREAD, FSCRED);
1165 		if (error) {
1166 			printf("vfs_mountroot: can't open root device\n");
1167 			return (error);
1168 		}
1169 		break;
1170 
1171 	case DV_VIRTUAL:
1172 		break;
1173 
1174 	default:
1175 		printf("%s: inappropriate for root file system\n",
1176 		    device_xname(root_device));
1177 		return (ENODEV);
1178 	}
1179 
1180 	/*
1181 	 * If user specified a root fs type, use it.  Make sure the
1182 	 * specified type exists and has a mount_root()
1183 	 */
1184 	if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
1185 		v = vfs_getopsbyname(rootfstype);
1186 		error = EFTYPE;
1187 		if (v != NULL) {
1188 			if (v->vfs_mountroot != NULL) {
1189 				error = (v->vfs_mountroot)();
1190 			}
1191 			v->vfs_refcount--;
1192 		}
1193 		goto done;
1194 	}
1195 
1196 	/*
1197 	 * Try each file system currently configured into the kernel.
1198 	 */
1199 	mutex_enter(&vfs_list_lock);
1200 	LIST_FOREACH(v, &vfs_list, vfs_list) {
1201 		if (v->vfs_mountroot == NULL)
1202 			continue;
1203 #ifdef DEBUG
1204 		aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1205 #endif
1206 		v->vfs_refcount++;
1207 		mutex_exit(&vfs_list_lock);
1208 		error = (*v->vfs_mountroot)();
1209 		mutex_enter(&vfs_list_lock);
1210 		v->vfs_refcount--;
1211 		if (!error) {
1212 			aprint_normal("root file system type: %s\n",
1213 			    v->vfs_name);
1214 			break;
1215 		}
1216 	}
1217 	mutex_exit(&vfs_list_lock);
1218 
1219 	if (v == NULL) {
1220 		vfs_print_fstypes();
1221 		printf("no file system for %s", device_xname(root_device));
1222 		if (device_class(root_device) == DV_DISK)
1223 			printf(" (dev 0x%llx)", (unsigned long long)rootdev);
1224 		printf("\n");
1225 		error = EFTYPE;
1226 	}
1227 
1228 done:
1229 	if (error && device_class(root_device) == DV_DISK) {
1230 		VOP_CLOSE(rootvp, FREAD, FSCRED);
1231 		vrele(rootvp);
1232 	}
1233 	if (error == 0) {
1234 		mount_iterator_t *iter;
1235 		struct mount *mp;
1236 		extern struct cwdinfo cwdi0;
1237 
1238 		mountlist_iterator_init(&iter);
1239 		mp = mountlist_iterator_next(iter);
1240 		KASSERT(mp != NULL);
1241 		mountlist_iterator_destroy(iter);
1242 
1243 		mp->mnt_flag |= MNT_ROOTFS;
1244 		mp->mnt_op->vfs_refcount++;
1245 
1246 		/*
1247 		 * Get the vnode for '/'.  Set cwdi0.cwdi_cdir to
1248 		 * reference it.
1249 		 */
1250 		error = VFS_ROOT(mp, LK_SHARED, &rootvnode);
1251 		if (error)
1252 			panic("cannot find root vnode, error=%d", error);
1253 		cwdi0.cwdi_cdir = rootvnode;
1254 		vref(cwdi0.cwdi_cdir);
1255 		VOP_UNLOCK(rootvnode);
1256 		cwdi0.cwdi_rdir = NULL;
1257 
1258 		/*
1259 		 * Now that root is mounted, we can fixup initproc's CWD
1260 		 * info.  All other processes are kthreads, which merely
1261 		 * share proc0's CWD info.
1262 		 */
1263 		initproc->p_cwdi->cwdi_cdir = rootvnode;
1264 		vref(initproc->p_cwdi->cwdi_cdir);
1265 		initproc->p_cwdi->cwdi_rdir = NULL;
1266 		/*
1267 		 * Enable loading of modules from the filesystem
1268 		 */
1269 		module_load_vfs_init();
1270 
1271 	}
1272 	return (error);
1273 }
1274 
1275 /*
1276  * mount_specific_key_create --
1277  *	Create a key for subsystem mount-specific data.
1278  */
1279 int
1280 mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1281 {
1282 
1283 	return specificdata_key_create(mount_specificdata_domain, keyp, dtor);
1284 }
1285 
1286 /*
1287  * mount_specific_key_delete --
1288  *	Delete a key for subsystem mount-specific data.
1289  */
1290 void
1291 mount_specific_key_delete(specificdata_key_t key)
1292 {
1293 
1294 	specificdata_key_delete(mount_specificdata_domain, key);
1295 }
1296 
1297 /*
1298  * mount_initspecific --
1299  *	Initialize a mount's specificdata container.
1300  */
1301 void
1302 mount_initspecific(struct mount *mp)
1303 {
1304 	int error __diagused;
1305 
1306 	error = specificdata_init(mount_specificdata_domain,
1307 				  &mp->mnt_specdataref);
1308 	KASSERT(error == 0);
1309 }
1310 
1311 /*
1312  * mount_finispecific --
1313  *	Finalize a mount's specificdata container.
1314  */
1315 void
1316 mount_finispecific(struct mount *mp)
1317 {
1318 
1319 	specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
1320 }
1321 
1322 /*
1323  * mount_getspecific --
1324  *	Return mount-specific data corresponding to the specified key.
1325  */
1326 void *
1327 mount_getspecific(struct mount *mp, specificdata_key_t key)
1328 {
1329 
1330 	return specificdata_getspecific(mount_specificdata_domain,
1331 					 &mp->mnt_specdataref, key);
1332 }
1333 
1334 /*
1335  * mount_setspecific --
1336  *	Set mount-specific data corresponding to the specified key.
1337  */
1338 void
1339 mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
1340 {
1341 
1342 	specificdata_setspecific(mount_specificdata_domain,
1343 				 &mp->mnt_specdataref, key, data);
1344 }
1345 
1346 /*
1347  * Check to see if a filesystem is mounted on a block device.
1348  */
1349 int
1350 vfs_mountedon(vnode_t *vp)
1351 {
1352 	vnode_t *vq;
1353 	int error = 0;
1354 
1355 	if (vp->v_type != VBLK)
1356 		return ENOTBLK;
1357 	if (spec_node_getmountedfs(vp) != NULL)
1358 		return EBUSY;
1359 	if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, &vq) == 0) {
1360 		if (spec_node_getmountedfs(vq) != NULL)
1361 			error = EBUSY;
1362 		vrele(vq);
1363 	}
1364 
1365 	return error;
1366 }
1367 
1368 /*
1369  * Check if a device pointed to by vp is mounted.
1370  *
1371  * Returns:
1372  *   EINVAL	if it's not a disk
1373  *   EBUSY	if it's a disk and mounted
1374  *   0		if it's a disk and not mounted
1375  */
1376 int
1377 rawdev_mounted(vnode_t *vp, vnode_t **bvpp)
1378 {
1379 	vnode_t *bvp;
1380 	dev_t dev;
1381 	int d_type;
1382 
1383 	bvp = NULL;
1384 	d_type = D_OTHER;
1385 
1386 	if (iskmemvp(vp))
1387 		return EINVAL;
1388 
1389 	switch (vp->v_type) {
1390 	case VCHR: {
1391 		const struct cdevsw *cdev;
1392 
1393 		dev = vp->v_rdev;
1394 		cdev = cdevsw_lookup(dev);
1395 		if (cdev != NULL) {
1396 			dev_t blkdev;
1397 
1398 			blkdev = devsw_chr2blk(dev);
1399 			if (blkdev != NODEV) {
1400 				if (vfinddev(blkdev, VBLK, &bvp) != 0) {
1401 					d_type = (cdev->d_flag & D_TYPEMASK);
1402 					/* XXX: what if bvp disappears? */
1403 					vrele(bvp);
1404 				}
1405 			}
1406 		}
1407 
1408 		break;
1409 		}
1410 
1411 	case VBLK: {
1412 		const struct bdevsw *bdev;
1413 
1414 		dev = vp->v_rdev;
1415 		bdev = bdevsw_lookup(dev);
1416 		if (bdev != NULL)
1417 			d_type = (bdev->d_flag & D_TYPEMASK);
1418 
1419 		bvp = vp;
1420 
1421 		break;
1422 		}
1423 
1424 	default:
1425 		break;
1426 	}
1427 
1428 	if (d_type != D_DISK)
1429 		return EINVAL;
1430 
1431 	if (bvpp != NULL)
1432 		*bvpp = bvp;
1433 
1434 	/*
1435 	 * XXX: This is bogus. We should be failing the request
1436 	 * XXX: not only if this specific slice is mounted, but
1437 	 * XXX: if it's on a disk with any other mounted slice.
1438 	 */
1439 	if (vfs_mountedon(bvp))
1440 		return EBUSY;
1441 
1442 	return 0;
1443 }
1444 
1445 /*
1446  * Make a 'unique' number from a mount type name.
1447  */
1448 long
1449 makefstype(const char *type)
1450 {
1451 	long rv;
1452 
1453 	for (rv = 0; *type; type++) {
1454 		rv <<= 2;
1455 		rv ^= *type;
1456 	}
1457 	return rv;
1458 }
1459 
1460 static struct mountlist_entry *
1461 mountlist_alloc(enum mountlist_type type, struct mount *mp)
1462 {
1463 	struct mountlist_entry *me;
1464 
1465 	me = kmem_zalloc(sizeof(*me), KM_SLEEP);
1466 	me->me_mount = mp;
1467 	me->me_type = type;
1468 
1469 	return me;
1470 }
1471 
1472 static void
1473 mountlist_free(struct mountlist_entry *me)
1474 {
1475 
1476 	kmem_free(me, sizeof(*me));
1477 }
1478 
1479 void
1480 mountlist_iterator_init(mount_iterator_t **mip)
1481 {
1482 	struct mountlist_entry *me;
1483 
1484 	me = mountlist_alloc(ME_MARKER, NULL);
1485 	mutex_enter(&mountlist_lock);
1486 	TAILQ_INSERT_HEAD(&mountlist, me, me_list);
1487 	mutex_exit(&mountlist_lock);
1488 	*mip = (mount_iterator_t *)me;
1489 }
1490 
1491 void
1492 mountlist_iterator_destroy(mount_iterator_t *mi)
1493 {
1494 	struct mountlist_entry *marker = &mi->mi_entry;
1495 
1496 	if (marker->me_mount != NULL)
1497 		vfs_unbusy(marker->me_mount);
1498 
1499 	mutex_enter(&mountlist_lock);
1500 	TAILQ_REMOVE(&mountlist, marker, me_list);
1501 	mutex_exit(&mountlist_lock);
1502 
1503 	mountlist_free(marker);
1504 
1505 }
1506 
1507 /*
1508  * Return the next mount or NULL for this iterator.
1509  * Mark it busy on success.
1510  */
1511 static inline struct mount *
1512 _mountlist_iterator_next(mount_iterator_t *mi, bool wait)
1513 {
1514 	struct mountlist_entry *me, *marker = &mi->mi_entry;
1515 	struct mount *mp;
1516 	int error;
1517 
1518 	if (marker->me_mount != NULL) {
1519 		vfs_unbusy(marker->me_mount);
1520 		marker->me_mount = NULL;
1521 	}
1522 
1523 	mutex_enter(&mountlist_lock);
1524 	for (;;) {
1525 		KASSERT(marker->me_type == ME_MARKER);
1526 
1527 		me = TAILQ_NEXT(marker, me_list);
1528 		if (me == NULL) {
1529 			/* End of list: keep marker and return. */
1530 			mutex_exit(&mountlist_lock);
1531 			return NULL;
1532 		}
1533 		TAILQ_REMOVE(&mountlist, marker, me_list);
1534 		TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list);
1535 
1536 		/* Skip other markers. */
1537 		if (me->me_type != ME_MOUNT)
1538 			continue;
1539 
1540 		/* Take an initial reference for vfs_busy() below. */
1541 		mp = me->me_mount;
1542 		KASSERT(mp != NULL);
1543 		vfs_ref(mp);
1544 		mutex_exit(&mountlist_lock);
1545 
1546 		/* Try to mark this mount busy and return on success. */
1547 		if (wait)
1548 			error = vfs_busy(mp);
1549 		else
1550 			error = vfs_trybusy(mp);
1551 		if (error == 0) {
1552 			vfs_rele(mp);
1553 			marker->me_mount = mp;
1554 			return mp;
1555 		}
1556 		vfs_rele(mp);
1557 		mutex_enter(&mountlist_lock);
1558 	}
1559 }
1560 
1561 struct mount *
1562 mountlist_iterator_next(mount_iterator_t *mi)
1563 {
1564 
1565 	return _mountlist_iterator_next(mi, true);
1566 }
1567 
1568 struct mount *
1569 mountlist_iterator_trynext(mount_iterator_t *mi)
1570 {
1571 
1572 	return _mountlist_iterator_next(mi, false);
1573 }
1574 
1575 /*
1576  * Attach new mount to the end of the mount list.
1577  */
1578 void
1579 mountlist_append(struct mount *mp)
1580 {
1581 	struct mountlist_entry *me;
1582 
1583 	me = mountlist_alloc(ME_MOUNT, mp);
1584 	mutex_enter(&mountlist_lock);
1585 	TAILQ_INSERT_TAIL(&mountlist, me, me_list);
1586 	mutex_exit(&mountlist_lock);
1587 }
1588 
1589 /*
1590  * Remove mount from mount list.
1591  */void
1592 mountlist_remove(struct mount *mp)
1593 {
1594 	struct mountlist_entry *me;
1595 
1596 	mutex_enter(&mountlist_lock);
1597 	TAILQ_FOREACH(me, &mountlist, me_list)
1598 		if (me->me_type == ME_MOUNT && me->me_mount == mp)
1599 			break;
1600 	KASSERT(me != NULL);
1601 	TAILQ_REMOVE(&mountlist, me, me_list);
1602 	mutex_exit(&mountlist_lock);
1603 	mountlist_free(me);
1604 }
1605 
1606 /*
1607  * Unlocked variant to traverse the mountlist.
1608  * To be used from DDB only.
1609  */
1610 struct mount *
1611 _mountlist_next(struct mount *mp)
1612 {
1613 	struct mountlist_entry *me;
1614 
1615 	if (mp == NULL) {
1616 		me = TAILQ_FIRST(&mountlist);
1617 	} else {
1618 		TAILQ_FOREACH(me, &mountlist, me_list)
1619 			if (me->me_type == ME_MOUNT && me->me_mount == mp)
1620 				break;
1621 		if (me != NULL)
1622 			me = TAILQ_NEXT(me, me_list);
1623 	}
1624 
1625 	while (me != NULL && me->me_type != ME_MOUNT)
1626 		me = TAILQ_NEXT(me, me_list);
1627 
1628 	return (me ? me->me_mount : NULL);
1629 }
1630