xref: /netbsd-src/sys/kern/vfs_mount.c (revision e94a5d02693120d4ad9d909e488894e9fcf0eb76)
1*e94a5d02Sriastradh /*	$NetBSD: vfs_mount.c,v 1.110 2024/12/07 02:27:38 riastradh Exp $	*/
2fbc8beaeSrmind 
3fbc8beaeSrmind /*-
4926b25e1Sad  * Copyright (c) 1997-2020 The NetBSD Foundation, Inc.
5fbc8beaeSrmind  * All rights reserved.
6fbc8beaeSrmind  *
7fbc8beaeSrmind  * This code is derived from software contributed to The NetBSD Foundation
8fbc8beaeSrmind  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9fbc8beaeSrmind  * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
10fbc8beaeSrmind  *
11fbc8beaeSrmind  * Redistribution and use in source and binary forms, with or without
12fbc8beaeSrmind  * modification, are permitted provided that the following conditions
13fbc8beaeSrmind  * are met:
14fbc8beaeSrmind  * 1. Redistributions of source code must retain the above copyright
15fbc8beaeSrmind  *    notice, this list of conditions and the following disclaimer.
16fbc8beaeSrmind  * 2. Redistributions in binary form must reproduce the above copyright
17fbc8beaeSrmind  *    notice, this list of conditions and the following disclaimer in the
18fbc8beaeSrmind  *    documentation and/or other materials provided with the distribution.
19fbc8beaeSrmind  *
20fbc8beaeSrmind  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21fbc8beaeSrmind  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22fbc8beaeSrmind  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23fbc8beaeSrmind  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24fbc8beaeSrmind  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25fbc8beaeSrmind  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26fbc8beaeSrmind  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27fbc8beaeSrmind  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28fbc8beaeSrmind  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29fbc8beaeSrmind  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30fbc8beaeSrmind  * POSSIBILITY OF SUCH DAMAGE.
31fbc8beaeSrmind  */
32fbc8beaeSrmind 
33fbc8beaeSrmind /*
34fbc8beaeSrmind  * Copyright (c) 1989, 1993
35fbc8beaeSrmind  *	The Regents of the University of California.  All rights reserved.
36fbc8beaeSrmind  * (c) UNIX System Laboratories, Inc.
37fbc8beaeSrmind  * All or some portions of this file are derived from material licensed
38fbc8beaeSrmind  * to the University of California by American Telephone and Telegraph
39fbc8beaeSrmind  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40fbc8beaeSrmind  * the permission of UNIX System Laboratories, Inc.
41fbc8beaeSrmind  *
42fbc8beaeSrmind  * Redistribution and use in source and binary forms, with or without
43fbc8beaeSrmind  * modification, are permitted provided that the following conditions
44fbc8beaeSrmind  * are met:
45fbc8beaeSrmind  * 1. Redistributions of source code must retain the above copyright
46fbc8beaeSrmind  *    notice, this list of conditions and the following disclaimer.
47fbc8beaeSrmind  * 2. Redistributions in binary form must reproduce the above copyright
48fbc8beaeSrmind  *    notice, this list of conditions and the following disclaimer in the
49fbc8beaeSrmind  *    documentation and/or other materials provided with the distribution.
50fbc8beaeSrmind  * 3. Neither the name of the University nor the names of its contributors
51fbc8beaeSrmind  *    may be used to endorse or promote products derived from this software
52fbc8beaeSrmind  *    without specific prior written permission.
53fbc8beaeSrmind  *
54fbc8beaeSrmind  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55fbc8beaeSrmind  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56fbc8beaeSrmind  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57fbc8beaeSrmind  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58fbc8beaeSrmind  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59fbc8beaeSrmind  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60fbc8beaeSrmind  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61fbc8beaeSrmind  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62fbc8beaeSrmind  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63fbc8beaeSrmind  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64fbc8beaeSrmind  * SUCH DAMAGE.
65fbc8beaeSrmind  *
66fbc8beaeSrmind  *	@(#)vfs_subr.c	8.13 (Berkeley) 4/18/94
67fbc8beaeSrmind  */
68fbc8beaeSrmind 
69fbc8beaeSrmind #include <sys/cdefs.h>
70*e94a5d02Sriastradh __KERNEL_RCSID(0, "$NetBSD: vfs_mount.c,v 1.110 2024/12/07 02:27:38 riastradh Exp $");
71a5288eefShannken 
72a5288eefShannken #include "veriexec.h"
73fbc8beaeSrmind 
74fbc8beaeSrmind #include <sys/param.h>
75fbc8beaeSrmind #include <sys/kernel.h>
76fbc8beaeSrmind 
77fbc8beaeSrmind #include <sys/atomic.h>
78fbc8beaeSrmind #include <sys/buf.h>
79fbc8beaeSrmind #include <sys/conf.h>
801cf06cb4Sriastradh #include <sys/device.h>
811cf06cb4Sriastradh #include <sys/extattr.h>
82fbc8beaeSrmind #include <sys/fcntl.h>
83fbc8beaeSrmind #include <sys/filedesc.h>
841cf06cb4Sriastradh #include <sys/fstrans.h>
85fbc8beaeSrmind #include <sys/kauth.h>
86fbc8beaeSrmind #include <sys/kmem.h>
87fbc8beaeSrmind #include <sys/module.h>
88fbc8beaeSrmind #include <sys/mount.h>
89fbc8beaeSrmind #include <sys/namei.h>
90*e94a5d02Sriastradh #include <sys/sdt.h>
91fbc8beaeSrmind #include <sys/syscallargs.h>
92fbc8beaeSrmind #include <sys/sysctl.h>
93fbc8beaeSrmind #include <sys/systm.h>
941cf06cb4Sriastradh #include <sys/verified_exec.h>
95ce817826Sdsl #include <sys/vfs_syscalls.h>
96175d720aShannken #include <sys/vnode_impl.h>
97fbc8beaeSrmind 
9885cb97f0Shannken #include <miscfs/deadfs/deadfs.h>
99fbc8beaeSrmind #include <miscfs/genfs/genfs.h>
100fbc8beaeSrmind #include <miscfs/specfs/specdev.h>
101fbc8beaeSrmind 
102dc06ff16Shannken #include <uvm/uvm_swap.h>
103dc06ff16Shannken 
1042f4fa4f9Shannken enum mountlist_type {
1052f4fa4f9Shannken 	ME_MOUNT,
1062f4fa4f9Shannken 	ME_MARKER
1072f4fa4f9Shannken };
1082f4fa4f9Shannken struct mountlist_entry {
1092f4fa4f9Shannken 	TAILQ_ENTRY(mountlist_entry) me_list;	/* Mount list. */
1102f4fa4f9Shannken 	struct mount *me_mount;			/* Actual mount if ME_MOUNT,
1112f4fa4f9Shannken 						   current mount else. */
1122f4fa4f9Shannken 	enum mountlist_type me_type;		/* Mount or marker. */
1132f4fa4f9Shannken };
1142f4fa4f9Shannken struct mount_iterator {
1152f4fa4f9Shannken 	struct mountlist_entry mi_entry;
1162f4fa4f9Shannken };
1172f4fa4f9Shannken 
118cfa69dcfShannken static struct vnode *vfs_vnode_iterator_next1(struct vnode_iterator *,
119cfa69dcfShannken     bool (*)(void *, struct vnode *), void *, bool);
120cfa69dcfShannken 
121915977c5Spooka /* Root filesystem. */
122fbc8beaeSrmind vnode_t *			rootvnode;
123fbc8beaeSrmind 
124fbc8beaeSrmind /* Mounted filesystem list. */
125256581e1Shannken static TAILQ_HEAD(mountlist, mountlist_entry) mountlist;
1267d06f330Sad static kmutex_t			mountlist_lock __cacheline_aligned;
127256581e1Shannken int vnode_offset_next_by_lru	/* XXX: ugly hack for pstat.c */
128256581e1Shannken     = offsetof(vnode_impl_t, vi_lrulist.tqe_next);
129fbc8beaeSrmind 
1307d06f330Sad kmutex_t			vfs_list_lock __cacheline_aligned;
131fbc8beaeSrmind 
132fbc8beaeSrmind static specificdata_domain_t	mount_specificdata_domain;
133fbc8beaeSrmind static kmutex_t			mntid_lock;
134fbc8beaeSrmind 
1357d06f330Sad static kmutex_t			mountgen_lock __cacheline_aligned;
136fbc8beaeSrmind static uint64_t			mountgen;
137fbc8beaeSrmind 
138fbc8beaeSrmind void
139fbc8beaeSrmind vfs_mount_sysinit(void)
140fbc8beaeSrmind {
141fbc8beaeSrmind 
1420b725b63Schristos 	TAILQ_INIT(&mountlist);
143fbc8beaeSrmind 	mutex_init(&mountlist_lock, MUTEX_DEFAULT, IPL_NONE);
144fbc8beaeSrmind 	mutex_init(&vfs_list_lock, MUTEX_DEFAULT, IPL_NONE);
145fbc8beaeSrmind 
146fbc8beaeSrmind 	mount_specificdata_domain = specificdata_domain_create();
147fbc8beaeSrmind 	mutex_init(&mntid_lock, MUTEX_DEFAULT, IPL_NONE);
148fbc8beaeSrmind 	mutex_init(&mountgen_lock, MUTEX_DEFAULT, IPL_NONE);
149fbc8beaeSrmind 	mountgen = 0;
150fbc8beaeSrmind }
151fbc8beaeSrmind 
152fbc8beaeSrmind struct mount *
153fbc8beaeSrmind vfs_mountalloc(struct vfsops *vfsops, vnode_t *vp)
154fbc8beaeSrmind {
155fbc8beaeSrmind 	struct mount *mp;
156c18a56f1Shannken 	int error __diagused;
157fbc8beaeSrmind 
158fbc8beaeSrmind 	mp = kmem_zalloc(sizeof(*mp), KM_SLEEP);
159fbc8beaeSrmind 	mp->mnt_op = vfsops;
160fbc8beaeSrmind 	mp->mnt_refcnt = 1;
161fbc8beaeSrmind 	TAILQ_INIT(&mp->mnt_vnodelist);
1627d06f330Sad 	mp->mnt_renamelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
1637d06f330Sad 	mp->mnt_vnodelock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
1647d06f330Sad 	mp->mnt_updating = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
165fbc8beaeSrmind 	mp->mnt_vnodecovered = vp;
166fbc8beaeSrmind 	mount_initspecific(mp);
167f421b366Shannken 
168c18a56f1Shannken 	error = fstrans_mount(mp);
169c18a56f1Shannken 	KASSERT(error == 0);
170fbc8beaeSrmind 
171fbc8beaeSrmind 	mutex_enter(&mountgen_lock);
172fbc8beaeSrmind 	mp->mnt_gen = mountgen++;
173fbc8beaeSrmind 	mutex_exit(&mountgen_lock);
174fbc8beaeSrmind 
175fbc8beaeSrmind 	return mp;
176fbc8beaeSrmind }
177fbc8beaeSrmind 
178fbc8beaeSrmind /*
179fbc8beaeSrmind  * vfs_rootmountalloc: lookup a filesystem type, and if found allocate and
180fbc8beaeSrmind  * initialize a mount structure for it.
181fbc8beaeSrmind  *
182fbc8beaeSrmind  * Devname is usually updated by mount(8) after booting.
183fbc8beaeSrmind  */
184fbc8beaeSrmind int
185fbc8beaeSrmind vfs_rootmountalloc(const char *fstypename, const char *devname,
186fbc8beaeSrmind     struct mount **mpp)
187fbc8beaeSrmind {
188fbc8beaeSrmind 	struct vfsops *vfsp = NULL;
189fbc8beaeSrmind 	struct mount *mp;
190eb8533a8Shannken 	int error __diagused;
191fbc8beaeSrmind 
192fbc8beaeSrmind 	mutex_enter(&vfs_list_lock);
193fbc8beaeSrmind 	LIST_FOREACH(vfsp, &vfs_list, vfs_list)
194fbc8beaeSrmind 		if (!strncmp(vfsp->vfs_name, fstypename,
195fbc8beaeSrmind 		    sizeof(mp->mnt_stat.f_fstypename)))
196fbc8beaeSrmind 			break;
197fbc8beaeSrmind 	if (vfsp == NULL) {
198fbc8beaeSrmind 		mutex_exit(&vfs_list_lock);
199*e94a5d02Sriastradh 		return SET_ERROR(ENODEV);
200fbc8beaeSrmind 	}
201fbc8beaeSrmind 	vfsp->vfs_refcount++;
202fbc8beaeSrmind 	mutex_exit(&vfs_list_lock);
203fbc8beaeSrmind 
204fbc8beaeSrmind 	if ((mp = vfs_mountalloc(vfsp, NULL)) == NULL)
205*e94a5d02Sriastradh 		return SET_ERROR(ENOMEM);
206eb8533a8Shannken 	error = vfs_busy(mp);
207eb8533a8Shannken 	KASSERT(error == 0);
208fbc8beaeSrmind 	mp->mnt_flag = MNT_RDONLY;
209fbc8beaeSrmind 	(void)strlcpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name,
210fbc8beaeSrmind 	    sizeof(mp->mnt_stat.f_fstypename));
211fbc8beaeSrmind 	mp->mnt_stat.f_mntonname[0] = '/';
212fbc8beaeSrmind 	mp->mnt_stat.f_mntonname[1] = '\0';
213fbc8beaeSrmind 	mp->mnt_stat.f_mntfromname[sizeof(mp->mnt_stat.f_mntfromname) - 1] =
214fbc8beaeSrmind 	    '\0';
215fbc8beaeSrmind 	(void)copystr(devname, mp->mnt_stat.f_mntfromname,
216fbc8beaeSrmind 	    sizeof(mp->mnt_stat.f_mntfromname) - 1, 0);
217fbc8beaeSrmind 	*mpp = mp;
218fbc8beaeSrmind 	return 0;
219fbc8beaeSrmind }
220fbc8beaeSrmind 
221fbc8beaeSrmind /*
222fbc8beaeSrmind  * vfs_getnewfsid: get a new unique fsid.
223fbc8beaeSrmind  */
224fbc8beaeSrmind void
225fbc8beaeSrmind vfs_getnewfsid(struct mount *mp)
226fbc8beaeSrmind {
227fbc8beaeSrmind 	static u_short xxxfs_mntid;
2285c869fd7Shannken 	struct mountlist_entry *me;
229fbc8beaeSrmind 	fsid_t tfsid;
230fbc8beaeSrmind 	int mtype;
231fbc8beaeSrmind 
232fbc8beaeSrmind 	mutex_enter(&mntid_lock);
233fbc8beaeSrmind 	if (xxxfs_mntid == 0)
234fbc8beaeSrmind 		++xxxfs_mntid;
2355c869fd7Shannken 	mtype = makefstype(mp->mnt_op->vfs_name);
236fbc8beaeSrmind 	tfsid.__fsid_val[0] = makedev(mtype & 0xff, xxxfs_mntid);
237fbc8beaeSrmind 	tfsid.__fsid_val[1] = mtype;
2385c869fd7Shannken 	/* Always increment to not return the same fsid to parallel mounts. */
2395c869fd7Shannken 	xxxfs_mntid++;
2405c869fd7Shannken 
2415c869fd7Shannken 	/*
2425c869fd7Shannken 	 * Directly walk mountlist to prevent deadlock through
2435c869fd7Shannken 	 * mountlist_iterator_next() -> vfs_busy().
2445c869fd7Shannken 	 */
2455c869fd7Shannken 	mutex_enter(&mountlist_lock);
2465c869fd7Shannken 	for (me = TAILQ_FIRST(&mountlist); me != TAILQ_END(&mountlist); ) {
2475c869fd7Shannken 		if (me->me_type == ME_MOUNT &&
2485c869fd7Shannken 		    me->me_mount->mnt_stat.f_fsidx.__fsid_val[0] ==
2495c869fd7Shannken 		    tfsid.__fsid_val[0] &&
2505c869fd7Shannken 		    me->me_mount->mnt_stat.f_fsidx.__fsid_val[1] ==
2515c869fd7Shannken 		    tfsid.__fsid_val[1]) {
252fbc8beaeSrmind 			tfsid.__fsid_val[0]++;
253fbc8beaeSrmind 			xxxfs_mntid++;
2545c869fd7Shannken 			me = TAILQ_FIRST(&mountlist);
2555c869fd7Shannken 		} else {
2565c869fd7Shannken 			me = TAILQ_NEXT(me, me_list);
257fbc8beaeSrmind 		}
2585c869fd7Shannken 	}
2595c869fd7Shannken 	mutex_exit(&mountlist_lock);
2605c869fd7Shannken 
261fbc8beaeSrmind 	mp->mnt_stat.f_fsidx.__fsid_val[0] = tfsid.__fsid_val[0];
2625c869fd7Shannken 	mp->mnt_stat.f_fsidx.__fsid_val[1] = tfsid.__fsid_val[1];
263fbc8beaeSrmind 	mp->mnt_stat.f_fsid = mp->mnt_stat.f_fsidx.__fsid_val[0];
264fbc8beaeSrmind 	mutex_exit(&mntid_lock);
265fbc8beaeSrmind }
266fbc8beaeSrmind 
267fbc8beaeSrmind /*
268fbc8beaeSrmind  * Lookup a mount point by filesystem identifier.
269fbc8beaeSrmind  *
270fbc8beaeSrmind  * XXX Needs to add a reference to the mount point.
271fbc8beaeSrmind  */
272fbc8beaeSrmind struct mount *
273fbc8beaeSrmind vfs_getvfs(fsid_t *fsid)
274fbc8beaeSrmind {
275e08a8c41Shannken 	mount_iterator_t *iter;
276fbc8beaeSrmind 	struct mount *mp;
277fbc8beaeSrmind 
278e08a8c41Shannken 	mountlist_iterator_init(&iter);
279e08a8c41Shannken 	while ((mp = mountlist_iterator_next(iter)) != NULL) {
280fbc8beaeSrmind 		if (mp->mnt_stat.f_fsidx.__fsid_val[0] == fsid->__fsid_val[0] &&
281fbc8beaeSrmind 		    mp->mnt_stat.f_fsidx.__fsid_val[1] == fsid->__fsid_val[1]) {
282e08a8c41Shannken 			mountlist_iterator_destroy(iter);
283e08a8c41Shannken 			return mp;
284fbc8beaeSrmind 		}
285fbc8beaeSrmind 	}
286e08a8c41Shannken 	mountlist_iterator_destroy(iter);
287fbc8beaeSrmind 	return NULL;
288fbc8beaeSrmind }
289fbc8beaeSrmind 
290fbc8beaeSrmind /*
291ebb8f73bShannken  * Take a reference to a mount structure.
292ebb8f73bShannken  */
293ebb8f73bShannken void
294ebb8f73bShannken vfs_ref(struct mount *mp)
295ebb8f73bShannken {
296ebb8f73bShannken 
297ebb8f73bShannken 	KASSERT(mp->mnt_refcnt > 0 || mutex_owned(&mountlist_lock));
298ebb8f73bShannken 
299ebb8f73bShannken 	atomic_inc_uint(&mp->mnt_refcnt);
300ebb8f73bShannken }
301ebb8f73bShannken 
302ebb8f73bShannken /*
303fbc8beaeSrmind  * Drop a reference to a mount structure, freeing if the last reference.
304fbc8beaeSrmind  */
305fbc8beaeSrmind void
306ebb8f73bShannken vfs_rele(struct mount *mp)
307fbc8beaeSrmind {
308fbc8beaeSrmind 
309ef3476fbSriastradh 	membar_release();
310fbc8beaeSrmind 	if (__predict_true((int)atomic_dec_uint_nv(&mp->mnt_refcnt) > 0)) {
311fbc8beaeSrmind 		return;
312fbc8beaeSrmind 	}
313ef3476fbSriastradh 	membar_acquire();
314fbc8beaeSrmind 
315fbc8beaeSrmind 	/*
316fbc8beaeSrmind 	 * Nothing else has visibility of the mount: we can now
317fbc8beaeSrmind 	 * free the data structures.
318fbc8beaeSrmind 	 */
319fbc8beaeSrmind 	KASSERT(mp->mnt_refcnt == 0);
320fbc8beaeSrmind 	specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
3217d06f330Sad 	mutex_obj_free(mp->mnt_updating);
3227d06f330Sad 	mutex_obj_free(mp->mnt_renamelock);
3237d06f330Sad 	mutex_obj_free(mp->mnt_vnodelock);
324fbc8beaeSrmind 	if (mp->mnt_op != NULL) {
325fbc8beaeSrmind 		vfs_delref(mp->mnt_op);
326fbc8beaeSrmind 	}
327583a153eShannken 	fstrans_unmount(mp);
328583a153eShannken 	/*
329583a153eShannken 	 * Final free of mp gets done from fstrans_mount_dtor().
330583a153eShannken 	 *
331583a153eShannken 	 * Prevents this memory to be reused as a mount before
332583a153eShannken 	 * fstrans releases all references to it.
333583a153eShannken 	 */
334fbc8beaeSrmind }
335fbc8beaeSrmind 
336fbc8beaeSrmind /*
337fbc8beaeSrmind  * Mark a mount point as busy, and gain a new reference to it.  Used to
338fbc8beaeSrmind  * prevent the file system from being unmounted during critical sections.
339fbc8beaeSrmind  *
340df5cf9d7Smlelstv  * vfs_busy can be called multiple times and by multiple threads
341df5cf9d7Smlelstv  * and must be accompanied by the same number of vfs_unbusy calls.
342df5cf9d7Smlelstv  *
343fbc8beaeSrmind  * => The caller must hold a pre-existing reference to the mount.
344fbc8beaeSrmind  * => Will fail if the file system is being unmounted, or is unmounted.
345fbc8beaeSrmind  */
346bd152b56Shannken static inline int
347bd152b56Shannken _vfs_busy(struct mount *mp, bool wait)
348fbc8beaeSrmind {
349fbc8beaeSrmind 
350fbc8beaeSrmind 	KASSERT(mp->mnt_refcnt > 0);
351fbc8beaeSrmind 
352bd152b56Shannken 	if (wait) {
353287643b0Shannken 		fstrans_start(mp);
3544f4cfe27Shannken 	} else {
355287643b0Shannken 		if (fstrans_start_nowait(mp))
356*e94a5d02Sriastradh 			return SET_ERROR(EBUSY);
357bd152b56Shannken 	}
358fbc8beaeSrmind 	if (__predict_false((mp->mnt_iflag & IMNT_GONE) != 0)) {
3594f4cfe27Shannken 		fstrans_done(mp);
360*e94a5d02Sriastradh 		return SET_ERROR(ENOENT);
361fbc8beaeSrmind 	}
362ebb8f73bShannken 	vfs_ref(mp);
363fbc8beaeSrmind 	return 0;
364fbc8beaeSrmind }
365fbc8beaeSrmind 
366bd152b56Shannken int
367bd152b56Shannken vfs_busy(struct mount *mp)
368bd152b56Shannken {
369bd152b56Shannken 
370bd152b56Shannken 	return _vfs_busy(mp, true);
371bd152b56Shannken }
372bd152b56Shannken 
373bd152b56Shannken int
374bd152b56Shannken vfs_trybusy(struct mount *mp)
375bd152b56Shannken {
376bd152b56Shannken 
377bd152b56Shannken 	return _vfs_busy(mp, false);
378bd152b56Shannken }
379bd152b56Shannken 
380fbc8beaeSrmind /*
381fbc8beaeSrmind  * Unbusy a busy filesystem.
382fbc8beaeSrmind  *
383df5cf9d7Smlelstv  * Every successful vfs_busy() call must be undone by a vfs_unbusy() call.
384fbc8beaeSrmind  */
385fbc8beaeSrmind void
38620bb034fShannken vfs_unbusy(struct mount *mp)
387fbc8beaeSrmind {
388fbc8beaeSrmind 
389fbc8beaeSrmind 	KASSERT(mp->mnt_refcnt > 0);
390fbc8beaeSrmind 
3914f4cfe27Shannken 	fstrans_done(mp);
392ebb8f73bShannken 	vfs_rele(mp);
393fbc8beaeSrmind }
394fbc8beaeSrmind 
395cfe3f2c3Shannken /*
396cfe3f2c3Shannken  * Change a file systems lower mount.
397cfe3f2c3Shannken  * Both the current and the new lower mount may be NULL.  The caller
398cfe3f2c3Shannken  * guarantees exclusive access to the mount and holds a pre-existing
399cfe3f2c3Shannken  * reference to the new lower mount.
400cfe3f2c3Shannken  */
401cfe3f2c3Shannken int
402cfe3f2c3Shannken vfs_set_lowermount(struct mount *mp, struct mount *lowermp)
403cfe3f2c3Shannken {
404cfe3f2c3Shannken 	struct mount *oldlowermp;
405cfe3f2c3Shannken 	int error;
406cfe3f2c3Shannken 
4073652e3dbShannken #ifdef DEBUG
4083652e3dbShannken 	/*
4093652e3dbShannken 	 * Limit the depth of file system stack so kernel sanitizers
4103652e3dbShannken 	 * may stress mount/unmount without exhausting the kernel stack.
4113652e3dbShannken 	 */
4123652e3dbShannken 	int depth;
4133652e3dbShannken 	struct mount *mp2;
4143652e3dbShannken 
4153652e3dbShannken 	for (depth = 0, mp2 = lowermp; mp2; depth++, mp2 = mp2->mnt_lower) {
4163652e3dbShannken 		if (depth == 23)
417*e94a5d02Sriastradh 			return SET_ERROR(EINVAL);
4183652e3dbShannken 	}
4193652e3dbShannken #endif
4203652e3dbShannken 
421cfe3f2c3Shannken 	if (lowermp) {
42285cb97f0Shannken 		if (lowermp == dead_rootmount)
423*e94a5d02Sriastradh 			return SET_ERROR(ENOENT);
424cfe3f2c3Shannken 		error = vfs_busy(lowermp);
425cfe3f2c3Shannken 		if (error)
426cfe3f2c3Shannken 			return error;
427cfe3f2c3Shannken 		vfs_ref(lowermp);
428cfe3f2c3Shannken 	}
429cfe3f2c3Shannken 
430cfe3f2c3Shannken 	oldlowermp = mp->mnt_lower;
431cfe3f2c3Shannken 	mp->mnt_lower = lowermp;
432cfe3f2c3Shannken 
433cfe3f2c3Shannken 	if (lowermp)
434cfe3f2c3Shannken 		vfs_unbusy(lowermp);
435cfe3f2c3Shannken 
436cfe3f2c3Shannken 	if (oldlowermp)
437cfe3f2c3Shannken 		vfs_rele(oldlowermp);
438cfe3f2c3Shannken 
439cfe3f2c3Shannken 	return 0;
440cfe3f2c3Shannken }
441cfe3f2c3Shannken 
44272439b7dShannken struct vnode_iterator {
443dcc198a3Shannken 	vnode_impl_t vi_vnode;
44472439b7dShannken };
44572439b7dShannken 
44672439b7dShannken void
447dcc198a3Shannken vfs_vnode_iterator_init(struct mount *mp, struct vnode_iterator **vnip)
44872439b7dShannken {
449dcc198a3Shannken 	vnode_t *vp;
450dcc198a3Shannken 	vnode_impl_t *vip;
45172439b7dShannken 
4524222e592Shannken 	vp = vnalloc_marker(mp);
453dcc198a3Shannken 	vip = VNODE_TO_VIMPL(vp);
45472439b7dShannken 
4557d06f330Sad 	mutex_enter(mp->mnt_vnodelock);
456dcc198a3Shannken 	TAILQ_INSERT_HEAD(&mp->mnt_vnodelist, vip, vi_mntvnodes);
45772439b7dShannken 	vp->v_usecount = 1;
4587d06f330Sad 	mutex_exit(mp->mnt_vnodelock);
45972439b7dShannken 
460dcc198a3Shannken 	*vnip = (struct vnode_iterator *)vip;
46172439b7dShannken }
46272439b7dShannken 
46372439b7dShannken void
464dcc198a3Shannken vfs_vnode_iterator_destroy(struct vnode_iterator *vni)
46572439b7dShannken {
466dcc198a3Shannken 	vnode_impl_t *mvip = &vni->vi_vnode;
467dcc198a3Shannken 	vnode_t *mvp = VIMPL_TO_VNODE(mvip);
4687d06f330Sad 	kmutex_t *lock;
46972439b7dShannken 
4704222e592Shannken 	KASSERT(vnis_marker(mvp));
47123bf8800Sad 	if (vrefcnt(mvp) != 0) {
4727d06f330Sad 		lock = mvp->v_mount->mnt_vnodelock;
4737d06f330Sad 		mutex_enter(lock);
474dcc198a3Shannken 		TAILQ_REMOVE(&mvp->v_mount->mnt_vnodelist, mvip, vi_mntvnodes);
475b0d9b99bShannken 		mvp->v_usecount = 0;
4767d06f330Sad 		mutex_exit(lock);
477b0d9b99bShannken 	}
4784222e592Shannken 	vnfree_marker(mvp);
47972439b7dShannken }
48072439b7dShannken 
481cfa69dcfShannken static struct vnode *
482cfa69dcfShannken vfs_vnode_iterator_next1(struct vnode_iterator *vni,
483cfa69dcfShannken     bool (*f)(void *, struct vnode *), void *cl, bool do_wait)
48472439b7dShannken {
485dcc198a3Shannken 	vnode_impl_t *mvip = &vni->vi_vnode;
486dcc198a3Shannken 	struct mount *mp = VIMPL_TO_VNODE(mvip)->v_mount;
487dcc198a3Shannken 	vnode_t *vp;
488dcc198a3Shannken 	vnode_impl_t *vip;
4897d06f330Sad 	kmutex_t *lock;
49072439b7dShannken 	int error;
49172439b7dShannken 
492dcc198a3Shannken 	KASSERT(vnis_marker(VIMPL_TO_VNODE(mvip)));
49372439b7dShannken 
4947d06f330Sad 	lock = mp->mnt_vnodelock;
49572439b7dShannken 	do {
4967d06f330Sad 		mutex_enter(lock);
497dcc198a3Shannken 		vip = TAILQ_NEXT(mvip, vi_mntvnodes);
498dcc198a3Shannken 		TAILQ_REMOVE(&mp->mnt_vnodelist, mvip, vi_mntvnodes);
499dcc198a3Shannken 		VIMPL_TO_VNODE(mvip)->v_usecount = 0;
50002cb0c6eSchristos again:
5011bb344adSmaxv 		if (vip == NULL) {
5027d06f330Sad 			mutex_exit(lock);
50302cb0c6eSchristos 			return NULL;
50472439b7dShannken 		}
5051bb344adSmaxv 		vp = VIMPL_TO_VNODE(vip);
5061bb344adSmaxv 		KASSERT(vp != NULL);
50772439b7dShannken 		mutex_enter(vp->v_interlock);
5084222e592Shannken 		if (vnis_marker(vp) ||
509cfa69dcfShannken 		    vdead_check(vp, (do_wait ? 0 : VDEAD_NOWAIT)) ||
510215f0391Shannken 		    (f && !(*f)(cl, vp))) {
51172439b7dShannken 			mutex_exit(vp->v_interlock);
512dcc198a3Shannken 			vip = TAILQ_NEXT(vip, vi_mntvnodes);
51302cb0c6eSchristos 			goto again;
51472439b7dShannken 		}
51572439b7dShannken 
5161cf06cb4Sriastradh 		TAILQ_INSERT_AFTER(&mp->mnt_vnodelist, vip, mvip,
5171cf06cb4Sriastradh 		    vi_mntvnodes);
518dcc198a3Shannken 		VIMPL_TO_VNODE(mvip)->v_usecount = 1;
5197d06f330Sad 		mutex_exit(lock);
520998709c4Shannken 		error = vcache_vget(vp);
52172439b7dShannken 		KASSERT(error == 0 || error == ENOENT);
52272439b7dShannken 	} while (error != 0);
52372439b7dShannken 
52402cb0c6eSchristos 	return vp;
52572439b7dShannken }
52672439b7dShannken 
527cfa69dcfShannken struct vnode *
528cfa69dcfShannken vfs_vnode_iterator_next(struct vnode_iterator *vni,
529cfa69dcfShannken     bool (*f)(void *, struct vnode *), void *cl)
530cfa69dcfShannken {
531cfa69dcfShannken 
532cfa69dcfShannken 	return vfs_vnode_iterator_next1(vni, f, cl, false);
533cfa69dcfShannken }
534cfa69dcfShannken 
535fbc8beaeSrmind /*
536fbc8beaeSrmind  * Move a vnode from one mount queue to another.
537fbc8beaeSrmind  */
538fbc8beaeSrmind void
539fbc8beaeSrmind vfs_insmntque(vnode_t *vp, struct mount *mp)
540fbc8beaeSrmind {
541dcc198a3Shannken 	vnode_impl_t *vip = VNODE_TO_VIMPL(vp);
542fbc8beaeSrmind 	struct mount *omp;
5437d06f330Sad 	kmutex_t *lock;
544fbc8beaeSrmind 
54563ac53c0Srmind 	KASSERT(mp == NULL || (mp->mnt_iflag & IMNT_UNMOUNT) == 0 ||
54663ac53c0Srmind 	    vp->v_tag == VT_VFS);
547fbc8beaeSrmind 
548fbc8beaeSrmind 	/*
549fbc8beaeSrmind 	 * Delete from old mount point vnode list, if on one.
550fbc8beaeSrmind 	 */
5517d06f330Sad 	if ((omp = vp->v_mount) != NULL) {
5527d06f330Sad 		lock = omp->mnt_vnodelock;
5537d06f330Sad 		mutex_enter(lock);
554dcc198a3Shannken 		TAILQ_REMOVE(&vp->v_mount->mnt_vnodelist, vip, vi_mntvnodes);
5557d06f330Sad 		mutex_exit(lock);
5567d06f330Sad 	}
5577d06f330Sad 
558fbc8beaeSrmind 	/*
559fbc8beaeSrmind 	 * Insert into list of vnodes for the new mount point, if
560fbc8beaeSrmind 	 * available.  The caller must take a reference on the mount
561fbc8beaeSrmind 	 * structure and donate to the vnode.
562fbc8beaeSrmind 	 */
5637d06f330Sad 	if ((vp->v_mount = mp) != NULL) {
5647d06f330Sad 		lock = mp->mnt_vnodelock;
5657d06f330Sad 		mutex_enter(lock);
566dcc198a3Shannken 		TAILQ_INSERT_TAIL(&mp->mnt_vnodelist, vip, vi_mntvnodes);
5677d06f330Sad 		mutex_exit(lock);
5687d06f330Sad 	}
569fbc8beaeSrmind 
570fbc8beaeSrmind 	if (omp != NULL) {
571fbc8beaeSrmind 		/* Release reference to old mount. */
572ebb8f73bShannken 		vfs_rele(omp);
573fbc8beaeSrmind 	}
574fbc8beaeSrmind }
575fbc8beaeSrmind 
576fbc8beaeSrmind /*
577fbc8beaeSrmind  * Remove any vnodes in the vnode table belonging to mount point mp.
578fbc8beaeSrmind  *
579fbc8beaeSrmind  * If FORCECLOSE is not specified, there should not be any active ones,
580fbc8beaeSrmind  * return error if any are found (nb: this is a user error, not a
581fbc8beaeSrmind  * system error). If FORCECLOSE is specified, detach any active vnodes
582fbc8beaeSrmind  * that are found.
583fbc8beaeSrmind  *
584fbc8beaeSrmind  * If WRITECLOSE is set, only flush out regular file vnodes open for
585fbc8beaeSrmind  * writing.
586fbc8beaeSrmind  *
587fbc8beaeSrmind  * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped.
588fbc8beaeSrmind  */
589fbc8beaeSrmind #ifdef DEBUG
590fbc8beaeSrmind int busyprt = 0;	/* print out busy vnodes */
591fbc8beaeSrmind struct ctldebug debug1 = { "busyprt", &busyprt };
592fbc8beaeSrmind #endif
593fbc8beaeSrmind 
59402cb0c6eSchristos static vnode_t *
5958e09b56dShannken vflushnext(struct vnode_iterator *marker, int *when)
59602cb0c6eSchristos {
597983fd9ccSmaxv 	if (getticks() > *when) {
598fbc8beaeSrmind 		yield();
599983fd9ccSmaxv 		*when = getticks() + hz / 10;
600fbc8beaeSrmind 	}
601be5e920cSriastradh 	preempt_point();
6028e09b56dShannken 	return vfs_vnode_iterator_next1(marker, NULL, NULL, true);
603fbc8beaeSrmind }
604fbc8beaeSrmind 
6058e09b56dShannken /*
6068e09b56dShannken  * Flush one vnode.  Referenced on entry, unreferenced on return.
6078e09b56dShannken  */
6088e09b56dShannken static int
6098e09b56dShannken vflush_one(vnode_t *vp, vnode_t *skipvp, int flags)
6108e09b56dShannken {
6118e09b56dShannken 	int error;
6128e09b56dShannken 	struct vattr vattr;
6138e09b56dShannken 
6148e09b56dShannken 	if (vp == skipvp ||
6158e09b56dShannken 	    ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM))) {
6168e09b56dShannken 		vrele(vp);
6178e09b56dShannken 		return 0;
6188e09b56dShannken 	}
6198e09b56dShannken 	/*
6208e09b56dShannken 	 * If WRITECLOSE is set, only flush out regular file
6218e09b56dShannken 	 * vnodes open for writing or open and unlinked.
6228e09b56dShannken 	 */
6238e09b56dShannken 	if ((flags & WRITECLOSE)) {
6248e09b56dShannken 		if (vp->v_type != VREG) {
6258e09b56dShannken 			vrele(vp);
6268e09b56dShannken 			return 0;
6278e09b56dShannken 		}
6288e09b56dShannken 		error = vn_lock(vp, LK_EXCLUSIVE);
6298e09b56dShannken 		if (error) {
6308e09b56dShannken 			KASSERT(error == ENOENT);
6318e09b56dShannken 			vrele(vp);
6328e09b56dShannken 			return 0;
6338e09b56dShannken 		}
6348e09b56dShannken 		error = VOP_FSYNC(vp, curlwp->l_cred, FSYNC_WAIT, 0, 0);
6358e09b56dShannken 		if (error == 0)
6368e09b56dShannken 			error = VOP_GETATTR(vp, &vattr, curlwp->l_cred);
6378e09b56dShannken 		VOP_UNLOCK(vp);
6388e09b56dShannken 		if (error) {
6398e09b56dShannken 			vrele(vp);
6408e09b56dShannken 			return error;
6418e09b56dShannken 		}
6428e09b56dShannken 		if (vp->v_writecount == 0 && vattr.va_nlink > 0) {
6438e09b56dShannken 			vrele(vp);
6448e09b56dShannken 			return 0;
6458e09b56dShannken 		}
6468e09b56dShannken 	}
6478e09b56dShannken 	/*
6488e09b56dShannken 	 * First try to recycle the vnode.
6498e09b56dShannken 	 */
6508e09b56dShannken 	if (vrecycle(vp))
6518e09b56dShannken 		return 0;
6528e09b56dShannken 	/*
6538e09b56dShannken 	 * If FORCECLOSE is set, forcibly close the vnode.
65428650af9Shannken 	 * For block or character devices, revert to an
65528650af9Shannken 	 * anonymous device.  For all other files, just
65628650af9Shannken 	 * kill them.
6578e09b56dShannken 	 */
6588e09b56dShannken 	if (flags & FORCECLOSE) {
65923bf8800Sad 		if (vrefcnt(vp) > 1 &&
66028650af9Shannken 		    (vp->v_type == VBLK || vp->v_type == VCHR))
66128650af9Shannken 			vcache_make_anon(vp);
66228650af9Shannken 		else
6638e09b56dShannken 			vgone(vp);
6648e09b56dShannken 		return 0;
6658e09b56dShannken 	}
6668e09b56dShannken 	vrele(vp);
667*e94a5d02Sriastradh 	return SET_ERROR(EBUSY);
6688e09b56dShannken }
66902cb0c6eSchristos 
670fbc8beaeSrmind int
671fbc8beaeSrmind vflush(struct mount *mp, vnode_t *skipvp, int flags)
672fbc8beaeSrmind {
67372439b7dShannken 	vnode_t *vp;
67472439b7dShannken 	struct vnode_iterator *marker;
675748bb656Shannken 	int busy, error, when, retries = 2;
6768e09b56dShannken 
677748bb656Shannken 	do {
6788e09b56dShannken 		busy = error = when = 0;
679fbc8beaeSrmind 
680748bb656Shannken 		/*
681748bb656Shannken 		 * First, flush out any vnode references from the
682748bb656Shannken 		 * deferred vrele list.
683748bb656Shannken 		 */
684677cf1d8Shannken 		vrele_flush(mp);
685fbc8beaeSrmind 
68672439b7dShannken 		vfs_vnode_iterator_init(mp, &marker);
68702cb0c6eSchristos 
6888e09b56dShannken 		while ((vp = vflushnext(marker, &when)) != NULL) {
6898e09b56dShannken 			error = vflush_one(vp, skipvp, flags);
6908e09b56dShannken 			if (error == EBUSY) {
6918e09b56dShannken 				error = 0;
6928e09b56dShannken 				busy++;
693fbc8beaeSrmind #ifdef DEBUG
694748bb656Shannken 				if (busyprt && retries == 0)
695fbc8beaeSrmind 					vprint("vflush: busy vnode", vp);
696fbc8beaeSrmind #endif
6978e09b56dShannken 			} else if (error != 0) {
6988e09b56dShannken 				break;
699fbc8beaeSrmind 			}
7008e09b56dShannken 		}
701215f0391Shannken 
7028e09b56dShannken 		vfs_vnode_iterator_destroy(marker);
703748bb656Shannken 	} while (error == 0 && busy > 0 && retries-- > 0);
7048e09b56dShannken 
7058e09b56dShannken 	if (error)
7068e09b56dShannken 		return error;
7078e09b56dShannken 	if (busy)
708*e94a5d02Sriastradh 		return SET_ERROR(EBUSY);
709215f0391Shannken 	return 0;
710215f0391Shannken }
711fbc8beaeSrmind 
712fbc8beaeSrmind /*
713fbc8beaeSrmind  * Mount a file system.
714fbc8beaeSrmind  */
715fbc8beaeSrmind 
716fbc8beaeSrmind /*
717fbc8beaeSrmind  * Scan all active processes to see if any of them have a current or root
718fbc8beaeSrmind  * directory onto which the new filesystem has just been  mounted. If so,
719fbc8beaeSrmind  * replace them with the new mount point.
720fbc8beaeSrmind  */
721fbc8beaeSrmind static void
722fbc8beaeSrmind mount_checkdirs(vnode_t *olddp)
723fbc8beaeSrmind {
724fbc8beaeSrmind 	vnode_t *newdp, *rele1, *rele2;
725fbc8beaeSrmind 	struct cwdinfo *cwdi;
726fbc8beaeSrmind 	struct proc *p;
727fbc8beaeSrmind 	bool retry;
728fbc8beaeSrmind 
72923bf8800Sad 	if (vrefcnt(olddp) == 1) {
730fbc8beaeSrmind 		return;
731fbc8beaeSrmind 	}
732c2e9cb94Sad 	if (VFS_ROOT(olddp->v_mountedhere, LK_EXCLUSIVE, &newdp))
733fbc8beaeSrmind 		panic("mount: lost mount");
734fbc8beaeSrmind 
735fbc8beaeSrmind 	do {
736fbc8beaeSrmind 		retry = false;
7370eaaa024Sad 		mutex_enter(&proc_lock);
738fbc8beaeSrmind 		PROCLIST_FOREACH(p, &allproc) {
739fbc8beaeSrmind 			if ((cwdi = p->p_cwdi) == NULL)
740fbc8beaeSrmind 				continue;
741fbc8beaeSrmind 			/*
742fbc8beaeSrmind 			 * Cannot change to the old directory any more,
743fbc8beaeSrmind 			 * so even if we see a stale value it is not a
744fbc8beaeSrmind 			 * problem.
745fbc8beaeSrmind 			 */
746fbc8beaeSrmind 			if (cwdi->cwdi_cdir != olddp &&
747fbc8beaeSrmind 			    cwdi->cwdi_rdir != olddp)
748fbc8beaeSrmind 				continue;
749fbc8beaeSrmind 			retry = true;
750fbc8beaeSrmind 			rele1 = NULL;
751fbc8beaeSrmind 			rele2 = NULL;
752fbc8beaeSrmind 			atomic_inc_uint(&cwdi->cwdi_refcnt);
7530eaaa024Sad 			mutex_exit(&proc_lock);
754e88c11f4Sad 			rw_enter(&cwdi->cwdi_lock, RW_WRITER);
755fbc8beaeSrmind 			if (cwdi->cwdi_cdir == olddp) {
756fbc8beaeSrmind 				rele1 = cwdi->cwdi_cdir;
757fbc8beaeSrmind 				vref(newdp);
758fbc8beaeSrmind 				cwdi->cwdi_cdir = newdp;
759fbc8beaeSrmind 			}
760fbc8beaeSrmind 			if (cwdi->cwdi_rdir == olddp) {
761fbc8beaeSrmind 				rele2 = cwdi->cwdi_rdir;
762fbc8beaeSrmind 				vref(newdp);
763fbc8beaeSrmind 				cwdi->cwdi_rdir = newdp;
764fbc8beaeSrmind 			}
765e88c11f4Sad 			rw_exit(&cwdi->cwdi_lock);
766fbc8beaeSrmind 			cwdfree(cwdi);
767fbc8beaeSrmind 			if (rele1 != NULL)
768fbc8beaeSrmind 				vrele(rele1);
769fbc8beaeSrmind 			if (rele2 != NULL)
770fbc8beaeSrmind 				vrele(rele2);
7710eaaa024Sad 			mutex_enter(&proc_lock);
772fbc8beaeSrmind 			break;
773fbc8beaeSrmind 		}
7740eaaa024Sad 		mutex_exit(&proc_lock);
775fbc8beaeSrmind 	} while (retry);
776fbc8beaeSrmind 
777fbc8beaeSrmind 	if (rootvnode == olddp) {
778fbc8beaeSrmind 		vrele(rootvnode);
779fbc8beaeSrmind 		vref(newdp);
780fbc8beaeSrmind 		rootvnode = newdp;
781fbc8beaeSrmind 	}
782fbc8beaeSrmind 	vput(newdp);
783fbc8beaeSrmind }
784fbc8beaeSrmind 
7857ac57848Smanu /*
7867ac57848Smanu  * Start extended attributes
7877ac57848Smanu  */
7887ac57848Smanu static int
7897ac57848Smanu start_extattr(struct mount *mp)
7907ac57848Smanu {
7917ac57848Smanu 	int error;
7927ac57848Smanu 
7937ac57848Smanu 	error = VFS_EXTATTRCTL(mp, EXTATTR_CMD_START, NULL, 0, NULL);
7947ac57848Smanu 	if (error)
7957ac57848Smanu 		printf("%s: failed to start extattr: error = %d\n",
7967ac57848Smanu 		       mp->mnt_stat.f_mntonname, error);
7977ac57848Smanu 
7987ac57848Smanu 	return error;
7997ac57848Smanu }
8007ac57848Smanu 
801fbc8beaeSrmind int
802fbc8beaeSrmind mount_domount(struct lwp *l, vnode_t **vpp, struct vfsops *vfsops,
803fbc8beaeSrmind     const char *path, int flags, void *data, size_t *data_len)
804fbc8beaeSrmind {
805fbc8beaeSrmind 	vnode_t *vp = *vpp;
806fbc8beaeSrmind 	struct mount *mp;
807fbc8beaeSrmind 	struct pathbuf *pb;
808fbc8beaeSrmind 	struct nameidata nd;
8090b5a6352Shannken 	int error, error2;
810fbc8beaeSrmind 
811fbc8beaeSrmind 	error = kauth_authorize_system(l->l_cred, KAUTH_SYSTEM_MOUNT,
812fbc8beaeSrmind 	    KAUTH_REQ_SYSTEM_MOUNT_NEW, vp, KAUTH_ARG(flags), data);
813fbc8beaeSrmind 	if (error) {
814fbc8beaeSrmind 		vfs_delref(vfsops);
815fbc8beaeSrmind 		return error;
816fbc8beaeSrmind 	}
817fbc8beaeSrmind 
818fbc8beaeSrmind 	/* Cannot make a non-dir a mount-point (from here anyway). */
819fbc8beaeSrmind 	if (vp->v_type != VDIR) {
820fbc8beaeSrmind 		vfs_delref(vfsops);
821*e94a5d02Sriastradh 		return SET_ERROR(ENOTDIR);
822fbc8beaeSrmind 	}
823fbc8beaeSrmind 
824fbc8beaeSrmind 	if (flags & MNT_EXPORTED) {
825fbc8beaeSrmind 		vfs_delref(vfsops);
826*e94a5d02Sriastradh 		return SET_ERROR(EINVAL);
827fbc8beaeSrmind 	}
828fbc8beaeSrmind 
829fbc8beaeSrmind 	if ((mp = vfs_mountalloc(vfsops, vp)) == NULL) {
830fbc8beaeSrmind 		vfs_delref(vfsops);
831*e94a5d02Sriastradh 		return SET_ERROR(ENOMEM);
832fbc8beaeSrmind 	}
833fbc8beaeSrmind 
834fbc8beaeSrmind 	mp->mnt_stat.f_owner = kauth_cred_geteuid(l->l_cred);
835fbc8beaeSrmind 
836fbc8beaeSrmind 	/*
837fbc8beaeSrmind 	 * The underlying file system may refuse the mount for
838fbc8beaeSrmind 	 * various reasons.  Allow the user to force it to happen.
839fbc8beaeSrmind 	 *
840fbc8beaeSrmind 	 * Set the mount level flags.
841fbc8beaeSrmind 	 */
842953ea7dbSchristos 	mp->mnt_flag = flags & (MNT_BASIC_FLAGS | MNT_FORCE | MNT_IGNORE);
843fbc8beaeSrmind 
844fbc8beaeSrmind 	error = VFS_MOUNT(mp, path, data, data_len);
845fbc8beaeSrmind 	mp->mnt_flag &= ~MNT_OP_FLAGS;
846fbc8beaeSrmind 
847ba6f7f8dShannken 	if (error != 0) {
848ba6f7f8dShannken 		vfs_rele(mp);
849ba6f7f8dShannken 		return error;
850ba6f7f8dShannken 	}
851ba6f7f8dShannken 
852ba6f7f8dShannken 	/* Suspend new file system before taking mnt_updating. */
853ba6f7f8dShannken 	do {
854ba6f7f8dShannken 		error2 = vfs_suspend(mp, 0);
855ba6f7f8dShannken 	} while (error2 == EINTR || error2 == ERESTART);
856ba6f7f8dShannken 	KASSERT(error2 == 0 || error2 == EOPNOTSUPP);
857ba6f7f8dShannken 	mutex_enter(mp->mnt_updating);
858fbc8beaeSrmind 
859fbc8beaeSrmind 	/*
860fbc8beaeSrmind 	 * Validate and prepare the mount point.
861fbc8beaeSrmind 	 */
862fbc8beaeSrmind 	error = pathbuf_copyin(path, &pb);
863fbc8beaeSrmind 	if (error != 0) {
864fbc8beaeSrmind 		goto err_mounted;
865fbc8beaeSrmind 	}
866fbc8beaeSrmind 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
867fbc8beaeSrmind 	error = namei(&nd);
868fbc8beaeSrmind 	pathbuf_destroy(pb);
869fbc8beaeSrmind 	if (error != 0) {
870fbc8beaeSrmind 		goto err_mounted;
871fbc8beaeSrmind 	}
872fbc8beaeSrmind 	if (nd.ni_vp != vp) {
873fbc8beaeSrmind 		vput(nd.ni_vp);
874*e94a5d02Sriastradh 		error = SET_ERROR(EINVAL);
875fbc8beaeSrmind 		goto err_mounted;
876fbc8beaeSrmind 	}
877fbc8beaeSrmind 	if (vp->v_mountedhere != NULL) {
878fbc8beaeSrmind 		vput(nd.ni_vp);
879*e94a5d02Sriastradh 		error = SET_ERROR(EBUSY);
880fbc8beaeSrmind 		goto err_mounted;
881fbc8beaeSrmind 	}
882fbc8beaeSrmind 	error = vinvalbuf(vp, V_SAVE, l->l_cred, l, 0, 0);
883fbc8beaeSrmind 	if (error != 0) {
884fbc8beaeSrmind 		vput(nd.ni_vp);
885fbc8beaeSrmind 		goto err_mounted;
886fbc8beaeSrmind 	}
887fbc8beaeSrmind 
888fbc8beaeSrmind 	/*
889fbc8beaeSrmind 	 * Put the new filesystem on the mount list after root.
890fbc8beaeSrmind 	 */
891fbc8beaeSrmind 	cache_purge(vp);
892fbc8beaeSrmind 	mp->mnt_iflag &= ~IMNT_WANTRDWR;
893fbc8beaeSrmind 
8942f4fa4f9Shannken 	mountlist_append(mp);
895fbc8beaeSrmind 	if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
896e10a32f7Shannken 		vfs_syncer_add_to_worklist(mp);
897fbc8beaeSrmind 	vp->v_mountedhere = mp;
898fbc8beaeSrmind 	vput(nd.ni_vp);
899fbc8beaeSrmind 
900fbc8beaeSrmind 	mount_checkdirs(vp);
9017d06f330Sad 	mutex_exit(mp->mnt_updating);
902ba6f7f8dShannken 	if (error2 == 0)
903ba6f7f8dShannken 		vfs_resume(mp);
904fbc8beaeSrmind 
905fbc8beaeSrmind 	/* Hold an additional reference to the mount across VFS_START(). */
90620bb034fShannken 	vfs_ref(mp);
907fbc8beaeSrmind 	(void) VFS_STATVFS(mp, &mp->mnt_stat);
908fbc8beaeSrmind 	error = VFS_START(mp, 0);
909425e23f1Sgson 	if (error) {
910fbc8beaeSrmind 		vrele(vp);
911425e23f1Sgson 	} else if (flags & MNT_EXTATTR) {
9125d96c08aSchristos 		if (start_extattr(mp) != 0)
9135d96c08aSchristos 			mp->mnt_flag &= ~MNT_EXTATTR;
914425e23f1Sgson 	}
915fbc8beaeSrmind 	/* Drop reference held for VFS_START(). */
916ebb8f73bShannken 	vfs_rele(mp);
917fbc8beaeSrmind 	*vpp = NULL;
918fbc8beaeSrmind 	return error;
919fbc8beaeSrmind 
920fbc8beaeSrmind err_mounted:
921fbc8beaeSrmind 	if (VFS_UNMOUNT(mp, MNT_FORCE) != 0)
922fbc8beaeSrmind 		panic("Unmounting fresh file system failed");
923ba6f7f8dShannken 	mutex_exit(mp->mnt_updating);
9240b5a6352Shannken 	if (error2 == 0)
925d8a57107Shannken 		vfs_resume(mp);
926cfe3f2c3Shannken 	vfs_set_lowermount(mp, NULL);
927ebb8f73bShannken 	vfs_rele(mp);
928fbc8beaeSrmind 
929fbc8beaeSrmind 	return error;
930fbc8beaeSrmind }
931fbc8beaeSrmind 
932fbc8beaeSrmind /*
933fbc8beaeSrmind  * Do the actual file system unmount.  File system is assumed to have
934fbc8beaeSrmind  * been locked by the caller.
935fbc8beaeSrmind  *
936fbc8beaeSrmind  * => Caller hold reference to the mount, explicitly for dounmount().
937fbc8beaeSrmind  */
938fbc8beaeSrmind int
939fbc8beaeSrmind dounmount(struct mount *mp, int flags, struct lwp *l)
940fbc8beaeSrmind {
941b7772f66Sriastradh 	struct vnode *coveredvp, *vp;
942b7772f66Sriastradh 	struct vnode_impl *vip;
9437ac57848Smanu 	int error, async, used_syncer, used_extattr;
94488bcb454Shannken 	const bool was_suspended = fstrans_is_owner(mp);
945fbc8beaeSrmind 
946fbc8beaeSrmind #if NVERIEXEC > 0
947fbc8beaeSrmind 	error = veriexec_unmountchk(mp);
948fbc8beaeSrmind 	if (error)
949fbc8beaeSrmind 		return (error);
950fbc8beaeSrmind #endif /* NVERIEXEC > 0 */
951fbc8beaeSrmind 
95288bcb454Shannken 	if (!was_suspended) {
953677cf1d8Shannken 		error = vfs_suspend(mp, 0);
954677cf1d8Shannken 		if (error) {
955677cf1d8Shannken 			return error;
956677cf1d8Shannken 		}
95788bcb454Shannken 	}
958677cf1d8Shannken 
95969174779Shannken 	KASSERT((mp->mnt_iflag & IMNT_GONE) == 0);
960fbc8beaeSrmind 
961e10a32f7Shannken 	used_syncer = (mp->mnt_iflag & IMNT_ONWORKLIST) != 0;
9627ac57848Smanu 	used_extattr = mp->mnt_flag & MNT_EXTATTR;
963fbc8beaeSrmind 
964fbc8beaeSrmind 	mp->mnt_iflag |= IMNT_UNMOUNT;
9657d06f330Sad 	mutex_enter(mp->mnt_updating);
966eb02f11dSbad 	/*
967eb02f11dSbad 	 * Temporarily clear the MNT_ASYNC flags so that bwrite() doesn't
968eb02f11dSbad 	 * convert the sync writes to delayed writes.
969eb02f11dSbad 	 */
970fbc8beaeSrmind 	async = mp->mnt_flag & MNT_ASYNC;
971fbc8beaeSrmind 	mp->mnt_flag &= ~MNT_ASYNC;
972fbc8beaeSrmind 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
973e10a32f7Shannken 	if (used_syncer)
974e10a32f7Shannken 		vfs_syncer_remove_from_worklist(mp);
975fbc8beaeSrmind 	error = 0;
9763a71cbe9Smanu 	if (((mp->mnt_flag & MNT_RDONLY) == 0) && ((flags & MNT_FORCE) == 0)) {
977fbc8beaeSrmind 		error = VFS_SYNC(mp, MNT_WAIT, l->l_cred);
978fbc8beaeSrmind 	}
979fbc8beaeSrmind 	if (error == 0 || (flags & MNT_FORCE)) {
980fbc8beaeSrmind 		error = VFS_UNMOUNT(mp, flags);
981fbc8beaeSrmind 	}
982fbc8beaeSrmind 	if (error) {
983df5cf9d7Smlelstv 		mp->mnt_iflag &= ~IMNT_UNMOUNT;
9842ed187fdSbad 		mp->mnt_flag |= async;
985fbc8beaeSrmind 		if ((mp->mnt_flag & (MNT_RDONLY | MNT_ASYNC)) == 0)
986e10a32f7Shannken 			vfs_syncer_add_to_worklist(mp);
9877d06f330Sad 		mutex_exit(mp->mnt_updating);
98888bcb454Shannken 		if (!was_suspended)
989677cf1d8Shannken 			vfs_resume(mp);
9907ac57848Smanu 		if (used_extattr) {
9917ac57848Smanu 			if (start_extattr(mp) != 0)
9927ac57848Smanu 				mp->mnt_flag &= ~MNT_EXTATTR;
9937ac57848Smanu 			else
9947ac57848Smanu 				mp->mnt_flag |= MNT_EXTATTR;
9957ac57848Smanu 		}
996fbc8beaeSrmind 		return (error);
997fbc8beaeSrmind 	}
9987d06f330Sad 	mutex_exit(mp->mnt_updating);
999220e532cSmlelstv 
1000220e532cSmlelstv 	/*
1001220e532cSmlelstv 	 * mark filesystem as gone to prevent further umounts
1002220e532cSmlelstv 	 * after mnt_umounting lock is gone, this also prevents
1003220e532cSmlelstv 	 * vfs_busy() from succeeding.
1004220e532cSmlelstv 	 */
1005220e532cSmlelstv 	mp->mnt_iflag |= IMNT_GONE;
100610a8222cShannken 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
100710a8222cShannken 		coveredvp->v_mountedhere = NULL;
100810a8222cShannken 	}
100988bcb454Shannken 	if (!was_suspended)
1010677cf1d8Shannken 		vfs_resume(mp);
1011220e532cSmlelstv 
10122f4fa4f9Shannken 	mountlist_remove(mp);
1013b7772f66Sriastradh 
1014b7772f66Sriastradh 	if ((vip = TAILQ_FIRST(&mp->mnt_vnodelist)) != NULL) {
1015b7772f66Sriastradh 		vp = VIMPL_TO_VNODE(vip);
1016ca4932dcShannken 		vprint("dangling", vp);
1017fbc8beaeSrmind 		panic("unmount: dangling vnode");
1018ca4932dcShannken 	}
1019fbc8beaeSrmind 	vfs_hooks_unmount(mp);
1020220e532cSmlelstv 
1021cfe3f2c3Shannken 	vfs_set_lowermount(mp, NULL);
1022ebb8f73bShannken 	vfs_rele(mp);	/* reference from mount() */
1023fbc8beaeSrmind 	if (coveredvp != NULLVP) {
1024fbc8beaeSrmind 		vrele(coveredvp);
1025fbc8beaeSrmind 	}
1026fbc8beaeSrmind 	return (0);
1027fbc8beaeSrmind }
1028fbc8beaeSrmind 
1029fbc8beaeSrmind /*
1030fbc8beaeSrmind  * Unmount all file systems.
1031fbc8beaeSrmind  * We traverse the list in reverse order under the assumption that doing so
1032fbc8beaeSrmind  * will avoid needing to worry about dependencies.
1033fbc8beaeSrmind  */
1034fbc8beaeSrmind bool
1035fbc8beaeSrmind vfs_unmountall(struct lwp *l)
1036fbc8beaeSrmind {
1037fbc8beaeSrmind 
10388c45e7bdSpooka 	printf("unmounting file systems...\n");
1039fbc8beaeSrmind 	return vfs_unmountall1(l, true, true);
1040fbc8beaeSrmind }
1041fbc8beaeSrmind 
1042fbc8beaeSrmind static void
1043fbc8beaeSrmind vfs_unmount_print(struct mount *mp, const char *pfx)
1044fbc8beaeSrmind {
1045fbc8beaeSrmind 
1046fbc8beaeSrmind 	aprint_verbose("%sunmounted %s on %s type %s\n", pfx,
1047fbc8beaeSrmind 	    mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname,
1048fbc8beaeSrmind 	    mp->mnt_stat.f_fstypename);
1049fbc8beaeSrmind }
1050fbc8beaeSrmind 
1051e08a8c41Shannken /*
1052e08a8c41Shannken  * Return the mount with the highest generation less than "gen".
1053e08a8c41Shannken  */
1054e08a8c41Shannken static struct mount *
1055e08a8c41Shannken vfs_unmount_next(uint64_t gen)
1056fbc8beaeSrmind {
1057e08a8c41Shannken 	mount_iterator_t *iter;
1058fbc8beaeSrmind 	struct mount *mp, *nmp;
1059fbc8beaeSrmind 
1060fbc8beaeSrmind 	nmp = NULL;
1061fbc8beaeSrmind 
1062e08a8c41Shannken 	mountlist_iterator_init(&iter);
1063e08a8c41Shannken 	while ((mp = mountlist_iterator_next(iter)) != NULL) {
1064e08a8c41Shannken 		if ((nmp == NULL || mp->mnt_gen > nmp->mnt_gen) &&
1065e08a8c41Shannken 		    mp->mnt_gen < gen) {
1066e08a8c41Shannken 			if (nmp != NULL)
1067ebb8f73bShannken 				vfs_rele(nmp);
1068fbc8beaeSrmind 			nmp = mp;
1069ebb8f73bShannken 			vfs_ref(nmp);
1070fbc8beaeSrmind 		}
1071fbc8beaeSrmind 	}
1072e08a8c41Shannken 	mountlist_iterator_destroy(iter);
1073e08a8c41Shannken 
1074e08a8c41Shannken 	return nmp;
1075e08a8c41Shannken }
1076e08a8c41Shannken 
1077e08a8c41Shannken bool
1078e08a8c41Shannken vfs_unmount_forceone(struct lwp *l)
1079e08a8c41Shannken {
1080e08a8c41Shannken 	struct mount *mp;
1081e08a8c41Shannken 	int error;
1082e08a8c41Shannken 
1083e08a8c41Shannken 	mp = vfs_unmount_next(mountgen);
1084e08a8c41Shannken 	if (mp == NULL) {
1085fbc8beaeSrmind 		return false;
1086fbc8beaeSrmind 	}
1087fbc8beaeSrmind 
1088fbc8beaeSrmind #ifdef DEBUG
10898c45e7bdSpooka 	printf("forcefully unmounting %s (%s)...\n",
1090e08a8c41Shannken 	    mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname);
1091fbc8beaeSrmind #endif
1092e08a8c41Shannken 	if ((error = dounmount(mp, MNT_FORCE, l)) == 0) {
1093e08a8c41Shannken 		vfs_unmount_print(mp, "forcefully ");
1094fbc8beaeSrmind 		return true;
1095fbc8beaeSrmind 	} else {
1096ebb8f73bShannken 		vfs_rele(mp);
1097fbc8beaeSrmind 	}
1098fbc8beaeSrmind 
1099fbc8beaeSrmind #ifdef DEBUG
1100fbc8beaeSrmind 	printf("forceful unmount of %s failed with error %d\n",
1101e08a8c41Shannken 	    mp->mnt_stat.f_mntonname, error);
1102fbc8beaeSrmind #endif
1103fbc8beaeSrmind 
1104fbc8beaeSrmind 	return false;
1105fbc8beaeSrmind }
1106fbc8beaeSrmind 
1107fbc8beaeSrmind bool
1108fbc8beaeSrmind vfs_unmountall1(struct lwp *l, bool force, bool verbose)
1109fbc8beaeSrmind {
1110e08a8c41Shannken 	struct mount *mp;
1111dc06ff16Shannken 	mount_iterator_t *iter;
1112fbc8beaeSrmind 	bool any_error = false, progress = false;
1113e08a8c41Shannken 	uint64_t gen;
1114fbc8beaeSrmind 	int error;
1115fbc8beaeSrmind 
1116e08a8c41Shannken 	gen = mountgen;
1117e08a8c41Shannken 	for (;;) {
1118e08a8c41Shannken 		mp = vfs_unmount_next(gen);
1119e08a8c41Shannken 		if (mp == NULL)
1120e08a8c41Shannken 			break;
1121e08a8c41Shannken 		gen = mp->mnt_gen;
1122e08a8c41Shannken 
1123fbc8beaeSrmind #ifdef DEBUG
11248c45e7bdSpooka 		printf("unmounting %p %s (%s)...\n",
1125fbc8beaeSrmind 		    (void *)mp, mp->mnt_stat.f_mntonname,
1126fbc8beaeSrmind 		    mp->mnt_stat.f_mntfromname);
1127fbc8beaeSrmind #endif
1128fbc8beaeSrmind 		if ((error = dounmount(mp, force ? MNT_FORCE : 0, l)) == 0) {
1129fbc8beaeSrmind 			vfs_unmount_print(mp, "");
1130fbc8beaeSrmind 			progress = true;
1131fbc8beaeSrmind 		} else {
1132ebb8f73bShannken 			vfs_rele(mp);
1133fbc8beaeSrmind 			if (verbose) {
1134fbc8beaeSrmind 				printf("unmount of %s failed with error %d\n",
1135fbc8beaeSrmind 				    mp->mnt_stat.f_mntonname, error);
1136fbc8beaeSrmind 			}
1137fbc8beaeSrmind 			any_error = true;
1138fbc8beaeSrmind 		}
1139fbc8beaeSrmind 	}
1140fbc8beaeSrmind 	if (verbose) {
11418c45e7bdSpooka 		printf("unmounting done\n");
1142fbc8beaeSrmind 	}
1143fbc8beaeSrmind 	if (any_error && verbose) {
1144fbc8beaeSrmind 		printf("WARNING: some file systems would not unmount\n");
1145fbc8beaeSrmind 	}
1146dc06ff16Shannken 	/* If the mountlist is empty it is time to remove swap. */
1147dc06ff16Shannken 	mountlist_iterator_init(&iter);
1148dc06ff16Shannken 	if (mountlist_iterator_next(iter) == NULL) {
1149dc06ff16Shannken 		uvm_swap_shutdown(l);
1150dc06ff16Shannken 	}
1151dc06ff16Shannken 	mountlist_iterator_destroy(iter);
1152dc06ff16Shannken 
1153fbc8beaeSrmind 	return progress;
1154fbc8beaeSrmind }
1155fbc8beaeSrmind 
1156fbc8beaeSrmind void
1157fbc8beaeSrmind vfs_sync_all(struct lwp *l)
1158fbc8beaeSrmind {
1159fbc8beaeSrmind 	printf("syncing disks... ");
1160fbc8beaeSrmind 
1161fbc8beaeSrmind 	/* remove user processes from run queue */
1162fbc8beaeSrmind 	suspendsched();
1163fbc8beaeSrmind 	(void)spl0();
1164fbc8beaeSrmind 
1165fbc8beaeSrmind 	/* avoid coming back this way again if we panic. */
1166fbc8beaeSrmind 	doing_shutdown = 1;
1167fbc8beaeSrmind 
1168ce817826Sdsl 	do_sys_sync(l);
1169fbc8beaeSrmind 
1170fbc8beaeSrmind 	/* Wait for sync to finish. */
11714e73754fSad 	if (vfs_syncwait() != 0) {
1172fbc8beaeSrmind #if defined(DDB) && defined(DEBUG_HALT_BUSY)
1173fbc8beaeSrmind 		Debugger();
1174fbc8beaeSrmind #endif
1175fbc8beaeSrmind 		printf("giving up\n");
1176fbc8beaeSrmind 		return;
1177fbc8beaeSrmind 	} else
1178fbc8beaeSrmind 		printf("done\n");
1179fbc8beaeSrmind }
1180fbc8beaeSrmind 
1181fbc8beaeSrmind /*
1182fbc8beaeSrmind  * Sync and unmount file systems before shutting down.
1183fbc8beaeSrmind  */
1184fbc8beaeSrmind void
1185fbc8beaeSrmind vfs_shutdown(void)
1186fbc8beaeSrmind {
118761dad95dSrmind 	lwp_t *l = curlwp;
1188fbc8beaeSrmind 
118961dad95dSrmind 	vfs_sync_all(l);
119061dad95dSrmind 
119161dad95dSrmind 	/*
1192db54414cSandvar 	 * If we have panicked - do not make the situation potentially
119361dad95dSrmind 	 * worse by unmounting the file systems.
119461dad95dSrmind 	 */
119561dad95dSrmind 	if (panicstr != NULL) {
119661dad95dSrmind 		return;
119761dad95dSrmind 	}
119861dad95dSrmind 
119961dad95dSrmind 	/* Unmount file systems. */
120061dad95dSrmind 	vfs_unmountall(l);
1201fbc8beaeSrmind }
1202fbc8beaeSrmind 
1203fbc8beaeSrmind /*
1204fbc8beaeSrmind  * Print a list of supported file system types (used by vfs_mountroot)
1205fbc8beaeSrmind  */
1206fbc8beaeSrmind static void
1207fbc8beaeSrmind vfs_print_fstypes(void)
1208fbc8beaeSrmind {
1209fbc8beaeSrmind 	struct vfsops *v;
1210fbc8beaeSrmind 	int cnt = 0;
1211fbc8beaeSrmind 
1212fbc8beaeSrmind 	mutex_enter(&vfs_list_lock);
1213fbc8beaeSrmind 	LIST_FOREACH(v, &vfs_list, vfs_list)
1214fbc8beaeSrmind 		++cnt;
1215fbc8beaeSrmind 	mutex_exit(&vfs_list_lock);
1216fbc8beaeSrmind 
1217fbc8beaeSrmind 	if (cnt == 0) {
1218fbc8beaeSrmind 		printf("WARNING: No file system modules have been loaded.\n");
1219fbc8beaeSrmind 		return;
1220fbc8beaeSrmind 	}
1221fbc8beaeSrmind 
1222fbc8beaeSrmind 	printf("Supported file systems:");
1223fbc8beaeSrmind 	mutex_enter(&vfs_list_lock);
1224fbc8beaeSrmind 	LIST_FOREACH(v, &vfs_list, vfs_list) {
1225fbc8beaeSrmind 		printf(" %s", v->vfs_name);
1226fbc8beaeSrmind 	}
1227fbc8beaeSrmind 	mutex_exit(&vfs_list_lock);
1228fbc8beaeSrmind 	printf("\n");
1229fbc8beaeSrmind }
1230fbc8beaeSrmind 
1231fbc8beaeSrmind /*
1232fbc8beaeSrmind  * Mount the root file system.  If the operator didn't specify a
1233fbc8beaeSrmind  * file system to use, try all possible file systems until one
1234fbc8beaeSrmind  * succeeds.
1235fbc8beaeSrmind  */
1236fbc8beaeSrmind int
1237fbc8beaeSrmind vfs_mountroot(void)
1238fbc8beaeSrmind {
1239fbc8beaeSrmind 	struct vfsops *v;
1240fbc8beaeSrmind 	int error = ENODEV;
1241fbc8beaeSrmind 
1242fbc8beaeSrmind 	if (root_device == NULL)
1243fbc8beaeSrmind 		panic("vfs_mountroot: root device unknown");
1244fbc8beaeSrmind 
1245fbc8beaeSrmind 	switch (device_class(root_device)) {
1246fbc8beaeSrmind 	case DV_IFNET:
1247fbc8beaeSrmind 		if (rootdev != NODEV)
1248fbc8beaeSrmind 			panic("vfs_mountroot: rootdev set for DV_IFNET "
1249fbc8beaeSrmind 			    "(0x%llx -> %llu,%llu)",
1250fbc8beaeSrmind 			    (unsigned long long)rootdev,
1251fbc8beaeSrmind 			    (unsigned long long)major(rootdev),
1252fbc8beaeSrmind 			    (unsigned long long)minor(rootdev));
1253fbc8beaeSrmind 		break;
1254fbc8beaeSrmind 
1255fbc8beaeSrmind 	case DV_DISK:
1256fbc8beaeSrmind 		if (rootdev == NODEV)
1257fbc8beaeSrmind 			panic("vfs_mountroot: rootdev not set for DV_DISK");
1258fbc8beaeSrmind 	        if (bdevvp(rootdev, &rootvp))
1259fbc8beaeSrmind 	                panic("vfs_mountroot: can't get vnode for rootdev");
1260ce218897Shannken 		vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY);
1261fbc8beaeSrmind 		error = VOP_OPEN(rootvp, FREAD, FSCRED);
1262ce218897Shannken 		VOP_UNLOCK(rootvp);
1263fbc8beaeSrmind 		if (error) {
1264fbc8beaeSrmind 			printf("vfs_mountroot: can't open root device\n");
1265fbc8beaeSrmind 			return (error);
1266fbc8beaeSrmind 		}
1267fbc8beaeSrmind 		break;
1268fbc8beaeSrmind 
1269fbc8beaeSrmind 	case DV_VIRTUAL:
1270fbc8beaeSrmind 		break;
1271fbc8beaeSrmind 
1272fbc8beaeSrmind 	default:
1273fbc8beaeSrmind 		printf("%s: inappropriate for root file system\n",
1274fbc8beaeSrmind 		    device_xname(root_device));
1275*e94a5d02Sriastradh 		return SET_ERROR(ENODEV);
1276fbc8beaeSrmind 	}
1277fbc8beaeSrmind 
1278fbc8beaeSrmind 	/*
1279fbc8beaeSrmind 	 * If user specified a root fs type, use it.  Make sure the
1280fbc8beaeSrmind 	 * specified type exists and has a mount_root()
1281fbc8beaeSrmind 	 */
1282fbc8beaeSrmind 	if (strcmp(rootfstype, ROOT_FSTYPE_ANY) != 0) {
1283fbc8beaeSrmind 		v = vfs_getopsbyname(rootfstype);
1284*e94a5d02Sriastradh 		error = SET_ERROR(EFTYPE);
1285fbc8beaeSrmind 		if (v != NULL) {
1286fbc8beaeSrmind 			if (v->vfs_mountroot != NULL) {
1287fbc8beaeSrmind 				error = (v->vfs_mountroot)();
1288fbc8beaeSrmind 			}
1289fbc8beaeSrmind 			v->vfs_refcount--;
1290fbc8beaeSrmind 		}
1291fbc8beaeSrmind 		goto done;
1292fbc8beaeSrmind 	}
1293fbc8beaeSrmind 
1294fbc8beaeSrmind 	/*
1295fbc8beaeSrmind 	 * Try each file system currently configured into the kernel.
1296fbc8beaeSrmind 	 */
1297fbc8beaeSrmind 	mutex_enter(&vfs_list_lock);
1298fbc8beaeSrmind 	LIST_FOREACH(v, &vfs_list, vfs_list) {
1299fbc8beaeSrmind 		if (v->vfs_mountroot == NULL)
1300fbc8beaeSrmind 			continue;
1301fbc8beaeSrmind #ifdef DEBUG
1302fbc8beaeSrmind 		aprint_normal("mountroot: trying %s...\n", v->vfs_name);
1303fbc8beaeSrmind #endif
1304fbc8beaeSrmind 		v->vfs_refcount++;
1305fbc8beaeSrmind 		mutex_exit(&vfs_list_lock);
1306fbc8beaeSrmind 		error = (*v->vfs_mountroot)();
1307fbc8beaeSrmind 		mutex_enter(&vfs_list_lock);
1308fbc8beaeSrmind 		v->vfs_refcount--;
1309fbc8beaeSrmind 		if (!error) {
1310fbc8beaeSrmind 			aprint_normal("root file system type: %s\n",
1311fbc8beaeSrmind 			    v->vfs_name);
1312fbc8beaeSrmind 			break;
1313fbc8beaeSrmind 		}
1314fbc8beaeSrmind 	}
1315fbc8beaeSrmind 	mutex_exit(&vfs_list_lock);
1316fbc8beaeSrmind 
1317fbc8beaeSrmind 	if (v == NULL) {
1318fbc8beaeSrmind 		vfs_print_fstypes();
1319fbc8beaeSrmind 		printf("no file system for %s", device_xname(root_device));
1320fbc8beaeSrmind 		if (device_class(root_device) == DV_DISK)
1321fbc8beaeSrmind 			printf(" (dev 0x%llx)", (unsigned long long)rootdev);
1322fbc8beaeSrmind 		printf("\n");
1323*e94a5d02Sriastradh 		error = SET_ERROR(EFTYPE);
1324fbc8beaeSrmind 	}
1325fbc8beaeSrmind 
1326fbc8beaeSrmind done:
1327fbc8beaeSrmind 	if (error && device_class(root_device) == DV_DISK) {
132862e877acSriastradh 		vn_lock(rootvp, LK_EXCLUSIVE | LK_RETRY);
1329fbc8beaeSrmind 		VOP_CLOSE(rootvp, FREAD, FSCRED);
133062e877acSriastradh 		VOP_UNLOCK(rootvp);
1331fbc8beaeSrmind 		vrele(rootvp);
1332fbc8beaeSrmind 	}
1333fbc8beaeSrmind 	if (error == 0) {
1334e08a8c41Shannken 		mount_iterator_t *iter;
13350b725b63Schristos 		struct mount *mp;
1336fbc8beaeSrmind 
1337e08a8c41Shannken 		mountlist_iterator_init(&iter);
1338e08a8c41Shannken 		mp = mountlist_iterator_next(iter);
1339e08a8c41Shannken 		KASSERT(mp != NULL);
1340e08a8c41Shannken 		mountlist_iterator_destroy(iter);
1341e08a8c41Shannken 
13420b725b63Schristos 		mp->mnt_flag |= MNT_ROOTFS;
13430b725b63Schristos 		mp->mnt_op->vfs_refcount++;
1344fbc8beaeSrmind 
1345fbc8beaeSrmind 		/*
1346fbc8beaeSrmind 		 * Get the vnode for '/'.  Set cwdi0.cwdi_cdir to
1347b5adab0eSad 		 * reference it, and donate it the reference grabbed
1348b5adab0eSad 		 * with VFS_ROOT().
1349fbc8beaeSrmind 		 */
1350b5adab0eSad 		error = VFS_ROOT(mp, LK_NONE, &rootvnode);
1351fbc8beaeSrmind 		if (error)
1352fbc8beaeSrmind 			panic("cannot find root vnode, error=%d", error);
1353fbc8beaeSrmind 		cwdi0.cwdi_cdir = rootvnode;
1354fbc8beaeSrmind 		cwdi0.cwdi_rdir = NULL;
1355fbc8beaeSrmind 
1356fbc8beaeSrmind 		/*
1357fbc8beaeSrmind 		 * Now that root is mounted, we can fixup initproc's CWD
1358fbc8beaeSrmind 		 * info.  All other processes are kthreads, which merely
1359fbc8beaeSrmind 		 * share proc0's CWD info.
1360fbc8beaeSrmind 		 */
1361fbc8beaeSrmind 		initproc->p_cwdi->cwdi_cdir = rootvnode;
1362fbc8beaeSrmind 		vref(initproc->p_cwdi->cwdi_cdir);
1363fbc8beaeSrmind 		initproc->p_cwdi->cwdi_rdir = NULL;
1364fbc8beaeSrmind 		/*
1365fbc8beaeSrmind 		 * Enable loading of modules from the filesystem
1366fbc8beaeSrmind 		 */
1367fbc8beaeSrmind 		module_load_vfs_init();
1368fbc8beaeSrmind 
1369fbc8beaeSrmind 	}
1370fbc8beaeSrmind 	return (error);
1371fbc8beaeSrmind }
1372fbc8beaeSrmind 
1373fbc8beaeSrmind /*
1374fbc8beaeSrmind  * mount_specific_key_create --
1375fbc8beaeSrmind  *	Create a key for subsystem mount-specific data.
1376fbc8beaeSrmind  */
1377fbc8beaeSrmind int
1378fbc8beaeSrmind mount_specific_key_create(specificdata_key_t *keyp, specificdata_dtor_t dtor)
1379fbc8beaeSrmind {
1380fbc8beaeSrmind 
1381fbc8beaeSrmind 	return specificdata_key_create(mount_specificdata_domain, keyp, dtor);
1382fbc8beaeSrmind }
1383fbc8beaeSrmind 
1384fbc8beaeSrmind /*
1385fbc8beaeSrmind  * mount_specific_key_delete --
1386fbc8beaeSrmind  *	Delete a key for subsystem mount-specific data.
1387fbc8beaeSrmind  */
1388fbc8beaeSrmind void
1389fbc8beaeSrmind mount_specific_key_delete(specificdata_key_t key)
1390fbc8beaeSrmind {
1391fbc8beaeSrmind 
1392fbc8beaeSrmind 	specificdata_key_delete(mount_specificdata_domain, key);
1393fbc8beaeSrmind }
1394fbc8beaeSrmind 
1395fbc8beaeSrmind /*
1396fbc8beaeSrmind  * mount_initspecific --
1397fbc8beaeSrmind  *	Initialize a mount's specificdata container.
1398fbc8beaeSrmind  */
1399fbc8beaeSrmind void
1400fbc8beaeSrmind mount_initspecific(struct mount *mp)
1401fbc8beaeSrmind {
140233ce4263Smartin 	int error __diagused;
1403fbc8beaeSrmind 
1404fbc8beaeSrmind 	error = specificdata_init(mount_specificdata_domain,
1405fbc8beaeSrmind 	    &mp->mnt_specdataref);
1406fbc8beaeSrmind 	KASSERT(error == 0);
1407fbc8beaeSrmind }
1408fbc8beaeSrmind 
1409fbc8beaeSrmind /*
1410fbc8beaeSrmind  * mount_finispecific --
1411fbc8beaeSrmind  *	Finalize a mount's specificdata container.
1412fbc8beaeSrmind  */
1413fbc8beaeSrmind void
1414fbc8beaeSrmind mount_finispecific(struct mount *mp)
1415fbc8beaeSrmind {
1416fbc8beaeSrmind 
1417fbc8beaeSrmind 	specificdata_fini(mount_specificdata_domain, &mp->mnt_specdataref);
1418fbc8beaeSrmind }
1419fbc8beaeSrmind 
1420fbc8beaeSrmind /*
1421fbc8beaeSrmind  * mount_getspecific --
1422fbc8beaeSrmind  *	Return mount-specific data corresponding to the specified key.
1423fbc8beaeSrmind  */
1424fbc8beaeSrmind void *
1425fbc8beaeSrmind mount_getspecific(struct mount *mp, specificdata_key_t key)
1426fbc8beaeSrmind {
1427fbc8beaeSrmind 
1428fbc8beaeSrmind 	return specificdata_getspecific(mount_specificdata_domain,
1429fbc8beaeSrmind 	    &mp->mnt_specdataref, key);
1430fbc8beaeSrmind }
1431fbc8beaeSrmind 
1432fbc8beaeSrmind /*
1433fbc8beaeSrmind  * mount_setspecific --
1434fbc8beaeSrmind  *	Set mount-specific data corresponding to the specified key.
1435fbc8beaeSrmind  */
1436fbc8beaeSrmind void
1437fbc8beaeSrmind mount_setspecific(struct mount *mp, specificdata_key_t key, void *data)
1438fbc8beaeSrmind {
1439fbc8beaeSrmind 
1440fbc8beaeSrmind 	specificdata_setspecific(mount_specificdata_domain,
1441fbc8beaeSrmind 	    &mp->mnt_specdataref, key, data);
1442fbc8beaeSrmind }
1443fbc8beaeSrmind 
1444fbc8beaeSrmind /*
1445fbc8beaeSrmind  * Check to see if a filesystem is mounted on a block device.
1446fbc8beaeSrmind  */
1447fbc8beaeSrmind int
1448fbc8beaeSrmind vfs_mountedon(vnode_t *vp)
1449fbc8beaeSrmind {
1450fbc8beaeSrmind 	vnode_t *vq;
1451fbc8beaeSrmind 	int error = 0;
1452fbc8beaeSrmind 
1453fbc8beaeSrmind 	if (vp->v_type != VBLK)
1454*e94a5d02Sriastradh 		return SET_ERROR(ENOTBLK);
14553881f4f3Shannken 	if (spec_node_getmountedfs(vp) != NULL)
1456*e94a5d02Sriastradh 		return SET_ERROR(EBUSY);
1457a2155d69Sriastradh 	if (spec_node_lookup_by_dev(vp->v_type, vp->v_rdev, VDEAD_NOWAIT, &vq)
1458a2155d69Sriastradh 	    == 0) {
14593881f4f3Shannken 		if (spec_node_getmountedfs(vq) != NULL)
1460*e94a5d02Sriastradh 			error = SET_ERROR(EBUSY);
14619f9ac3cbShannken 		vrele(vq);
1462fbc8beaeSrmind 	}
14639f9ac3cbShannken 
14649f9ac3cbShannken 	return error;
1465fbc8beaeSrmind }
1466fbc8beaeSrmind 
1467fbc8beaeSrmind /*
1468fbc8beaeSrmind  * Check if a device pointed to by vp is mounted.
1469fbc8beaeSrmind  *
1470fbc8beaeSrmind  * Returns:
1471fbc8beaeSrmind  *   EINVAL	if it's not a disk
1472fbc8beaeSrmind  *   EBUSY	if it's a disk and mounted
1473fbc8beaeSrmind  *   0		if it's a disk and not mounted
1474fbc8beaeSrmind  */
1475fbc8beaeSrmind int
1476fbc8beaeSrmind rawdev_mounted(vnode_t *vp, vnode_t **bvpp)
1477fbc8beaeSrmind {
1478fbc8beaeSrmind 	vnode_t *bvp;
1479fbc8beaeSrmind 	dev_t dev;
1480fbc8beaeSrmind 	int d_type;
1481fbc8beaeSrmind 
1482fbc8beaeSrmind 	bvp = NULL;
1483fbc8beaeSrmind 	d_type = D_OTHER;
1484fbc8beaeSrmind 
1485fbc8beaeSrmind 	if (iskmemvp(vp))
1486*e94a5d02Sriastradh 		return SET_ERROR(EINVAL);
1487fbc8beaeSrmind 
1488fbc8beaeSrmind 	switch (vp->v_type) {
1489fbc8beaeSrmind 	case VCHR: {
1490fbc8beaeSrmind 		const struct cdevsw *cdev;
1491fbc8beaeSrmind 
1492c2085187Schristos 		dev = vp->v_rdev;
1493fbc8beaeSrmind 		cdev = cdevsw_lookup(dev);
1494fbc8beaeSrmind 		if (cdev != NULL) {
1495fbc8beaeSrmind 			dev_t blkdev;
1496fbc8beaeSrmind 
1497fbc8beaeSrmind 			blkdev = devsw_chr2blk(dev);
1498fbc8beaeSrmind 			if (blkdev != NODEV) {
1499fbc8beaeSrmind 				if (vfinddev(blkdev, VBLK, &bvp) != 0) {
1500fbc8beaeSrmind 					d_type = (cdev->d_flag & D_TYPEMASK);
1501fbc8beaeSrmind 					/* XXX: what if bvp disappears? */
1502fbc8beaeSrmind 					vrele(bvp);
1503fbc8beaeSrmind 				}
1504fbc8beaeSrmind 			}
1505fbc8beaeSrmind 		}
1506fbc8beaeSrmind 
1507fbc8beaeSrmind 		break;
1508fbc8beaeSrmind 	}
1509fbc8beaeSrmind 
1510fbc8beaeSrmind 	case VBLK: {
1511fbc8beaeSrmind 		const struct bdevsw *bdev;
1512fbc8beaeSrmind 
1513c2085187Schristos 		dev = vp->v_rdev;
1514fbc8beaeSrmind 		bdev = bdevsw_lookup(dev);
1515fbc8beaeSrmind 		if (bdev != NULL)
1516fbc8beaeSrmind 			d_type = (bdev->d_flag & D_TYPEMASK);
1517fbc8beaeSrmind 
1518fbc8beaeSrmind 		bvp = vp;
1519fbc8beaeSrmind 
1520fbc8beaeSrmind 		break;
1521fbc8beaeSrmind 	}
1522fbc8beaeSrmind 
1523fbc8beaeSrmind 	default:
1524fbc8beaeSrmind 		break;
1525fbc8beaeSrmind 	}
1526fbc8beaeSrmind 
1527fbc8beaeSrmind 	if (d_type != D_DISK)
1528*e94a5d02Sriastradh 		return SET_ERROR(EINVAL);
1529fbc8beaeSrmind 
1530fbc8beaeSrmind 	if (bvpp != NULL)
1531fbc8beaeSrmind 		*bvpp = bvp;
1532fbc8beaeSrmind 
1533fbc8beaeSrmind 	/*
1534fbc8beaeSrmind 	 * XXX: This is bogus. We should be failing the request
1535fbc8beaeSrmind 	 * XXX: not only if this specific slice is mounted, but
1536fbc8beaeSrmind 	 * XXX: if it's on a disk with any other mounted slice.
1537fbc8beaeSrmind 	 */
1538fbc8beaeSrmind 	if (vfs_mountedon(bvp))
1539*e94a5d02Sriastradh 		return SET_ERROR(EBUSY);
1540fbc8beaeSrmind 
1541fbc8beaeSrmind 	return 0;
1542fbc8beaeSrmind }
1543fbc8beaeSrmind 
1544fbc8beaeSrmind /*
1545fbc8beaeSrmind  * Make a 'unique' number from a mount type name.
1546fbc8beaeSrmind  */
1547fbc8beaeSrmind long
1548fbc8beaeSrmind makefstype(const char *type)
1549fbc8beaeSrmind {
1550fbc8beaeSrmind 	long rv;
1551fbc8beaeSrmind 
1552fbc8beaeSrmind 	for (rv = 0; *type; type++) {
1553fbc8beaeSrmind 		rv <<= 2;
1554fbc8beaeSrmind 		rv ^= *type;
1555fbc8beaeSrmind 	}
1556fbc8beaeSrmind 	return rv;
1557fbc8beaeSrmind }
15580b725b63Schristos 
15592f4fa4f9Shannken static struct mountlist_entry *
15602f4fa4f9Shannken mountlist_alloc(enum mountlist_type type, struct mount *mp)
15612f4fa4f9Shannken {
15622f4fa4f9Shannken 	struct mountlist_entry *me;
15632f4fa4f9Shannken 
15642f4fa4f9Shannken 	me = kmem_zalloc(sizeof(*me), KM_SLEEP);
15652f4fa4f9Shannken 	me->me_mount = mp;
15662f4fa4f9Shannken 	me->me_type = type;
15672f4fa4f9Shannken 
15682f4fa4f9Shannken 	return me;
15692f4fa4f9Shannken }
15702f4fa4f9Shannken 
15712f4fa4f9Shannken static void
15722f4fa4f9Shannken mountlist_free(struct mountlist_entry *me)
15732f4fa4f9Shannken {
15742f4fa4f9Shannken 
15752f4fa4f9Shannken 	kmem_free(me, sizeof(*me));
15762f4fa4f9Shannken }
15772f4fa4f9Shannken 
15782f4fa4f9Shannken void
15792f4fa4f9Shannken mountlist_iterator_init(mount_iterator_t **mip)
15802f4fa4f9Shannken {
15812f4fa4f9Shannken 	struct mountlist_entry *me;
15822f4fa4f9Shannken 
15832f4fa4f9Shannken 	me = mountlist_alloc(ME_MARKER, NULL);
15842f4fa4f9Shannken 	mutex_enter(&mountlist_lock);
1585256581e1Shannken 	TAILQ_INSERT_HEAD(&mountlist, me, me_list);
15862f4fa4f9Shannken 	mutex_exit(&mountlist_lock);
15872f4fa4f9Shannken 	*mip = (mount_iterator_t *)me;
15882f4fa4f9Shannken }
15892f4fa4f9Shannken 
15902f4fa4f9Shannken void
15912f4fa4f9Shannken mountlist_iterator_destroy(mount_iterator_t *mi)
15922f4fa4f9Shannken {
15932f4fa4f9Shannken 	struct mountlist_entry *marker = &mi->mi_entry;
15942f4fa4f9Shannken 
15952f4fa4f9Shannken 	if (marker->me_mount != NULL)
159620bb034fShannken 		vfs_unbusy(marker->me_mount);
15972f4fa4f9Shannken 
15982f4fa4f9Shannken 	mutex_enter(&mountlist_lock);
1599256581e1Shannken 	TAILQ_REMOVE(&mountlist, marker, me_list);
16002f4fa4f9Shannken 	mutex_exit(&mountlist_lock);
16012f4fa4f9Shannken 
16022f4fa4f9Shannken 	mountlist_free(marker);
16032f4fa4f9Shannken 
16042f4fa4f9Shannken }
16052f4fa4f9Shannken 
16062f4fa4f9Shannken /*
16072f4fa4f9Shannken  * Return the next mount or NULL for this iterator.
16082f4fa4f9Shannken  * Mark it busy on success.
16092f4fa4f9Shannken  */
1610bd152b56Shannken static inline struct mount *
1611bd152b56Shannken _mountlist_iterator_next(mount_iterator_t *mi, bool wait)
16122f4fa4f9Shannken {
16132f4fa4f9Shannken 	struct mountlist_entry *me, *marker = &mi->mi_entry;
16142f4fa4f9Shannken 	struct mount *mp;
1615bd152b56Shannken 	int error;
16162f4fa4f9Shannken 
16172f4fa4f9Shannken 	if (marker->me_mount != NULL) {
161820bb034fShannken 		vfs_unbusy(marker->me_mount);
16192f4fa4f9Shannken 		marker->me_mount = NULL;
16202f4fa4f9Shannken 	}
16212f4fa4f9Shannken 
16222f4fa4f9Shannken 	mutex_enter(&mountlist_lock);
16232f4fa4f9Shannken 	for (;;) {
16242f4fa4f9Shannken 		KASSERT(marker->me_type == ME_MARKER);
16252f4fa4f9Shannken 
16262f4fa4f9Shannken 		me = TAILQ_NEXT(marker, me_list);
16272f4fa4f9Shannken 		if (me == NULL) {
16282f4fa4f9Shannken 			/* End of list: keep marker and return. */
16292f4fa4f9Shannken 			mutex_exit(&mountlist_lock);
16302f4fa4f9Shannken 			return NULL;
16312f4fa4f9Shannken 		}
1632256581e1Shannken 		TAILQ_REMOVE(&mountlist, marker, me_list);
1633256581e1Shannken 		TAILQ_INSERT_AFTER(&mountlist, me, marker, me_list);
16342f4fa4f9Shannken 
16352f4fa4f9Shannken 		/* Skip other markers. */
16362f4fa4f9Shannken 		if (me->me_type != ME_MOUNT)
16372f4fa4f9Shannken 			continue;
16382f4fa4f9Shannken 
16392f4fa4f9Shannken 		/* Take an initial reference for vfs_busy() below. */
16402f4fa4f9Shannken 		mp = me->me_mount;
16412f4fa4f9Shannken 		KASSERT(mp != NULL);
1642ebb8f73bShannken 		vfs_ref(mp);
16432f4fa4f9Shannken 		mutex_exit(&mountlist_lock);
16442f4fa4f9Shannken 
16452f4fa4f9Shannken 		/* Try to mark this mount busy and return on success. */
1646bd152b56Shannken 		if (wait)
1647bd152b56Shannken 			error = vfs_busy(mp);
1648bd152b56Shannken 		else
1649bd152b56Shannken 			error = vfs_trybusy(mp);
1650bd152b56Shannken 		if (error == 0) {
1651ebb8f73bShannken 			vfs_rele(mp);
16522f4fa4f9Shannken 			marker->me_mount = mp;
16532f4fa4f9Shannken 			return mp;
16542f4fa4f9Shannken 		}
1655ebb8f73bShannken 		vfs_rele(mp);
16562f4fa4f9Shannken 		mutex_enter(&mountlist_lock);
16572f4fa4f9Shannken 	}
16582f4fa4f9Shannken }
16592f4fa4f9Shannken 
1660bd152b56Shannken struct mount *
1661bd152b56Shannken mountlist_iterator_next(mount_iterator_t *mi)
1662bd152b56Shannken {
1663bd152b56Shannken 
1664bd152b56Shannken 	return _mountlist_iterator_next(mi, true);
1665bd152b56Shannken }
1666bd152b56Shannken 
1667bd152b56Shannken struct mount *
1668bd152b56Shannken mountlist_iterator_trynext(mount_iterator_t *mi)
1669bd152b56Shannken {
1670bd152b56Shannken 
1671bd152b56Shannken 	return _mountlist_iterator_next(mi, false);
1672bd152b56Shannken }
1673bd152b56Shannken 
16742f4fa4f9Shannken /*
16752f4fa4f9Shannken  * Attach new mount to the end of the mount list.
16762f4fa4f9Shannken  */
16770b725b63Schristos void
16780b725b63Schristos mountlist_append(struct mount *mp)
16790b725b63Schristos {
16802f4fa4f9Shannken 	struct mountlist_entry *me;
16812f4fa4f9Shannken 
16822f4fa4f9Shannken 	me = mountlist_alloc(ME_MOUNT, mp);
16830b725b63Schristos 	mutex_enter(&mountlist_lock);
1684256581e1Shannken 	TAILQ_INSERT_TAIL(&mountlist, me, me_list);
16850b725b63Schristos 	mutex_exit(&mountlist_lock);
16860b725b63Schristos }
16872f4fa4f9Shannken 
16882f4fa4f9Shannken /*
16892f4fa4f9Shannken  * Remove mount from mount list.
16901cf06cb4Sriastradh  */
16911cf06cb4Sriastradh void
16922f4fa4f9Shannken mountlist_remove(struct mount *mp)
16932f4fa4f9Shannken {
16942f4fa4f9Shannken 	struct mountlist_entry *me;
16952f4fa4f9Shannken 
16962f4fa4f9Shannken 	mutex_enter(&mountlist_lock);
1697256581e1Shannken 	TAILQ_FOREACH(me, &mountlist, me_list)
16982f4fa4f9Shannken 		if (me->me_type == ME_MOUNT && me->me_mount == mp)
16992f4fa4f9Shannken 			break;
17002f4fa4f9Shannken 	KASSERT(me != NULL);
1701256581e1Shannken 	TAILQ_REMOVE(&mountlist, me, me_list);
17022f4fa4f9Shannken 	mutex_exit(&mountlist_lock);
17032f4fa4f9Shannken 	mountlist_free(me);
17042f4fa4f9Shannken }
17052f4fa4f9Shannken 
17062f4fa4f9Shannken /*
17072f4fa4f9Shannken  * Unlocked variant to traverse the mountlist.
17082f4fa4f9Shannken  * To be used from DDB only.
17092f4fa4f9Shannken  */
17102f4fa4f9Shannken struct mount *
17112f4fa4f9Shannken _mountlist_next(struct mount *mp)
17122f4fa4f9Shannken {
17132f4fa4f9Shannken 	struct mountlist_entry *me;
17142f4fa4f9Shannken 
17152f4fa4f9Shannken 	if (mp == NULL) {
1716256581e1Shannken 		me = TAILQ_FIRST(&mountlist);
17172f4fa4f9Shannken 	} else {
1718256581e1Shannken 		TAILQ_FOREACH(me, &mountlist, me_list)
17192f4fa4f9Shannken 			if (me->me_type == ME_MOUNT && me->me_mount == mp)
17202f4fa4f9Shannken 				break;
17212f4fa4f9Shannken 		if (me != NULL)
17222f4fa4f9Shannken 			me = TAILQ_NEXT(me, me_list);
17232f4fa4f9Shannken 	}
17242f4fa4f9Shannken 
17252f4fa4f9Shannken 	while (me != NULL && me->me_type != ME_MOUNT)
17262f4fa4f9Shannken 		me = TAILQ_NEXT(me, me_list);
17272f4fa4f9Shannken 
17282f4fa4f9Shannken 	return (me ? me->me_mount : NULL);
17292f4fa4f9Shannken }
1730