xref: /freebsd-src/sys/contrib/openzfs/module/os/freebsd/spl/spl_vfs.c (revision ce4dcb97ca433b2a2f03fbae957dae0ff16f6f51)
1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * Copyright (c) 2006-2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3eda14cbcSMatt Macy  * All rights reserved.
4eda14cbcSMatt Macy  *
5eda14cbcSMatt Macy  * Redistribution and use in source and binary forms, with or without
6eda14cbcSMatt Macy  * modification, are permitted provided that the following conditions
7eda14cbcSMatt Macy  * are met:
8eda14cbcSMatt Macy  * 1. Redistributions of source code must retain the above copyright
9eda14cbcSMatt Macy  *    notice, this list of conditions and the following disclaimer.
10eda14cbcSMatt Macy  * 2. Redistributions in binary form must reproduce the above copyright
11eda14cbcSMatt Macy  *    notice, this list of conditions and the following disclaimer in the
12eda14cbcSMatt Macy  *    documentation and/or other materials provided with the distribution.
13eda14cbcSMatt Macy  *
14eda14cbcSMatt Macy  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15eda14cbcSMatt Macy  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16eda14cbcSMatt Macy  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17eda14cbcSMatt Macy  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18eda14cbcSMatt Macy  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19eda14cbcSMatt Macy  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20eda14cbcSMatt Macy  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21eda14cbcSMatt Macy  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22eda14cbcSMatt Macy  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23eda14cbcSMatt Macy  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24eda14cbcSMatt Macy  * SUCH DAMAGE.
25eda14cbcSMatt Macy  */
26eda14cbcSMatt Macy 
27eda14cbcSMatt Macy #include <sys/types.h>
28eda14cbcSMatt Macy #include <sys/param.h>
29eda14cbcSMatt Macy #include <sys/kernel.h>
30eda14cbcSMatt Macy #include <sys/systm.h>
31eda14cbcSMatt Macy #include <sys/malloc.h>
32eda14cbcSMatt Macy #include <sys/mount.h>
33eda14cbcSMatt Macy #include <sys/cred.h>
34eda14cbcSMatt Macy #include <sys/vfs.h>
35eda14cbcSMatt Macy #include <sys/priv.h>
36eda14cbcSMatt Macy #include <sys/libkern.h>
37eda14cbcSMatt Macy 
38eda14cbcSMatt Macy #include <sys/mutex.h>
39eda14cbcSMatt Macy #include <sys/vnode.h>
40eda14cbcSMatt Macy #include <sys/taskq.h>
41eda14cbcSMatt Macy 
42eda14cbcSMatt Macy #include <sys/ccompat.h>
43eda14cbcSMatt Macy 
44eda14cbcSMatt Macy MALLOC_DECLARE(M_MOUNT);
45eda14cbcSMatt Macy 
46eda14cbcSMatt Macy void
47eda14cbcSMatt Macy vfs_setmntopt(vfs_t *vfsp, const char *name, const char *arg,
48eda14cbcSMatt Macy     int flags __unused)
49eda14cbcSMatt Macy {
50eda14cbcSMatt Macy 	struct vfsopt *opt;
51eda14cbcSMatt Macy 	size_t namesize;
52eda14cbcSMatt Macy 	int locked;
53eda14cbcSMatt Macy 
54eda14cbcSMatt Macy 	if (!(locked = mtx_owned(MNT_MTX(vfsp))))
55eda14cbcSMatt Macy 		MNT_ILOCK(vfsp);
56eda14cbcSMatt Macy 
57eda14cbcSMatt Macy 	if (vfsp->mnt_opt == NULL) {
58eda14cbcSMatt Macy 		void *opts;
59eda14cbcSMatt Macy 
60eda14cbcSMatt Macy 		MNT_IUNLOCK(vfsp);
61eda14cbcSMatt Macy 		opts = malloc(sizeof (*vfsp->mnt_opt), M_MOUNT, M_WAITOK);
62eda14cbcSMatt Macy 		MNT_ILOCK(vfsp);
63eda14cbcSMatt Macy 		if (vfsp->mnt_opt == NULL) {
64eda14cbcSMatt Macy 			vfsp->mnt_opt = opts;
65eda14cbcSMatt Macy 			TAILQ_INIT(vfsp->mnt_opt);
66eda14cbcSMatt Macy 		} else {
67eda14cbcSMatt Macy 			free(opts, M_MOUNT);
68eda14cbcSMatt Macy 		}
69eda14cbcSMatt Macy 	}
70eda14cbcSMatt Macy 
71eda14cbcSMatt Macy 	MNT_IUNLOCK(vfsp);
72eda14cbcSMatt Macy 
73eda14cbcSMatt Macy 	opt = malloc(sizeof (*opt), M_MOUNT, M_WAITOK);
74eda14cbcSMatt Macy 	namesize = strlen(name) + 1;
75eda14cbcSMatt Macy 	opt->name = malloc(namesize, M_MOUNT, M_WAITOK);
76eda14cbcSMatt Macy 	strlcpy(opt->name, name, namesize);
77eda14cbcSMatt Macy 	opt->pos = -1;
78eda14cbcSMatt Macy 	opt->seen = 1;
79eda14cbcSMatt Macy 	if (arg == NULL) {
80eda14cbcSMatt Macy 		opt->value = NULL;
81eda14cbcSMatt Macy 		opt->len = 0;
82eda14cbcSMatt Macy 	} else {
83eda14cbcSMatt Macy 		opt->len = strlen(arg) + 1;
84eda14cbcSMatt Macy 		opt->value = malloc(opt->len, M_MOUNT, M_WAITOK);
85da5137abSMartin Matuska 		memcpy(opt->value, arg, opt->len);
86eda14cbcSMatt Macy 	}
87eda14cbcSMatt Macy 
88eda14cbcSMatt Macy 	MNT_ILOCK(vfsp);
89eda14cbcSMatt Macy 	TAILQ_INSERT_TAIL(vfsp->mnt_opt, opt, link);
90eda14cbcSMatt Macy 	if (!locked)
91eda14cbcSMatt Macy 		MNT_IUNLOCK(vfsp);
92eda14cbcSMatt Macy }
93eda14cbcSMatt Macy 
94eda14cbcSMatt Macy void
95eda14cbcSMatt Macy vfs_clearmntopt(vfs_t *vfsp, const char *name)
96eda14cbcSMatt Macy {
97eda14cbcSMatt Macy 	int locked;
98eda14cbcSMatt Macy 
99eda14cbcSMatt Macy 	if (!(locked = mtx_owned(MNT_MTX(vfsp))))
100eda14cbcSMatt Macy 		MNT_ILOCK(vfsp);
101eda14cbcSMatt Macy 	vfs_deleteopt(vfsp->mnt_opt, name);
102eda14cbcSMatt Macy 	if (!locked)
103eda14cbcSMatt Macy 		MNT_IUNLOCK(vfsp);
104eda14cbcSMatt Macy }
105eda14cbcSMatt Macy 
106eda14cbcSMatt Macy int
107eda14cbcSMatt Macy vfs_optionisset(const vfs_t *vfsp, const char *opt, char **argp)
108eda14cbcSMatt Macy {
109eda14cbcSMatt Macy 	struct vfsoptlist *opts = vfsp->mnt_optnew;
110eda14cbcSMatt Macy 	int error;
111eda14cbcSMatt Macy 
112eda14cbcSMatt Macy 	if (opts == NULL)
113eda14cbcSMatt Macy 		return (0);
114eda14cbcSMatt Macy 	error = vfs_getopt(opts, opt, (void **)argp, NULL);
115eda14cbcSMatt Macy 	return (error != 0 ? 0 : 1);
116eda14cbcSMatt Macy }
117eda14cbcSMatt Macy 
118eda14cbcSMatt Macy int
119eda14cbcSMatt Macy mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
1202276e539SMartin Matuska     char *fspec, int fsflags, vfs_t *parent_vfsp)
121eda14cbcSMatt Macy {
122eda14cbcSMatt Macy 	struct vfsconf *vfsp;
123eda14cbcSMatt Macy 	struct mount *mp;
124eda14cbcSMatt Macy 	vnode_t *vp, *mvp;
125eda14cbcSMatt Macy 	int error;
126eda14cbcSMatt Macy 
127eda14cbcSMatt Macy 	ASSERT_VOP_ELOCKED(*vpp, "mount_snapshot");
128eda14cbcSMatt Macy 
129eda14cbcSMatt Macy 	vp = *vpp;
130eda14cbcSMatt Macy 	*vpp = NULL;
131eda14cbcSMatt Macy 	error = 0;
132eda14cbcSMatt Macy 
133eda14cbcSMatt Macy 	/*
134eda14cbcSMatt Macy 	 * Be ultra-paranoid about making sure the type and fspath
135eda14cbcSMatt Macy 	 * variables will fit in our mp buffers, including the
136eda14cbcSMatt Macy 	 * terminating NUL.
137eda14cbcSMatt Macy 	 */
138eda14cbcSMatt Macy 	if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN)
139eda14cbcSMatt Macy 		error = ENAMETOOLONG;
140eda14cbcSMatt Macy 	if (error == 0 && (vfsp = vfs_byname_kld(fstype, td, &error)) == NULL)
141eda14cbcSMatt Macy 		error = ENODEV;
142eda14cbcSMatt Macy 	if (error == 0 && vp->v_type != VDIR)
143eda14cbcSMatt Macy 		error = ENOTDIR;
144eda14cbcSMatt Macy 	/*
145eda14cbcSMatt Macy 	 * We need vnode lock to protect v_mountedhere and vnode interlock
146eda14cbcSMatt Macy 	 * to protect v_iflag.
147eda14cbcSMatt Macy 	 */
148eda14cbcSMatt Macy 	if (error == 0) {
149eda14cbcSMatt Macy 		VI_LOCK(vp);
150eda14cbcSMatt Macy 		if ((vp->v_iflag & VI_MOUNT) == 0 && vp->v_mountedhere == NULL)
151eda14cbcSMatt Macy 			vp->v_iflag |= VI_MOUNT;
152eda14cbcSMatt Macy 		else
153eda14cbcSMatt Macy 			error = EBUSY;
154eda14cbcSMatt Macy 		VI_UNLOCK(vp);
155eda14cbcSMatt Macy 	}
156eda14cbcSMatt Macy 	if (error != 0) {
157eda14cbcSMatt Macy 		vput(vp);
158eda14cbcSMatt Macy 		return (error);
159eda14cbcSMatt Macy 	}
160eda14cbcSMatt Macy 	vn_seqc_write_begin(vp);
161*ce4dcb97SMartin Matuska 	VOP_UNLOCK(vp);
162eda14cbcSMatt Macy 
163eda14cbcSMatt Macy 	/*
164eda14cbcSMatt Macy 	 * Allocate and initialize the filesystem.
165eda14cbcSMatt Macy 	 * We don't want regular user that triggered snapshot mount to be able
166eda14cbcSMatt Macy 	 * to unmount it, so pass credentials of the parent mount.
167eda14cbcSMatt Macy 	 */
168eda14cbcSMatt Macy 	mp = vfs_mount_alloc(vp, vfsp, fspath, vp->v_mount->mnt_cred);
169eda14cbcSMatt Macy 
170eda14cbcSMatt Macy 	mp->mnt_optnew = NULL;
171eda14cbcSMatt Macy 	vfs_setmntopt(mp, "from", fspec, 0);
172eda14cbcSMatt Macy 	mp->mnt_optnew = mp->mnt_opt;
173eda14cbcSMatt Macy 	mp->mnt_opt = NULL;
174eda14cbcSMatt Macy 
175eda14cbcSMatt Macy 	/*
176eda14cbcSMatt Macy 	 * Set the mount level flags.
177eda14cbcSMatt Macy 	 */
178eda14cbcSMatt Macy 	mp->mnt_flag = fsflags & MNT_UPDATEMASK;
179eda14cbcSMatt Macy 	/*
180eda14cbcSMatt Macy 	 * Snapshots are always read-only.
181eda14cbcSMatt Macy 	 */
182eda14cbcSMatt Macy 	mp->mnt_flag |= MNT_RDONLY;
183eda14cbcSMatt Macy 	/*
184eda14cbcSMatt Macy 	 * We don't want snapshots to allow access to vulnerable setuid
185eda14cbcSMatt Macy 	 * programs, so we turn off setuid when mounting snapshots.
186eda14cbcSMatt Macy 	 */
187eda14cbcSMatt Macy 	mp->mnt_flag |= MNT_NOSUID;
188eda14cbcSMatt Macy 	/*
189eda14cbcSMatt Macy 	 * We don't want snapshots to be visible in regular
190eda14cbcSMatt Macy 	 * mount(8) and df(1) output.
191eda14cbcSMatt Macy 	 */
192eda14cbcSMatt Macy 	mp->mnt_flag |= MNT_IGNORE;
19353b70c86SMartin Matuska 
194eda14cbcSMatt Macy 	error = VFS_MOUNT(mp);
195eda14cbcSMatt Macy 	if (error != 0) {
196eda14cbcSMatt Macy 		/*
197eda14cbcSMatt Macy 		 * Clear VI_MOUNT and decrement the use count "atomically",
198eda14cbcSMatt Macy 		 * under the vnode lock.  This is not strictly required,
199eda14cbcSMatt Macy 		 * but makes it easier to reason about the life-cycle and
200eda14cbcSMatt Macy 		 * ownership of the covered vnode.
201eda14cbcSMatt Macy 		 */
202eda14cbcSMatt Macy 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
203eda14cbcSMatt Macy 		VI_LOCK(vp);
204eda14cbcSMatt Macy 		vp->v_iflag &= ~VI_MOUNT;
205eda14cbcSMatt Macy 		VI_UNLOCK(vp);
206eda14cbcSMatt Macy 		vn_seqc_write_end(vp);
207eda14cbcSMatt Macy 		vput(vp);
208eda14cbcSMatt Macy 		vfs_unbusy(mp);
209eda14cbcSMatt Macy 		vfs_freeopts(mp->mnt_optnew);
210eda14cbcSMatt Macy 		mp->mnt_vnodecovered = NULL;
211eda14cbcSMatt Macy 		vfs_mount_destroy(mp);
212eda14cbcSMatt Macy 		return (error);
213eda14cbcSMatt Macy 	}
214eda14cbcSMatt Macy 
215eda14cbcSMatt Macy 	if (mp->mnt_opt != NULL)
216eda14cbcSMatt Macy 		vfs_freeopts(mp->mnt_opt);
217eda14cbcSMatt Macy 	mp->mnt_opt = mp->mnt_optnew;
218eda14cbcSMatt Macy 	(void) VFS_STATFS(mp, &mp->mnt_stat);
219eda14cbcSMatt Macy 
2202276e539SMartin Matuska #ifdef VFS_SUPPORTS_EXJAIL_CLONE
2212276e539SMartin Matuska 	/*
2222276e539SMartin Matuska 	 * Clone the mnt_exjail credentials of the parent, as required.
2232276e539SMartin Matuska 	 */
2242276e539SMartin Matuska 	vfs_exjail_clone(parent_vfsp, mp);
2252276e539SMartin Matuska #endif
2262276e539SMartin Matuska 
227eda14cbcSMatt Macy 	/*
228eda14cbcSMatt Macy 	 * Prevent external consumers of mount options from reading
229eda14cbcSMatt Macy 	 * mnt_optnew.
230eda14cbcSMatt Macy 	 */
231eda14cbcSMatt Macy 	mp->mnt_optnew = NULL;
232eda14cbcSMatt Macy 
233eda14cbcSMatt Macy 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
234eda14cbcSMatt Macy #ifdef FREEBSD_NAMECACHE
235eda14cbcSMatt Macy 	cache_purge(vp);
236eda14cbcSMatt Macy #endif
237eda14cbcSMatt Macy 	VI_LOCK(vp);
238eda14cbcSMatt Macy 	vp->v_iflag &= ~VI_MOUNT;
239184c1b94SMartin Matuska #ifdef VIRF_MOUNTPOINT
24082397d79SMateusz Guzik 	vn_irflag_set_locked(vp, VIRF_MOUNTPOINT);
241184c1b94SMartin Matuska #endif
242eda14cbcSMatt Macy 	vp->v_mountedhere = mp;
24382397d79SMateusz Guzik 	VI_UNLOCK(vp);
244eda14cbcSMatt Macy 	/* Put the new filesystem on the mount list. */
245eda14cbcSMatt Macy 	mtx_lock(&mountlist_mtx);
246eda14cbcSMatt Macy 	TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
247eda14cbcSMatt Macy 	mtx_unlock(&mountlist_mtx);
248eda14cbcSMatt Macy 	vfs_event_signal(NULL, VQ_MOUNT, 0);
249eda14cbcSMatt Macy 	if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
250eda14cbcSMatt Macy 		panic("mount: lost mount");
251eda14cbcSMatt Macy 	vn_seqc_write_end(vp);
252*ce4dcb97SMartin Matuska 	VOP_UNLOCK(vp);
253eda14cbcSMatt Macy 	vfs_op_exit(mp);
254eda14cbcSMatt Macy 	vfs_unbusy(mp);
255eda14cbcSMatt Macy 	*vpp = mvp;
256eda14cbcSMatt Macy 	return (0);
257eda14cbcSMatt Macy }
258eda14cbcSMatt Macy 
259eda14cbcSMatt Macy /*
260eda14cbcSMatt Macy  * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
261eda14cbcSMatt Macy  * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
262eda14cbcSMatt Macy  * the file system as a result of releasing the vnode. Note, file systems
263eda14cbcSMatt Macy  * already have to handle the race where the vnode is incremented before the
264eda14cbcSMatt Macy  * inactive routine is called and does its locking.
265eda14cbcSMatt Macy  *
266eda14cbcSMatt Macy  * Warning: Excessive use of this routine can lead to performance problems.
267eda14cbcSMatt Macy  * This is because taskqs throttle back allocation if too many are created.
268eda14cbcSMatt Macy  */
269eda14cbcSMatt Macy void
270eda14cbcSMatt Macy vn_rele_async(vnode_t *vp, taskq_t *taskq)
271eda14cbcSMatt Macy {
27216038816SMartin Matuska 	VERIFY3U(vp->v_usecount, >, 0);
273*ce4dcb97SMartin Matuska 	if (refcount_release_if_not_last(&vp->v_usecount))
274eda14cbcSMatt Macy 		return;
27516038816SMartin Matuska 	VERIFY3U(taskq_dispatch((taskq_t *)taskq,
27616038816SMartin Matuska 	    (task_func_t *)vrele, vp, TQ_SLEEP), !=, 0);
277eda14cbcSMatt Macy }
278