xref: /onnv-gate/usr/src/uts/common/fs/zfs/zfs_vfsops.c (revision 789:b348f31ed315)
1*789Sahrens /*
2*789Sahrens  * CDDL HEADER START
3*789Sahrens  *
4*789Sahrens  * The contents of this file are subject to the terms of the
5*789Sahrens  * Common Development and Distribution License, Version 1.0 only
6*789Sahrens  * (the "License").  You may not use this file except in compliance
7*789Sahrens  * with the License.
8*789Sahrens  *
9*789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*789Sahrens  * or http://www.opensolaris.org/os/licensing.
11*789Sahrens  * See the License for the specific language governing permissions
12*789Sahrens  * and limitations under the License.
13*789Sahrens  *
14*789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
15*789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
17*789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
18*789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
19*789Sahrens  *
20*789Sahrens  * CDDL HEADER END
21*789Sahrens  */
22*789Sahrens /*
23*789Sahrens  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*789Sahrens  * Use is subject to license terms.
25*789Sahrens  */
26*789Sahrens 
27*789Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*789Sahrens 
29*789Sahrens #include <sys/types.h>
30*789Sahrens #include <sys/param.h>
31*789Sahrens #include <sys/systm.h>
32*789Sahrens #include <sys/sysmacros.h>
33*789Sahrens #include <sys/kmem.h>
34*789Sahrens #include <sys/pathname.h>
35*789Sahrens #include <sys/acl.h>
36*789Sahrens #include <sys/vnode.h>
37*789Sahrens #include <sys/vfs.h>
38*789Sahrens #include <sys/mntent.h>
39*789Sahrens #include <sys/mount.h>
40*789Sahrens #include <sys/cmn_err.h>
41*789Sahrens #include "fs/fs_subr.h"
42*789Sahrens #include <sys/zfs_znode.h>
43*789Sahrens #include <sys/zil.h>
44*789Sahrens #include <sys/fs/zfs.h>
45*789Sahrens #include <sys/dmu.h>
46*789Sahrens #include <sys/dsl_prop.h>
47*789Sahrens #include <sys/spa.h>
48*789Sahrens #include <sys/zap.h>
49*789Sahrens #include <sys/varargs.h>
50*789Sahrens #include <sys/policy.h>
51*789Sahrens #include <sys/atomic.h>
52*789Sahrens #include <sys/mkdev.h>
53*789Sahrens #include <sys/modctl.h>
54*789Sahrens #include <sys/zfs_ioctl.h>
55*789Sahrens #include <sys/zfs_ctldir.h>
56*789Sahrens 
57*789Sahrens int zfsfstype;
58*789Sahrens vfsops_t *zfs_vfsops = NULL;
59*789Sahrens static major_t	zfs_major;
60*789Sahrens static minor_t zfs_minor;
61*789Sahrens static kmutex_t	zfs_dev_mtx;
62*789Sahrens 
63*789Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr);
64*789Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr);
65*789Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp);
66*789Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp);
67*789Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp);
68*789Sahrens static void zfs_freevfs(vfs_t *vfsp);
69*789Sahrens static void zfs_objset_close(zfsvfs_t *zfsvfs);
70*789Sahrens 
71*789Sahrens static const fs_operation_def_t zfs_vfsops_template[] = {
72*789Sahrens 	VFSNAME_MOUNT, zfs_mount,
73*789Sahrens 	VFSNAME_UNMOUNT, zfs_umount,
74*789Sahrens 	VFSNAME_ROOT, zfs_root,
75*789Sahrens 	VFSNAME_STATVFS, zfs_statvfs,
76*789Sahrens 	VFSNAME_SYNC, (fs_generic_func_p) zfs_sync,
77*789Sahrens 	VFSNAME_VGET, zfs_vget,
78*789Sahrens 	VFSNAME_FREEVFS, (fs_generic_func_p) zfs_freevfs,
79*789Sahrens 	NULL, NULL
80*789Sahrens };
81*789Sahrens 
82*789Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = {
83*789Sahrens 	VFSNAME_FREEVFS, (fs_generic_func_p) zfs_freevfs,
84*789Sahrens 	NULL, NULL
85*789Sahrens };
86*789Sahrens 
87*789Sahrens /*
88*789Sahrens  * We need to keep a count of active fs's.
89*789Sahrens  * This is necessary to prevent our module
90*789Sahrens  * from being unloaded after a umount -f
91*789Sahrens  */
92*789Sahrens static uint32_t	zfs_active_fs_count = 0;
93*789Sahrens 
94*789Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
95*789Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
96*789Sahrens 
97*789Sahrens static mntopt_t mntopts[] = {
98*789Sahrens 	{ MNTOPT_XATTR, NULL, NULL, MO_NODISPLAY|MO_DEFAULT, NULL },
99*789Sahrens 	{ MNTOPT_NOATIME, noatime_cancel, NULL, MO_DEFAULT, NULL },
100*789Sahrens 	{ MNTOPT_ATIME, atime_cancel, NULL, 0, NULL }
101*789Sahrens };
102*789Sahrens 
103*789Sahrens static mntopts_t zfs_mntopts = {
104*789Sahrens 	sizeof (mntopts) / sizeof (mntopt_t),
105*789Sahrens 	mntopts
106*789Sahrens };
107*789Sahrens 
108*789Sahrens /*ARGSUSED*/
109*789Sahrens int
110*789Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
111*789Sahrens {
112*789Sahrens 	/*
113*789Sahrens 	 * Data integrity is job one.  We don't want a compromised kernel
114*789Sahrens 	 * writing to the storage pool, so we never sync during panic.
115*789Sahrens 	 */
116*789Sahrens 	if (panicstr)
117*789Sahrens 		return (0);
118*789Sahrens 
119*789Sahrens 	/*
120*789Sahrens 	 * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
121*789Sahrens 	 * to sync metadata, which they would otherwise cache indefinitely.
122*789Sahrens 	 * Semantically, the only requirement is that the sync be initiated.
123*789Sahrens 	 * The DMU syncs out txgs frequently, so there's nothing to do.
124*789Sahrens 	 */
125*789Sahrens 	if (flag & SYNC_ATTR)
126*789Sahrens 		return (0);
127*789Sahrens 
128*789Sahrens 	if (vfsp != NULL) {
129*789Sahrens 		/*
130*789Sahrens 		 * Sync a specific filesystem.
131*789Sahrens 		 */
132*789Sahrens 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
133*789Sahrens 
134*789Sahrens 		ZFS_ENTER(zfsvfs);
135*789Sahrens 		if (zfsvfs->z_log != NULL)
136*789Sahrens 			zil_commit(zfsvfs->z_log, UINT64_MAX, FSYNC);
137*789Sahrens 		else
138*789Sahrens 			txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
139*789Sahrens 		ZFS_EXIT(zfsvfs);
140*789Sahrens 	} else {
141*789Sahrens 		/*
142*789Sahrens 		 * Sync all ZFS filesystems.  This is what happens when you
143*789Sahrens 		 * run sync(1M).  Unlike other filesystems, ZFS honors the
144*789Sahrens 		 * request by waiting for all pools to commit all dirty data.
145*789Sahrens 		 */
146*789Sahrens 		spa_sync_allpools();
147*789Sahrens 	}
148*789Sahrens 
149*789Sahrens 	return (0);
150*789Sahrens }
151*789Sahrens 
152*789Sahrens static void
153*789Sahrens atime_changed_cb(void *arg, uint64_t newval)
154*789Sahrens {
155*789Sahrens 	zfsvfs_t *zfsvfs = arg;
156*789Sahrens 
157*789Sahrens 	if (newval == TRUE) {
158*789Sahrens 		zfsvfs->z_atime = TRUE;
159*789Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
160*789Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
161*789Sahrens 	} else {
162*789Sahrens 		zfsvfs->z_atime = FALSE;
163*789Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
164*789Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
165*789Sahrens 	}
166*789Sahrens }
167*789Sahrens 
168*789Sahrens static void
169*789Sahrens blksz_changed_cb(void *arg, uint64_t newval)
170*789Sahrens {
171*789Sahrens 	zfsvfs_t *zfsvfs = arg;
172*789Sahrens 
173*789Sahrens 	if (newval < SPA_MINBLOCKSIZE ||
174*789Sahrens 	    newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
175*789Sahrens 		newval = SPA_MAXBLOCKSIZE;
176*789Sahrens 
177*789Sahrens 	zfsvfs->z_max_blksz = newval;
178*789Sahrens 	zfsvfs->z_vfs->vfs_bsize = newval;
179*789Sahrens }
180*789Sahrens 
181*789Sahrens static void
182*789Sahrens readonly_changed_cb(void *arg, uint64_t newval)
183*789Sahrens {
184*789Sahrens 	zfsvfs_t *zfsvfs = arg;
185*789Sahrens 
186*789Sahrens 	if (newval) {
187*789Sahrens 		/* XXX locking on vfs_flag? */
188*789Sahrens 		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
189*789Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
190*789Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
191*789Sahrens 		(void) zfs_delete_thread_target(zfsvfs, 0);
192*789Sahrens 	} else {
193*789Sahrens 		/* XXX locking on vfs_flag? */
194*789Sahrens 		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
195*789Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
196*789Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
197*789Sahrens 		(void) zfs_delete_thread_target(zfsvfs, 1);
198*789Sahrens 	}
199*789Sahrens }
200*789Sahrens 
201*789Sahrens static void
202*789Sahrens devices_changed_cb(void *arg, uint64_t newval)
203*789Sahrens {
204*789Sahrens 	zfsvfs_t *zfsvfs = arg;
205*789Sahrens 
206*789Sahrens 	if (newval == FALSE) {
207*789Sahrens 		zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES;
208*789Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES);
209*789Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0);
210*789Sahrens 	} else {
211*789Sahrens 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES;
212*789Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES);
213*789Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0);
214*789Sahrens 	}
215*789Sahrens }
216*789Sahrens 
217*789Sahrens static void
218*789Sahrens setuid_changed_cb(void *arg, uint64_t newval)
219*789Sahrens {
220*789Sahrens 	zfsvfs_t *zfsvfs = arg;
221*789Sahrens 
222*789Sahrens 	if (newval == FALSE) {
223*789Sahrens 		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
224*789Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
225*789Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
226*789Sahrens 	} else {
227*789Sahrens 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
228*789Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
229*789Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
230*789Sahrens 	}
231*789Sahrens }
232*789Sahrens 
233*789Sahrens static void
234*789Sahrens exec_changed_cb(void *arg, uint64_t newval)
235*789Sahrens {
236*789Sahrens 	zfsvfs_t *zfsvfs = arg;
237*789Sahrens 
238*789Sahrens 	if (newval == FALSE) {
239*789Sahrens 		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
240*789Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
241*789Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
242*789Sahrens 	} else {
243*789Sahrens 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
244*789Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
245*789Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
246*789Sahrens 	}
247*789Sahrens }
248*789Sahrens 
249*789Sahrens static void
250*789Sahrens snapdir_changed_cb(void *arg, uint64_t newval)
251*789Sahrens {
252*789Sahrens 	zfsvfs_t *zfsvfs = arg;
253*789Sahrens 
254*789Sahrens 	zfsvfs->z_show_ctldir = newval;
255*789Sahrens }
256*789Sahrens 
257*789Sahrens static void
258*789Sahrens acl_mode_changed_cb(void *arg, uint64_t newval)
259*789Sahrens {
260*789Sahrens 	zfsvfs_t *zfsvfs = arg;
261*789Sahrens 
262*789Sahrens 	zfsvfs->z_acl_mode = newval;
263*789Sahrens }
264*789Sahrens 
265*789Sahrens static void
266*789Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval)
267*789Sahrens {
268*789Sahrens 	zfsvfs_t *zfsvfs = arg;
269*789Sahrens 
270*789Sahrens 	zfsvfs->z_acl_inherit = newval;
271*789Sahrens }
272*789Sahrens 
273*789Sahrens /*ARGSUSED*/
274*789Sahrens static int
275*789Sahrens zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
276*789Sahrens {
277*789Sahrens 	zfsvfs_t	*zfsvfs = NULL;
278*789Sahrens 	znode_t		*zp = NULL;
279*789Sahrens 	vnode_t		*vp = NULL;
280*789Sahrens 	objset_t	*os = NULL;
281*789Sahrens 	struct dsl_dataset *ds;
282*789Sahrens 	char		*osname;
283*789Sahrens 	uint64_t	readonly, recordsize;
284*789Sahrens 	pathname_t	spn;
285*789Sahrens 	dev_t		mount_dev;
286*789Sahrens 	major_t		new_major;
287*789Sahrens 	int		mode;
288*789Sahrens 	int		error = 0;
289*789Sahrens 	uio_seg_t	fromspace = (uap->flags & MS_SYSSPACE) ?
290*789Sahrens 				UIO_SYSSPACE : UIO_USERSPACE;
291*789Sahrens 	int		canwrite;
292*789Sahrens 
293*789Sahrens 	if (mvp->v_type != VDIR)
294*789Sahrens 		return (ENOTDIR);
295*789Sahrens 
296*789Sahrens 	mutex_enter(&mvp->v_lock);
297*789Sahrens 	if ((uap->flags & MS_REMOUNT) == 0 &&
298*789Sahrens 	    (uap->flags & MS_OVERLAY) == 0 &&
299*789Sahrens 	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
300*789Sahrens 		mutex_exit(&mvp->v_lock);
301*789Sahrens 		return (EBUSY);
302*789Sahrens 	}
303*789Sahrens 	mutex_exit(&mvp->v_lock);
304*789Sahrens 
305*789Sahrens 	/*
306*789Sahrens 	 * ZFS does not support passing unparsed data in via MS_DATA.
307*789Sahrens 	 * Users should use the MS_OPTIONSTR interface; this means
308*789Sahrens 	 * that all option parsing is already done and the options struct
309*789Sahrens 	 * can be interrogated.
310*789Sahrens 	 */
311*789Sahrens 	if ((uap->flags & MS_DATA) && uap->datalen > 0)
312*789Sahrens 		return (EINVAL);
313*789Sahrens 
314*789Sahrens 	/*
315*789Sahrens 	 * When doing a remount, we simply refresh our temporary properties
316*789Sahrens 	 * according to those options set in the current VFS options.
317*789Sahrens 	 */
318*789Sahrens 	if (uap->flags & MS_REMOUNT) {
319*789Sahrens 		zfsvfs = vfsp->vfs_data;
320*789Sahrens 
321*789Sahrens 		if (vfs_optionisset(vfsp, MNTOPT_RO, NULL))
322*789Sahrens 			readonly_changed_cb(zfsvfs, B_TRUE);
323*789Sahrens 		else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
324*789Sahrens 			if (dmu_objset_is_snapshot(zfsvfs->z_os))
325*789Sahrens 				return (EROFS);
326*789Sahrens 			readonly_changed_cb(zfsvfs, B_FALSE);
327*789Sahrens 		}
328*789Sahrens 
329*789Sahrens 		if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
330*789Sahrens 			devices_changed_cb(zfsvfs, B_FALSE);
331*789Sahrens 			setuid_changed_cb(zfsvfs, B_FALSE);
332*789Sahrens 		} else {
333*789Sahrens 			if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
334*789Sahrens 				devices_changed_cb(zfsvfs, B_FALSE);
335*789Sahrens 			else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL))
336*789Sahrens 				devices_changed_cb(zfsvfs, B_TRUE);
337*789Sahrens 
338*789Sahrens 			if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
339*789Sahrens 				setuid_changed_cb(zfsvfs, B_FALSE);
340*789Sahrens 			else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL))
341*789Sahrens 				setuid_changed_cb(zfsvfs, B_TRUE);
342*789Sahrens 		}
343*789Sahrens 
344*789Sahrens 		if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
345*789Sahrens 			exec_changed_cb(zfsvfs, B_FALSE);
346*789Sahrens 		else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL))
347*789Sahrens 			exec_changed_cb(zfsvfs, B_TRUE);
348*789Sahrens 
349*789Sahrens 		return (0);
350*789Sahrens 	}
351*789Sahrens 
352*789Sahrens 	/*
353*789Sahrens 	 * Get the objset name (the "special" mount argument).
354*789Sahrens 	 */
355*789Sahrens 	if (error = pn_get(uap->spec, fromspace, &spn))
356*789Sahrens 		return (error);
357*789Sahrens 
358*789Sahrens 	osname = spn.pn_path;
359*789Sahrens 
360*789Sahrens 	if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
361*789Sahrens 		goto out;
362*789Sahrens 
363*789Sahrens 	/*
364*789Sahrens 	 * Refuse to mount a filesystem if we are in a local zone and the
365*789Sahrens 	 * dataset is not visible.
366*789Sahrens 	 */
367*789Sahrens 	if (!INGLOBALZONE(curproc) &&
368*789Sahrens 	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
369*789Sahrens 		error = EPERM;
370*789Sahrens 		goto out;
371*789Sahrens 	}
372*789Sahrens 
373*789Sahrens 	/*
374*789Sahrens 	 * Initialize the zfs-specific filesystem structure.
375*789Sahrens 	 * Should probably make this a kmem cache, shuffle fields,
376*789Sahrens 	 * and just bzero upto z_hold_mtx[].
377*789Sahrens 	 */
378*789Sahrens 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
379*789Sahrens 	zfsvfs->z_vfs = vfsp;
380*789Sahrens 	zfsvfs->z_parent = zfsvfs;
381*789Sahrens 	zfsvfs->z_assign = TXG_NOWAIT;
382*789Sahrens 	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
383*789Sahrens 	zfsvfs->z_show_ctldir = VISIBLE;
384*789Sahrens 
385*789Sahrens 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
386*789Sahrens 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
387*789Sahrens 	    offsetof(znode_t, z_link_node));
388*789Sahrens 	rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL);
389*789Sahrens 
390*789Sahrens 	/*
391*789Sahrens 	 * Initialize the generic filesystem structure.
392*789Sahrens 	 */
393*789Sahrens 	vfsp->vfs_bcount = 0;
394*789Sahrens 	vfsp->vfs_data = NULL;
395*789Sahrens 
396*789Sahrens 	/*
397*789Sahrens 	 * Create a unique device for the mount.
398*789Sahrens 	 */
399*789Sahrens 	do {
400*789Sahrens 		ASSERT3U(zfs_minor, <=, MAXMIN32);
401*789Sahrens 		int start = zfs_minor;
402*789Sahrens 		do {
403*789Sahrens 			mutex_enter(&zfs_dev_mtx);
404*789Sahrens 			zfs_minor++;
405*789Sahrens 			if (zfs_minor > MAXMIN32)
406*789Sahrens 				zfs_minor = 0;
407*789Sahrens 			mount_dev = makedevice(zfs_major, zfs_minor);
408*789Sahrens 			mutex_exit(&zfs_dev_mtx);
409*789Sahrens 		} while (vfs_devismounted(mount_dev) && zfs_minor != start);
410*789Sahrens 		if (zfs_minor == start) {
411*789Sahrens 			/*
412*789Sahrens 			 * We are using all ~262,000 minor numbers
413*789Sahrens 			 * for the current major number.  Create a
414*789Sahrens 			 * new major number.
415*789Sahrens 			 */
416*789Sahrens 			if ((new_major = getudev()) == (major_t)-1) {
417*789Sahrens 				cmn_err(CE_WARN,
418*789Sahrens 				    "zfs_mount: Can't get unique"
419*789Sahrens 				    " major device number.");
420*789Sahrens 				goto out;
421*789Sahrens 			}
422*789Sahrens 			mutex_enter(&zfs_dev_mtx);
423*789Sahrens 			zfs_major = new_major;
424*789Sahrens 			zfs_minor = 0;
425*789Sahrens 			mutex_exit(&zfs_dev_mtx);
426*789Sahrens 		} else {
427*789Sahrens 			break;
428*789Sahrens 		}
429*789Sahrens 		/* CONSTANTCONDITION */
430*789Sahrens 	} while (1);
431*789Sahrens 
432*789Sahrens 	ASSERT(vfs_devismounted(mount_dev) == 0);
433*789Sahrens 
434*789Sahrens 	if (dsl_prop_get_integer(osname, "recordsize", &recordsize, NULL) != 0)
435*789Sahrens 		recordsize = SPA_MAXBLOCKSIZE;
436*789Sahrens 
437*789Sahrens 	vfsp->vfs_dev = mount_dev;
438*789Sahrens 	vfsp->vfs_fstype = zfsfstype;
439*789Sahrens 	vfsp->vfs_bsize = recordsize;
440*789Sahrens 	vfsp->vfs_flag |= VFS_NOTRUNC;
441*789Sahrens 	vfsp->vfs_data = zfsvfs;
442*789Sahrens 
443*789Sahrens 	error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL);
444*789Sahrens 	if (error)
445*789Sahrens 		goto out;
446*789Sahrens 
447*789Sahrens 	if (readonly)
448*789Sahrens 		mode = DS_MODE_PRIMARY | DS_MODE_READONLY;
449*789Sahrens 	else
450*789Sahrens 		mode = DS_MODE_PRIMARY;
451*789Sahrens 
452*789Sahrens 	error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
453*789Sahrens 	if (error == EROFS) {
454*789Sahrens 		mode = DS_MODE_PRIMARY | DS_MODE_READONLY;
455*789Sahrens 		error = dmu_objset_open(osname, DMU_OST_ZFS, mode,
456*789Sahrens 		    &zfsvfs->z_os);
457*789Sahrens 	}
458*789Sahrens 	os = zfsvfs->z_os;
459*789Sahrens 
460*789Sahrens 	if (error)
461*789Sahrens 		goto out;
462*789Sahrens 
463*789Sahrens 	if (error = zfs_init_fs(zfsvfs, &zp, cr))
464*789Sahrens 		goto out;
465*789Sahrens 
466*789Sahrens 	if (dmu_objset_is_snapshot(os)) {
467*789Sahrens 		ASSERT(mode & DS_MODE_READONLY);
468*789Sahrens 		atime_changed_cb(zfsvfs, B_FALSE);
469*789Sahrens 		readonly_changed_cb(zfsvfs, B_TRUE);
470*789Sahrens 		zfsvfs->z_issnap = B_TRUE;
471*789Sahrens 	} else {
472*789Sahrens 		int do_readonly = FALSE, readonly;
473*789Sahrens 		int do_setuid = FALSE, setuid;
474*789Sahrens 		int do_exec = FALSE, exec;
475*789Sahrens 		int do_devices = FALSE, devices;
476*789Sahrens 
477*789Sahrens 		/*
478*789Sahrens 		 * Start a delete thread running.
479*789Sahrens 		 */
480*789Sahrens 		(void) zfs_delete_thread_target(zfsvfs, 1);
481*789Sahrens 
482*789Sahrens 		/*
483*789Sahrens 		 * Parse and replay the intent log.
484*789Sahrens 		 */
485*789Sahrens 		zil_replay(os, zfsvfs, &zfsvfs->z_assign, zfs_replay_vector,
486*789Sahrens 		    (void (*)(void *))zfs_delete_wait_empty);
487*789Sahrens 
488*789Sahrens 		if (!zil_disable)
489*789Sahrens 			zfsvfs->z_log = zil_open(os, zfs_get_data);
490*789Sahrens 
491*789Sahrens 		/*
492*789Sahrens 		 * The act of registering our callbacks will destroy any mount
493*789Sahrens 		 * options we may have.  In order to enable temporary overrides
494*789Sahrens 		 * of mount options, we stash away the current values and
495*789Sahrens 		 * restore them after we register the callbacks.
496*789Sahrens 		 */
497*789Sahrens 		if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
498*789Sahrens 			readonly = B_TRUE;
499*789Sahrens 			do_readonly = B_TRUE;
500*789Sahrens 		} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
501*789Sahrens 			readonly = B_FALSE;
502*789Sahrens 			do_readonly = B_TRUE;
503*789Sahrens 		}
504*789Sahrens 		if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
505*789Sahrens 			devices = B_FALSE;
506*789Sahrens 			setuid = B_FALSE;
507*789Sahrens 			do_devices = B_TRUE;
508*789Sahrens 			do_setuid = B_TRUE;
509*789Sahrens 		} else {
510*789Sahrens 			if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
511*789Sahrens 				devices = B_FALSE;
512*789Sahrens 				do_devices = B_TRUE;
513*789Sahrens 			} else if (vfs_optionisset(vfsp,
514*789Sahrens 			    MNTOPT_DEVICES, NULL)) {
515*789Sahrens 				devices = B_TRUE;
516*789Sahrens 				do_devices = B_TRUE;
517*789Sahrens 			}
518*789Sahrens 
519*789Sahrens 			if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
520*789Sahrens 				setuid = B_FALSE;
521*789Sahrens 				do_setuid = B_TRUE;
522*789Sahrens 			} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
523*789Sahrens 				setuid = B_TRUE;
524*789Sahrens 				do_setuid = B_TRUE;
525*789Sahrens 			}
526*789Sahrens 		}
527*789Sahrens 		if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
528*789Sahrens 			exec = B_FALSE;
529*789Sahrens 			do_exec = B_TRUE;
530*789Sahrens 		} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
531*789Sahrens 			exec = B_TRUE;
532*789Sahrens 			do_exec = B_TRUE;
533*789Sahrens 		}
534*789Sahrens 
535*789Sahrens 		/*
536*789Sahrens 		 * Register property callbacks.
537*789Sahrens 		 */
538*789Sahrens 		ds = dmu_objset_ds(os);
539*789Sahrens 		VERIFY(dsl_prop_register(ds, "atime", atime_changed_cb,
540*789Sahrens 		    zfsvfs) == 0);
541*789Sahrens 
542*789Sahrens 		VERIFY(dsl_prop_register(ds, "recordsize", blksz_changed_cb,
543*789Sahrens 		    zfsvfs) == 0);
544*789Sahrens 
545*789Sahrens 		VERIFY(dsl_prop_register(ds, "readonly", readonly_changed_cb,
546*789Sahrens 		    zfsvfs) == 0);
547*789Sahrens 
548*789Sahrens 		VERIFY(dsl_prop_register(ds, "devices", devices_changed_cb,
549*789Sahrens 		    zfsvfs) == 0);
550*789Sahrens 
551*789Sahrens 		VERIFY(dsl_prop_register(ds, "setuid", setuid_changed_cb,
552*789Sahrens 		    zfsvfs) == 0);
553*789Sahrens 
554*789Sahrens 		VERIFY(dsl_prop_register(ds, "exec", exec_changed_cb,
555*789Sahrens 		    zfsvfs) == 0);
556*789Sahrens 
557*789Sahrens 		VERIFY(dsl_prop_register(ds, "snapdir", snapdir_changed_cb,
558*789Sahrens 		    zfsvfs) == 0);
559*789Sahrens 
560*789Sahrens 		VERIFY(dsl_prop_register(ds, "aclmode", acl_mode_changed_cb,
561*789Sahrens 		    zfsvfs) == 0);
562*789Sahrens 
563*789Sahrens 		VERIFY(dsl_prop_register(ds, "aclinherit",
564*789Sahrens 		    acl_inherit_changed_cb, zfsvfs) == 0);
565*789Sahrens 
566*789Sahrens 
567*789Sahrens 		/*
568*789Sahrens 		 * Invoke our callbacks to restore temporary mount options.
569*789Sahrens 		 */
570*789Sahrens 		if (do_readonly)
571*789Sahrens 			readonly_changed_cb(zfsvfs, readonly);
572*789Sahrens 		if (do_setuid)
573*789Sahrens 			setuid_changed_cb(zfsvfs, setuid);
574*789Sahrens 		if (do_exec)
575*789Sahrens 			exec_changed_cb(zfsvfs, exec);
576*789Sahrens 		if (do_devices)
577*789Sahrens 			devices_changed_cb(zfsvfs, devices);
578*789Sahrens 	}
579*789Sahrens 
580*789Sahrens 	vp = ZTOV(zp);
581*789Sahrens 	if (!zfsvfs->z_issnap)
582*789Sahrens 		zfsctl_create(zfsvfs);
583*789Sahrens out:
584*789Sahrens 	if (error) {
585*789Sahrens 		if (zp)
586*789Sahrens 			VN_RELE(vp);
587*789Sahrens 
588*789Sahrens 		if (zfsvfs) {
589*789Sahrens 			if (os)
590*789Sahrens 				dmu_objset_close(os);
591*789Sahrens 			kmem_free(zfsvfs, sizeof (zfsvfs_t));
592*789Sahrens 		}
593*789Sahrens 	} else {
594*789Sahrens 		atomic_add_32(&zfs_active_fs_count, 1);
595*789Sahrens 		VN_RELE(vp);
596*789Sahrens 	}
597*789Sahrens 
598*789Sahrens 	pn_free(&spn);
599*789Sahrens 	return (error);
600*789Sahrens }
601*789Sahrens 
602*789Sahrens static int
603*789Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp)
604*789Sahrens {
605*789Sahrens 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
606*789Sahrens 	dmu_objset_stats_t dstats;
607*789Sahrens 	dev32_t d32;
608*789Sahrens 
609*789Sahrens 	ZFS_ENTER(zfsvfs);
610*789Sahrens 
611*789Sahrens 	dmu_objset_stats(zfsvfs->z_os, &dstats);
612*789Sahrens 
613*789Sahrens 	/*
614*789Sahrens 	 * The underlying storage pool actually uses multiple block sizes.
615*789Sahrens 	 * We report the fragsize as the smallest block size we support,
616*789Sahrens 	 * and we report our blocksize as the filesystem's maximum blocksize.
617*789Sahrens 	 */
618*789Sahrens 	statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT;
619*789Sahrens 	statp->f_bsize = zfsvfs->z_max_blksz;
620*789Sahrens 
621*789Sahrens 	/*
622*789Sahrens 	 * The following report "total" blocks of various kinds in the
623*789Sahrens 	 * file system, but reported in terms of f_frsize - the
624*789Sahrens 	 * "fragment" size.
625*789Sahrens 	 */
626*789Sahrens 
627*789Sahrens 	statp->f_blocks =
628*789Sahrens 	    (dstats.dds_space_refd + dstats.dds_available) >> SPA_MINBLOCKSHIFT;
629*789Sahrens 	statp->f_bfree = dstats.dds_available >> SPA_MINBLOCKSHIFT;
630*789Sahrens 	statp->f_bavail = statp->f_bfree; /* no root reservation */
631*789Sahrens 
632*789Sahrens 	/*
633*789Sahrens 	 * statvfs() should really be called statufs(), because it assumes
634*789Sahrens 	 * static metadata.  ZFS doesn't preallocate files, so the best
635*789Sahrens 	 * we can do is report the max that could possibly fit in f_files,
636*789Sahrens 	 * and that minus the number actually used in f_ffree.
637*789Sahrens 	 * For f_ffree, report the smaller of the number of object available
638*789Sahrens 	 * and the number of blocks (each object will take at least a block).
639*789Sahrens 	 */
640*789Sahrens 	statp->f_ffree = MIN(dstats.dds_objects_avail, statp->f_bfree);
641*789Sahrens 	statp->f_favail = statp->f_ffree;	/* no "root reservation" */
642*789Sahrens 	statp->f_files = statp->f_ffree + dstats.dds_objects_used;
643*789Sahrens 
644*789Sahrens 	(void) cmpldev(&d32, vfsp->vfs_dev);
645*789Sahrens 	statp->f_fsid = d32;
646*789Sahrens 
647*789Sahrens 	/*
648*789Sahrens 	 * We're a zfs filesystem.
649*789Sahrens 	 */
650*789Sahrens 	(void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
651*789Sahrens 
652*789Sahrens 	statp->f_flag = 0;
653*789Sahrens 
654*789Sahrens 	statp->f_namemax = ZFS_MAXNAMELEN;
655*789Sahrens 
656*789Sahrens 	/*
657*789Sahrens 	 * We have all of 32 characters to stuff a string here.
658*789Sahrens 	 * Is there anything useful we could/should provide?
659*789Sahrens 	 */
660*789Sahrens 	bzero(statp->f_fstr, sizeof (statp->f_fstr));
661*789Sahrens 
662*789Sahrens 	ZFS_EXIT(zfsvfs);
663*789Sahrens 	return (0);
664*789Sahrens }
665*789Sahrens 
666*789Sahrens static int
667*789Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp)
668*789Sahrens {
669*789Sahrens 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
670*789Sahrens 	znode_t *rootzp;
671*789Sahrens 	int error;
672*789Sahrens 
673*789Sahrens 	ZFS_ENTER(zfsvfs);
674*789Sahrens 
675*789Sahrens 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
676*789Sahrens 	if (error == 0)
677*789Sahrens 		*vpp = ZTOV(rootzp);
678*789Sahrens 
679*789Sahrens 	ZFS_EXIT(zfsvfs);
680*789Sahrens 	return (error);
681*789Sahrens }
682*789Sahrens 
683*789Sahrens /*ARGSUSED*/
684*789Sahrens static int
685*789Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
686*789Sahrens {
687*789Sahrens 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
688*789Sahrens 	int ret;
689*789Sahrens 
690*789Sahrens 	if ((ret = secpolicy_fs_unmount(cr, vfsp)) != 0)
691*789Sahrens 		return (ret);
692*789Sahrens 
693*789Sahrens 	/*
694*789Sahrens 	 * Unmount any snapshots mounted under .zfs before unmounting the
695*789Sahrens 	 * dataset itself.
696*789Sahrens 	 */
697*789Sahrens 	if (zfsvfs->z_ctldir != NULL &&
698*789Sahrens 	    (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
699*789Sahrens 		return (ret);
700*789Sahrens 
701*789Sahrens 	if (fflag & MS_FORCE) {
702*789Sahrens 		vfsp->vfs_flag |= VFS_UNMOUNTED;
703*789Sahrens 		zfsvfs->z_unmounted1 = B_TRUE;
704*789Sahrens 
705*789Sahrens 		/*
706*789Sahrens 		 * Wait for all zfs threads to leave zfs.
707*789Sahrens 		 * Grabbing a rwlock as reader in all vops and
708*789Sahrens 		 * as writer here doesn't work because it too easy to get
709*789Sahrens 		 * multiple reader enters as zfs can re-enter itself.
710*789Sahrens 		 * This can lead to deadlock if there is an intervening
711*789Sahrens 		 * rw_enter as writer.
712*789Sahrens 		 * So a file system threads ref count (z_op_cnt) is used.
713*789Sahrens 		 * A polling loop on z_op_cnt may seem inefficient, but
714*789Sahrens 		 * - this saves all threads on exit from having to grab a
715*789Sahrens 		 *   mutex in order to cv_signal
716*789Sahrens 		 * - only occurs on forced unmount in the rare case when
717*789Sahrens 		 *   there are outstanding threads within the file system.
718*789Sahrens 		 */
719*789Sahrens 		while (zfsvfs->z_op_cnt) {
720*789Sahrens 			delay(1);
721*789Sahrens 		}
722*789Sahrens 
723*789Sahrens 		zfs_objset_close(zfsvfs);
724*789Sahrens 
725*789Sahrens 		return (0);
726*789Sahrens 	}
727*789Sahrens 
728*789Sahrens 	zfs_zcache_flush(zfsvfs);
729*789Sahrens 
730*789Sahrens 	/*
731*789Sahrens 	 * Stop all delete threads.
732*789Sahrens 	 */
733*789Sahrens 	(void) zfs_delete_thread_target(zfsvfs, 0);
734*789Sahrens 
735*789Sahrens 	/*
736*789Sahrens 	 * Check the number of active vnodes in the file system.
737*789Sahrens 	 * Our count is maintained in the vfs structure, but the number
738*789Sahrens 	 * is off by 1 to indicate a hold on the vfs structure itself.
739*789Sahrens 	 *
740*789Sahrens 	 * The '.zfs' directory maintains a reference of its own, and any active
741*789Sahrens 	 * references underneath are reflected in the vnode count.
742*789Sahrens 	 */
743*789Sahrens 	if (zfsvfs->z_ctldir == NULL) {
744*789Sahrens 		if (vfsp->vfs_count > 1) {
745*789Sahrens 			if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0)
746*789Sahrens 				(void) zfs_delete_thread_target(zfsvfs, 1);
747*789Sahrens 			return (EBUSY);
748*789Sahrens 		}
749*789Sahrens 	} else {
750*789Sahrens 		if (vfsp->vfs_count > 2 ||
751*789Sahrens 		    (zfsvfs->z_ctldir->v_count > 1 && !(fflag & MS_FORCE))) {
752*789Sahrens 			if ((zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) == 0)
753*789Sahrens 				(void) zfs_delete_thread_target(zfsvfs, 1);
754*789Sahrens 			return (EBUSY);
755*789Sahrens 		}
756*789Sahrens 	}
757*789Sahrens 
758*789Sahrens 	vfsp->vfs_flag |= VFS_UNMOUNTED;
759*789Sahrens 	zfs_objset_close(zfsvfs);
760*789Sahrens 
761*789Sahrens 	/*
762*789Sahrens 	 * We can now safely destroy the '.zfs' directory node, which will
763*789Sahrens 	 * release its hold on the vfs_t.
764*789Sahrens 	 */
765*789Sahrens 	if (zfsvfs->z_ctldir != NULL)
766*789Sahrens 		zfsctl_destroy(zfsvfs);
767*789Sahrens 
768*789Sahrens 	return (0);
769*789Sahrens }
770*789Sahrens 
771*789Sahrens static int
772*789Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
773*789Sahrens {
774*789Sahrens 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
775*789Sahrens 	znode_t		*zp;
776*789Sahrens 	uint64_t	object = 0;
777*789Sahrens 	uint64_t	fid_gen = 0;
778*789Sahrens 	uint64_t	gen_mask;
779*789Sahrens 	uint64_t	zp_gen;
780*789Sahrens 	int 		i, err;
781*789Sahrens 
782*789Sahrens 	*vpp = NULL;
783*789Sahrens 
784*789Sahrens 	ZFS_ENTER(zfsvfs);
785*789Sahrens 
786*789Sahrens 	if (fidp->fid_len == LONG_FID_LEN) {
787*789Sahrens 		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
788*789Sahrens 		uint64_t	objsetid = 0;
789*789Sahrens 		uint64_t	setgen = 0;
790*789Sahrens 
791*789Sahrens 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
792*789Sahrens 			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
793*789Sahrens 
794*789Sahrens 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
795*789Sahrens 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
796*789Sahrens 
797*789Sahrens 		ZFS_EXIT(zfsvfs);
798*789Sahrens 
799*789Sahrens 		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
800*789Sahrens 		if (err)
801*789Sahrens 			return (EINVAL);
802*789Sahrens 		ZFS_ENTER(zfsvfs);
803*789Sahrens 	}
804*789Sahrens 
805*789Sahrens 	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
806*789Sahrens 		zfid_short_t	*zfid = (zfid_short_t *)fidp;
807*789Sahrens 
808*789Sahrens 		for (i = 0; i < sizeof (zfid->zf_object); i++)
809*789Sahrens 			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
810*789Sahrens 
811*789Sahrens 		for (i = 0; i < sizeof (zfid->zf_gen); i++)
812*789Sahrens 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
813*789Sahrens 	} else {
814*789Sahrens 		ZFS_EXIT(zfsvfs);
815*789Sahrens 		return (EINVAL);
816*789Sahrens 	}
817*789Sahrens 
818*789Sahrens 	/* A zero fid_gen means we are in the .zfs control directories */
819*789Sahrens 	if (fid_gen == 0 &&
820*789Sahrens 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
821*789Sahrens 		*vpp = zfsvfs->z_ctldir;
822*789Sahrens 		ASSERT(*vpp != NULL);
823*789Sahrens 		if (object == ZFSCTL_INO_SNAPDIR) {
824*789Sahrens 			VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL,
825*789Sahrens 			    0, NULL, NULL) == 0);
826*789Sahrens 		} else {
827*789Sahrens 			VN_HOLD(*vpp);
828*789Sahrens 		}
829*789Sahrens 		ZFS_EXIT(zfsvfs);
830*789Sahrens 		return (0);
831*789Sahrens 	}
832*789Sahrens 
833*789Sahrens 	gen_mask = -1ULL >> (64 - 8 * i);
834*789Sahrens 
835*789Sahrens 	dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
836*789Sahrens 	if (err = zfs_zget(zfsvfs, object, &zp)) {
837*789Sahrens 		ZFS_EXIT(zfsvfs);
838*789Sahrens 		return (err);
839*789Sahrens 	}
840*789Sahrens 	zp_gen = zp->z_phys->zp_gen & gen_mask;
841*789Sahrens 	if (zp_gen == 0)
842*789Sahrens 		zp_gen = 1;
843*789Sahrens 	if (zp->z_reap || zp_gen != fid_gen) {
844*789Sahrens 		dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
845*789Sahrens 		VN_RELE(ZTOV(zp));
846*789Sahrens 		ZFS_EXIT(zfsvfs);
847*789Sahrens 		return (EINVAL);
848*789Sahrens 	}
849*789Sahrens 
850*789Sahrens 	*vpp = ZTOV(zp);
851*789Sahrens 	ZFS_EXIT(zfsvfs);
852*789Sahrens 	return (0);
853*789Sahrens }
854*789Sahrens 
855*789Sahrens static void
856*789Sahrens zfs_objset_close(zfsvfs_t *zfsvfs)
857*789Sahrens {
858*789Sahrens 	zfs_delete_t	*zd = &zfsvfs->z_delete_head;
859*789Sahrens 	znode_t		*zp, *nextzp;
860*789Sahrens 	objset_t	*os = zfsvfs->z_os;
861*789Sahrens 	struct dsl_dataset *ds;
862*789Sahrens 
863*789Sahrens 	/*
864*789Sahrens 	 * Stop all delete threads.
865*789Sahrens 	 */
866*789Sahrens 	(void) zfs_delete_thread_target(zfsvfs, 0);
867*789Sahrens 
868*789Sahrens 	/*
869*789Sahrens 	 * For forced unmount, at this point all vops except zfs_inactive
870*789Sahrens 	 * are erroring EIO. We need to now suspend zfs_inactive threads
871*789Sahrens 	 * while we are freeing dbufs before switching zfs_inactive
872*789Sahrens 	 * to use behaviour without a objset.
873*789Sahrens 	 */
874*789Sahrens 	rw_enter(&zfsvfs->z_um_lock, RW_WRITER);
875*789Sahrens 
876*789Sahrens 	zfs_zcache_flush(zfsvfs);
877*789Sahrens 
878*789Sahrens 	/*
879*789Sahrens 	 * Release all delete in progress znodes
880*789Sahrens 	 * They will be processed when the file system remounts.
881*789Sahrens 	 */
882*789Sahrens 	mutex_enter(&zd->z_mutex);
883*789Sahrens 	while (zp = list_head(&zd->z_znodes)) {
884*789Sahrens 		list_remove(&zd->z_znodes, zp);
885*789Sahrens 		zp->z_dbuf_held = 0;
886*789Sahrens 		dmu_buf_rele(zp->z_dbuf);
887*789Sahrens 	}
888*789Sahrens 	mutex_exit(&zd->z_mutex);
889*789Sahrens 
890*789Sahrens 	/*
891*789Sahrens 	 * Release all holds on dbufs
892*789Sahrens 	 * Note, although we have stopped all other vop threads and
893*789Sahrens 	 * zfs_inactive(), the dmu can callback via znode_pageout_func()
894*789Sahrens 	 * which can zfs_znode_free() the znode.
895*789Sahrens 	 * So we lock z_all_znodes; search the list for a held
896*789Sahrens 	 * dbuf; drop the lock (we know zp can't disappear if we hold
897*789Sahrens 	 * a dbuf lock; then regrab the lock and restart.
898*789Sahrens 	 */
899*789Sahrens 	mutex_enter(&zfsvfs->z_znodes_lock);
900*789Sahrens 	for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) {
901*789Sahrens 		nextzp = list_next(&zfsvfs->z_all_znodes, zp);
902*789Sahrens 		if (zp->z_dbuf_held) {
903*789Sahrens 			/* dbufs should only be held when force unmounting */
904*789Sahrens 			zp->z_dbuf_held = 0;
905*789Sahrens 			mutex_exit(&zfsvfs->z_znodes_lock);
906*789Sahrens 			dmu_buf_rele(zp->z_dbuf);
907*789Sahrens 			/* Start again */
908*789Sahrens 			mutex_enter(&zfsvfs->z_znodes_lock);
909*789Sahrens 			nextzp = list_head(&zfsvfs->z_all_znodes);
910*789Sahrens 		}
911*789Sahrens 	}
912*789Sahrens 	mutex_exit(&zfsvfs->z_znodes_lock);
913*789Sahrens 
914*789Sahrens 	/*
915*789Sahrens 	 * Unregister properties.
916*789Sahrens 	 */
917*789Sahrens 	if (!dmu_objset_is_snapshot(os)) {
918*789Sahrens 		ds = dmu_objset_ds(os);
919*789Sahrens 
920*789Sahrens 		VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
921*789Sahrens 		    zfsvfs) == 0);
922*789Sahrens 
923*789Sahrens 		VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
924*789Sahrens 		    zfsvfs) == 0);
925*789Sahrens 
926*789Sahrens 		VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
927*789Sahrens 		    zfsvfs) == 0);
928*789Sahrens 
929*789Sahrens 		VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb,
930*789Sahrens 		    zfsvfs) == 0);
931*789Sahrens 
932*789Sahrens 		VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
933*789Sahrens 		    zfsvfs) == 0);
934*789Sahrens 
935*789Sahrens 		VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
936*789Sahrens 		    zfsvfs) == 0);
937*789Sahrens 
938*789Sahrens 		VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
939*789Sahrens 		    zfsvfs) == 0);
940*789Sahrens 
941*789Sahrens 		VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
942*789Sahrens 		    zfsvfs) == 0);
943*789Sahrens 
944*789Sahrens 		VERIFY(dsl_prop_unregister(ds, "aclinherit",
945*789Sahrens 		    acl_inherit_changed_cb, zfsvfs) == 0);
946*789Sahrens 	}
947*789Sahrens 
948*789Sahrens 	/*
949*789Sahrens 	 * Make the dmu drop all it dbuf holds so that zfs_inactive
950*789Sahrens 	 * can then safely free znode/vnodes.
951*789Sahrens 	 */
952*789Sahrens 	txg_wait_synced(dmu_objset_pool(os), 0);
953*789Sahrens 
954*789Sahrens 	/*
955*789Sahrens 	 * Switch zfs_inactive to behaviour without an objset.
956*789Sahrens 	 * It just tosses cached pages and frees the znode & vnode.
957*789Sahrens 	 * Then re-enable zfs_inactive threads in that new behaviour.
958*789Sahrens 	 */
959*789Sahrens 	zfsvfs->z_unmounted2 = B_TRUE;
960*789Sahrens 	rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */
961*789Sahrens 
962*789Sahrens 	/*
963*789Sahrens 	 * Close the zil. Can't close the zil while zfs_inactive
964*789Sahrens 	 * threads are blocked as zil_close can call zfs_inactive.
965*789Sahrens 	 */
966*789Sahrens 	if (zfsvfs->z_log) {
967*789Sahrens 		zil_close(zfsvfs->z_log);
968*789Sahrens 		zfsvfs->z_log = NULL;
969*789Sahrens 	}
970*789Sahrens 
971*789Sahrens 	/*
972*789Sahrens 	 * Finally close the objset
973*789Sahrens 	 */
974*789Sahrens 	dmu_objset_close(os);
975*789Sahrens 
976*789Sahrens }
977*789Sahrens 
978*789Sahrens static void
979*789Sahrens zfs_freevfs(vfs_t *vfsp)
980*789Sahrens {
981*789Sahrens 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
982*789Sahrens 
983*789Sahrens 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
984*789Sahrens 
985*789Sahrens 	atomic_add_32(&zfs_active_fs_count, -1);
986*789Sahrens }
987*789Sahrens 
988*789Sahrens /*
989*789Sahrens  * VFS_INIT() initialization.  Note that there is no VFS_FINI(),
990*789Sahrens  * so we can't safely do any non-idempotent initialization here.
991*789Sahrens  * Leave that to zfs_init() and zfs_fini(), which are called
992*789Sahrens  * from the module's _init() and _fini() entry points.
993*789Sahrens  */
994*789Sahrens /*ARGSUSED*/
995*789Sahrens static int
996*789Sahrens zfs_vfsinit(int fstype, char *name)
997*789Sahrens {
998*789Sahrens 	int error;
999*789Sahrens 
1000*789Sahrens 	zfsfstype = fstype;
1001*789Sahrens 
1002*789Sahrens 	/*
1003*789Sahrens 	 * Setup vfsops and vnodeops tables.
1004*789Sahrens 	 */
1005*789Sahrens 	error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops);
1006*789Sahrens 	if (error != 0) {
1007*789Sahrens 		cmn_err(CE_WARN, "zfs: bad vfs ops template");
1008*789Sahrens 	}
1009*789Sahrens 
1010*789Sahrens 	error = zfs_create_op_tables();
1011*789Sahrens 	if (error) {
1012*789Sahrens 		zfs_remove_op_tables();
1013*789Sahrens 		cmn_err(CE_WARN, "zfs: bad vnode ops template");
1014*789Sahrens 		(void) vfs_freevfsops_by_type(zfsfstype);
1015*789Sahrens 		return (error);
1016*789Sahrens 	}
1017*789Sahrens 
1018*789Sahrens 	mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
1019*789Sahrens 
1020*789Sahrens 	/*
1021*789Sahrens 	 * unique major number for all zfs mounts
1022*789Sahrens 	 */
1023*789Sahrens 	if ((zfs_major = getudev()) == (major_t)-1) {
1024*789Sahrens 		cmn_err(CE_WARN,
1025*789Sahrens 		    "zfs_vfsinit: Can't get unique device number.");
1026*789Sahrens 		zfs_remove_op_tables();
1027*789Sahrens 		(void) vfs_freevfsops_by_type(zfsfstype);
1028*789Sahrens 		return (error);
1029*789Sahrens 	}
1030*789Sahrens 	zfs_minor = 0;
1031*789Sahrens 
1032*789Sahrens 	return (0);
1033*789Sahrens }
1034*789Sahrens 
1035*789Sahrens void
1036*789Sahrens zfs_init(void)
1037*789Sahrens {
1038*789Sahrens 	/*
1039*789Sahrens 	 * Initialize .zfs directory structures
1040*789Sahrens 	 */
1041*789Sahrens 	zfsctl_init();
1042*789Sahrens 
1043*789Sahrens 	/*
1044*789Sahrens 	 * Initialize znode cache, vnode ops, etc...
1045*789Sahrens 	 */
1046*789Sahrens 	zfs_znode_init();
1047*789Sahrens }
1048*789Sahrens 
1049*789Sahrens void
1050*789Sahrens zfs_fini(void)
1051*789Sahrens {
1052*789Sahrens 	zfsctl_fini();
1053*789Sahrens 	zfs_znode_fini();
1054*789Sahrens }
1055*789Sahrens 
1056*789Sahrens int
1057*789Sahrens zfs_busy(void)
1058*789Sahrens {
1059*789Sahrens 	return (zfs_active_fs_count != 0);
1060*789Sahrens }
1061*789Sahrens 
1062*789Sahrens static vfsdef_t vfw = {
1063*789Sahrens 	VFSDEF_VERSION,
1064*789Sahrens 	MNTTYPE_ZFS,
1065*789Sahrens 	zfs_vfsinit,
1066*789Sahrens 	VSW_HASPROTO | VSW_CANRWRO | VSW_CANREMOUNT | VSW_VOLATILEDEV,
1067*789Sahrens 	&zfs_mntopts
1068*789Sahrens };
1069*789Sahrens 
1070*789Sahrens struct modlfs zfs_modlfs = {
1071*789Sahrens 	&mod_fsops, "ZFS filesystem version 1", &vfw
1072*789Sahrens };
1073