xref: /freebsd-src/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c (revision 2eb4d8dc723da3cf7d735a3226ae49da4c8c5dbc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
24  * All rights reserved.
25  * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
26  * Copyright (c) 2014 Integros [integros.com]
27  * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
28  */
29 
30 /* Portions Copyright 2010 Robert Milkowski */
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/sysmacros.h>
37 #include <sys/kmem.h>
38 #include <sys/acl.h>
39 #include <sys/vnode.h>
40 #include <sys/vfs.h>
41 #include <sys/mntent.h>
42 #include <sys/mount.h>
43 #include <sys/cmn_err.h>
44 #include <sys/zfs_znode.h>
45 #include <sys/zfs_vnops.h>
46 #include <sys/zfs_dir.h>
47 #include <sys/zil.h>
48 #include <sys/fs/zfs.h>
49 #include <sys/dmu.h>
50 #include <sys/dsl_prop.h>
51 #include <sys/dsl_dataset.h>
52 #include <sys/dsl_deleg.h>
53 #include <sys/spa.h>
54 #include <sys/zap.h>
55 #include <sys/sa.h>
56 #include <sys/sa_impl.h>
57 #include <sys/policy.h>
58 #include <sys/atomic.h>
59 #include <sys/zfs_ioctl.h>
60 #include <sys/zfs_ctldir.h>
61 #include <sys/zfs_fuid.h>
62 #include <sys/sunddi.h>
63 #include <sys/dmu_objset.h>
64 #include <sys/dsl_dir.h>
65 #include <sys/spa_boot.h>
66 #include <sys/jail.h>
67 #include <ufs/ufs/quota.h>
68 #include <sys/zfs_quota.h>
69 
70 #include "zfs_comutil.h"
71 
72 #ifndef	MNTK_VMSETSIZE_BUG
73 #define	MNTK_VMSETSIZE_BUG	0
74 #endif
75 #ifndef	MNTK_NOMSYNC
76 #define	MNTK_NOMSYNC	8
77 #endif
78 
79 /* BEGIN CSTYLED */
80 struct mtx zfs_debug_mtx;
81 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
82 
83 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
84 
85 int zfs_super_owner;
86 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
87     "File system owner can perform privileged operation on his file systems");
88 
89 int zfs_debug_level;
90 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
91 	"Debug level");
92 
93 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
94 static int zfs_version_acl = ZFS_ACL_VERSION;
95 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
96     "ZFS_ACL_VERSION");
97 static int zfs_version_spa = SPA_VERSION;
98 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
99     "SPA_VERSION");
100 static int zfs_version_zpl = ZPL_VERSION;
101 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
102     "ZPL_VERSION");
103 /* END CSTYLED */
104 
105 #if __FreeBSD_version >= 1400018
106 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg,
107     bool *mp_busy);
108 #else
109 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg);
110 #endif
111 static int zfs_mount(vfs_t *vfsp);
112 static int zfs_umount(vfs_t *vfsp, int fflag);
113 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
114 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
115 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
116 static int zfs_sync(vfs_t *vfsp, int waitfor);
117 #if __FreeBSD_version >= 1300098
118 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
119     struct ucred **credanonp, int *numsecflavors, int *secflavors);
120 #else
121 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
122     struct ucred **credanonp, int *numsecflavors, int **secflavors);
123 #endif
124 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp);
125 static void zfs_freevfs(vfs_t *vfsp);
126 
127 struct vfsops zfs_vfsops = {
128 	.vfs_mount =		zfs_mount,
129 	.vfs_unmount =		zfs_umount,
130 #if __FreeBSD_version >= 1300049
131 	.vfs_root =		vfs_cache_root,
132 	.vfs_cachedroot = zfs_root,
133 #else
134 	.vfs_root =		zfs_root,
135 #endif
136 	.vfs_statfs =		zfs_statfs,
137 	.vfs_vget =		zfs_vget,
138 	.vfs_sync =		zfs_sync,
139 	.vfs_checkexp =		zfs_checkexp,
140 	.vfs_fhtovp =		zfs_fhtovp,
141 	.vfs_quotactl =		zfs_quotactl,
142 };
143 
144 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
145 
146 /*
147  * We need to keep a count of active fs's.
148  * This is necessary to prevent our module
149  * from being unloaded after a umount -f
150  */
151 static uint32_t	zfs_active_fs_count = 0;
152 
153 int
154 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
155     char *setpoint)
156 {
157 	int error;
158 	zfsvfs_t *zfvp;
159 	vfs_t *vfsp;
160 	objset_t *os;
161 	uint64_t tmp = *val;
162 
163 	error = dmu_objset_from_ds(ds, &os);
164 	if (error != 0)
165 		return (error);
166 
167 	error = getzfsvfs_impl(os, &zfvp);
168 	if (error != 0)
169 		return (error);
170 	if (zfvp == NULL)
171 		return (ENOENT);
172 	vfsp = zfvp->z_vfs;
173 	switch (zfs_prop) {
174 	case ZFS_PROP_ATIME:
175 		if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL))
176 			tmp = 0;
177 		if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL))
178 			tmp = 1;
179 		break;
180 	case ZFS_PROP_DEVICES:
181 		if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
182 			tmp = 0;
183 		if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL))
184 			tmp = 1;
185 		break;
186 	case ZFS_PROP_EXEC:
187 		if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
188 			tmp = 0;
189 		if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL))
190 			tmp = 1;
191 		break;
192 	case ZFS_PROP_SETUID:
193 		if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
194 			tmp = 0;
195 		if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL))
196 			tmp = 1;
197 		break;
198 	case ZFS_PROP_READONLY:
199 		if (vfs_optionisset(vfsp, MNTOPT_RW, NULL))
200 			tmp = 0;
201 		if (vfs_optionisset(vfsp, MNTOPT_RO, NULL))
202 			tmp = 1;
203 		break;
204 	case ZFS_PROP_XATTR:
205 		if (zfvp->z_flags & ZSB_XATTR)
206 			tmp = zfvp->z_xattr;
207 		break;
208 	case ZFS_PROP_NBMAND:
209 		if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL))
210 			tmp = 0;
211 		if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL))
212 			tmp = 1;
213 		break;
214 	default:
215 		vfs_unbusy(vfsp);
216 		return (ENOENT);
217 	}
218 
219 	vfs_unbusy(vfsp);
220 	if (tmp != *val) {
221 		(void) strcpy(setpoint, "temporary");
222 		*val = tmp;
223 	}
224 	return (0);
225 }
226 
227 static int
228 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp)
229 {
230 	int error = 0;
231 	char buf[32];
232 	uint64_t usedobj, quotaobj;
233 	uint64_t quota, used = 0;
234 	timespec_t now;
235 
236 	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
237 	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
238 
239 	if (quotaobj == 0 || zfsvfs->z_replay) {
240 		error = ENOENT;
241 		goto done;
242 	}
243 	(void) sprintf(buf, "%llx", (longlong_t)id);
244 	if ((error = zap_lookup(zfsvfs->z_os, quotaobj,
245 	    buf, sizeof (quota), 1, &quota)) != 0) {
246 		dprintf("%s(%d): quotaobj lookup failed\n",
247 		    __FUNCTION__, __LINE__);
248 		goto done;
249 	}
250 	/*
251 	 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit".
252 	 * So we set them to be the same.
253 	 */
254 	dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota);
255 	error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used);
256 	if (error && error != ENOENT) {
257 		dprintf("%s(%d):  usedobj failed; %d\n",
258 		    __FUNCTION__, __LINE__, error);
259 		goto done;
260 	}
261 	dqp->dqb_curblocks = btodb(used);
262 	dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0;
263 	vfs_timestamp(&now);
264 	/*
265 	 * Setting this to 0 causes FreeBSD quota(8) to print
266 	 * the number of days since the epoch, which isn't
267 	 * particularly useful.
268 	 */
269 	dqp->dqb_btime = dqp->dqb_itime = now.tv_sec;
270 done:
271 	return (error);
272 }
273 
274 static int
275 #if __FreeBSD_version >= 1400018
276 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy)
277 #else
278 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg)
279 #endif
280 {
281 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
282 	struct thread *td;
283 	int cmd, type, error = 0;
284 	int bitsize;
285 	zfs_userquota_prop_t quota_type;
286 	struct dqblk64 dqblk = { 0 };
287 
288 	td = curthread;
289 	cmd = cmds >> SUBCMDSHIFT;
290 	type = cmds & SUBCMDMASK;
291 
292 	ZFS_ENTER(zfsvfs);
293 	if (id == -1) {
294 		switch (type) {
295 		case USRQUOTA:
296 			id = td->td_ucred->cr_ruid;
297 			break;
298 		case GRPQUOTA:
299 			id = td->td_ucred->cr_rgid;
300 			break;
301 		default:
302 			error = EINVAL;
303 #if __FreeBSD_version < 1400018
304 			if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF)
305 				vfs_unbusy(vfsp);
306 #endif
307 			goto done;
308 		}
309 	}
310 	/*
311 	 * Map BSD type to:
312 	 * ZFS_PROP_USERUSED,
313 	 * ZFS_PROP_USERQUOTA,
314 	 * ZFS_PROP_GROUPUSED,
315 	 * ZFS_PROP_GROUPQUOTA
316 	 */
317 	switch (cmd) {
318 	case Q_SETQUOTA:
319 	case Q_SETQUOTA32:
320 		if (type == USRQUOTA)
321 			quota_type = ZFS_PROP_USERQUOTA;
322 		else if (type == GRPQUOTA)
323 			quota_type = ZFS_PROP_GROUPQUOTA;
324 		else
325 			error = EINVAL;
326 		break;
327 	case Q_GETQUOTA:
328 	case Q_GETQUOTA32:
329 		if (type == USRQUOTA)
330 			quota_type = ZFS_PROP_USERUSED;
331 		else if (type == GRPQUOTA)
332 			quota_type = ZFS_PROP_GROUPUSED;
333 		else
334 			error = EINVAL;
335 		break;
336 	}
337 
338 	/*
339 	 * Depending on the cmd, we may need to get
340 	 * the ruid and domain (see fuidstr_to_sid?),
341 	 * the fuid (how?), or other information.
342 	 * Create fuid using zfs_fuid_create(zfsvfs, id,
343 	 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)?
344 	 * I think I can use just the id?
345 	 *
346 	 * Look at zfs_id_overquota() to look up a quota.
347 	 * zap_lookup(something, quotaobj, fuidstring,
348 	 *     sizeof (long long), 1, &quota)
349 	 *
350 	 * See zfs_set_userquota() to set a quota.
351 	 */
352 	if ((uint32_t)type >= MAXQUOTAS) {
353 		error = EINVAL;
354 		goto done;
355 	}
356 
357 	switch (cmd) {
358 	case Q_GETQUOTASIZE:
359 		bitsize = 64;
360 		error = copyout(&bitsize, arg, sizeof (int));
361 		break;
362 	case Q_QUOTAON:
363 		// As far as I can tell, you can't turn quotas on or off on zfs
364 		error = 0;
365 #if __FreeBSD_version < 1400018
366 		vfs_unbusy(vfsp);
367 #endif
368 		break;
369 	case Q_QUOTAOFF:
370 		error = ENOTSUP;
371 #if __FreeBSD_version < 1400018
372 		vfs_unbusy(vfsp);
373 #endif
374 		break;
375 	case Q_SETQUOTA:
376 		error = copyin(arg, &dqblk, sizeof (dqblk));
377 		if (error == 0)
378 			error = zfs_set_userquota(zfsvfs, quota_type,
379 			    "", id, dbtob(dqblk.dqb_bhardlimit));
380 		break;
381 	case Q_GETQUOTA:
382 		error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk);
383 		if (error == 0)
384 			error = copyout(&dqblk, arg, sizeof (dqblk));
385 		break;
386 	default:
387 		error = EINVAL;
388 		break;
389 	}
390 done:
391 	ZFS_EXIT(zfsvfs);
392 	return (error);
393 }
394 
395 
396 boolean_t
397 zfs_is_readonly(zfsvfs_t *zfsvfs)
398 {
399 	return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY));
400 }
401 
402 /*ARGSUSED*/
403 static int
404 zfs_sync(vfs_t *vfsp, int waitfor)
405 {
406 
407 	/*
408 	 * Data integrity is job one.  We don't want a compromised kernel
409 	 * writing to the storage pool, so we never sync during panic.
410 	 */
411 	if (panicstr)
412 		return (0);
413 
414 	/*
415 	 * Ignore the system syncher.  ZFS already commits async data
416 	 * at zfs_txg_timeout intervals.
417 	 */
418 	if (waitfor == MNT_LAZY)
419 		return (0);
420 
421 	if (vfsp != NULL) {
422 		/*
423 		 * Sync a specific filesystem.
424 		 */
425 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
426 		dsl_pool_t *dp;
427 		int error;
428 
429 		error = vfs_stdsync(vfsp, waitfor);
430 		if (error != 0)
431 			return (error);
432 
433 		ZFS_ENTER(zfsvfs);
434 		dp = dmu_objset_pool(zfsvfs->z_os);
435 
436 		/*
437 		 * If the system is shutting down, then skip any
438 		 * filesystems which may exist on a suspended pool.
439 		 */
440 		if (rebooting && spa_suspended(dp->dp_spa)) {
441 			ZFS_EXIT(zfsvfs);
442 			return (0);
443 		}
444 
445 		if (zfsvfs->z_log != NULL)
446 			zil_commit(zfsvfs->z_log, 0);
447 
448 		ZFS_EXIT(zfsvfs);
449 	} else {
450 		/*
451 		 * Sync all ZFS filesystems.  This is what happens when you
452 		 * run sync(8).  Unlike other filesystems, ZFS honors the
453 		 * request by waiting for all pools to commit all dirty data.
454 		 */
455 		spa_sync_allpools();
456 	}
457 
458 	return (0);
459 }
460 
461 static void
462 atime_changed_cb(void *arg, uint64_t newval)
463 {
464 	zfsvfs_t *zfsvfs = arg;
465 
466 	if (newval == TRUE) {
467 		zfsvfs->z_atime = TRUE;
468 		zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
469 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
470 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
471 	} else {
472 		zfsvfs->z_atime = FALSE;
473 		zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
474 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
475 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
476 	}
477 }
478 
479 static void
480 xattr_changed_cb(void *arg, uint64_t newval)
481 {
482 	zfsvfs_t *zfsvfs = arg;
483 
484 	if (newval == ZFS_XATTR_OFF) {
485 		zfsvfs->z_flags &= ~ZSB_XATTR;
486 	} else {
487 		zfsvfs->z_flags |= ZSB_XATTR;
488 
489 		if (newval == ZFS_XATTR_SA)
490 			zfsvfs->z_xattr_sa = B_TRUE;
491 		else
492 			zfsvfs->z_xattr_sa = B_FALSE;
493 	}
494 }
495 
496 static void
497 blksz_changed_cb(void *arg, uint64_t newval)
498 {
499 	zfsvfs_t *zfsvfs = arg;
500 	ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
501 	ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
502 	ASSERT(ISP2(newval));
503 
504 	zfsvfs->z_max_blksz = newval;
505 	zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
506 }
507 
508 static void
509 readonly_changed_cb(void *arg, uint64_t newval)
510 {
511 	zfsvfs_t *zfsvfs = arg;
512 
513 	if (newval) {
514 		/* XXX locking on vfs_flag? */
515 		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
516 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
517 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
518 	} else {
519 		/* XXX locking on vfs_flag? */
520 		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
521 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
522 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
523 	}
524 }
525 
526 static void
527 setuid_changed_cb(void *arg, uint64_t newval)
528 {
529 	zfsvfs_t *zfsvfs = arg;
530 
531 	if (newval == FALSE) {
532 		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
533 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
534 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
535 	} else {
536 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
537 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
538 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
539 	}
540 }
541 
542 static void
543 exec_changed_cb(void *arg, uint64_t newval)
544 {
545 	zfsvfs_t *zfsvfs = arg;
546 
547 	if (newval == FALSE) {
548 		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
549 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
550 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
551 	} else {
552 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
553 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
554 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
555 	}
556 }
557 
558 /*
559  * The nbmand mount option can be changed at mount time.
560  * We can't allow it to be toggled on live file systems or incorrect
561  * behavior may be seen from cifs clients
562  *
563  * This property isn't registered via dsl_prop_register(), but this callback
564  * will be called when a file system is first mounted
565  */
566 static void
567 nbmand_changed_cb(void *arg, uint64_t newval)
568 {
569 	zfsvfs_t *zfsvfs = arg;
570 	if (newval == FALSE) {
571 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
572 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
573 	} else {
574 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
575 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
576 	}
577 }
578 
579 static void
580 snapdir_changed_cb(void *arg, uint64_t newval)
581 {
582 	zfsvfs_t *zfsvfs = arg;
583 
584 	zfsvfs->z_show_ctldir = newval;
585 }
586 
587 static void
588 acl_mode_changed_cb(void *arg, uint64_t newval)
589 {
590 	zfsvfs_t *zfsvfs = arg;
591 
592 	zfsvfs->z_acl_mode = newval;
593 }
594 
595 static void
596 acl_inherit_changed_cb(void *arg, uint64_t newval)
597 {
598 	zfsvfs_t *zfsvfs = arg;
599 
600 	zfsvfs->z_acl_inherit = newval;
601 }
602 
603 static void
604 acl_type_changed_cb(void *arg, uint64_t newval)
605 {
606 	zfsvfs_t *zfsvfs = arg;
607 
608 	zfsvfs->z_acl_type = newval;
609 }
610 
611 static int
612 zfs_register_callbacks(vfs_t *vfsp)
613 {
614 	struct dsl_dataset *ds = NULL;
615 	objset_t *os = NULL;
616 	zfsvfs_t *zfsvfs = NULL;
617 	uint64_t nbmand;
618 	boolean_t readonly = B_FALSE;
619 	boolean_t do_readonly = B_FALSE;
620 	boolean_t setuid = B_FALSE;
621 	boolean_t do_setuid = B_FALSE;
622 	boolean_t exec = B_FALSE;
623 	boolean_t do_exec = B_FALSE;
624 	boolean_t xattr = B_FALSE;
625 	boolean_t atime = B_FALSE;
626 	boolean_t do_atime = B_FALSE;
627 	boolean_t do_xattr = B_FALSE;
628 	int error = 0;
629 
630 	ASSERT3P(vfsp, !=, NULL);
631 	zfsvfs = vfsp->vfs_data;
632 	ASSERT3P(zfsvfs, !=, NULL);
633 	os = zfsvfs->z_os;
634 
635 	/*
636 	 * This function can be called for a snapshot when we update snapshot's
637 	 * mount point, which isn't really supported.
638 	 */
639 	if (dmu_objset_is_snapshot(os))
640 		return (EOPNOTSUPP);
641 
642 	/*
643 	 * The act of registering our callbacks will destroy any mount
644 	 * options we may have.  In order to enable temporary overrides
645 	 * of mount options, we stash away the current values and
646 	 * restore them after we register the callbacks.
647 	 */
648 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
649 	    !spa_writeable(dmu_objset_spa(os))) {
650 		readonly = B_TRUE;
651 		do_readonly = B_TRUE;
652 	} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
653 		readonly = B_FALSE;
654 		do_readonly = B_TRUE;
655 	}
656 	if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
657 		setuid = B_FALSE;
658 		do_setuid = B_TRUE;
659 	} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
660 		setuid = B_TRUE;
661 		do_setuid = B_TRUE;
662 	}
663 	if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
664 		exec = B_FALSE;
665 		do_exec = B_TRUE;
666 	} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
667 		exec = B_TRUE;
668 		do_exec = B_TRUE;
669 	}
670 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
671 		zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF;
672 		do_xattr = B_TRUE;
673 	} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
674 		zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
675 		do_xattr = B_TRUE;
676 	} else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) {
677 		zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR;
678 		do_xattr = B_TRUE;
679 	} else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) {
680 		zfsvfs->z_xattr = xattr = ZFS_XATTR_SA;
681 		do_xattr = B_TRUE;
682 	}
683 	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
684 		atime = B_FALSE;
685 		do_atime = B_TRUE;
686 	} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
687 		atime = B_TRUE;
688 		do_atime = B_TRUE;
689 	}
690 
691 	/*
692 	 * We need to enter pool configuration here, so that we can use
693 	 * dsl_prop_get_int_ds() to handle the special nbmand property below.
694 	 * dsl_prop_get_integer() can not be used, because it has to acquire
695 	 * spa_namespace_lock and we can not do that because we already hold
696 	 * z_teardown_lock.  The problem is that spa_write_cachefile() is called
697 	 * with spa_namespace_lock held and the function calls ZFS vnode
698 	 * operations to write the cache file and thus z_teardown_lock is
699 	 * acquired after spa_namespace_lock.
700 	 */
701 	ds = dmu_objset_ds(os);
702 	dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
703 
704 	/*
705 	 * nbmand is a special property.  It can only be changed at
706 	 * mount time.
707 	 *
708 	 * This is weird, but it is documented to only be changeable
709 	 * at mount time.
710 	 */
711 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
712 		nbmand = B_FALSE;
713 	} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
714 		nbmand = B_TRUE;
715 	} else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0)) {
716 		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
717 		return (error);
718 	}
719 
720 	/*
721 	 * Register property callbacks.
722 	 *
723 	 * It would probably be fine to just check for i/o error from
724 	 * the first prop_register(), but I guess I like to go
725 	 * overboard...
726 	 */
727 	error = dsl_prop_register(ds,
728 	    zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
729 	error = error ? error : dsl_prop_register(ds,
730 	    zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
731 	error = error ? error : dsl_prop_register(ds,
732 	    zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
733 	error = error ? error : dsl_prop_register(ds,
734 	    zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
735 	error = error ? error : dsl_prop_register(ds,
736 	    zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
737 	error = error ? error : dsl_prop_register(ds,
738 	    zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
739 	error = error ? error : dsl_prop_register(ds,
740 	    zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
741 	error = error ? error : dsl_prop_register(ds,
742 	    zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs);
743 	error = error ? error : dsl_prop_register(ds,
744 	    zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
745 	error = error ? error : dsl_prop_register(ds,
746 	    zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
747 	    zfsvfs);
748 	dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
749 	if (error)
750 		goto unregister;
751 
752 	/*
753 	 * Invoke our callbacks to restore temporary mount options.
754 	 */
755 	if (do_readonly)
756 		readonly_changed_cb(zfsvfs, readonly);
757 	if (do_setuid)
758 		setuid_changed_cb(zfsvfs, setuid);
759 	if (do_exec)
760 		exec_changed_cb(zfsvfs, exec);
761 	if (do_xattr)
762 		xattr_changed_cb(zfsvfs, xattr);
763 	if (do_atime)
764 		atime_changed_cb(zfsvfs, atime);
765 
766 	nbmand_changed_cb(zfsvfs, nbmand);
767 
768 	return (0);
769 
770 unregister:
771 	dsl_prop_unregister_all(ds, zfsvfs);
772 	return (error);
773 }
774 
775 /*
776  * Associate this zfsvfs with the given objset, which must be owned.
777  * This will cache a bunch of on-disk state from the objset in the
778  * zfsvfs.
779  */
780 static int
781 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
782 {
783 	int error;
784 	uint64_t val;
785 
786 	zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
787 	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
788 	zfsvfs->z_os = os;
789 
790 	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
791 	if (error != 0)
792 		return (error);
793 	if (zfsvfs->z_version >
794 	    zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
795 		(void) printf("Can't mount a version %lld file system "
796 		    "on a version %lld pool\n. Pool must be upgraded to mount "
797 		    "this file system.", (u_longlong_t)zfsvfs->z_version,
798 		    (u_longlong_t)spa_version(dmu_objset_spa(os)));
799 		return (SET_ERROR(ENOTSUP));
800 	}
801 	error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
802 	if (error != 0)
803 		return (error);
804 	zfsvfs->z_norm = (int)val;
805 
806 	error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
807 	if (error != 0)
808 		return (error);
809 	zfsvfs->z_utf8 = (val != 0);
810 
811 	error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
812 	if (error != 0)
813 		return (error);
814 	zfsvfs->z_case = (uint_t)val;
815 
816 	error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val);
817 	if (error != 0)
818 		return (error);
819 	zfsvfs->z_acl_type = (uint_t)val;
820 
821 	/*
822 	 * Fold case on file systems that are always or sometimes case
823 	 * insensitive.
824 	 */
825 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
826 	    zfsvfs->z_case == ZFS_CASE_MIXED)
827 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
828 
829 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
830 	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
831 
832 	uint64_t sa_obj = 0;
833 	if (zfsvfs->z_use_sa) {
834 		/* should either have both of these objects or none */
835 		error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
836 		    &sa_obj);
837 		if (error != 0)
838 			return (error);
839 
840 		error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val);
841 		if (error == 0 && val == ZFS_XATTR_SA)
842 			zfsvfs->z_xattr_sa = B_TRUE;
843 	}
844 
845 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
846 	    &zfsvfs->z_attr_table);
847 	if (error != 0)
848 		return (error);
849 
850 	if (zfsvfs->z_version >= ZPL_VERSION_SA)
851 		sa_register_update_callback(os, zfs_sa_upgrade);
852 
853 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
854 	    &zfsvfs->z_root);
855 	if (error != 0)
856 		return (error);
857 	ASSERT3U(zfsvfs->z_root, !=, 0);
858 
859 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
860 	    &zfsvfs->z_unlinkedobj);
861 	if (error != 0)
862 		return (error);
863 
864 	error = zap_lookup(os, MASTER_NODE_OBJ,
865 	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
866 	    8, 1, &zfsvfs->z_userquota_obj);
867 	if (error == ENOENT)
868 		zfsvfs->z_userquota_obj = 0;
869 	else if (error != 0)
870 		return (error);
871 
872 	error = zap_lookup(os, MASTER_NODE_OBJ,
873 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
874 	    8, 1, &zfsvfs->z_groupquota_obj);
875 	if (error == ENOENT)
876 		zfsvfs->z_groupquota_obj = 0;
877 	else if (error != 0)
878 		return (error);
879 
880 	error = zap_lookup(os, MASTER_NODE_OBJ,
881 	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA],
882 	    8, 1, &zfsvfs->z_projectquota_obj);
883 	if (error == ENOENT)
884 		zfsvfs->z_projectquota_obj = 0;
885 	else if (error != 0)
886 		return (error);
887 
888 	error = zap_lookup(os, MASTER_NODE_OBJ,
889 	    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
890 	    8, 1, &zfsvfs->z_userobjquota_obj);
891 	if (error == ENOENT)
892 		zfsvfs->z_userobjquota_obj = 0;
893 	else if (error != 0)
894 		return (error);
895 
896 	error = zap_lookup(os, MASTER_NODE_OBJ,
897 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
898 	    8, 1, &zfsvfs->z_groupobjquota_obj);
899 	if (error == ENOENT)
900 		zfsvfs->z_groupobjquota_obj = 0;
901 	else if (error != 0)
902 		return (error);
903 
904 	error = zap_lookup(os, MASTER_NODE_OBJ,
905 	    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA],
906 	    8, 1, &zfsvfs->z_projectobjquota_obj);
907 	if (error == ENOENT)
908 		zfsvfs->z_projectobjquota_obj = 0;
909 	else if (error != 0)
910 		return (error);
911 
912 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
913 	    &zfsvfs->z_fuid_obj);
914 	if (error == ENOENT)
915 		zfsvfs->z_fuid_obj = 0;
916 	else if (error != 0)
917 		return (error);
918 
919 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
920 	    &zfsvfs->z_shares_dir);
921 	if (error == ENOENT)
922 		zfsvfs->z_shares_dir = 0;
923 	else if (error != 0)
924 		return (error);
925 
926 	/*
927 	 * Only use the name cache if we are looking for a
928 	 * name on a file system that does not require normalization
929 	 * or case folding.  We can also look there if we happen to be
930 	 * on a non-normalizing, mixed sensitivity file system IF we
931 	 * are looking for the exact name (which is always the case on
932 	 * FreeBSD).
933 	 */
934 	zfsvfs->z_use_namecache = !zfsvfs->z_norm ||
935 	    ((zfsvfs->z_case == ZFS_CASE_MIXED) &&
936 	    !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER));
937 
938 	return (0);
939 }
940 
941 taskq_t *zfsvfs_taskq;
942 
943 static void
944 zfsvfs_task_unlinked_drain(void *context, int pending __unused)
945 {
946 
947 	zfs_unlinked_drain((zfsvfs_t *)context);
948 }
949 
950 int
951 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp)
952 {
953 	objset_t *os;
954 	zfsvfs_t *zfsvfs;
955 	int error;
956 	boolean_t ro = (readonly || (strchr(osname, '@') != NULL));
957 
958 	/*
959 	 * XXX: Fix struct statfs so this isn't necessary!
960 	 *
961 	 * The 'osname' is used as the filesystem's special node, which means
962 	 * it must fit in statfs.f_mntfromname, or else it can't be
963 	 * enumerated, so libzfs_mnttab_find() returns NULL, which causes
964 	 * 'zfs unmount' to think it's not mounted when it is.
965 	 */
966 	if (strlen(osname) >= MNAMELEN)
967 		return (SET_ERROR(ENAMETOOLONG));
968 
969 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
970 
971 	error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs,
972 	    &os);
973 	if (error != 0) {
974 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
975 		return (error);
976 	}
977 
978 	error = zfsvfs_create_impl(zfvp, zfsvfs, os);
979 
980 	return (error);
981 }
982 
983 
984 int
985 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os)
986 {
987 	int error;
988 
989 	zfsvfs->z_vfs = NULL;
990 	zfsvfs->z_parent = zfsvfs;
991 
992 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
993 	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
994 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
995 	    offsetof(znode_t, z_link_node));
996 	TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0,
997 	    zfsvfs_task_unlinked_drain, zfsvfs);
998 	ZFS_TEARDOWN_INIT(zfsvfs);
999 	ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs);
1000 	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
1001 	for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1002 		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
1003 
1004 	error = zfsvfs_init(zfsvfs, os);
1005 	if (error != 0) {
1006 		dmu_objset_disown(os, B_TRUE, zfsvfs);
1007 		*zfvp = NULL;
1008 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
1009 		return (error);
1010 	}
1011 
1012 	*zfvp = zfsvfs;
1013 	return (0);
1014 }
1015 
1016 static int
1017 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
1018 {
1019 	int error;
1020 
1021 	/*
1022 	 * Check for a bad on-disk format version now since we
1023 	 * lied about owning the dataset readonly before.
1024 	 */
1025 	if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) &&
1026 	    dmu_objset_incompatible_encryption_version(zfsvfs->z_os))
1027 		return (SET_ERROR(EROFS));
1028 
1029 	error = zfs_register_callbacks(zfsvfs->z_vfs);
1030 	if (error)
1031 		return (error);
1032 
1033 	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
1034 
1035 	/*
1036 	 * If we are not mounting (ie: online recv), then we don't
1037 	 * have to worry about replaying the log as we blocked all
1038 	 * operations out since we closed the ZIL.
1039 	 */
1040 	if (mounting) {
1041 		boolean_t readonly;
1042 
1043 		ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL);
1044 		dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os);
1045 
1046 		/*
1047 		 * During replay we remove the read only flag to
1048 		 * allow replays to succeed.
1049 		 */
1050 		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
1051 		if (readonly != 0) {
1052 			zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
1053 		} else {
1054 			dsl_dir_t *dd;
1055 			zap_stats_t zs;
1056 
1057 			if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj,
1058 			    &zs) == 0) {
1059 				dataset_kstats_update_nunlinks_kstat(
1060 				    &zfsvfs->z_kstat, zs.zs_num_entries);
1061 				dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
1062 				    "num_entries in unlinked set: %llu",
1063 				    (u_longlong_t)zs.zs_num_entries);
1064 			}
1065 
1066 			zfs_unlinked_drain(zfsvfs);
1067 			dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
1068 			dd->dd_activity_cancelled = B_FALSE;
1069 		}
1070 
1071 		/*
1072 		 * Parse and replay the intent log.
1073 		 *
1074 		 * Because of ziltest, this must be done after
1075 		 * zfs_unlinked_drain().  (Further note: ziltest
1076 		 * doesn't use readonly mounts, where
1077 		 * zfs_unlinked_drain() isn't called.)  This is because
1078 		 * ziltest causes spa_sync() to think it's committed,
1079 		 * but actually it is not, so the intent log contains
1080 		 * many txg's worth of changes.
1081 		 *
1082 		 * In particular, if object N is in the unlinked set in
1083 		 * the last txg to actually sync, then it could be
1084 		 * actually freed in a later txg and then reallocated
1085 		 * in a yet later txg.  This would write a "create
1086 		 * object N" record to the intent log.  Normally, this
1087 		 * would be fine because the spa_sync() would have
1088 		 * written out the fact that object N is free, before
1089 		 * we could write the "create object N" intent log
1090 		 * record.
1091 		 *
1092 		 * But when we are in ziltest mode, we advance the "open
1093 		 * txg" without actually spa_sync()-ing the changes to
1094 		 * disk.  So we would see that object N is still
1095 		 * allocated and in the unlinked set, and there is an
1096 		 * intent log record saying to allocate it.
1097 		 */
1098 		if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
1099 			if (zil_replay_disable) {
1100 				zil_destroy(zfsvfs->z_log, B_FALSE);
1101 			} else {
1102 				boolean_t use_nc = zfsvfs->z_use_namecache;
1103 				zfsvfs->z_use_namecache = B_FALSE;
1104 				zfsvfs->z_replay = B_TRUE;
1105 				zil_replay(zfsvfs->z_os, zfsvfs,
1106 				    zfs_replay_vector);
1107 				zfsvfs->z_replay = B_FALSE;
1108 				zfsvfs->z_use_namecache = use_nc;
1109 			}
1110 		}
1111 
1112 		/* restore readonly bit */
1113 		if (readonly != 0)
1114 			zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
1115 	}
1116 
1117 	/*
1118 	 * Set the objset user_ptr to track its zfsvfs.
1119 	 */
1120 	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1121 	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1122 	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1123 
1124 	return (0);
1125 }
1126 
1127 void
1128 zfsvfs_free(zfsvfs_t *zfsvfs)
1129 {
1130 	int i;
1131 
1132 	zfs_fuid_destroy(zfsvfs);
1133 
1134 	mutex_destroy(&zfsvfs->z_znodes_lock);
1135 	mutex_destroy(&zfsvfs->z_lock);
1136 	ASSERT3U(zfsvfs->z_nr_znodes, ==, 0);
1137 	list_destroy(&zfsvfs->z_all_znodes);
1138 	ZFS_TEARDOWN_DESTROY(zfsvfs);
1139 	ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs);
1140 	rw_destroy(&zfsvfs->z_fuid_lock);
1141 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1142 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
1143 	dataset_kstats_destroy(&zfsvfs->z_kstat);
1144 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
1145 }
1146 
1147 static void
1148 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
1149 {
1150 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
1151 	if (zfsvfs->z_vfs) {
1152 		if (zfsvfs->z_use_fuids) {
1153 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1154 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1155 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1156 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1157 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1158 			vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1159 		} else {
1160 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1161 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1162 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1163 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1164 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1165 			vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1166 		}
1167 	}
1168 	zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
1169 }
1170 
1171 static int
1172 zfs_domount(vfs_t *vfsp, char *osname)
1173 {
1174 	uint64_t recordsize, fsid_guid;
1175 	int error = 0;
1176 	zfsvfs_t *zfsvfs;
1177 
1178 	ASSERT3P(vfsp, !=, NULL);
1179 	ASSERT3P(osname, !=, NULL);
1180 
1181 	error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs);
1182 	if (error)
1183 		return (error);
1184 	zfsvfs->z_vfs = vfsp;
1185 
1186 	if ((error = dsl_prop_get_integer(osname,
1187 	    "recordsize", &recordsize, NULL)))
1188 		goto out;
1189 	zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE;
1190 	zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize;
1191 
1192 	vfsp->vfs_data = zfsvfs;
1193 	vfsp->mnt_flag |= MNT_LOCAL;
1194 	vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
1195 	vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
1196 	vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
1197 	/*
1198 	 * This can cause a loss of coherence between ARC and page cache
1199 	 * on ZoF - unclear if the problem is in FreeBSD or ZoF
1200 	 */
1201 	vfsp->mnt_kern_flag |= MNTK_NO_IOPF;	/* vn_io_fault can be used */
1202 	vfsp->mnt_kern_flag |= MNTK_NOMSYNC;
1203 	vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG;
1204 
1205 #if defined(_KERNEL) && !defined(KMEM_DEBUG)
1206 	vfsp->mnt_kern_flag |= MNTK_FPLOOKUP;
1207 #endif
1208 	/*
1209 	 * The fsid is 64 bits, composed of an 8-bit fs type, which
1210 	 * separates our fsid from any other filesystem types, and a
1211 	 * 56-bit objset unique ID.  The objset unique ID is unique to
1212 	 * all objsets open on this system, provided by unique_create().
1213 	 * The 8-bit fs type must be put in the low bits of fsid[1]
1214 	 * because that's where other Solaris filesystems put it.
1215 	 */
1216 	fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
1217 	ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0);
1218 	vfsp->vfs_fsid.val[0] = fsid_guid;
1219 	vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) |
1220 	    (vfsp->mnt_vfc->vfc_typenum & 0xFF);
1221 
1222 	/*
1223 	 * Set features for file system.
1224 	 */
1225 	zfs_set_fuid_feature(zfsvfs);
1226 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
1227 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1228 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1229 		vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
1230 	} else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
1231 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1232 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1233 	}
1234 	vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
1235 
1236 	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
1237 		uint64_t pval;
1238 
1239 		atime_changed_cb(zfsvfs, B_FALSE);
1240 		readonly_changed_cb(zfsvfs, B_TRUE);
1241 		if ((error = dsl_prop_get_integer(osname,
1242 		    "xattr", &pval, NULL)))
1243 			goto out;
1244 		xattr_changed_cb(zfsvfs, pval);
1245 		if ((error = dsl_prop_get_integer(osname,
1246 		    "acltype", &pval, NULL)))
1247 			goto out;
1248 		acl_type_changed_cb(zfsvfs, pval);
1249 		zfsvfs->z_issnap = B_TRUE;
1250 		zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
1251 
1252 		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1253 		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1254 		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1255 	} else {
1256 		if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
1257 			goto out;
1258 	}
1259 
1260 	vfs_mountedfrom(vfsp, osname);
1261 
1262 	if (!zfsvfs->z_issnap)
1263 		zfsctl_create(zfsvfs);
1264 out:
1265 	if (error) {
1266 		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1267 		zfsvfs_free(zfsvfs);
1268 	} else {
1269 		atomic_inc_32(&zfs_active_fs_count);
1270 	}
1271 
1272 	return (error);
1273 }
1274 
1275 static void
1276 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
1277 {
1278 	objset_t *os = zfsvfs->z_os;
1279 
1280 	if (!dmu_objset_is_snapshot(os))
1281 		dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
1282 }
1283 
1284 static int
1285 getpoolname(const char *osname, char *poolname)
1286 {
1287 	char *p;
1288 
1289 	p = strchr(osname, '/');
1290 	if (p == NULL) {
1291 		if (strlen(osname) >= MAXNAMELEN)
1292 			return (ENAMETOOLONG);
1293 		(void) strcpy(poolname, osname);
1294 	} else {
1295 		if (p - osname >= MAXNAMELEN)
1296 			return (ENAMETOOLONG);
1297 		(void) strncpy(poolname, osname, p - osname);
1298 		poolname[p - osname] = '\0';
1299 	}
1300 	return (0);
1301 }
1302 
1303 static void
1304 fetch_osname_options(char *name, bool *checkpointrewind)
1305 {
1306 
1307 	if (name[0] == '!') {
1308 		*checkpointrewind = true;
1309 		memmove(name, name + 1, strlen(name));
1310 	} else {
1311 		*checkpointrewind = false;
1312 	}
1313 }
1314 
1315 /*ARGSUSED*/
1316 static int
1317 zfs_mount(vfs_t *vfsp)
1318 {
1319 	kthread_t	*td = curthread;
1320 	vnode_t		*mvp = vfsp->mnt_vnodecovered;
1321 	cred_t		*cr = td->td_ucred;
1322 	char		*osname;
1323 	int		error = 0;
1324 	int		canwrite;
1325 	bool		checkpointrewind;
1326 
1327 	if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
1328 		return (SET_ERROR(EINVAL));
1329 
1330 	/*
1331 	 * If full-owner-access is enabled and delegated administration is
1332 	 * turned on, we must set nosuid.
1333 	 */
1334 	if (zfs_super_owner &&
1335 	    dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
1336 		secpolicy_fs_mount_clearopts(cr, vfsp);
1337 	}
1338 
1339 	fetch_osname_options(osname, &checkpointrewind);
1340 
1341 	/*
1342 	 * Check for mount privilege?
1343 	 *
1344 	 * If we don't have privilege then see if
1345 	 * we have local permission to allow it
1346 	 */
1347 	error = secpolicy_fs_mount(cr, mvp, vfsp);
1348 	if (error) {
1349 		if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
1350 			goto out;
1351 
1352 		if (!(vfsp->vfs_flag & MS_REMOUNT)) {
1353 			vattr_t		vattr;
1354 
1355 			/*
1356 			 * Make sure user is the owner of the mount point
1357 			 * or has sufficient privileges.
1358 			 */
1359 
1360 			vattr.va_mask = AT_UID;
1361 
1362 			vn_lock(mvp, LK_SHARED | LK_RETRY);
1363 			if (VOP_GETATTR(mvp, &vattr, cr)) {
1364 				VOP_UNLOCK1(mvp);
1365 				goto out;
1366 			}
1367 
1368 			if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
1369 			    VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
1370 				VOP_UNLOCK1(mvp);
1371 				goto out;
1372 			}
1373 			VOP_UNLOCK1(mvp);
1374 		}
1375 
1376 		secpolicy_fs_mount_clearopts(cr, vfsp);
1377 	}
1378 
1379 	/*
1380 	 * Refuse to mount a filesystem if we are in a local zone and the
1381 	 * dataset is not visible.
1382 	 */
1383 	if (!INGLOBALZONE(curproc) &&
1384 	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
1385 		error = SET_ERROR(EPERM);
1386 		goto out;
1387 	}
1388 
1389 	vfsp->vfs_flag |= MNT_NFS4ACLS;
1390 
1391 	/*
1392 	 * When doing a remount, we simply refresh our temporary properties
1393 	 * according to those options set in the current VFS options.
1394 	 */
1395 	if (vfsp->vfs_flag & MS_REMOUNT) {
1396 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
1397 
1398 		/*
1399 		 * Refresh mount options with z_teardown_lock blocking I/O while
1400 		 * the filesystem is in an inconsistent state.
1401 		 * The lock also serializes this code with filesystem
1402 		 * manipulations between entry to zfs_suspend_fs() and return
1403 		 * from zfs_resume_fs().
1404 		 */
1405 		ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
1406 		zfs_unregister_callbacks(zfsvfs);
1407 		error = zfs_register_callbacks(vfsp);
1408 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1409 		goto out;
1410 	}
1411 
1412 	/* Initial root mount: try hard to import the requested root pool. */
1413 	if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
1414 	    (vfsp->vfs_flag & MNT_UPDATE) == 0) {
1415 		char pname[MAXNAMELEN];
1416 
1417 		error = getpoolname(osname, pname);
1418 		if (error == 0)
1419 			error = spa_import_rootpool(pname, checkpointrewind);
1420 		if (error)
1421 			goto out;
1422 	}
1423 	DROP_GIANT();
1424 	error = zfs_domount(vfsp, osname);
1425 	PICKUP_GIANT();
1426 
1427 out:
1428 	return (error);
1429 }
1430 
1431 static int
1432 zfs_statfs(vfs_t *vfsp, struct statfs *statp)
1433 {
1434 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1435 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
1436 
1437 	statp->f_version = STATFS_VERSION;
1438 
1439 	ZFS_ENTER(zfsvfs);
1440 
1441 	dmu_objset_space(zfsvfs->z_os,
1442 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
1443 
1444 	/*
1445 	 * The underlying storage pool actually uses multiple block sizes.
1446 	 * We report the fragsize as the smallest block size we support,
1447 	 * and we report our blocksize as the filesystem's maximum blocksize.
1448 	 */
1449 	statp->f_bsize = SPA_MINBLOCKSIZE;
1450 	statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize;
1451 
1452 	/*
1453 	 * The following report "total" blocks of various kinds in the
1454 	 * file system, but reported in terms of f_frsize - the
1455 	 * "fragment" size.
1456 	 */
1457 
1458 	statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
1459 	statp->f_bfree = availbytes / statp->f_bsize;
1460 	statp->f_bavail = statp->f_bfree; /* no root reservation */
1461 
1462 	/*
1463 	 * statvfs() should really be called statufs(), because it assumes
1464 	 * static metadata.  ZFS doesn't preallocate files, so the best
1465 	 * we can do is report the max that could possibly fit in f_files,
1466 	 * and that minus the number actually used in f_ffree.
1467 	 * For f_ffree, report the smaller of the number of object available
1468 	 * and the number of blocks (each object will take at least a block).
1469 	 */
1470 	statp->f_ffree = MIN(availobjs, statp->f_bfree);
1471 	statp->f_files = statp->f_ffree + usedobjs;
1472 
1473 	/*
1474 	 * We're a zfs filesystem.
1475 	 */
1476 	strlcpy(statp->f_fstypename, "zfs",
1477 	    sizeof (statp->f_fstypename));
1478 
1479 	strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
1480 	    sizeof (statp->f_mntfromname));
1481 	strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
1482 	    sizeof (statp->f_mntonname));
1483 
1484 	statp->f_namemax = MAXNAMELEN - 1;
1485 
1486 	ZFS_EXIT(zfsvfs);
1487 	return (0);
1488 }
1489 
1490 static int
1491 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
1492 {
1493 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1494 	znode_t *rootzp;
1495 	int error;
1496 
1497 	ZFS_ENTER(zfsvfs);
1498 
1499 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
1500 	if (error == 0)
1501 		*vpp = ZTOV(rootzp);
1502 
1503 	ZFS_EXIT(zfsvfs);
1504 
1505 	if (error == 0) {
1506 		error = vn_lock(*vpp, flags);
1507 		if (error != 0) {
1508 			VN_RELE(*vpp);
1509 			*vpp = NULL;
1510 		}
1511 	}
1512 	return (error);
1513 }
1514 
1515 /*
1516  * Teardown the zfsvfs::z_os.
1517  *
1518  * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
1519  * and 'z_teardown_inactive_lock' held.
1520  */
1521 static int
1522 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
1523 {
1524 	znode_t	*zp;
1525 	dsl_dir_t *dd;
1526 
1527 	/*
1528 	 * If someone has not already unmounted this file system,
1529 	 * drain the zrele_taskq to ensure all active references to the
1530 	 * zfsvfs_t have been handled only then can it be safely destroyed.
1531 	 */
1532 	if (zfsvfs->z_os) {
1533 		/*
1534 		 * If we're unmounting we have to wait for the list to
1535 		 * drain completely.
1536 		 *
1537 		 * If we're not unmounting there's no guarantee the list
1538 		 * will drain completely, but zreles run from the taskq
1539 		 * may add the parents of dir-based xattrs to the taskq
1540 		 * so we want to wait for these.
1541 		 *
1542 		 * We can safely read z_nr_znodes without locking because the
1543 		 * VFS has already blocked operations which add to the
1544 		 * z_all_znodes list and thus increment z_nr_znodes.
1545 		 */
1546 		int round = 0;
1547 		while (zfsvfs->z_nr_znodes > 0) {
1548 			taskq_wait_outstanding(dsl_pool_zrele_taskq(
1549 			    dmu_objset_pool(zfsvfs->z_os)), 0);
1550 			if (++round > 1 && !unmounting)
1551 				break;
1552 		}
1553 	}
1554 	ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
1555 
1556 	if (!unmounting) {
1557 		/*
1558 		 * We purge the parent filesystem's vfsp as the parent
1559 		 * filesystem and all of its snapshots have their vnode's
1560 		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
1561 		 * 'z_parent' is self referential for non-snapshots.
1562 		 */
1563 #ifdef FREEBSD_NAMECACHE
1564 #if __FreeBSD_version >= 1300117
1565 		cache_purgevfs(zfsvfs->z_parent->z_vfs);
1566 #else
1567 		cache_purgevfs(zfsvfs->z_parent->z_vfs, true);
1568 #endif
1569 #endif
1570 	}
1571 
1572 	/*
1573 	 * Close the zil. NB: Can't close the zil while zfs_inactive
1574 	 * threads are blocked as zil_close can call zfs_inactive.
1575 	 */
1576 	if (zfsvfs->z_log) {
1577 		zil_close(zfsvfs->z_log);
1578 		zfsvfs->z_log = NULL;
1579 	}
1580 
1581 	ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs);
1582 
1583 	/*
1584 	 * If we are not unmounting (ie: online recv) and someone already
1585 	 * unmounted this file system while we were doing the switcheroo,
1586 	 * or a reopen of z_os failed then just bail out now.
1587 	 */
1588 	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
1589 		ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
1590 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1591 		return (SET_ERROR(EIO));
1592 	}
1593 
1594 	/*
1595 	 * At this point there are no vops active, and any new vops will
1596 	 * fail with EIO since we have z_teardown_lock for writer (only
1597 	 * relevant for forced unmount).
1598 	 *
1599 	 * Release all holds on dbufs.
1600 	 */
1601 	mutex_enter(&zfsvfs->z_znodes_lock);
1602 	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
1603 	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
1604 		if (zp->z_sa_hdl != NULL) {
1605 			zfs_znode_dmu_fini(zp);
1606 		}
1607 	}
1608 	mutex_exit(&zfsvfs->z_znodes_lock);
1609 
1610 	/*
1611 	 * If we are unmounting, set the unmounted flag and let new vops
1612 	 * unblock.  zfs_inactive will have the unmounted behavior, and all
1613 	 * other vops will fail with EIO.
1614 	 */
1615 	if (unmounting) {
1616 		zfsvfs->z_unmounted = B_TRUE;
1617 		ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
1618 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1619 	}
1620 
1621 	/*
1622 	 * z_os will be NULL if there was an error in attempting to reopen
1623 	 * zfsvfs, so just return as the properties had already been
1624 	 * unregistered and cached data had been evicted before.
1625 	 */
1626 	if (zfsvfs->z_os == NULL)
1627 		return (0);
1628 
1629 	/*
1630 	 * Unregister properties.
1631 	 */
1632 	zfs_unregister_callbacks(zfsvfs);
1633 
1634 	/*
1635 	 * Evict cached data
1636 	 */
1637 	if (!zfs_is_readonly(zfsvfs))
1638 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
1639 	dmu_objset_evict_dbufs(zfsvfs->z_os);
1640 	dd = zfsvfs->z_os->os_dsl_dataset->ds_dir;
1641 	dsl_dir_cancel_waiters(dd);
1642 
1643 	return (0);
1644 }
1645 
1646 /*ARGSUSED*/
1647 static int
1648 zfs_umount(vfs_t *vfsp, int fflag)
1649 {
1650 	kthread_t *td = curthread;
1651 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1652 	objset_t *os;
1653 	cred_t *cr = td->td_ucred;
1654 	int ret;
1655 
1656 	ret = secpolicy_fs_unmount(cr, vfsp);
1657 	if (ret) {
1658 		if (dsl_deleg_access((char *)vfsp->vfs_resource,
1659 		    ZFS_DELEG_PERM_MOUNT, cr))
1660 			return (ret);
1661 	}
1662 
1663 	/*
1664 	 * Unmount any snapshots mounted under .zfs before unmounting the
1665 	 * dataset itself.
1666 	 */
1667 	if (zfsvfs->z_ctldir != NULL) {
1668 		if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
1669 			return (ret);
1670 	}
1671 
1672 	if (fflag & MS_FORCE) {
1673 		/*
1674 		 * Mark file system as unmounted before calling
1675 		 * vflush(FORCECLOSE). This way we ensure no future vnops
1676 		 * will be called and risk operating on DOOMED vnodes.
1677 		 */
1678 		ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG);
1679 		zfsvfs->z_unmounted = B_TRUE;
1680 		ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1681 	}
1682 
1683 	/*
1684 	 * Flush all the files.
1685 	 */
1686 	ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
1687 	if (ret != 0)
1688 		return (ret);
1689 	while (taskqueue_cancel(zfsvfs_taskq->tq_queue,
1690 	    &zfsvfs->z_unlinked_drain_task, NULL) != 0)
1691 		taskqueue_drain(zfsvfs_taskq->tq_queue,
1692 		    &zfsvfs->z_unlinked_drain_task);
1693 
1694 	VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE));
1695 	os = zfsvfs->z_os;
1696 
1697 	/*
1698 	 * z_os will be NULL if there was an error in
1699 	 * attempting to reopen zfsvfs.
1700 	 */
1701 	if (os != NULL) {
1702 		/*
1703 		 * Unset the objset user_ptr.
1704 		 */
1705 		mutex_enter(&os->os_user_ptr_lock);
1706 		dmu_objset_set_user(os, NULL);
1707 		mutex_exit(&os->os_user_ptr_lock);
1708 
1709 		/*
1710 		 * Finally release the objset
1711 		 */
1712 		dmu_objset_disown(os, B_TRUE, zfsvfs);
1713 	}
1714 
1715 	/*
1716 	 * We can now safely destroy the '.zfs' directory node.
1717 	 */
1718 	if (zfsvfs->z_ctldir != NULL)
1719 		zfsctl_destroy(zfsvfs);
1720 	zfs_freevfs(vfsp);
1721 
1722 	return (0);
1723 }
1724 
1725 static int
1726 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
1727 {
1728 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
1729 	znode_t		*zp;
1730 	int 		err;
1731 
1732 	/*
1733 	 * zfs_zget() can't operate on virtual entries like .zfs/ or
1734 	 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP.
1735 	 * This will make NFS to switch to LOOKUP instead of using VGET.
1736 	 */
1737 	if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR ||
1738 	    (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir))
1739 		return (EOPNOTSUPP);
1740 
1741 	ZFS_ENTER(zfsvfs);
1742 	err = zfs_zget(zfsvfs, ino, &zp);
1743 	if (err == 0 && zp->z_unlinked) {
1744 		vrele(ZTOV(zp));
1745 		err = EINVAL;
1746 	}
1747 	if (err == 0)
1748 		*vpp = ZTOV(zp);
1749 	ZFS_EXIT(zfsvfs);
1750 	if (err == 0) {
1751 		err = vn_lock(*vpp, flags);
1752 		if (err != 0)
1753 			vrele(*vpp);
1754 	}
1755 	if (err != 0)
1756 		*vpp = NULL;
1757 	return (err);
1758 }
1759 
1760 static int
1761 #if __FreeBSD_version >= 1300098
1762 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp,
1763     struct ucred **credanonp, int *numsecflavors, int *secflavors)
1764 #else
1765 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
1766     struct ucred **credanonp, int *numsecflavors, int **secflavors)
1767 #endif
1768 {
1769 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1770 
1771 	/*
1772 	 * If this is regular file system vfsp is the same as
1773 	 * zfsvfs->z_parent->z_vfs, but if it is snapshot,
1774 	 * zfsvfs->z_parent->z_vfs represents parent file system
1775 	 * which we have to use here, because only this file system
1776 	 * has mnt_export configured.
1777 	 */
1778 	return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
1779 	    credanonp, numsecflavors, secflavors));
1780 }
1781 
1782 CTASSERT(SHORT_FID_LEN <= sizeof (struct fid));
1783 CTASSERT(LONG_FID_LEN <= sizeof (struct fid));
1784 
1785 static int
1786 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
1787 {
1788 	struct componentname cn;
1789 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
1790 	znode_t		*zp;
1791 	vnode_t		*dvp;
1792 	uint64_t	object = 0;
1793 	uint64_t	fid_gen = 0;
1794 	uint64_t	gen_mask;
1795 	uint64_t	zp_gen;
1796 	int 		i, err;
1797 
1798 	*vpp = NULL;
1799 
1800 	ZFS_ENTER(zfsvfs);
1801 
1802 	/*
1803 	 * On FreeBSD we can get snapshot's mount point or its parent file
1804 	 * system mount point depending if snapshot is already mounted or not.
1805 	 */
1806 	if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
1807 		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
1808 		uint64_t	objsetid = 0;
1809 		uint64_t	setgen = 0;
1810 
1811 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
1812 			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
1813 
1814 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
1815 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
1816 
1817 		ZFS_EXIT(zfsvfs);
1818 
1819 		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
1820 		if (err)
1821 			return (SET_ERROR(EINVAL));
1822 		ZFS_ENTER(zfsvfs);
1823 	}
1824 
1825 	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
1826 		zfid_short_t	*zfid = (zfid_short_t *)fidp;
1827 
1828 		for (i = 0; i < sizeof (zfid->zf_object); i++)
1829 			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
1830 
1831 		for (i = 0; i < sizeof (zfid->zf_gen); i++)
1832 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
1833 	} else {
1834 		ZFS_EXIT(zfsvfs);
1835 		return (SET_ERROR(EINVAL));
1836 	}
1837 
1838 	/*
1839 	 * A zero fid_gen means we are in .zfs or the .zfs/snapshot
1840 	 * directory tree. If the object == zfsvfs->z_shares_dir, then
1841 	 * we are in the .zfs/shares directory tree.
1842 	 */
1843 	if ((fid_gen == 0 &&
1844 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) ||
1845 	    (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) {
1846 		ZFS_EXIT(zfsvfs);
1847 		VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp));
1848 		if (object == ZFSCTL_INO_SNAPDIR) {
1849 			cn.cn_nameptr = "snapshot";
1850 			cn.cn_namelen = strlen(cn.cn_nameptr);
1851 			cn.cn_nameiop = LOOKUP;
1852 			cn.cn_flags = ISLASTCN | LOCKLEAF;
1853 			cn.cn_lkflags = flags;
1854 			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
1855 			vput(dvp);
1856 		} else if (object == zfsvfs->z_shares_dir) {
1857 			/*
1858 			 * XXX This branch must not be taken,
1859 			 * if it is, then the lookup below will
1860 			 * explode.
1861 			 */
1862 			cn.cn_nameptr = "shares";
1863 			cn.cn_namelen = strlen(cn.cn_nameptr);
1864 			cn.cn_nameiop = LOOKUP;
1865 			cn.cn_flags = ISLASTCN;
1866 			cn.cn_lkflags = flags;
1867 			VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
1868 			vput(dvp);
1869 		} else {
1870 			*vpp = dvp;
1871 		}
1872 		return (err);
1873 	}
1874 
1875 	gen_mask = -1ULL >> (64 - 8 * i);
1876 
1877 	dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object,
1878 	    (u_longlong_t)fid_gen,
1879 	    (u_longlong_t)gen_mask);
1880 	if ((err = zfs_zget(zfsvfs, object, &zp))) {
1881 		ZFS_EXIT(zfsvfs);
1882 		return (err);
1883 	}
1884 	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
1885 	    sizeof (uint64_t));
1886 	zp_gen = zp_gen & gen_mask;
1887 	if (zp_gen == 0)
1888 		zp_gen = 1;
1889 	if (zp->z_unlinked || zp_gen != fid_gen) {
1890 		dprintf("znode gen (%llu) != fid gen (%llu)\n",
1891 		    (u_longlong_t)zp_gen, (u_longlong_t)fid_gen);
1892 		vrele(ZTOV(zp));
1893 		ZFS_EXIT(zfsvfs);
1894 		return (SET_ERROR(EINVAL));
1895 	}
1896 
1897 	*vpp = ZTOV(zp);
1898 	ZFS_EXIT(zfsvfs);
1899 	err = vn_lock(*vpp, flags);
1900 	if (err == 0)
1901 		vnode_create_vobject(*vpp, zp->z_size, curthread);
1902 	else
1903 		*vpp = NULL;
1904 	return (err);
1905 }
1906 
1907 /*
1908  * Block out VOPs and close zfsvfs_t::z_os
1909  *
1910  * Note, if successful, then we return with the 'z_teardown_lock' and
1911  * 'z_teardown_inactive_lock' write held.  We leave ownership of the underlying
1912  * dataset and objset intact so that they can be atomically handed off during
1913  * a subsequent rollback or recv operation and the resume thereafter.
1914  */
1915 int
1916 zfs_suspend_fs(zfsvfs_t *zfsvfs)
1917 {
1918 	int error;
1919 
1920 	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
1921 		return (error);
1922 
1923 	return (0);
1924 }
1925 
1926 /*
1927  * Rebuild SA and release VOPs.  Note that ownership of the underlying dataset
1928  * is an invariant across any of the operations that can be performed while the
1929  * filesystem was suspended.  Whether it succeeded or failed, the preconditions
1930  * are the same: the relevant objset and associated dataset are owned by
1931  * zfsvfs, held, and long held on entry.
1932  */
1933 int
1934 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
1935 {
1936 	int err;
1937 	znode_t *zp;
1938 
1939 	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
1940 	ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
1941 
1942 	/*
1943 	 * We already own this, so just update the objset_t, as the one we
1944 	 * had before may have been evicted.
1945 	 */
1946 	objset_t *os;
1947 	VERIFY3P(ds->ds_owner, ==, zfsvfs);
1948 	VERIFY(dsl_dataset_long_held(ds));
1949 	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
1950 	dsl_pool_config_enter(dp, FTAG);
1951 	VERIFY0(dmu_objset_from_ds(ds, &os));
1952 	dsl_pool_config_exit(dp, FTAG);
1953 
1954 	err = zfsvfs_init(zfsvfs, os);
1955 	if (err != 0)
1956 		goto bail;
1957 
1958 	ds->ds_dir->dd_activity_cancelled = B_FALSE;
1959 	VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE));
1960 
1961 	zfs_set_fuid_feature(zfsvfs);
1962 
1963 	/*
1964 	 * Attempt to re-establish all the active znodes with
1965 	 * their dbufs.  If a zfs_rezget() fails, then we'll let
1966 	 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
1967 	 * when they try to use their znode.
1968 	 */
1969 	mutex_enter(&zfsvfs->z_znodes_lock);
1970 	for (zp = list_head(&zfsvfs->z_all_znodes); zp;
1971 	    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
1972 		(void) zfs_rezget(zp);
1973 	}
1974 	mutex_exit(&zfsvfs->z_znodes_lock);
1975 
1976 bail:
1977 	/* release the VOPs */
1978 	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
1979 	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
1980 
1981 	if (err) {
1982 		/*
1983 		 * Since we couldn't setup the sa framework, try to force
1984 		 * unmount this file system.
1985 		 */
1986 		if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) {
1987 			vfs_ref(zfsvfs->z_vfs);
1988 			(void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
1989 		}
1990 	}
1991 	return (err);
1992 }
1993 
1994 static void
1995 zfs_freevfs(vfs_t *vfsp)
1996 {
1997 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1998 
1999 	zfsvfs_free(zfsvfs);
2000 
2001 	atomic_dec_32(&zfs_active_fs_count);
2002 }
2003 
2004 #ifdef __i386__
2005 static int desiredvnodes_backup;
2006 #include <sys/vmmeter.h>
2007 
2008 
2009 #include <vm/vm_page.h>
2010 #include <vm/vm_object.h>
2011 #include <vm/vm_kern.h>
2012 #include <vm/vm_map.h>
2013 #endif
2014 
2015 static void
2016 zfs_vnodes_adjust(void)
2017 {
2018 #ifdef __i386__
2019 	int newdesiredvnodes;
2020 
2021 	desiredvnodes_backup = desiredvnodes;
2022 
2023 	/*
2024 	 * We calculate newdesiredvnodes the same way it is done in
2025 	 * vntblinit(). If it is equal to desiredvnodes, it means that
2026 	 * it wasn't tuned by the administrator and we can tune it down.
2027 	 */
2028 	newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 *
2029 	    vm_kmem_size / (5 * (sizeof (struct vm_object) +
2030 	    sizeof (struct vnode))));
2031 	if (newdesiredvnodes == desiredvnodes)
2032 		desiredvnodes = (3 * newdesiredvnodes) / 4;
2033 #endif
2034 }
2035 
2036 static void
2037 zfs_vnodes_adjust_back(void)
2038 {
2039 
2040 #ifdef __i386__
2041 	desiredvnodes = desiredvnodes_backup;
2042 #endif
2043 }
2044 
2045 void
2046 zfs_init(void)
2047 {
2048 
2049 	printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
2050 
2051 	/*
2052 	 * Initialize .zfs directory structures
2053 	 */
2054 	zfsctl_init();
2055 
2056 	/*
2057 	 * Initialize znode cache, vnode ops, etc...
2058 	 */
2059 	zfs_znode_init();
2060 
2061 	/*
2062 	 * Reduce number of vnodes. Originally number of vnodes is calculated
2063 	 * with UFS inode in mind. We reduce it here, because it's too big for
2064 	 * ZFS/i386.
2065 	 */
2066 	zfs_vnodes_adjust();
2067 
2068 	dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info);
2069 
2070 	zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0);
2071 }
2072 
2073 void
2074 zfs_fini(void)
2075 {
2076 	taskq_destroy(zfsvfs_taskq);
2077 	zfsctl_fini();
2078 	zfs_znode_fini();
2079 	zfs_vnodes_adjust_back();
2080 }
2081 
2082 int
2083 zfs_busy(void)
2084 {
2085 	return (zfs_active_fs_count != 0);
2086 }
2087 
2088 /*
2089  * Release VOPs and unmount a suspended filesystem.
2090  */
2091 int
2092 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
2093 {
2094 	ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs));
2095 	ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs));
2096 
2097 	/*
2098 	 * We already own this, so just hold and rele it to update the
2099 	 * objset_t, as the one we had before may have been evicted.
2100 	 */
2101 	objset_t *os;
2102 	VERIFY3P(ds->ds_owner, ==, zfsvfs);
2103 	VERIFY(dsl_dataset_long_held(ds));
2104 	dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds));
2105 	dsl_pool_config_enter(dp, FTAG);
2106 	VERIFY0(dmu_objset_from_ds(ds, &os));
2107 	dsl_pool_config_exit(dp, FTAG);
2108 	zfsvfs->z_os = os;
2109 
2110 	/* release the VOPs */
2111 	ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs);
2112 	ZFS_TEARDOWN_EXIT(zfsvfs, FTAG);
2113 
2114 	/*
2115 	 * Try to force unmount this file system.
2116 	 */
2117 	(void) zfs_umount(zfsvfs->z_vfs, 0);
2118 	zfsvfs->z_unmounted = B_TRUE;
2119 	return (0);
2120 }
2121 
2122 int
2123 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
2124 {
2125 	int error;
2126 	objset_t *os = zfsvfs->z_os;
2127 	dmu_tx_t *tx;
2128 
2129 	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
2130 		return (SET_ERROR(EINVAL));
2131 
2132 	if (newvers < zfsvfs->z_version)
2133 		return (SET_ERROR(EINVAL));
2134 
2135 	if (zfs_spa_version_map(newvers) >
2136 	    spa_version(dmu_objset_spa(zfsvfs->z_os)))
2137 		return (SET_ERROR(ENOTSUP));
2138 
2139 	tx = dmu_tx_create(os);
2140 	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
2141 	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2142 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
2143 		    ZFS_SA_ATTRS);
2144 		dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
2145 	}
2146 	error = dmu_tx_assign(tx, TXG_WAIT);
2147 	if (error) {
2148 		dmu_tx_abort(tx);
2149 		return (error);
2150 	}
2151 
2152 	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
2153 	    8, 1, &newvers, tx);
2154 
2155 	if (error) {
2156 		dmu_tx_commit(tx);
2157 		return (error);
2158 	}
2159 
2160 	if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2161 		uint64_t sa_obj;
2162 
2163 		ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
2164 		    SPA_VERSION_SA);
2165 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
2166 		    DMU_OT_NONE, 0, tx);
2167 
2168 		error = zap_add(os, MASTER_NODE_OBJ,
2169 		    ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
2170 		ASSERT0(error);
2171 
2172 		VERIFY0(sa_set_sa_object(os, sa_obj));
2173 		sa_register_update_callback(os, zfs_sa_upgrade);
2174 	}
2175 
2176 	spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
2177 	    "from %ju to %ju", (uintmax_t)zfsvfs->z_version,
2178 	    (uintmax_t)newvers);
2179 	dmu_tx_commit(tx);
2180 
2181 	zfsvfs->z_version = newvers;
2182 	os->os_version = newvers;
2183 
2184 	zfs_set_fuid_feature(zfsvfs);
2185 
2186 	return (0);
2187 }
2188 
2189 /*
2190  * Read a property stored within the master node.
2191  */
2192 int
2193 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
2194 {
2195 	uint64_t *cached_copy = NULL;
2196 
2197 	/*
2198 	 * Figure out where in the objset_t the cached copy would live, if it
2199 	 * is available for the requested property.
2200 	 */
2201 	if (os != NULL) {
2202 		switch (prop) {
2203 		case ZFS_PROP_VERSION:
2204 			cached_copy = &os->os_version;
2205 			break;
2206 		case ZFS_PROP_NORMALIZE:
2207 			cached_copy = &os->os_normalization;
2208 			break;
2209 		case ZFS_PROP_UTF8ONLY:
2210 			cached_copy = &os->os_utf8only;
2211 			break;
2212 		case ZFS_PROP_CASE:
2213 			cached_copy = &os->os_casesensitivity;
2214 			break;
2215 		default:
2216 			break;
2217 		}
2218 	}
2219 	if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) {
2220 		*value = *cached_copy;
2221 		return (0);
2222 	}
2223 
2224 	/*
2225 	 * If the property wasn't cached, look up the file system's value for
2226 	 * the property. For the version property, we look up a slightly
2227 	 * different string.
2228 	 */
2229 	const char *pname;
2230 	int error = ENOENT;
2231 	if (prop == ZFS_PROP_VERSION) {
2232 		pname = ZPL_VERSION_STR;
2233 	} else {
2234 		pname = zfs_prop_to_name(prop);
2235 	}
2236 
2237 	if (os != NULL) {
2238 		ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
2239 		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
2240 	}
2241 
2242 	if (error == ENOENT) {
2243 		/* No value set, use the default value */
2244 		switch (prop) {
2245 		case ZFS_PROP_VERSION:
2246 			*value = ZPL_VERSION;
2247 			break;
2248 		case ZFS_PROP_NORMALIZE:
2249 		case ZFS_PROP_UTF8ONLY:
2250 			*value = 0;
2251 			break;
2252 		case ZFS_PROP_CASE:
2253 			*value = ZFS_CASE_SENSITIVE;
2254 			break;
2255 		case ZFS_PROP_ACLTYPE:
2256 			*value = ZFS_ACLTYPE_NFSV4;
2257 			break;
2258 		default:
2259 			return (error);
2260 		}
2261 		error = 0;
2262 	}
2263 
2264 	/*
2265 	 * If one of the methods for getting the property value above worked,
2266 	 * copy it into the objset_t's cache.
2267 	 */
2268 	if (error == 0 && cached_copy != NULL) {
2269 		*cached_copy = *value;
2270 	}
2271 
2272 	return (error);
2273 }
2274 
2275 /*
2276  * Return true if the corresponding vfs's unmounted flag is set.
2277  * Otherwise return false.
2278  * If this function returns true we know VFS unmount has been initiated.
2279  */
2280 boolean_t
2281 zfs_get_vfs_flag_unmounted(objset_t *os)
2282 {
2283 	zfsvfs_t *zfvp;
2284 	boolean_t unmounted = B_FALSE;
2285 
2286 	ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS);
2287 
2288 	mutex_enter(&os->os_user_ptr_lock);
2289 	zfvp = dmu_objset_get_user(os);
2290 	if (zfvp != NULL && zfvp->z_vfs != NULL &&
2291 	    (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT))
2292 		unmounted = B_TRUE;
2293 	mutex_exit(&os->os_user_ptr_lock);
2294 
2295 	return (unmounted);
2296 }
2297 
2298 #ifdef _KERNEL
2299 void
2300 zfsvfs_update_fromname(const char *oldname, const char *newname)
2301 {
2302 	char tmpbuf[MAXPATHLEN];
2303 	struct mount *mp;
2304 	char *fromname;
2305 	size_t oldlen;
2306 
2307 	oldlen = strlen(oldname);
2308 
2309 	mtx_lock(&mountlist_mtx);
2310 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
2311 		fromname = mp->mnt_stat.f_mntfromname;
2312 		if (strcmp(fromname, oldname) == 0) {
2313 			(void) strlcpy(fromname, newname,
2314 			    sizeof (mp->mnt_stat.f_mntfromname));
2315 			continue;
2316 		}
2317 		if (strncmp(fromname, oldname, oldlen) == 0 &&
2318 		    (fromname[oldlen] == '/' || fromname[oldlen] == '@')) {
2319 			(void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s",
2320 			    newname, fromname + oldlen);
2321 			(void) strlcpy(fromname, tmpbuf,
2322 			    sizeof (mp->mnt_stat.f_mntfromname));
2323 			continue;
2324 		}
2325 	}
2326 	mtx_unlock(&mountlist_mtx);
2327 }
2328 #endif
2329