xref: /netbsd-src/external/cddl/osnet/dist/uts/common/fs/zfs/zfs_vfsops.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/sysmacros.h>
30 #include <sys/kmem.h>
31 #include <sys/pathname.h>
32 #include <sys/vnode.h>
33 #include <sys/vfs.h>
34 #include <sys/vfs_opreg.h>
35 #include <sys/mntent.h>
36 #include <sys/mount.h>
37 #include <sys/cmn_err.h>
38 #include <sys/zfs_znode.h>
39 #include <sys/zfs_dir.h>
40 #include <sys/zil.h>
41 #include <sys/fs/zfs.h>
42 #include <sys/dmu.h>
43 #include <sys/dsl_prop.h>
44 #include <sys/dsl_dataset.h>
45 #include <sys/dsl_deleg.h>
46 #include <sys/spa.h>
47 #include <sys/zap.h>
48 #include <sys/varargs.h>
49 #include <sys/policy.h>
50 #include <sys/atomic.h>
51 #include <sys/mkdev.h>
52 #include <sys/modctl.h>
53 #include <sys/zfs_ioctl.h>
54 #include <sys/zfs_ctldir.h>
55 #include <sys/zfs_fuid.h>
56 #include <sys/sunddi.h>
57 #include <sys/dnlc.h>
58 #include <sys/dmu_objset.h>
59 #include <sys/spa_boot.h>
60 
61 #ifdef __NetBSD__
62 /* include ddi_name_to_major function is there better place for it ?*/
63 #include <sys/ddi.h>
64 #include <sys/systm.h>
65 #endif
66 
67 int zfsfstype;
68 vfsops_t *zfs_vfsops = NULL;
69 static major_t zfs_major;
70 static minor_t zfs_minor;
71 static kmutex_t	zfs_dev_mtx;
72 
73 int zfs_debug_level;
74 kmutex_t zfs_debug_mtx;
75 
76 /* XXX NetBSD static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr);*/
77 static int zfs_mount(vfs_t *vfsp, const char *path, void *data, size_t *data_len);
78 static int zfs_umount(vfs_t *vfsp, int fflag);
79 static int zfs_root(vfs_t *vfsp, vnode_t **vpp);
80 static int zfs_statvfs(vfs_t *vfsp, struct statvfs *statp);
81 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp);
82 static int zfs_vget(vfs_t *vfsp, ino_t ino, vnode_t **vpp);
83 static int zfs_start(vfs_t *vfsp, int flags);
84 static void zfs_freevfs(vfs_t *vfsp);
85 
86 void zfs_init(void);
87 void zfs_fini(void);
88 
89 
90 extern const struct vnodeopv_desc zfs_vnodeop_opv_desc;
91 
92 static const struct vnodeopv_desc * const zfs_vnodeop_descs[] = {
93 	&zfs_vnodeop_opv_desc,
94 	NULL,
95 };
96 
97 static struct vfsops zfs_vfsops_template = {
98 	.vfs_name = MOUNT_ZFS,
99 	.vfs_min_mount_data = sizeof(struct zfs_args),
100 	.vfs_opv_descs = zfs_vnodeop_descs,
101 	.vfs_mount = zfs_mount,
102 	.vfs_unmount = zfs_umount,
103 	.vfs_root = zfs_root,
104 	.vfs_statvfs = zfs_statvfs,
105 	.vfs_sync = zfs_sync,
106 	.vfs_vget = zfs_vget,
107 	.vfs_fhtovp = zfs_fhtovp,
108 	.vfs_init = zfs_init,
109 	.vfs_done = zfs_fini,
110 	.vfs_start = zfs_start,
111 	.vfs_renamelock_enter = (void*)nullop,
112 	.vfs_renamelock_exit = (void*)nullop,
113 	.vfs_reinit = (void *)nullop,
114 	.vfs_vptofh = (void *)eopnotsupp,
115 	.vfs_fhtovp = (void *)eopnotsupp,
116 	.vfs_quotactl = (void *)eopnotsupp,
117 	.vfs_extattrctl = (void *)eopnotsupp,
118 	.vfs_snapshot = (void *)eopnotsupp,
119 	.vfs_fsync = (void *)eopnotsupp,
120 };
121 
122 /*
123  * We need to keep a count of active fs's.
124  * This is necessary to prevent our module
125  * from being unloaded after a umount -f
126  */
127 static uint32_t	zfs_active_fs_count = 0;
128 
129 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
130 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
131 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
132 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
133 
134 /*
135  * MO_DEFAULT is not used since the default value is determined
136  * by the equivalent property.
137  */
138 static mntopt_t mntopts[] = {
139 	{ MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL },
140 	{ MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL },
141 	{ MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL },
142 	{ MNTOPT_ATIME, atime_cancel, NULL, 0, NULL }
143 };
144 
145 static mntopts_t zfs_mntopts = {
146 	sizeof (mntopts) / sizeof (mntopt_t),
147 	mntopts
148 };
149 
150 static bool
151 zfs_sync_selector(void *cl, struct vnode *vp)
152 {
153 	znode_t *zp;
154 
155 	/*
156 	 * Skip the vnode/inode if inaccessible, or if the
157 	 * atime is clean.
158 	 */
159 	zp = VTOZ(vp);
160 	return zp != NULL && vp->v_type != VNON && zp->z_atime_dirty != 0
161 	    && !zp->z_unlinked;
162 }
163 
164 /*ARGSUSED*/
165 int
166 zfs_sync(vfs_t *vfsp, int flag, cred_t *cr)
167 {
168 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
169 	znode_t *zp;
170 	vnode_t *vp;
171 	struct vnode_iterator *marker;
172 	dmu_tx_t *tx;
173 	int error;
174 
175 
176 	error = 0;
177 
178         /*
179 	 * Data integrity is job one.  We don't want a compromised kernel
180 	 * writing to the storage pool, so we never sync during panic.
181 	 */
182 	if (panicstr)
183 		return (0);
184 
185 	/*
186 	 * On NetBSD, we need to push out atime updates.  Solaris does
187 	 * this during VOP_INACTIVE, but that does not work well with the
188 	 * BSD VFS, so we do it in batch here.
189 	 */
190 	vfs_vnode_iterator_init(vfsp, &marker);
191 	while ((vp = vfs_vnode_iterator_next(marker, zfs_sync_selector, NULL)))
192 	{
193 		error = vn_lock(vp, LK_EXCLUSIVE);
194 		if (error) {
195 			vrele(vp);
196 			continue;
197 		}
198 		zp = VTOZ(vp);
199 		tx = dmu_tx_create(zfsvfs->z_os);
200 		dmu_tx_hold_bonus(tx, zp->z_id);
201 		error = dmu_tx_assign(tx, TXG_WAIT);
202 		if (error) {
203 			dmu_tx_abort(tx);
204 		} else {
205 			dmu_buf_will_dirty(zp->z_dbuf, tx);
206 			mutex_enter(&zp->z_lock);
207 			zp->z_atime_dirty = 0;
208 			mutex_exit(&zp->z_lock);
209 			dmu_tx_commit(tx);
210 		}
211 		vput(vp);
212 	}
213 	vfs_vnode_iterator_destroy(marker);
214 
215 	/*
216 	 * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
217 	 * to sync metadata, which they would otherwise cache indefinitely.
218 	 * Semantically, the only requirement is that the sync be initiated.
219 	 * The DMU syncs out txgs frequently, so there's nothing to do.
220 	 */
221 	if ((flag & MNT_LAZY) != 0)
222 		return (0);
223 
224 	if (vfsp != NULL) {
225 		/*
226 		 * Sync a specific filesystem.
227 		 */
228 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
229 		dsl_pool_t *dp;
230 
231 		ZFS_ENTER(zfsvfs);
232 		dp = dmu_objset_pool(zfsvfs->z_os);
233 
234 		/*
235 		 * If the system is shutting down, then skip any
236 		 * filesystems which may exist on a suspended pool.
237 		 */
238 		if (sys_shutdown && spa_suspended(dp->dp_spa)) {
239 			ZFS_EXIT(zfsvfs);
240 			return (0);
241 		}
242 
243 		if (zfsvfs->z_log != NULL)
244 			zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
245 		else
246 			txg_wait_synced(dp, 0);
247 		ZFS_EXIT(zfsvfs);
248 	} else {
249 		/*
250 		 * Sync all ZFS filesystems.  This is what happens when you
251 		 * run sync(1M).  Unlike other filesystems, ZFS honors the
252 		 * request by waiting for all pools to commit all dirty data.
253 		 */
254 		spa_sync_allpools();
255 	}
256 
257 	return (0);
258 }
259 
260 static int
261 zfs_create_unique_device(dev_t *dev)
262 {
263 	major_t new_major;
264 
265 	do {
266 		ASSERT3U(zfs_minor, <=, MAXMIN);
267 		minor_t start = zfs_minor;
268 		do {
269 			mutex_enter(&zfs_dev_mtx);
270 			if (zfs_minor >= MAXMIN) {
271 				/*
272 				 * If we're still using the real major
273 				 * keep out of /dev/zfs and /dev/zvol minor
274 				 * number space.  If we're using a getudev()'ed
275 				 * major number, we can use all of its minors.
276 				 */
277 				if (zfs_major == ddi_name_to_major(ZFS_DRIVER))
278 					zfs_minor = ZFS_MIN_MINOR;
279 				else
280 					zfs_minor = 0;
281 			} else {
282 				zfs_minor++;
283 			}
284 			*dev = makedevice(zfs_major, zfs_minor);
285 			mutex_exit(&zfs_dev_mtx);
286 		} while (vfs_devismounted(*dev) && zfs_minor != start);
287 		break;
288 #ifndef __NetBSD__
289 		if (zfs_minor == start) {
290 			/*
291 			 * We are using all ~262,000 minor numbers for the
292 			 * current major number.  Create a new major number.
293 			 */
294 			if ((new_major = getudev()) == (major_t)-1) {
295 				cmn_err(CE_WARN,
296 				    "zfs_mount: Can't get unique major "
297 				    "device number.");
298 				return (-1);
299 			}
300 			mutex_enter(&zfs_dev_mtx);
301 			zfs_major = new_major;
302 			zfs_minor = 0;
303 
304 			mutex_exit(&zfs_dev_mtx);
305 		} else {
306 			break;
307 		}
308 		/* CONSTANTCONDITION */
309 #endif
310 	} while (1);
311 
312 	return (0);
313 }
314 
315 static void
316 atime_changed_cb(void *arg, uint64_t newval)
317 {
318 	zfsvfs_t *zfsvfs = arg;
319 
320 	if (newval == TRUE) {
321 		zfsvfs->z_atime = TRUE;
322 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
323 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
324 	} else {
325 		zfsvfs->z_atime = FALSE;
326 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
327 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
328 	}
329 }
330 
331 static void
332 xattr_changed_cb(void *arg, uint64_t newval)
333 {
334 	zfsvfs_t *zfsvfs = arg;
335 
336 	if (newval == TRUE) {
337 		/* XXX locking on vfs_flag? */
338 #ifdef TODO
339 		zfsvfs->z_vfs->vfs_flag |= VFS_XATTR;
340 #endif
341 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR);
342 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0);
343 	} else {
344 		/* XXX locking on vfs_flag? */
345 #ifdef TODO
346 		zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR;
347 #endif
348 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR);
349 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0);
350 	}
351 }
352 
353 static void
354 blksz_changed_cb(void *arg, uint64_t newval)
355 {
356 	zfsvfs_t *zfsvfs = arg;
357 
358 	if (newval < SPA_MINBLOCKSIZE ||
359 	    newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
360 		newval = SPA_MAXBLOCKSIZE;
361 
362 	zfsvfs->z_max_blksz = newval;
363 	zfsvfs->z_vfs->vfs_bsize = newval;
364 }
365 
366 static void
367 readonly_changed_cb(void *arg, uint64_t newval)
368 {
369 	zfsvfs_t *zfsvfs = arg;
370 
371 	if (newval) {
372 		/* XXX locking on vfs_flag? */
373 		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
374 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
375 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
376 	} else {
377 		/* XXX locking on vfs_flag? */
378 		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
379 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
380 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
381 	}
382 }
383 
384 static void
385 devices_changed_cb(void *arg, uint64_t newval)
386 {
387 	zfsvfs_t *zfsvfs = arg;
388 
389 	if (newval == FALSE) {
390 		zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES;
391 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES);
392 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0);
393 	} else {
394 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES;
395 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES);
396 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0);
397 	}
398 }
399 
400 static void
401 setuid_changed_cb(void *arg, uint64_t newval)
402 {
403 	zfsvfs_t *zfsvfs = arg;
404 
405 	if (newval == FALSE) {
406 		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
407 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
408 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
409 	} else {
410 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
411 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
412 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
413 	}
414 }
415 
416 static void
417 exec_changed_cb(void *arg, uint64_t newval)
418 {
419 	zfsvfs_t *zfsvfs = arg;
420 
421 	if (newval == FALSE) {
422 		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
423 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
424 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
425 	} else {
426 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
427 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
428 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
429 	}
430 }
431 
432 /*
433  * The nbmand mount option can be changed at mount time.
434  * We can't allow it to be toggled on live file systems or incorrect
435  * behavior may be seen from cifs clients
436  *
437  * This property isn't registered via dsl_prop_register(), but this callback
438  * will be called when a file system is first mounted
439  */
440 static void
441 nbmand_changed_cb(void *arg, uint64_t newval)
442 {
443 	zfsvfs_t *zfsvfs = arg;
444 	if (newval == FALSE) {
445 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
446 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
447 	} else {
448 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
449 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
450 	}
451 }
452 
453 static void
454 snapdir_changed_cb(void *arg, uint64_t newval)
455 {
456 	zfsvfs_t *zfsvfs = arg;
457 
458 	zfsvfs->z_show_ctldir = newval;
459 }
460 
461 static void
462 vscan_changed_cb(void *arg, uint64_t newval)
463 {
464 	zfsvfs_t *zfsvfs = arg;
465 
466 	zfsvfs->z_vscan = newval;
467 }
468 
469 static void
470 acl_mode_changed_cb(void *arg, uint64_t newval)
471 {
472 	zfsvfs_t *zfsvfs = arg;
473 
474 	zfsvfs->z_acl_mode = newval;
475 }
476 
477 static void
478 acl_inherit_changed_cb(void *arg, uint64_t newval)
479 {
480 	zfsvfs_t *zfsvfs = arg;
481 
482 	zfsvfs->z_acl_inherit = newval;
483 }
484 
485 static int
486 zfs_register_callbacks(vfs_t *vfsp)
487 {
488 	struct dsl_dataset *ds = NULL;
489 	objset_t *os = NULL;
490 	zfsvfs_t *zfsvfs = NULL;
491 	uint64_t nbmand;
492 	int readonly, do_readonly = B_FALSE;
493 	int setuid, do_setuid = B_FALSE;
494 	int exec, do_exec = B_FALSE;
495 	int devices, do_devices = B_FALSE;
496 	int xattr, do_xattr = B_FALSE;
497 	int atime, do_atime = B_FALSE;
498 	int error = 0;
499 
500 	ASSERT(vfsp);
501 	zfsvfs = vfsp->vfs_data;
502 	ASSERT(zfsvfs);
503 	os = zfsvfs->z_os;
504 
505 	/*
506 	 * The act of registering our callbacks will destroy any mount
507 	 * options we may have.  In order to enable temporary overrides
508 	 * of mount options, we stash away the current values and
509 	 * restore them after we register the callbacks.
510 	 */
511 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
512 		readonly = B_TRUE;
513 		do_readonly = B_TRUE;
514 	} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
515 		readonly = B_FALSE;
516 		do_readonly = B_TRUE;
517 	}
518 	if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
519 		devices = B_FALSE;
520 		setuid = B_FALSE;
521 		do_devices = B_TRUE;
522 		do_setuid = B_TRUE;
523 	} else {
524 		if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
525 			devices = B_FALSE;
526 			do_devices = B_TRUE;
527 		} else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) {
528 			devices = B_TRUE;
529 			do_devices = B_TRUE;
530 		}
531 
532 		if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
533 			setuid = B_FALSE;
534 			do_setuid = B_TRUE;
535 		} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
536 			setuid = B_TRUE;
537 			do_setuid = B_TRUE;
538 		}
539 	}
540 	if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
541 		exec = B_FALSE;
542 		do_exec = B_TRUE;
543 	} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
544 		exec = B_TRUE;
545 		do_exec = B_TRUE;
546 	}
547 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
548 		xattr = B_FALSE;
549 		do_xattr = B_TRUE;
550 	} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
551 		xattr = B_TRUE;
552 		do_xattr = B_TRUE;
553 	}
554 	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
555 		atime = B_FALSE;
556 		do_atime = B_TRUE;
557 	} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
558 		atime = B_TRUE;
559 		do_atime = B_TRUE;
560 	}
561 
562 	/*
563 	 * nbmand is a special property.  It can only be changed at
564 	 * mount time.
565 	 *
566 	 * This is weird, but it is documented to only be changeable
567 	 * at mount time.
568 	 */
569 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
570 		nbmand = B_FALSE;
571 	} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
572 		nbmand = B_TRUE;
573 	} else {
574 		char osname[MAXNAMELEN];
575 
576 		dmu_objset_name(os, osname);
577 		if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand,
578 		    NULL)) {
579 			return (error);
580 		}
581 	}
582 
583 	/*
584 	 * Register property callbacks.
585 	 *
586 	 * It would probably be fine to just check for i/o error from
587 	 * the first prop_register(), but I guess I like to go
588 	 * overboard...
589 	 */
590 	ds = dmu_objset_ds(os);
591 	error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs);
592 	error = error ? error : dsl_prop_register(ds,
593 	    "xattr", xattr_changed_cb, zfsvfs);
594 	error = error ? error : dsl_prop_register(ds,
595 	    "recordsize", blksz_changed_cb, zfsvfs);
596 	error = error ? error : dsl_prop_register(ds,
597 	    "readonly", readonly_changed_cb, zfsvfs);
598 	error = error ? error : dsl_prop_register(ds,
599 	    "devices", devices_changed_cb, zfsvfs);
600 	error = error ? error : dsl_prop_register(ds,
601 	    "setuid", setuid_changed_cb, zfsvfs);
602 	error = error ? error : dsl_prop_register(ds,
603 	    "exec", exec_changed_cb, zfsvfs);
604 	error = error ? error : dsl_prop_register(ds,
605 	    "snapdir", snapdir_changed_cb, zfsvfs);
606 	error = error ? error : dsl_prop_register(ds,
607 	    "aclmode", acl_mode_changed_cb, zfsvfs);
608 	error = error ? error : dsl_prop_register(ds,
609 	    "aclinherit", acl_inherit_changed_cb, zfsvfs);
610 	error = error ? error : dsl_prop_register(ds,
611 	    "vscan", vscan_changed_cb, zfsvfs);
612 	if (error)
613 		goto unregister;
614 
615 	/*
616 	 * Invoke our callbacks to restore temporary mount options.
617 	 */
618 	if (do_readonly)
619 		readonly_changed_cb(zfsvfs, readonly);
620 	if (do_setuid)
621 		setuid_changed_cb(zfsvfs, setuid);
622 	if (do_exec)
623 		exec_changed_cb(zfsvfs, exec);
624 	if (do_devices)
625 		devices_changed_cb(zfsvfs, devices);
626 	if (do_xattr)
627 		xattr_changed_cb(zfsvfs, xattr);
628 	if (do_atime)
629 		atime_changed_cb(zfsvfs, atime);
630 
631 	nbmand_changed_cb(zfsvfs, nbmand);
632 
633 	return (0);
634 
635 unregister:
636 	/*
637 	 * We may attempt to unregister some callbacks that are not
638 	 * registered, but this is OK; it will simply return ENOMSG,
639 	 * which we will ignore.
640 	 */
641 	(void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs);
642 	(void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs);
643 	(void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs);
644 	(void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs);
645 	(void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs);
646 	(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
647 	(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
648 	(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
649 	(void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
650 	(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
651 	    zfsvfs);
652 	(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
653 	return (error);
654 
655 }
656 
657 static void
658 uidacct(objset_t *os, boolean_t isgroup, uint64_t fuid,
659     int64_t delta, dmu_tx_t *tx)
660 {
661 	uint64_t used = 0;
662 	char buf[32];
663 	int err;
664 	uint64_t obj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
665 
666 	if (delta == 0)
667 		return;
668 
669 	(void) snprintf(buf, sizeof (buf), "%llx", (longlong_t)fuid);
670 	err = zap_lookup(os, obj, buf, 8, 1, &used);
671 	ASSERT(err == 0 || err == ENOENT);
672 	/* no underflow/overflow */
673 	ASSERT(delta > 0 || used >= -delta);
674 	ASSERT(delta < 0 || used + delta > used);
675 	used += delta;
676 	if (used == 0)
677 		err = zap_remove(os, obj, buf, tx);
678 	else
679 		err = zap_update(os, obj, buf, 8, 1, &used, tx);
680 	ASSERT(err == 0);
681 }
682 
683 static int
684 zfs_space_delta_cb(dmu_object_type_t bonustype, void *bonus,
685     uint64_t *userp, uint64_t *groupp)
686 {
687 	znode_phys_t *znp = bonus;
688 
689 	if (bonustype != DMU_OT_ZNODE)
690 		return (ENOENT);
691 
692 	*userp = znp->zp_uid;
693 	*groupp = znp->zp_gid;
694 	return (0);
695 }
696 
697 static void
698 fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
699     char *domainbuf, int buflen, uid_t *ridp)
700 {
701 	uint64_t fuid;
702 	const char *domain;
703 
704 	fuid = strtonum(fuidstr, NULL);
705 
706 	domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
707 	if (domain)
708 		(void) strlcpy(domainbuf, domain, buflen);
709 	else
710 		domainbuf[0] = '\0';
711 	*ridp = FUID_RID(fuid);
712 }
713 
714 static uint64_t
715 zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
716 {
717 	switch (type) {
718 	case ZFS_PROP_USERUSED:
719 		return (DMU_USERUSED_OBJECT);
720 	case ZFS_PROP_GROUPUSED:
721 		return (DMU_GROUPUSED_OBJECT);
722 	case ZFS_PROP_USERQUOTA:
723 		return (zfsvfs->z_userquota_obj);
724 	case ZFS_PROP_GROUPQUOTA:
725 		return (zfsvfs->z_groupquota_obj);
726 	}
727 	return (0);
728 }
729 
730 int
731 zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
732     uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
733 {
734 	int error;
735 	zap_cursor_t zc;
736 	zap_attribute_t za;
737 	zfs_useracct_t *buf = vbuf;
738 	uint64_t obj;
739 
740 	if (!dmu_objset_userspace_present(zfsvfs->z_os))
741 		return (ENOTSUP);
742 
743 	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
744 	if (obj == 0) {
745 		*bufsizep = 0;
746 		return (0);
747 	}
748 
749 	for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
750 	    (error = zap_cursor_retrieve(&zc, &za)) == 0;
751 	    zap_cursor_advance(&zc)) {
752 		if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
753 		    *bufsizep)
754 			break;
755 
756 		fuidstr_to_sid(zfsvfs, za.za_name,
757 		    buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
758 
759 		buf->zu_space = za.za_first_integer;
760 		buf++;
761 	}
762 	if (error == ENOENT)
763 		error = 0;
764 
765 	ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
766 	*bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
767 	*cookiep = zap_cursor_serialize(&zc);
768 	zap_cursor_fini(&zc);
769 	return (error);
770 }
771 
772 /*
773  * buf must be big enough (eg, 32 bytes)
774  */
775 static int
776 id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
777     char *buf, size_t buflen, boolean_t addok)
778 {
779 	uint64_t fuid;
780 	int domainid = 0;
781 
782 	if (domain && domain[0]) {
783 		domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
784 		if (domainid == -1)
785 			return (ENOENT);
786 	}
787 	fuid = FUID_ENCODE(domainid, rid);
788 	(void) snprintf(buf, buflen, "%llx", (longlong_t)fuid);
789 	return (0);
790 }
791 
792 int
793 zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
794     const char *domain, uint64_t rid, uint64_t *valp)
795 {
796 	char buf[32];
797 	int err;
798 	uint64_t obj;
799 
800 	*valp = 0;
801 
802 	if (!dmu_objset_userspace_present(zfsvfs->z_os))
803 		return (ENOTSUP);
804 
805 	obj = zfs_userquota_prop_to_obj(zfsvfs, type);
806 	if (obj == 0)
807 		return (0);
808 
809 	err = id_to_fuidstr(zfsvfs, domain, rid, buf, sizeof(buf), FALSE);
810 	if (err)
811 		return (err);
812 
813 	err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
814 	if (err == ENOENT)
815 		err = 0;
816 	return (err);
817 }
818 
819 int
820 zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
821     const char *domain, uint64_t rid, uint64_t quota)
822 {
823 	char buf[32];
824 	int err;
825 	dmu_tx_t *tx;
826 	uint64_t *objp;
827 	boolean_t fuid_dirtied;
828 
829 	if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
830 		return (EINVAL);
831 
832 	if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
833 		return (ENOTSUP);
834 
835 	objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj :
836 	    &zfsvfs->z_groupquota_obj;
837 
838 	err = id_to_fuidstr(zfsvfs, domain, rid, buf, sizeof(buf), B_TRUE);
839 	if (err)
840 		return (err);
841 	fuid_dirtied = zfsvfs->z_fuid_dirty;
842 
843 	tx = dmu_tx_create(zfsvfs->z_os);
844 	dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
845 	if (*objp == 0) {
846 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
847 		    zfs_userquota_prop_prefixes[type]);
848 	}
849 	if (fuid_dirtied)
850 		zfs_fuid_txhold(zfsvfs, tx);
851 	err = dmu_tx_assign(tx, TXG_WAIT);
852 	if (err) {
853 		dmu_tx_abort(tx);
854 		return (err);
855 	}
856 
857 	mutex_enter(&zfsvfs->z_lock);
858 	if (*objp == 0) {
859 		*objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
860 		    DMU_OT_NONE, 0, tx);
861 		VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
862 		    zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
863 	}
864 	mutex_exit(&zfsvfs->z_lock);
865 
866 	if (quota == 0) {
867 		err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
868 		if (err == ENOENT)
869 			err = 0;
870 	} else {
871 		err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, &quota, tx);
872 	}
873 	ASSERT(err == 0);
874 	if (fuid_dirtied)
875 		zfs_fuid_sync(zfsvfs, tx);
876 	dmu_tx_commit(tx);
877 	return (err);
878 }
879 
880 boolean_t
881 zfs_usergroup_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
882 {
883 	char buf[32];
884 	uint64_t used, quota, usedobj, quotaobj;
885 	int err;
886 
887 	usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
888 	quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
889 
890 	if (quotaobj == 0 || zfsvfs->z_replay)
891 		return (B_FALSE);
892 
893 	(void) snprintf(buf, sizeof(buf), "%llx", (longlong_t)fuid);
894 	err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, &quota);
895 	if (err != 0)
896 		return (B_FALSE);
897 
898 	err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
899 	if (err != 0)
900 		return (B_FALSE);
901 	return (used >= quota);
902 }
903 
904 int
905 zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
906 {
907 	objset_t *os;
908 	zfsvfs_t *zfsvfs;
909 	uint64_t zval;
910 	int i, error;
911 
912 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
913 
914 	/*
915 	 * We claim to always be readonly so we can open snapshots;
916 	 * other ZPL code will prevent us from writing to snapshots.
917 	 */
918 	error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
919 	if (error) {
920 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
921 		return (error);
922 	}
923 
924 	/*
925 	 * Initialize the zfs-specific filesystem structure.
926 	 * Should probably make this a kmem cache, shuffle fields,
927 	 * and just bzero up to z_hold_mtx[].
928 	 */
929 	zfsvfs->z_vfs = NULL;
930 	zfsvfs->z_parent = zfsvfs;
931 	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
932 	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
933 	zfsvfs->z_os = os;
934 
935 	error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
936 	if (error) {
937 		goto out;
938 	} else if (zfsvfs->z_version > ZPL_VERSION) {
939 		(void) printf("Mismatched versions:  File system "
940 		    "is version %llu on-disk format, which is "
941 		    "incompatible with this software version %lld!",
942 		    (u_longlong_t)zfsvfs->z_version, ZPL_VERSION);
943 		error = ENOTSUP;
944 		goto out;
945 	}
946 
947 	if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
948 		goto out;
949 	zfsvfs->z_norm = (int)zval;
950 
951 	if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
952 		goto out;
953 	zfsvfs->z_utf8 = (zval != 0);
954 
955 	if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
956 		goto out;
957 	zfsvfs->z_case = (uint_t)zval;
958 
959 	/*
960 	 * Fold case on file systems that are always or sometimes case
961 	 * insensitive.
962 	 */
963 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
964 	    zfsvfs->z_case == ZFS_CASE_MIXED)
965 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
966 
967 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
968 
969 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
970 	    &zfsvfs->z_root);
971 	if (error)
972 		goto out;
973 	ASSERT(zfsvfs->z_root != 0);
974 
975 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
976 	    &zfsvfs->z_unlinkedobj);
977 	if (error)
978 		goto out;
979 
980 	error = zap_lookup(os, MASTER_NODE_OBJ,
981 	    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
982 	    8, 1, &zfsvfs->z_userquota_obj);
983 	if (error && error != ENOENT)
984 		goto out;
985 
986 	error = zap_lookup(os, MASTER_NODE_OBJ,
987 	    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
988 	    8, 1, &zfsvfs->z_groupquota_obj);
989 	if (error && error != ENOENT)
990 		goto out;
991 
992 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
993 	    &zfsvfs->z_fuid_obj);
994 	if (error && error != ENOENT)
995 		goto out;
996 
997 	error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
998 	    &zfsvfs->z_shares_dir);
999 	if (error && error != ENOENT)
1000 		goto out;
1001 
1002 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
1003 	mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
1004 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
1005 	    offsetof(znode_t, z_link_node));
1006 	rrw_init(&zfsvfs->z_teardown_lock);
1007 	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
1008 	rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
1009 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1010 		mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
1011 
1012 	*zfvp = zfsvfs;
1013 	return (0);
1014 
1015 out:
1016 	dmu_objset_disown(os, zfsvfs);
1017 	*zfvp = NULL;
1018 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
1019 	return (error);
1020 }
1021 
1022 static int
1023 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
1024 {
1025 	int error;
1026 
1027 	error = zfs_register_callbacks(zfsvfs->z_vfs);
1028 	if (error)
1029 		return (error);
1030 
1031 	/*
1032 	 * Set the objset user_ptr to track its zfsvfs.
1033 	 */
1034 	mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1035 	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1036 	mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1037 
1038 	zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
1039 	if (zil_disable) {
1040 		zil_destroy(zfsvfs->z_log, B_FALSE);
1041 		zfsvfs->z_log = NULL;
1042 	}
1043 
1044 	/*
1045 	 * If we are not mounting (ie: online recv), then we don't
1046 	 * have to worry about replaying the log as we blocked all
1047 	 * operations out since we closed the ZIL.
1048 	 */
1049 	if (mounting) {
1050 		boolean_t readonly;
1051 
1052 		/*
1053 		 * During replay we remove the read only flag to
1054 		 * allow replays to succeed.
1055 		 */
1056 		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
1057 		if (readonly != 0)
1058 			zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
1059 		else
1060 			zfs_unlinked_drain(zfsvfs);
1061 
1062 		if (zfsvfs->z_log) {
1063 			/*
1064 			 * Parse and replay the intent log.
1065 			 *
1066 			 * Because of ziltest, this must be done after
1067 			 * zfs_unlinked_drain().  (Further note: ziltest
1068 			 * doesn't use readonly mounts, where
1069 			 * zfs_unlinked_drain() isn't called.)  This is because
1070 			 * ziltest causes spa_sync() to think it's committed,
1071 			 * but actually it is not, so the intent log contains
1072 			 * many txg's worth of changes.
1073 			 *
1074 			 * In particular, if object N is in the unlinked set in
1075 			 * the last txg to actually sync, then it could be
1076 			 * actually freed in a later txg and then reallocated
1077 			 * in a yet later txg.  This would write a "create
1078 			 * object N" record to the intent log.  Normally, this
1079 			 * would be fine because the spa_sync() would have
1080 			 * written out the fact that object N is free, before
1081 			 * we could write the "create object N" intent log
1082 			 * record.
1083 			 *
1084 			 * But when we are in ziltest mode, we advance the "open
1085 			 * txg" without actually spa_sync()-ing the changes to
1086 			 * disk.  So we would see that object N is still
1087 			 * allocated and in the unlinked set, and there is an
1088 			 * intent log record saying to allocate it.
1089 			 */
1090 			zfsvfs->z_replay = B_TRUE;
1091 			zil_replay(zfsvfs->z_os, zfsvfs, zfs_replay_vector);
1092 			zfsvfs->z_replay = B_FALSE;
1093 		}
1094 		zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
1095 	}
1096 
1097 	return (0);
1098 }
1099 
1100 void
1101 zfsvfs_free(zfsvfs_t *zfsvfs)
1102 {
1103 	int i;
1104 	extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
1105 
1106 	/*
1107 	 * This is a barrier to prevent the filesystem from going away in
1108 	 * zfs_znode_move() until we can safely ensure that the filesystem is
1109 	 * not unmounted. We consider the filesystem valid before the barrier
1110 	 * and invalid after the barrier.
1111 	 */
1112 	rw_enter(&zfsvfs_lock, RW_READER);
1113 	rw_exit(&zfsvfs_lock);
1114 
1115 	zfs_fuid_destroy(zfsvfs);
1116 	mutex_destroy(&zfsvfs->z_znodes_lock);
1117 	mutex_destroy(&zfsvfs->z_lock);
1118 	list_destroy(&zfsvfs->z_all_znodes);
1119 	rrw_destroy(&zfsvfs->z_teardown_lock);
1120 	rw_destroy(&zfsvfs->z_teardown_inactive_lock);
1121 	rw_destroy(&zfsvfs->z_fuid_lock);
1122 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1123 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
1124 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
1125 }
1126 
1127 static void
1128 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
1129 {
1130 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
1131 	if (zfsvfs->z_use_fuids && zfsvfs->z_vfs) {
1132 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1133 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1134 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1135 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1136 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1137 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1138 	}
1139 }
1140 
1141 static int
1142 zfs_domount(vfs_t *vfsp, char *osname)
1143 {
1144 	dev_t mount_dev;
1145 	uint64_t recordsize, fsid_guid;
1146 	int error = 0;
1147 	zfsvfs_t *zfsvfs;
1148 
1149 	ASSERT(vfsp);
1150 	ASSERT(osname);
1151 
1152 	error = zfsvfs_create(osname, &zfsvfs);
1153 	if (error)
1154 		return (error);
1155 	zfsvfs->z_vfs = vfsp;
1156 	zfsvfs->z_parent = zfsvfs;
1157 	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
1158 	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
1159 
1160 	/* Initialize the generic filesystem structure. */
1161 	vfsp->vfs_data = NULL;
1162 
1163 	if (zfs_create_unique_device(&mount_dev) == -1) {
1164 		error = ENODEV;
1165 		goto out;
1166 	}
1167 	ASSERT(vfs_devismounted(mount_dev) == 0);
1168 
1169 	if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
1170 	    NULL))
1171 	    goto out;
1172 
1173 	vfsp->vfs_bsize = DEV_BSIZE;
1174 	vfsp->vfs_flag |= VFS_NOTRUNC;
1175 	vfsp->vfs_data = zfsvfs;
1176 
1177 	/*
1178 	 * The fsid is 64 bits, composed of an 8-bit fs type, which
1179 	 * separates our fsid from any other filesystem types, and a
1180 	 * 56-bit objset unique ID.  The objset unique ID is unique to
1181 	 * all objsets open on this system, provided by unique_create().
1182 	 * The 8-bit fs type must be put in the low bits of fsid[1]
1183 	 * because that's where other Solaris filesystems put it.
1184 	 */
1185 	fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
1186 	ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
1187 	vfsp->mnt_stat.f_fsidx.__fsid_val[0] = fsid_guid;
1188 	vfsp->mnt_stat.f_fsidx.__fsid_val[1] = ((fsid_guid>>32) << 8) |
1189 	    zfsfstype & 0xFF;
1190 
1191 	dprintf("zfs_domount vrele after vfsp->vfs_count %d\n", vfsp->vfs_count);
1192 	/*
1193 	 * Set features for file system.
1194 	 */
1195 	zfs_set_fuid_feature(zfsvfs);
1196 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
1197 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1198 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1199 		vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
1200 	} else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
1201 		vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1202 		vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1203 	}
1204 	vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
1205 
1206 	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
1207 		uint64_t pval;
1208 
1209 		atime_changed_cb(zfsvfs, B_FALSE);
1210 		readonly_changed_cb(zfsvfs, B_TRUE);
1211 		if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
1212 			goto out;
1213 		xattr_changed_cb(zfsvfs, pval);
1214 		zfsvfs->z_issnap = B_TRUE;
1215 
1216 		mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1217 		dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1218 		mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1219 	} else {
1220 		error = zfsvfs_setup(zfsvfs, B_TRUE);
1221 	}
1222 
1223 	dprintf("zfs_vfsops.c zfs_domount called\n");
1224 	dprintf("vfsp->vfs_count %d\n", vfsp->vfs_count);
1225 
1226 	if (!zfsvfs->z_issnap)
1227 		zfsctl_create(zfsvfs);
1228 out:
1229 	if (error) {
1230 		dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1231 		zfsvfs_free(zfsvfs);
1232 	} else {
1233 		atomic_add_32(&zfs_active_fs_count, 1);
1234 	}
1235 	return (error);
1236 }
1237 
1238 void
1239 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
1240 {
1241 	objset_t *os = zfsvfs->z_os;
1242 	struct dsl_dataset *ds;
1243 
1244 	/*
1245 	 * Unregister properties.
1246 	 */
1247 	if (!dmu_objset_is_snapshot(os)) {
1248 		ds = dmu_objset_ds(os);
1249 		VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
1250 		    zfsvfs) == 0);
1251 
1252 		VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
1253 		    zfsvfs) == 0);
1254 
1255 		VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
1256 		    zfsvfs) == 0);
1257 
1258 		VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
1259 		    zfsvfs) == 0);
1260 
1261 		VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb,
1262 		    zfsvfs) == 0);
1263 
1264 		VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
1265 		    zfsvfs) == 0);
1266 
1267 		VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
1268 		    zfsvfs) == 0);
1269 
1270 		VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
1271 		    zfsvfs) == 0);
1272 
1273 		VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
1274 		    zfsvfs) == 0);
1275 
1276 		VERIFY(dsl_prop_unregister(ds, "aclinherit",
1277 		    acl_inherit_changed_cb, zfsvfs) == 0);
1278 
1279 		VERIFY(dsl_prop_unregister(ds, "vscan",
1280 		    vscan_changed_cb, zfsvfs) == 0);
1281 	}
1282 }
1283 
1284 /*
1285  * Convert a decimal digit string to a uint64_t integer.
1286  */
1287 static int
1288 str_to_uint64(char *str, uint64_t *objnum)
1289 {
1290 	uint64_t num = 0;
1291 
1292 	while (*str) {
1293 		if (*str < '0' || *str > '9')
1294 			return (EINVAL);
1295 
1296 		num = num*10 + *str++ - '0';
1297 	}
1298 
1299 	*objnum = num;
1300 	return (0);
1301 }
1302 
1303 /*
1304  * The boot path passed from the boot loader is in the form of
1305  * "rootpool-name/root-filesystem-object-number'. Convert this
1306  * string to a dataset name: "rootpool-name/root-filesystem-name".
1307  */
1308 static int
1309 zfs_parse_bootfs(char *bpath, char *outpath)
1310 {
1311 	char *slashp;
1312 	uint64_t objnum;
1313 	int error;
1314 
1315 	if (*bpath == 0 || *bpath == '/')
1316 		return (EINVAL);
1317 
1318 	(void) strcpy(outpath, bpath);
1319 
1320 	slashp = strchr(bpath, '/');
1321 
1322 	/* if no '/', just return the pool name */
1323 	if (slashp == NULL) {
1324 		return (0);
1325 	}
1326 
1327 	/* if not a number, just return the root dataset name */
1328 	if (str_to_uint64(slashp+1, &objnum)) {
1329 		return (0);
1330 	}
1331 
1332 	*slashp = '\0';
1333 	error = dsl_dsobj_to_dsname(bpath, objnum, outpath);
1334 	*slashp = '/';
1335 
1336 	return (error);
1337 }
1338 
1339 
1340 /*
1341  * zfs_check_global_label:
1342  *	Check that the hex label string is appropriate for the dataset
1343  *	being mounted into the global_zone proper.
1344  *
1345  *	Return an error if the hex label string is not default or
1346  *	admin_low/admin_high.  For admin_low labels, the corresponding
1347  *	dataset must be readonly.
1348  */
1349 int
1350 zfs_check_global_label(const char *dsname, const char *hexsl)
1351 {
1352 #ifdef PORT_SOLARIS
1353 	if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1354 		return (0);
1355 	if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
1356 		return (0);
1357 	if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
1358 		/* must be readonly */
1359 		uint64_t rdonly;
1360 
1361 		if (dsl_prop_get_integer(dsname,
1362 		    zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
1363 			return (EACCES);
1364 		return (rdonly ? 0 : EACCES);
1365 	}
1366 	return (EACCES);
1367 #else
1368 	return 0;
1369 #endif
1370 }
1371 
1372 /*
1373  * zfs_mount_label_policy:
1374  *	Determine whether the mount is allowed according to MAC check.
1375  *	by comparing (where appropriate) label of the dataset against
1376  *	the label of the zone being mounted into.  If the dataset has
1377  *	no label, create one.
1378  *
1379  *	Returns:
1380  *		 0 :	access allowed
1381  *		>0 :	error code, such as EACCES
1382  */
1383 static int
1384 zfs_mount_label_policy(vfs_t *vfsp, char *osname)
1385 {
1386 #ifdef PORT_SOLARIS
1387 	int		error, retv;
1388 	zone_t		*mntzone = NULL;
1389 	ts_label_t	*mnt_tsl;
1390 	bslabel_t	*mnt_sl;
1391 	bslabel_t	ds_sl;
1392 	char		ds_hexsl[MAXNAMELEN];
1393 
1394 	retv = EACCES;				/* assume the worst */
1395 
1396 	/*
1397 	 * Start by getting the dataset label if it exists.
1398 	 */
1399 	error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1400 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
1401 	if (error)
1402 		return (EACCES);
1403 
1404 	/*
1405 	 * If labeling is NOT enabled, then disallow the mount of datasets
1406 	 * which have a non-default label already.  No other label checks
1407 	 * are needed.
1408 	 */
1409 	if (!is_system_labeled()) {
1410 		if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1411 			return (0);
1412 		return (EACCES);
1413 	}
1414 
1415 	/*
1416 	 * Get the label of the mountpoint.  If mounting into the global
1417 	 * zone (i.e. mountpoint is not within an active zone and the
1418 	 * zoned property is off), the label must be default or
1419 	 * admin_low/admin_high only; no other checks are needed.
1420 	 */
1421 	mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
1422 	if (mntzone->zone_id == GLOBAL_ZONEID) {
1423 		uint64_t zoned;
1424 
1425 		zone_rele(mntzone);
1426 
1427 		if (dsl_prop_get_integer(osname,
1428 		    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
1429 			return (EACCES);
1430 		if (!zoned)
1431 			return (zfs_check_global_label(osname, ds_hexsl));
1432 		else
1433 			/*
1434 			 * This is the case of a zone dataset being mounted
1435 			 * initially, before the zone has been fully created;
1436 			 * allow this mount into global zone.
1437 			 */
1438 			return (0);
1439 	}
1440 
1441 	mnt_tsl = mntzone->zone_slabel;
1442 	ASSERT(mnt_tsl != NULL);
1443 	label_hold(mnt_tsl);
1444 	mnt_sl = label2bslabel(mnt_tsl);
1445 
1446 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
1447 		/*
1448 		 * The dataset doesn't have a real label, so fabricate one.
1449 		 */
1450 		char *str = NULL;
1451 
1452 		if (l_to_str_internal(mnt_sl, &str) == 0 &&
1453 		    dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1454 		    ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0)
1455 			retv = 0;
1456 		if (str != NULL)
1457 			kmem_free(str, strlen(str) + 1);
1458 	} else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
1459 		/*
1460 		 * Now compare labels to complete the MAC check.  If the
1461 		 * labels are equal then allow access.  If the mountpoint
1462 		 * label dominates the dataset label, allow readonly access.
1463 		 * Otherwise, access is denied.
1464 		 */
1465 		if (blequal(mnt_sl, &ds_sl))
1466 			retv = 0;
1467 		else if (bldominates(mnt_sl, &ds_sl)) {
1468 			vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
1469 			retv = 0;
1470 		}
1471 	}
1472 
1473 	label_rele(mnt_tsl);
1474 	zone_rele(mntzone);
1475 	return (retv);
1476 #else   /* PORT_SOLARIS */
1477 	return (0);
1478 #endif
1479 }
1480 
1481 #ifndef __NetBSD__
1482 static int
1483 zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
1484 {
1485 	int error = 0;
1486 	static int zfsrootdone = 0;
1487 	zfsvfs_t *zfsvfs = NULL;
1488 	znode_t *zp = NULL;
1489 	vnode_t *vp = NULL;
1490 	char *zfs_bootfs;
1491 	char *zfs_devid;
1492 
1493 	ASSERT(vfsp);
1494 
1495 	/*
1496 	 * The filesystem that we mount as root is defined in the
1497 	 * boot property "zfs-bootfs" with a format of
1498 	 * "poolname/root-dataset-objnum".
1499 	 */
1500 	if (why == ROOT_INIT) {
1501 		if (zfsrootdone++)
1502 			return (EBUSY);
1503 		/*
1504 		 * the process of doing a spa_load will require the
1505 		 * clock to be set before we could (for example) do
1506 		 * something better by looking at the timestamp on
1507 		 * an uberblock, so just set it to -1.
1508 		 */
1509 		clkset(-1);
1510 
1511 		if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) {
1512 			cmn_err(CE_NOTE, "spa_get_bootfs: can not get "
1513 			    "bootfs name");
1514 			return (EINVAL);
1515 		}
1516 		zfs_devid = spa_get_bootprop("diskdevid");
1517 		error = spa_import_rootpool(rootfs.bo_name, zfs_devid);
1518 		if (zfs_devid)
1519 			spa_free_bootprop(zfs_devid);
1520 		if (error) {
1521 			spa_free_bootprop(zfs_bootfs);
1522 			cmn_err(CE_NOTE, "spa_import_rootpool: error %d",
1523 			    error);
1524 			return (error);
1525 		}
1526 		if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) {
1527 			spa_free_bootprop(zfs_bootfs);
1528 			cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d",
1529 			    error);
1530 			return (error);
1531 		}
1532 
1533 		spa_free_bootprop(zfs_bootfs);
1534 
1535 		if (error = vfs_lock(vfsp))
1536 			return (error);
1537 
1538 		if (error = zfs_domount(vfsp, rootfs.bo_name)) {
1539 			cmn_err(CE_NOTE, "zfs_domount: error %d", error);
1540 			goto out;
1541 		}
1542 
1543 		zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
1544 		ASSERT(zfsvfs);
1545 		if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) {
1546 			cmn_err(CE_NOTE, "zfs_zget: error %d", error);
1547 			goto out;
1548 		}
1549 
1550 		vp = ZTOV(zp);
1551 		mutex_enter(&vp->v_lock);
1552 		vp->v_flag |= VROOT;
1553 		mutex_exit(&vp->v_lock);
1554 		rootvp = vp;
1555 
1556 		/*
1557 		 * Leave rootvp held.  The root file system is never unmounted.
1558 		 */
1559 
1560 		vfs_add((struct vnode *)0, vfsp,
1561 		    (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
1562 out:
1563 		vfs_unlock(vfsp);
1564 		return (error);
1565 	} else if (why == ROOT_REMOUNT) {
1566 		readonly_changed_cb(vfsp->vfs_data, B_FALSE);
1567 		vfsp->vfs_flag |= VFS_REMOUNT;
1568 
1569 		/* refresh mount options */
1570 		zfs_unregister_callbacks(vfsp->vfs_data);
1571 		return (zfs_register_callbacks(vfsp));
1572 
1573 	} else if (why == ROOT_UNMOUNT) {
1574 		zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data);
1575 		(void) zfs_sync(vfsp, 0, 0);
1576 		return (0);
1577 	}
1578 
1579 	/*
1580 	 * if "why" is equal to anything else other than ROOT_INIT,
1581 	 * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
1582 	 */
1583 	return (ENOTSUP);
1584 }
1585 #endif /*__NetBSD__ */
1586 
1587 /*ARGSUSED*/
1588 static int
1589 zfs_mount(vfs_t *vfsp, const char *path, void *data, size_t *data_len)
1590 {
1591 	char		*osname;
1592 	pathname_t	spn;
1593 	vnode_t         *mvp = vfsp->mnt_vnodecovered;
1594 	struct mounta   *uap = data;
1595 	int		error = 0;
1596 	int		canwrite;
1597 	cred_t          *cr;
1598 
1599 	crget(cr);
1600 	dprintf("zfs_vfsops.c zfs_mount called\n");
1601 	dprintf("vfsp->vfs_count %d\n", vfsp->vfs_count);
1602 	if (mvp->v_type != VDIR)
1603 		return (ENOTDIR);
1604 
1605 	if (uap == NULL)
1606 		return (EINVAL);
1607 
1608 	mutex_enter(mvp->v_interlock);
1609 	if ((uap->flags & MS_REMOUNT) == 0 &&
1610 	    (uap->flags & MS_OVERLAY) == 0 &&
1611 	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
1612 		mutex_exit(mvp->v_interlock);
1613 		return (EBUSY);
1614 	}
1615 	mutex_exit(mvp->v_interlock);
1616 
1617 	/*
1618 	 * ZFS does not support passing unparsed data in via MS_DATA.
1619 	 * Users should use the MS_OPTIONSTR interface; this means
1620 	 * that all option parsing is already done and the options struct
1621 	 * can be interrogated.
1622 	 */
1623 	if ((uap->flags & MS_DATA) && uap->datalen > 0)
1624 		return (EINVAL);
1625 
1626 	osname = PNBUF_GET();
1627 
1628 	strlcpy(osname, uap->fspec, strlen(uap->fspec) + 1);
1629 
1630 	/*
1631 	 * Check for mount privilege?
1632 	 *
1633 	 * If we don't have privilege then see if
1634 	 * we have local permission to allow it
1635 	 */
1636 	error = secpolicy_fs_mount(cr, mvp, vfsp);
1637 	if (error) {
1638 		error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr);
1639 		if (error == 0) {
1640 			vattr_t		vattr;
1641 
1642 			/*
1643 			 * Make sure user is the owner of the mount point
1644 			 * or has sufficient privileges.
1645 			 */
1646 
1647 			vattr.va_mask = AT_UID;
1648 
1649 			if (error = VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) {
1650 				goto out;
1651 			}
1652 
1653 			if (secpolicy_vnode_owner(cr, vattr.va_uid) != 0 &&
1654 			    VOP_ACCESS(mvp, VWRITE, cr) != 0) {
1655 				error = EPERM;
1656 				goto out;
1657 			}
1658 
1659 /* XXX NetBSD			secpolicy_fs_mount_clearopts(cr, vfsp);*/
1660 		} else {
1661 			goto out;
1662 		}
1663 	}
1664 
1665 	/*
1666 	 * Refuse to mount a filesystem if we are in a local zone and the
1667 	 * dataset is not visible.
1668 	 */
1669 	if (!INGLOBALZONE(curproc) &&
1670 	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
1671 		error = EPERM;
1672 		goto out;
1673 	}
1674 
1675 	error = zfs_mount_label_policy(vfsp, osname);
1676 	if (error)
1677 		goto out;
1678 
1679 	/*
1680 	 * When doing a remount, we simply refresh our temporary properties
1681 	 * according to those options set in the current VFS options.
1682 	 */
1683 	if (uap->flags & MS_REMOUNT) {
1684 		/* refresh mount options */
1685 		zfs_unregister_callbacks(vfsp->vfs_data);
1686 		error = zfs_register_callbacks(vfsp);
1687 		goto out;
1688 	}
1689 
1690 	/* Mark ZFS as MP SAFE */
1691 	vfsp->mnt_iflag |= IMNT_MPSAFE;
1692 
1693 	error = zfs_domount(vfsp, osname);
1694 
1695 	vfs_getnewfsid(vfsp);
1696 
1697 	/* setup zfs mount info */
1698 	strlcpy(vfsp->mnt_stat.f_mntfromname, osname,
1699 	    sizeof(vfsp->mnt_stat.f_mntfromname));
1700 	set_statvfs_info(path, UIO_USERSPACE, vfsp->mnt_stat.f_mntfromname,
1701 	    UIO_SYSSPACE, vfsp->mnt_op->vfs_name, vfsp, curlwp);
1702 
1703 	/*
1704 	 * Add an extra VFS_HOLD on our parent vfs so that it can't
1705 	 * disappear due to a forced unmount.
1706 	 */
1707 	if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap)
1708 		VFS_HOLD(mvp->v_vfsp);
1709 
1710 out:
1711 	PNBUF_PUT(osname);
1712 	return (error);
1713 }
1714 
1715 static int
1716 zfs_statvfs(vfs_t *vfsp, struct statvfs *statp)
1717 {
1718 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1719 	dev_t dev;
1720 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
1721 
1722 	ZFS_ENTER(zfsvfs);
1723 
1724 	dmu_objset_space(zfsvfs->z_os,
1725 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
1726 
1727 	/*
1728 	 * The underlying storage pool actually uses multiple block sizes.
1729 	 * We report the fragsize as the smallest block size we support,
1730 	 * and we report our blocksize as the filesystem's maximum blocksize.
1731 	 */
1732 	statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT;
1733 	statp->f_bsize = zfsvfs->z_max_blksz;
1734 
1735 	/*
1736 	 * The following report "total" blocks of various kinds in the
1737 	 * file system, but reported in terms of f_frsize - the
1738 	 * "fragment" size.
1739 	 */
1740 
1741 	statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
1742 	statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT;
1743 	statp->f_bavail = statp->f_bfree; /* no root reservation */
1744 
1745 	/*
1746 	 * statvfs() should really be called statufs(), because it assumes
1747 	 * static metadata.  ZFS doesn't preallocate files, so the best
1748 	 * we can do is report the max that could possibly fit in f_files,
1749 	 * and that minus the number actually used in f_ffree.
1750 	 * For f_ffree, report the smaller of the number of object available
1751 	 * and the number of blocks (each object will take at least a block).
1752 	 */
1753 	statp->f_ffree = MIN(availobjs, statp->f_bfree);
1754 	statp->f_favail = statp->f_ffree;	/* no "root reservation" */
1755 	statp->f_files = statp->f_ffree + usedobjs;
1756 
1757 	statp->f_fsid = vfsp->mnt_stat.f_fsidx.__fsid_val[0];
1758 
1759 	/*
1760 	 * We're a zfs filesystem.
1761 	 */
1762 	(void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename));
1763 	(void) strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
1764 	    sizeof(statp->f_mntfromname));
1765 	(void) strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
1766 	    sizeof(statp->f_mntonname));
1767 
1768 	statp->f_namemax = ZFS_MAXNAMELEN;
1769 
1770 	/*
1771 	 * We have all of 32 characters to stuff a string here.
1772 	 * Is there anything useful we could/should provide?
1773 	 */
1774 #ifndef __NetBSD__
1775 	bzero(statp->f_fstr, sizeof (statp->f_fstr));
1776 #endif
1777 	ZFS_EXIT(zfsvfs);
1778 	return (0);
1779 }
1780 
1781 static int
1782 zfs_root(vfs_t *vfsp, vnode_t **vpp)
1783 {
1784 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1785 	znode_t *rootzp;
1786 	int error;
1787 
1788 	ZFS_ENTER(zfsvfs);
1789 	dprintf("zfs_root called\n");
1790 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
1791 	if (error == 0)
1792 		*vpp = ZTOV(rootzp);
1793 	dprintf("vpp -> %d, error %d -- %p\n", (*vpp)->v_type, error, *vpp);
1794 	ZFS_EXIT(zfsvfs);
1795 	if (error == 0)
1796 		vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
1797 	KASSERT((error != 0) || (*vpp != NULL));
1798 	KASSERT((error != 0) || (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE));
1799 	return (error);
1800 }
1801 
1802 /*
1803  * Teardown the zfsvfs::z_os.
1804  *
1805  * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
1806  * and 'z_teardown_inactive_lock' held.
1807  */
1808 static int
1809 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
1810 {
1811 	znode_t	*zp;
1812 
1813 	rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
1814 
1815 	if (!unmounting) {
1816 		/*
1817 		 * We purge the parent filesystem's vfsp as the parent
1818 		 * filesystem and all of its snapshots have their vnode's
1819 		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
1820 		 * 'z_parent' is self referential for non-snapshots.
1821 		 */
1822 		(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1823 	}
1824 
1825 	/*
1826 	 * Close the zil. NB: Can't close the zil while zfs_inactive
1827 	 * threads are blocked as zil_close can call zfs_inactive.
1828 	 */
1829 	if (zfsvfs->z_log) {
1830 		zil_close(zfsvfs->z_log);
1831 		zfsvfs->z_log = NULL;
1832 	}
1833 
1834 	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
1835 
1836 	/*
1837 	 * If we are not unmounting (ie: online recv) and someone already
1838 	 * unmounted this file system while we were doing the switcheroo,
1839 	 * or a reopen of z_os failed then just bail out now.
1840 	 */
1841 	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
1842 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
1843 		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1844 		return (EIO);
1845 	}
1846 
1847 	/*
1848 	 * At this point there are no vops active, and any new vops will
1849 	 * fail with EIO since we have z_teardown_lock for writer (only
1850 	 * relavent for forced unmount).
1851 	 *
1852 	 * Release all holds on dbufs.
1853 	 */
1854 	mutex_enter(&zfsvfs->z_znodes_lock);
1855 	for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
1856 	    zp = list_next(&zfsvfs->z_all_znodes, zp))
1857 		if (zp->z_dbuf) {
1858 			ASSERT(ZTOV(zp)->v_count > 0);
1859 			zfs_znode_dmu_fini(zp);
1860 		}
1861 	mutex_exit(&zfsvfs->z_znodes_lock);
1862 
1863 	/*
1864 	 * If we are unmounting, set the unmounted flag and let new vops
1865 	 * unblock.  zfs_inactive will have the unmounted behavior, and all
1866 	 * other vops will fail with EIO.
1867 	 */
1868 	if (unmounting) {
1869 		zfsvfs->z_unmounted = B_TRUE;
1870 		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1871 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
1872 	}
1873 
1874 	/*
1875 	 * z_os will be NULL if there was an error in attempting to reopen
1876 	 * zfsvfs, so just return as the properties had already been
1877 	 * unregistered and cached data had been evicted before.
1878 	 */
1879 	if (zfsvfs->z_os == NULL)
1880 		return (0);
1881 
1882 	/*
1883 	 * Unregister properties.
1884 	 */
1885 	zfs_unregister_callbacks(zfsvfs);
1886 
1887 	/*
1888 	 * Evict cached data
1889 	 */
1890 	if (dmu_objset_evict_dbufs(zfsvfs->z_os)) {
1891 		txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
1892 		(void) dmu_objset_evict_dbufs(zfsvfs->z_os);
1893 	}
1894 
1895 	return (0);
1896 }
1897 
1898 /*ARGSUSED*/
1899 static int
1900 zfs_umount(vfs_t *vfsp, int fflag)
1901 {
1902 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1903 	objset_t *os;
1904 	int ret, flags = 0;
1905 	cred_t *cr;
1906 
1907 	vnode_t *vpp;
1908 	int counter;
1909 
1910 	counter = 0;
1911 
1912 	dprintf("ZFS_UMOUNT called\n");
1913 
1914 	/*TAILQ_FOREACH(vpp, &vfsp->mnt_vnodelist, v_mntvnodes) {
1915 		printf("vnode list vnode number %d -- vnode address %p\n", counter, vpp);
1916 		vprint("ZFS vfsp vnode list", vpp);
1917 		counter++;
1918 		} */
1919 
1920 	crget(cr);
1921 #ifdef TODO
1922 	ret = secpolicy_fs_unmount(cr, vfsp);
1923 	if (ret) {
1924 		ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
1925 		    ZFS_DELEG_PERM_MOUNT, cr);
1926 		if (ret)
1927 			return (ret);
1928 	}
1929 #endif
1930 	/*
1931 	 * We purge the parent filesystem's vfsp as the parent filesystem
1932 	 * and all of its snapshots have their vnode's v_vfsp set to the
1933 	 * parent's filesystem's vfsp.  Note, 'z_parent' is self
1934 	 * referential for non-snapshots.
1935 	 */
1936 	(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1937 
1938 	/*
1939 	 * Unmount any snapshots mounted under .zfs before unmounting the
1940 	 * dataset itself.
1941 	 */
1942 	if (zfsvfs->z_ctldir != NULL &&
1943 	    (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) {
1944 		return (ret);
1945 	}
1946 
1947 #if 0
1948 	if (!(fflag & MS_FORCE)) {
1949 		/*
1950 		 * Check the number of active vnodes in the file system.
1951 		 * Our count is maintained in the vfs structure, but the
1952 		 * number is off by 1 to indicate a hold on the vfs
1953 		 * structure itself.
1954 		 *
1955 		 * The '.zfs' directory maintains a reference of its
1956 		 * own, and any active references underneath are
1957 		 * reflected in the vnode count.
1958 		 */
1959 		if (zfsvfs->z_ctldir == NULL) {
1960 			if (vfsp->vfs_count > 1){
1961 				return (EBUSY);
1962 			}
1963 		} else {
1964 			if (vfsp->vfs_count > 2 ||
1965 			    zfsvfs->z_ctldir->v_count > 1) {
1966 				return (EBUSY);
1967 			}
1968 		}
1969 	}
1970 #endif
1971 	ret = vflush(vfsp, NULL, (ISSET(fflag, MS_FORCE)? FORCECLOSE : 0));
1972 	if (ret != 0)
1973 		return ret;
1974 	vfsp->vfs_flag |= VFS_UNMOUNTED;
1975 
1976 	VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
1977 	os = zfsvfs->z_os;
1978 
1979 	/*
1980 	 * z_os will be NULL if there was an error in
1981 	 * attempting to reopen zfsvfs.
1982 	 */
1983 	if (os != NULL) {
1984 		/*
1985 		 * Unset the objset user_ptr.
1986 		 */
1987 		mutex_enter(&os->os_user_ptr_lock);
1988 		dmu_objset_set_user(os, NULL);
1989 		mutex_exit(&os->os_user_ptr_lock);
1990 
1991 		/*
1992 		 * Finally release the objset
1993 		 */
1994 		dmu_objset_disown(os, zfsvfs);
1995 	}
1996 
1997 	/*
1998 	 * We can now safely destroy the '.zfs' directory node.
1999 	 */
2000 	if (zfsvfs->z_ctldir != NULL)
2001 		zfsctl_destroy(zfsvfs);
2002 
2003 	return (0);
2004 }
2005 
2006 static int
2007 zfs_vget(vfs_t *vfsp, ino_t ino, vnode_t **vpp)
2008 {
2009 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
2010 	znode_t *zp;
2011 	int err;
2012 
2013 	dprintf("zfs_vget called\n");
2014 	dprintf("vfsp->vfs_count %d\n", vfsp->vfs_count);
2015 
2016 	ZFS_ENTER(zfsvfs);
2017 	err = zfs_zget(zfsvfs, ino, &zp);
2018 	if (err == 0 && zp->z_unlinked) {
2019 		VN_RELE(ZTOV(zp));
2020 		err = EINVAL;
2021 	}
2022 	if (err != 0)
2023 		*vpp = NULL;
2024 	else {
2025 		*vpp = ZTOV(zp);
2026 		/* XXX NetBSD how to get flags for vn_lock ? */
2027 		vn_lock(*vpp, 0);
2028 	}
2029 	ZFS_EXIT(zfsvfs);
2030 	return (err);
2031 }
2032 
2033 static int
2034 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp)
2035 {
2036 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
2037 	znode_t *zp;
2038 	uint64_t object = 0;
2039 	uint64_t fid_gen = 0;
2040 	uint64_t gen_mask;
2041 	uint64_t zp_gen;
2042 	int i, err;
2043 
2044 	*vpp = NULL;
2045 
2046 	dprintf("zfs_fhtovp called\n");
2047 	dprintf("vfsp->vfs_count %d\n", vfsp->vfs_count);
2048 
2049 	ZFS_ENTER(zfsvfs);
2050 
2051 	if (fidp->fid_len == LONG_FID_LEN) {
2052 		zfid_long_t *zlfid = (zfid_long_t *)fidp;
2053 		uint64_t objsetid = 0;
2054 		uint64_t setgen = 0;
2055 
2056 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
2057 			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
2058 
2059 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
2060 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
2061 
2062 		ZFS_EXIT(zfsvfs);
2063 
2064 		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
2065 		if (err)
2066 			return (EINVAL);
2067 		ZFS_ENTER(zfsvfs);
2068 	}
2069 
2070 	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
2071 		zfid_short_t *zfid = (zfid_short_t *)fidp;
2072 
2073 		for (i = 0; i < sizeof (zfid->zf_object); i++)
2074 			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
2075 
2076 		for (i = 0; i < sizeof (zfid->zf_gen); i++)
2077 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
2078 	} else {
2079 		ZFS_EXIT(zfsvfs);
2080 		return (EINVAL);
2081 	}
2082 
2083 	/* A zero fid_gen means we are in the .zfs control directories */
2084 	if (fid_gen == 0 &&
2085 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
2086 		*vpp = zfsvfs->z_ctldir;
2087 		ASSERT(*vpp != NULL);
2088 		if (object == ZFSCTL_INO_SNAPDIR) {
2089 			VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL,
2090 				0, NULL, NULL, NULL, NULL, NULL) == 0);
2091 		} else {
2092 			VN_HOLD(*vpp);
2093 		}
2094 		ZFS_EXIT(zfsvfs);
2095 		/* XXX: LK_RETRY? */
2096 		vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
2097 		return (0);
2098 	}
2099 
2100 	gen_mask = -1ULL >> (64 - 8 * i);
2101 
2102 	dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
2103 	if (err = zfs_zget(zfsvfs, object, &zp)) {
2104 		ZFS_EXIT(zfsvfs);
2105 		return (err);
2106 	}
2107 	zp_gen = zp->z_phys->zp_gen & gen_mask;
2108 	if (zp_gen == 0)
2109 		zp_gen = 1;
2110 	if (zp->z_unlinked || zp_gen != fid_gen) {
2111 		dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
2112 		VN_RELE(ZTOV(zp));
2113 		ZFS_EXIT(zfsvfs);
2114 		return (EINVAL);
2115 	}
2116 
2117 	*vpp = ZTOV(zp);
2118 	/* XXX: LK_RETRY? */
2119 	vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
2120 	ZFS_EXIT(zfsvfs);
2121 	return (0);
2122 }
2123 
2124 /*
2125  * Block out VOPs and close zfsvfs_t::z_os
2126  *
2127  * Note, if successful, then we return with the 'z_teardown_lock' and
2128  * 'z_teardown_inactive_lock' write held.
2129  */
2130 int
2131 zfs_suspend_fs(zfsvfs_t *zfsvfs)
2132 {
2133 	int error;
2134 
2135 	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
2136 		return (error);
2137 	dmu_objset_disown(zfsvfs->z_os, zfsvfs);
2138 
2139 	return (0);
2140 }
2141 
2142 /*
2143  * Reopen zfsvfs_t::z_os and release VOPs.
2144  */
2145 int
2146 zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
2147 {
2148 	int err;
2149 
2150 	ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
2151 	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
2152 
2153 	err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
2154 	    &zfsvfs->z_os);
2155 	if (err) {
2156 		zfsvfs->z_os = NULL;
2157 	} else {
2158 		znode_t *zp;
2159 
2160 		VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
2161 
2162 		/*
2163 		 * Attempt to re-establish all the active znodes with
2164 		 * their dbufs.  If a zfs_rezget() fails, then we'll let
2165 		 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
2166 		 * when they try to use their znode.
2167 		 */
2168 		mutex_enter(&zfsvfs->z_znodes_lock);
2169 		for (zp = list_head(&zfsvfs->z_all_znodes); zp;
2170 		    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
2171 			(void) zfs_rezget(zp);
2172 		}
2173 		mutex_exit(&zfsvfs->z_znodes_lock);
2174 
2175 	}
2176 
2177 	/* release the VOPs */
2178 	rw_exit(&zfsvfs->z_teardown_inactive_lock);
2179 	rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
2180 
2181 	if (err) {
2182 		/*
2183 		 * Since we couldn't reopen zfsvfs::z_os, force
2184 		 * unmount this file system.
2185 		 */
2186 		if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0)
2187 			(void) dounmount(zfsvfs->z_vfs, MS_FORCE, curlwp);
2188 	}
2189 	return (err);
2190 }
2191 
2192 static void
2193 zfs_freevfs(vfs_t *vfsp)
2194 {
2195 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
2196 
2197 	/*
2198 	 * If this is a snapshot, we have an extra VFS_HOLD on our parent
2199 	 * from zfs_mount().  Release it here.
2200 	 */
2201 	if (zfsvfs->z_issnap)
2202 		VFS_RELE(zfsvfs->z_parent->z_vfs);
2203 
2204 	zfsvfs_free(zfsvfs);
2205 
2206 	atomic_add_32(&zfs_active_fs_count, -1);
2207 }
2208 
2209 /*
2210  * VFS_INIT() initialization.  Note that there is no VFS_FINI(),
2211  * so we can't safely do any non-idempotent initialization here.
2212  * Leave that to zfs_init() and zfs_fini(), which are called
2213  * from the module's _init() and _fini() entry points.
2214  */
2215 /*ARGSUSED*/
2216 int
2217 zfs_vfsinit(int fstype, char *name)
2218 {
2219 	int error;
2220 
2221 	zfsfstype = fstype;
2222 
2223 	/*
2224 	 * Setup vfsops and vnodeops tables.
2225 	 */
2226 	error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops);
2227 
2228 	error = zfs_create_op_tables();
2229 	if (error) {
2230 		zfs_remove_op_tables();
2231 		cmn_err(CE_WARN, "zfs: bad vnode ops template");
2232 		vfs_freevfsops_by_type(zfsfstype);
2233 		return (error);
2234 	}
2235 
2236 	mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
2237 	mutex_init(&zfs_debug_mtx, NULL, MUTEX_DEFAULT, NULL);
2238 
2239 	/*
2240 	 * Unique major number for all zfs mounts.
2241 	 * If we run out of 32-bit minors, we'll getudev() another major.
2242 	 */
2243 	zfs_major = ddi_name_to_major(ZFS_DRIVER);
2244 	zfs_minor = ZFS_MIN_MINOR;
2245 
2246 	return (0);
2247 }
2248 
2249 int
2250 zfs_vfsfini(void)
2251 {
2252 	int err;
2253 
2254 	err = vfs_detach(&zfs_vfsops_template);
2255 	if (err != 0)
2256 		return err;
2257 
2258 	mutex_destroy(&zfs_debug_mtx);
2259 	mutex_destroy(&zfs_dev_mtx);
2260 
2261 	return 0;
2262 }
2263 
2264 void
2265 zfs_init(void)
2266 {
2267 	/*
2268 	 * Initialize .zfs directory structures
2269 	 */
2270 	zfsctl_init();
2271 
2272 	/*
2273 	 * Initialize znode cache, vnode ops, etc...
2274 	 */
2275 	zfs_znode_init();
2276 
2277 	dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
2278 }
2279 
2280 void
2281 zfs_fini(void)
2282 {
2283 	zfsctl_fini();
2284 	zfs_znode_fini();
2285 }
2286 
2287 int
2288 zfs_busy(void)
2289 {
2290 	return (zfs_active_fs_count != 0);
2291 }
2292 
2293 int
2294 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
2295 {
2296 	int error;
2297 	objset_t *os = zfsvfs->z_os;
2298 	dmu_tx_t *tx;
2299 
2300 	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
2301 		return (EINVAL);
2302 
2303 	if (newvers < zfsvfs->z_version)
2304 		return (EINVAL);
2305 
2306 	tx = dmu_tx_create(os);
2307 	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
2308 	error = dmu_tx_assign(tx, TXG_WAIT);
2309 	if (error) {
2310 		dmu_tx_abort(tx);
2311 		return (error);
2312 	}
2313 	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
2314 	    8, 1, &newvers, tx);
2315 
2316 	if (error) {
2317 		dmu_tx_commit(tx);
2318 		return (error);
2319 	}
2320 
2321 	spa_history_internal_log(LOG_DS_UPGRADE,
2322 	    dmu_objset_spa(os), tx, CRED(),
2323 	    "oldver=%llu newver=%llu dataset = %llu",
2324 	    zfsvfs->z_version, newvers, dmu_objset_id(os));
2325 
2326 	dmu_tx_commit(tx);
2327 
2328 	zfsvfs->z_version = newvers;
2329 
2330 	if (zfsvfs->z_version >= ZPL_VERSION_FUID)
2331 		zfs_set_fuid_feature(zfsvfs);
2332 
2333 	return (0);
2334 }
2335 
2336 /*
2337  * Read a property stored within the master node.
2338  */
2339 int
2340 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
2341 {
2342 	const char *pname;
2343 	int error = ENOENT;
2344 
2345 	/*
2346 	 * Look up the file system's value for the property.  For the
2347 	 * version property, we look up a slightly different string.
2348 	 */
2349 	if (prop == ZFS_PROP_VERSION)
2350 		pname = ZPL_VERSION_STR;
2351 	else
2352 		pname = zfs_prop_to_name(prop);
2353 
2354 	if (os != NULL)
2355 		error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
2356 
2357 	if (error == ENOENT) {
2358 		/* No value set, use the default value */
2359 		switch (prop) {
2360 		case ZFS_PROP_VERSION:
2361 			*value = ZPL_VERSION;
2362 			break;
2363 		case ZFS_PROP_NORMALIZE:
2364 		case ZFS_PROP_UTF8ONLY:
2365 			*value = 0;
2366 			break;
2367 		case ZFS_PROP_CASE:
2368 			*value = ZFS_CASE_SENSITIVE;
2369 			break;
2370 		default:
2371 			return (error);
2372 		}
2373 		error = 0;
2374 	}
2375 	return (error);
2376 }
2377 
2378 static int
2379 zfs_start(vfs_t *vfsp, int flags)
2380 {
2381 
2382 	return (0);
2383 }
2384 
2385 
2386 #ifdef TODO
2387 static vfsdef_t vfw = {
2388 	VFSDEF_VERSION,
2389 	MNTTYPE_ZFS,
2390 	zfs_vfsinit,
2391 	VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS|
2392 	    VSW_XID,
2393 	&zfs_mntopts
2394 };
2395 
2396 struct modlfs zfs_modlfs = {
2397 	&mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw
2398 };
2399 #endif
2400