1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
24 * All rights reserved.
25 * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
26 * Copyright (c) 2014 Integros [integros.com]
27 */
28
29 /* Portions Copyright 2010 Robert Milkowski */
30
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/sysmacros.h>
36 #include <sys/kmem.h>
37 #include <sys/acl.h>
38 #include <sys/vnode.h>
39 #include <sys/vfs.h>
40 #include <sys/mntent.h>
41 #include <sys/mount.h>
42 #include <sys/cmn_err.h>
43 #include <sys/zfs_znode.h>
44 #include <sys/zfs_dir.h>
45 #include <sys/zil.h>
46 #include <sys/fs/zfs.h>
47 #include <sys/dmu.h>
48 #include <sys/dsl_prop.h>
49 #include <sys/dsl_dataset.h>
50 #include <sys/dsl_deleg.h>
51 #include <sys/spa.h>
52 #include <sys/zap.h>
53 #include <sys/sa.h>
54 #include <sys/sa_impl.h>
55 #include <sys/varargs.h>
56 #include <sys/policy.h>
57 #include <sys/atomic.h>
58 #include <sys/zfs_ioctl.h>
59 #include <sys/zfs_ctldir.h>
60 #include <sys/zfs_fuid.h>
61 #include <sys/sunddi.h>
62 #include <sys/dnlc.h>
63 #include <sys/dmu_objset.h>
64 #include <sys/spa_boot.h>
65 #include "zfs_comutil.h"
66
67 #ifdef __FreeBSD_kernel__
68
69 #include <sys/jail.h>
70
71 struct mtx zfs_debug_mtx;
72 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF);
73
74 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system");
75
76 int zfs_super_owner;
77 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0,
78 "File system owner can perform privileged operation on his file systems");
79
80 int zfs_debug_level;
81 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
82 "Debug level");
83
84 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions");
85 static int zfs_version_acl = ZFS_ACL_VERSION;
86 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0,
87 "ZFS_ACL_VERSION");
88 static int zfs_version_spa = SPA_VERSION;
89 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0,
90 "SPA_VERSION");
91 static int zfs_version_zpl = ZPL_VERSION;
92 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0,
93 "ZPL_VERSION");
94
95 static int zfs_mount(vfs_t *vfsp);
96 static int zfs_umount(vfs_t *vfsp, int fflag);
97 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
98 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp);
99 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp);
100 static int zfs_sync(vfs_t *vfsp, int waitfor);
101 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
102 struct ucred **credanonp, int *numsecflavors, int **secflavors);
103 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp);
104 static void zfs_objset_close(zfsvfs_t *zfsvfs);
105 static void zfs_freevfs(vfs_t *vfsp);
106
107 struct vfsops zfs_vfsops = {
108 .vfs_mount = zfs_mount,
109 .vfs_unmount = zfs_umount,
110 .vfs_root = zfs_root,
111 .vfs_statfs = zfs_statfs,
112 .vfs_vget = zfs_vget,
113 .vfs_sync = zfs_sync,
114 .vfs_checkexp = zfs_checkexp,
115 .vfs_fhtovp = zfs_fhtovp,
116 };
117
118 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN);
119
120 #endif /* __FreeBSD_kernel__ */
121
122 #ifdef __NetBSD__
123
124 #include <sys/fstrans.h>
125 #include <sys/mkdev.h>
126 #include <miscfs/genfs/genfs.h>
127
128 int zfs_debug_level;
129 kmutex_t zfs_debug_mtx;
130
131 #define DROP_GIANT() /* nothing */
132 #define PICKUP_GIANT() /* nothing */
133 #define vfs_stdsync(a, b) 0
134
135 static int zfs_mount(vfs_t *vfsp, const char *path, void *data, size_t *data_len);
136 static int zfs_umount(vfs_t *vfsp, int fflag);
137 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp);
138 static int zfs_statvfs(vfs_t *vfsp, struct statvfs *statp);
139 static int zfs_netbsd_vptofh(vnode_t *vp, fid_t *fidp, size_t *fh_size);
140 static int zfs_netbsd_fhtovp(vfs_t *vfsp, fid_t *fidp, int lktype, vnode_t **vpp);
141 static int zfs_vget(vfs_t *vfsp, ino_t ino, int lktype, vnode_t **vpp);
142 static int zfs_sync(vfs_t *vfsp, int waitfor);
143 static int zfs_netbsd_sync(vfs_t *vfsp, int waitfor, cred_t *cr);
144 static void zfs_freevfs(vfs_t *vfsp);
145
146 void zfs_init(void);
147 void zfs_fini(void);
148
149 extern const struct vnodeopv_desc zfs_vnodeop_opv_desc;
150 extern const struct vnodeopv_desc zfs_specop_opv_desc;
151 extern const struct vnodeopv_desc zfs_fifoop_opv_desc;
152 extern const struct vnodeopv_desc zfs_sfsop_opv_desc;
153
154 static const struct vnodeopv_desc * const zfs_vnodeop_descs[] = {
155 &zfs_vnodeop_opv_desc,
156 &zfs_specop_opv_desc,
157 &zfs_fifoop_opv_desc,
158 &zfs_sfsop_opv_desc,
159 NULL,
160 };
161
162 struct vfsops zfs_vfsops = {
163 .vfs_name = MOUNT_ZFS,
164 .vfs_min_mount_data = sizeof(struct zfs_args),
165 .vfs_opv_descs = zfs_vnodeop_descs,
166 .vfs_mount = zfs_mount,
167 .vfs_unmount = zfs_umount,
168 .vfs_root = zfs_root,
169 .vfs_statvfs = zfs_statvfs,
170 .vfs_sync = zfs_netbsd_sync,
171 .vfs_vget = zfs_vget,
172 .vfs_loadvnode = zfs_loadvnode,
173 .vfs_newvnode = zfs_newvnode,
174 .vfs_init = zfs_init,
175 .vfs_done = zfs_fini,
176 .vfs_start = (void *)nullop,
177 .vfs_renamelock_enter = genfs_renamelock_enter,
178 .vfs_renamelock_exit = genfs_renamelock_exit,
179 .vfs_reinit = (void *)nullop,
180 .vfs_vptofh = zfs_netbsd_vptofh,
181 .vfs_fhtovp = zfs_netbsd_fhtovp,
182 .vfs_quotactl = (void *)eopnotsupp,
183 .vfs_extattrctl = (void *)eopnotsupp,
184 .vfs_suspendctl = genfs_suspendctl,
185 .vfs_snapshot = (void *)eopnotsupp,
186 .vfs_fsync = (void *)eopnotsupp,
187 };
188
189 static bool
zfs_sync_selector(void * cl,struct vnode * vp)190 zfs_sync_selector(void *cl, struct vnode *vp)
191 {
192 znode_t *zp;
193
194 /*
195 * Skip the vnode/inode if inaccessible, is control node or if the
196 * atime is clean.
197 */
198 if (zfsctl_is_node(vp))
199 return false;
200 zp = VTOZ(vp);
201 return zp != NULL && vp->v_type != VNON && zp->z_atime_dirty != 0
202 && !zp->z_unlinked;
203 }
204
205 static int
zfs_netbsd_sync(vfs_t * vfsp,int waitfor,cred_t * cr)206 zfs_netbsd_sync(vfs_t *vfsp, int waitfor, cred_t *cr)
207 {
208 struct vnode_iterator *marker;
209 zfsvfs_t *zfsvfs = vfsp->vfs_data;
210 vnode_t *vp;
211
212 /*
213 * On NetBSD, we need to push out atime updates. Solaris does
214 * this during VOP_INACTIVE, but that does not work well with the
215 * BSD VFS, so we do it in batch here.
216 */
217 vfs_vnode_iterator_init(vfsp, &marker);
218 while ((vp = vfs_vnode_iterator_next(marker, zfs_sync_selector, NULL)))
219 {
220 znode_t *zp;
221 dmu_buf_t *dbp;
222 dmu_tx_t *tx;
223 int error;
224
225 error = vn_lock(vp, LK_EXCLUSIVE);
226 if (error) {
227 VN_RELE(vp);
228 continue;
229 }
230 ZFS_ENTER(zfsvfs);
231 zp = VTOZ(vp);
232 tx = dmu_tx_create(zfsvfs->z_os);
233 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
234 zfs_sa_upgrade_txholds(tx, zp);
235 error = dmu_tx_assign(tx, TXG_WAIT);
236 if (error) {
237 dmu_tx_abort(tx);
238 } else {
239 (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
240 (void *)&zp->z_atime, sizeof (zp->z_atime), tx);
241 zp->z_atime_dirty = 0;
242 dmu_tx_commit(tx);
243 }
244 ZFS_EXIT(zfsvfs);
245 vput(vp);
246 }
247 vfs_vnode_iterator_destroy(marker);
248
249 /*
250 * Then do the regular ZFS stuff.
251 */
252 return zfs_sync(vfsp, waitfor);
253 }
254
255 static int
zfs_netbsd_vptofh(vnode_t * vp,fid_t * fidp,size_t * fh_size)256 zfs_netbsd_vptofh(vnode_t *vp, fid_t *fidp, size_t *fh_size)
257 {
258 znode_t *zp;
259 zfsvfs_t *zfsvfs;
260 uint32_t gen;
261 uint64_t gen64;
262 uint64_t object;
263 zfid_short_t *zfid;
264 int size, i, error;
265
266 if (zfsctl_is_node(vp))
267 return zfsctl_vptofh(vp, fidp, fh_size);
268
269 zp = VTOZ(vp);
270 zfsvfs = zp->z_zfsvfs;
271 object = zp->z_id;
272
273 ZFS_ENTER(zfsvfs);
274 ZFS_VERIFY_ZP(zp);
275
276 if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
277 &gen64, sizeof (uint64_t))) != 0) {
278 ZFS_EXIT(zfsvfs);
279 return (error);
280 }
281
282 gen = (uint32_t)gen64;
283
284 size = (zfsvfs->z_parent != zfsvfs) ? LONG_FID_LEN : SHORT_FID_LEN;
285
286 if (*fh_size < size) {
287 ZFS_EXIT(zfsvfs);
288 *fh_size = size;
289 return SET_ERROR(E2BIG);
290 }
291 *fh_size = size;
292
293 zfid = (zfid_short_t *)fidp;
294
295 zfid->zf_len = size;
296
297 for (i = 0; i < sizeof (zfid->zf_object); i++)
298 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
299
300 /* Must have a non-zero generation number to distinguish from .zfs */
301 if (gen == 0)
302 gen = 1;
303 for (i = 0; i < sizeof (zfid->zf_gen); i++)
304 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
305
306 if (size == LONG_FID_LEN) {
307 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
308 zfid_long_t *zlfid;
309
310 zlfid = (zfid_long_t *)fidp;
311
312 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
313 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
314
315 /* XXX - this should be the generation number for the objset */
316 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
317 zlfid->zf_setgen[i] = 0;
318 }
319
320 ZFS_EXIT(zfsvfs);
321 return 0;
322 }
323
324 static int
zfs_netbsd_fhtovp(vfs_t * vfsp,fid_t * fidp,int lktype,vnode_t ** vpp)325 zfs_netbsd_fhtovp(vfs_t *vfsp, fid_t *fidp, int lktype, vnode_t **vpp)
326 {
327 zfsvfs_t *zfsvfs = vfsp->vfs_data;
328 znode_t *zp;
329 vnode_t *dvp;
330 uint64_t object = 0;
331 uint64_t fid_gen = 0;
332 uint64_t gen_mask;
333 uint64_t zp_gen;
334 int i, err;
335
336 *vpp = NULL;
337
338 ZFS_ENTER(zfsvfs);
339
340 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
341 zfid_long_t *zlfid = (zfid_long_t *)fidp;
342 uint64_t objsetid = 0;
343 uint64_t setgen = 0;
344
345 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
346 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
347
348 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
349 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
350
351 ZFS_EXIT(zfsvfs);
352
353 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
354 if (err)
355 return (SET_ERROR(EINVAL));
356 ZFS_ENTER(zfsvfs);
357 }
358
359 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
360 zfid_short_t *zfid = (zfid_short_t *)fidp;
361
362 for (i = 0; i < sizeof (zfid->zf_object); i++)
363 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
364
365 for (i = 0; i < sizeof (zfid->zf_gen); i++)
366 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
367 } else {
368 ZFS_EXIT(zfsvfs);
369 return (SET_ERROR(EINVAL));
370 }
371
372 /* A zero fid_gen means we are in the .zfs control directories */
373 if (fid_gen == 0 &&
374 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
375 ZFS_EXIT(zfsvfs);
376 if (object == ZFSCTL_INO_ROOT)
377 err = zfsctl_root(zfsvfs, vpp);
378 else
379 err = zfsctl_snapshot(zfsvfs, vpp);
380 if (err)
381 return err;
382 err = vn_lock(*vpp, LK_EXCLUSIVE);
383 if (err) {
384 vrele(*vpp);
385 *vpp = NULL;
386 return err;
387 }
388 return 0;
389 }
390
391 gen_mask = -1ULL >> (64 - 8 * i);
392
393 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
394 if (err = zfs_zget(zfsvfs, object, &zp)) {
395 ZFS_EXIT(zfsvfs);
396 return SET_ERROR(ESTALE);
397 }
398 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
399 sizeof (uint64_t));
400 zp_gen = zp_gen & gen_mask;
401 if (zp_gen == 0)
402 zp_gen = 1;
403 if (zp->z_unlinked || zp_gen != fid_gen) {
404 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
405 VN_RELE(ZTOV(zp));
406 ZFS_EXIT(zfsvfs);
407 return SET_ERROR(ESTALE);
408 }
409
410 *vpp = ZTOV(zp);
411 ZFS_EXIT(zfsvfs);
412 err = vn_lock(*vpp, lktype);
413 if (err) {
414 vrele(*vpp);
415 *vpp = NULL;
416 return err;
417 }
418 return 0;
419 }
420 #endif /* __NetBSD__ */
421
422 /*
423 * We need to keep a count of active fs's.
424 * This is necessary to prevent our module
425 * from being unloaded after a umount -f
426 */
427 static uint32_t zfs_active_fs_count = 0;
428
429 /*ARGSUSED*/
430 static int
zfs_sync(vfs_t * vfsp,int waitfor)431 zfs_sync(vfs_t *vfsp, int waitfor)
432 {
433 /*
434 * Data integrity is job one. We don't want a compromised kernel
435 * writing to the storage pool, so we never sync during panic.
436 */
437 if (panicstr)
438 return (0);
439
440 /*
441 * Ignore the system syncher. ZFS already commits async data
442 * at zfs_txg_timeout intervals.
443 */
444 if (waitfor == MNT_LAZY)
445 return (0);
446
447 if (vfsp != NULL) {
448 /*
449 * Sync a specific filesystem.
450 */
451 zfsvfs_t *zfsvfs = vfsp->vfs_data;
452 dsl_pool_t *dp;
453 int error;
454
455 error = vfs_stdsync(vfsp, waitfor);
456 if (error != 0)
457 return (error);
458
459 ZFS_ENTER(zfsvfs);
460 dp = dmu_objset_pool(zfsvfs->z_os);
461
462 /*
463 * If the system is shutting down, then skip any
464 * filesystems which may exist on a suspended pool.
465 */
466 if (sys_shutdown && spa_suspended(dp->dp_spa)) {
467 ZFS_EXIT(zfsvfs);
468 return (0);
469 }
470
471 if (zfsvfs->z_log != NULL)
472 zil_commit(zfsvfs->z_log, 0);
473
474 ZFS_EXIT(zfsvfs);
475 } else {
476 /*
477 * Sync all ZFS filesystems. This is what happens when you
478 * run sync(1M). Unlike other filesystems, ZFS honors the
479 * request by waiting for all pools to commit all dirty data.
480 */
481 spa_sync_allpools();
482 }
483
484 return (0);
485 }
486
487 #ifdef illumos
488 static int
zfs_create_unique_device(dev_t * dev)489 zfs_create_unique_device(dev_t *dev)
490 {
491 major_t new_major;
492
493 do {
494 ASSERT3U(zfs_minor, <=, MAXMIN32);
495 minor_t start = zfs_minor;
496 do {
497 mutex_enter(&zfs_dev_mtx);
498 if (zfs_minor >= MAXMIN32) {
499 /*
500 * If we're still using the real major
501 * keep out of /dev/zfs and /dev/zvol minor
502 * number space. If we're using a getudev()'ed
503 * major number, we can use all of its minors.
504 */
505 if (zfs_major == ddi_name_to_major(ZFS_DRIVER))
506 zfs_minor = ZFS_MIN_MINOR;
507 else
508 zfs_minor = 0;
509 } else {
510 zfs_minor++;
511 }
512 *dev = makedevice(zfs_major, zfs_minor);
513 mutex_exit(&zfs_dev_mtx);
514 } while (vfs_devismounted(*dev) && zfs_minor != start);
515 #ifdef illumos
516 if (zfs_minor == start) {
517 /*
518 * We are using all ~262,000 minor numbers for the
519 * current major number. Create a new major number.
520 */
521 if ((new_major = getudev()) == (major_t)-1) {
522 cmn_err(CE_WARN,
523 "zfs_mount: Can't get unique major "
524 "device number.");
525 return (-1);
526 }
527 mutex_enter(&zfs_dev_mtx);
528 zfs_major = new_major;
529 zfs_minor = 0;
530
531 mutex_exit(&zfs_dev_mtx);
532 } else {
533 break;
534 }
535 /* CONSTANTCONDITION */
536 #endif
537 } while (1);
538
539 return (0);
540 }
541 #endif /* illumos */
542
543
544 static void
atime_changed_cb(void * arg,uint64_t newval)545 atime_changed_cb(void *arg, uint64_t newval)
546 {
547 zfsvfs_t *zfsvfs = arg;
548
549 if (newval == TRUE) {
550 zfsvfs->z_atime = TRUE;
551 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME;
552 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
553 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
554 } else {
555 zfsvfs->z_atime = FALSE;
556 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME;
557 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
558 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
559 }
560 }
561
562 static void
xattr_changed_cb(void * arg,uint64_t newval)563 xattr_changed_cb(void *arg, uint64_t newval)
564 {
565 zfsvfs_t *zfsvfs = arg;
566
567 if (newval == TRUE) {
568 /* XXX locking on vfs_flag? */
569 #ifdef TODO
570 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR;
571 #endif
572 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR);
573 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0);
574 } else {
575 /* XXX locking on vfs_flag? */
576 #ifdef TODO
577 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR;
578 #endif
579 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR);
580 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0);
581 }
582 }
583
584 static void
blksz_changed_cb(void * arg,uint64_t newval)585 blksz_changed_cb(void *arg, uint64_t newval)
586 {
587 zfsvfs_t *zfsvfs = arg;
588 ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
589 ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
590 ASSERT(ISP2(newval));
591
592 zfsvfs->z_max_blksz = newval;
593 zfsvfs->z_vfs->mnt_stat.f_iosize = newval;
594 }
595
596 static void
readonly_changed_cb(void * arg,uint64_t newval)597 readonly_changed_cb(void *arg, uint64_t newval)
598 {
599 zfsvfs_t *zfsvfs = arg;
600
601 if (newval) {
602 /* XXX locking on vfs_flag? */
603 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
604 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
605 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
606 } else {
607 /* XXX locking on vfs_flag? */
608 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
609 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
610 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
611 }
612 }
613
614 static void
setuid_changed_cb(void * arg,uint64_t newval)615 setuid_changed_cb(void *arg, uint64_t newval)
616 {
617 zfsvfs_t *zfsvfs = arg;
618
619 if (newval == FALSE) {
620 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
621 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
622 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
623 } else {
624 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
625 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
626 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
627 }
628 }
629
630 static void
exec_changed_cb(void * arg,uint64_t newval)631 exec_changed_cb(void *arg, uint64_t newval)
632 {
633 zfsvfs_t *zfsvfs = arg;
634
635 if (newval == FALSE) {
636 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
637 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
638 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
639 } else {
640 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
641 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
642 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
643 }
644 }
645
646 /*
647 * The nbmand mount option can be changed at mount time.
648 * We can't allow it to be toggled on live file systems or incorrect
649 * behavior may be seen from cifs clients
650 *
651 * This property isn't registered via dsl_prop_register(), but this callback
652 * will be called when a file system is first mounted
653 */
654 static void
nbmand_changed_cb(void * arg,uint64_t newval)655 nbmand_changed_cb(void *arg, uint64_t newval)
656 {
657 zfsvfs_t *zfsvfs = arg;
658 if (newval == FALSE) {
659 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
660 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
661 } else {
662 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
663 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
664 }
665 }
666
667 static void
snapdir_changed_cb(void * arg,uint64_t newval)668 snapdir_changed_cb(void *arg, uint64_t newval)
669 {
670 zfsvfs_t *zfsvfs = arg;
671
672 zfsvfs->z_show_ctldir = newval;
673 }
674
675 static void
vscan_changed_cb(void * arg,uint64_t newval)676 vscan_changed_cb(void *arg, uint64_t newval)
677 {
678 zfsvfs_t *zfsvfs = arg;
679
680 zfsvfs->z_vscan = newval;
681 }
682
683 static void
acl_mode_changed_cb(void * arg,uint64_t newval)684 acl_mode_changed_cb(void *arg, uint64_t newval)
685 {
686 zfsvfs_t *zfsvfs = arg;
687
688 zfsvfs->z_acl_mode = newval;
689 }
690
691 static void
acl_inherit_changed_cb(void * arg,uint64_t newval)692 acl_inherit_changed_cb(void *arg, uint64_t newval)
693 {
694 zfsvfs_t *zfsvfs = arg;
695
696 zfsvfs->z_acl_inherit = newval;
697 }
698
699 static int
zfs_register_callbacks(vfs_t * vfsp)700 zfs_register_callbacks(vfs_t *vfsp)
701 {
702 struct dsl_dataset *ds = NULL;
703 objset_t *os = NULL;
704 zfsvfs_t *zfsvfs = NULL;
705 uint64_t nbmand;
706 boolean_t readonly = B_FALSE;
707 boolean_t do_readonly = B_FALSE;
708 boolean_t setuid = B_FALSE;
709 boolean_t do_setuid = B_FALSE;
710 boolean_t exec = B_FALSE;
711 boolean_t do_exec = B_FALSE;
712 #ifdef illumos
713 boolean_t devices = B_FALSE;
714 boolean_t do_devices = B_FALSE;
715 #endif
716 boolean_t xattr = B_FALSE;
717 boolean_t do_xattr = B_FALSE;
718 boolean_t atime = B_FALSE;
719 boolean_t do_atime = B_FALSE;
720 int error = 0;
721
722 ASSERT(vfsp);
723 zfsvfs = vfsp->vfs_data;
724 ASSERT(zfsvfs);
725 os = zfsvfs->z_os;
726
727 /*
728 * This function can be called for a snapshot when we update snapshot's
729 * mount point, which isn't really supported.
730 */
731 if (dmu_objset_is_snapshot(os))
732 return (EOPNOTSUPP);
733
734 /*
735 * The act of registering our callbacks will destroy any mount
736 * options we may have. In order to enable temporary overrides
737 * of mount options, we stash away the current values and
738 * restore them after we register the callbacks.
739 */
740 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
741 !spa_writeable(dmu_objset_spa(os))) {
742 readonly = B_TRUE;
743 do_readonly = B_TRUE;
744 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
745 readonly = B_FALSE;
746 do_readonly = B_TRUE;
747 }
748 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
749 setuid = B_FALSE;
750 do_setuid = B_TRUE;
751 } else {
752 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
753 setuid = B_FALSE;
754 do_setuid = B_TRUE;
755 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
756 setuid = B_TRUE;
757 do_setuid = B_TRUE;
758 }
759 }
760 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
761 exec = B_FALSE;
762 do_exec = B_TRUE;
763 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
764 exec = B_TRUE;
765 do_exec = B_TRUE;
766 }
767 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
768 xattr = B_FALSE;
769 do_xattr = B_TRUE;
770 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
771 xattr = B_TRUE;
772 do_xattr = B_TRUE;
773 }
774 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
775 atime = B_FALSE;
776 do_atime = B_TRUE;
777 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
778 atime = B_TRUE;
779 do_atime = B_TRUE;
780 }
781
782 /*
783 * We need to enter pool configuration here, so that we can use
784 * dsl_prop_get_int_ds() to handle the special nbmand property below.
785 * dsl_prop_get_integer() can not be used, because it has to acquire
786 * spa_namespace_lock and we can not do that because we already hold
787 * z_teardown_lock. The problem is that spa_config_sync() is called
788 * with spa_namespace_lock held and the function calls ZFS vnode
789 * operations to write the cache file and thus z_teardown_lock is
790 * acquired after spa_namespace_lock.
791 */
792 ds = dmu_objset_ds(os);
793 dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
794
795 /*
796 * nbmand is a special property. It can only be changed at
797 * mount time.
798 *
799 * This is weird, but it is documented to only be changeable
800 * at mount time.
801 */
802 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
803 nbmand = B_FALSE;
804 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
805 nbmand = B_TRUE;
806 } else if (error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0) {
807 dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
808 return (error);
809 }
810
811 /*
812 * Register property callbacks.
813 *
814 * It would probably be fine to just check for i/o error from
815 * the first prop_register(), but I guess I like to go
816 * overboard...
817 */
818 error = dsl_prop_register(ds,
819 zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
820 error = error ? error : dsl_prop_register(ds,
821 zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
822 error = error ? error : dsl_prop_register(ds,
823 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
824 error = error ? error : dsl_prop_register(ds,
825 zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
826 #ifdef illumos
827 error = error ? error : dsl_prop_register(ds,
828 zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
829 #endif
830 error = error ? error : dsl_prop_register(ds,
831 zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
832 error = error ? error : dsl_prop_register(ds,
833 zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
834 error = error ? error : dsl_prop_register(ds,
835 zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
836 error = error ? error : dsl_prop_register(ds,
837 zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs);
838 error = error ? error : dsl_prop_register(ds,
839 zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
840 zfsvfs);
841 error = error ? error : dsl_prop_register(ds,
842 zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
843 dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
844 if (error)
845 goto unregister;
846
847 /*
848 * Invoke our callbacks to restore temporary mount options.
849 */
850 if (do_readonly)
851 readonly_changed_cb(zfsvfs, readonly);
852 if (do_setuid)
853 setuid_changed_cb(zfsvfs, setuid);
854 if (do_exec)
855 exec_changed_cb(zfsvfs, exec);
856 if (do_xattr)
857 xattr_changed_cb(zfsvfs, xattr);
858 if (do_atime)
859 atime_changed_cb(zfsvfs, atime);
860
861 nbmand_changed_cb(zfsvfs, nbmand);
862
863 return (0);
864
865 unregister:
866 dsl_prop_unregister_all(ds, zfsvfs);
867 return (error);
868 }
869
870 static int
zfs_space_delta_cb(dmu_object_type_t bonustype,void * data,uint64_t * userp,uint64_t * groupp)871 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
872 uint64_t *userp, uint64_t *groupp)
873 {
874 /*
875 * Is it a valid type of object to track?
876 */
877 if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
878 return (SET_ERROR(ENOENT));
879
880 /*
881 * If we have a NULL data pointer
882 * then assume the id's aren't changing and
883 * return EEXIST to the dmu to let it know to
884 * use the same ids
885 */
886 if (data == NULL)
887 return (SET_ERROR(EEXIST));
888
889 if (bonustype == DMU_OT_ZNODE) {
890 znode_phys_t *znp = data;
891 *userp = znp->zp_uid;
892 *groupp = znp->zp_gid;
893 } else {
894 int hdrsize;
895 sa_hdr_phys_t *sap = data;
896 sa_hdr_phys_t sa = *sap;
897 boolean_t swap = B_FALSE;
898
899 ASSERT(bonustype == DMU_OT_SA);
900
901 if (sa.sa_magic == 0) {
902 /*
903 * This should only happen for newly created
904 * files that haven't had the znode data filled
905 * in yet.
906 */
907 *userp = 0;
908 *groupp = 0;
909 return (0);
910 }
911 if (sa.sa_magic == BSWAP_32(SA_MAGIC)) {
912 sa.sa_magic = SA_MAGIC;
913 sa.sa_layout_info = BSWAP_16(sa.sa_layout_info);
914 swap = B_TRUE;
915 } else {
916 VERIFY3U(sa.sa_magic, ==, SA_MAGIC);
917 }
918
919 hdrsize = sa_hdrsize(&sa);
920 VERIFY3U(hdrsize, >=, sizeof (sa_hdr_phys_t));
921 *userp = *((uint64_t *)((uintptr_t)data + hdrsize +
922 SA_UID_OFFSET));
923 *groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
924 SA_GID_OFFSET));
925 if (swap) {
926 *userp = BSWAP_64(*userp);
927 *groupp = BSWAP_64(*groupp);
928 }
929 }
930 return (0);
931 }
932
933 static void
fuidstr_to_sid(zfsvfs_t * zfsvfs,const char * fuidstr,char * domainbuf,int buflen,uid_t * ridp)934 fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
935 char *domainbuf, int buflen, uid_t *ridp)
936 {
937 uint64_t fuid;
938 const char *domain;
939
940 fuid = strtonum(fuidstr, NULL);
941
942 domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
943 if (domain)
944 (void) strlcpy(domainbuf, domain, buflen);
945 else
946 domainbuf[0] = '\0';
947 *ridp = FUID_RID(fuid);
948 }
949
950 static uint64_t
zfs_userquota_prop_to_obj(zfsvfs_t * zfsvfs,zfs_userquota_prop_t type)951 zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
952 {
953 switch (type) {
954 case ZFS_PROP_USERUSED:
955 return (DMU_USERUSED_OBJECT);
956 case ZFS_PROP_GROUPUSED:
957 return (DMU_GROUPUSED_OBJECT);
958 case ZFS_PROP_USERQUOTA:
959 return (zfsvfs->z_userquota_obj);
960 case ZFS_PROP_GROUPQUOTA:
961 return (zfsvfs->z_groupquota_obj);
962 }
963 return (0);
964 }
965
966 int
zfs_userspace_many(zfsvfs_t * zfsvfs,zfs_userquota_prop_t type,uint64_t * cookiep,void * vbuf,uint64_t * bufsizep)967 zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
968 uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
969 {
970 int error;
971 zap_cursor_t zc;
972 zap_attribute_t za;
973 zfs_useracct_t *buf = vbuf;
974 uint64_t obj;
975
976 if (!dmu_objset_userspace_present(zfsvfs->z_os))
977 return (SET_ERROR(ENOTSUP));
978
979 obj = zfs_userquota_prop_to_obj(zfsvfs, type);
980 if (obj == 0) {
981 *bufsizep = 0;
982 return (0);
983 }
984
985 for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
986 (error = zap_cursor_retrieve(&zc, &za)) == 0;
987 zap_cursor_advance(&zc)) {
988 if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
989 *bufsizep)
990 break;
991
992 fuidstr_to_sid(zfsvfs, za.za_name,
993 buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
994
995 buf->zu_space = za.za_first_integer;
996 buf++;
997 }
998 if (error == ENOENT)
999 error = 0;
1000
1001 ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
1002 *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
1003 *cookiep = zap_cursor_serialize(&zc);
1004 zap_cursor_fini(&zc);
1005 return (error);
1006 }
1007
1008 /*
1009 * buf must be big enough (eg, 32 bytes)
1010 */
1011 static int
id_to_fuidstr(zfsvfs_t * zfsvfs,const char * domain,uid_t rid,char * buf,boolean_t addok)1012 id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
1013 char *buf, boolean_t addok)
1014 {
1015 uint64_t fuid;
1016 int domainid = 0;
1017
1018 if (domain && domain[0]) {
1019 domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
1020 if (domainid == -1)
1021 return (SET_ERROR(ENOENT));
1022 }
1023 fuid = FUID_ENCODE(domainid, rid);
1024 (void) sprintf(buf, "%llx", (longlong_t)fuid);
1025 return (0);
1026 }
1027
1028 int
zfs_userspace_one(zfsvfs_t * zfsvfs,zfs_userquota_prop_t type,const char * domain,uint64_t rid,uint64_t * valp)1029 zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
1030 const char *domain, uint64_t rid, uint64_t *valp)
1031 {
1032 char buf[32];
1033 int err;
1034 uint64_t obj;
1035
1036 *valp = 0;
1037
1038 if (!dmu_objset_userspace_present(zfsvfs->z_os))
1039 return (SET_ERROR(ENOTSUP));
1040
1041 obj = zfs_userquota_prop_to_obj(zfsvfs, type);
1042 if (obj == 0)
1043 return (0);
1044
1045 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE);
1046 if (err)
1047 return (err);
1048
1049 err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
1050 if (err == ENOENT)
1051 err = 0;
1052 return (err);
1053 }
1054
1055 int
zfs_set_userquota(zfsvfs_t * zfsvfs,zfs_userquota_prop_t type,const char * domain,uint64_t rid,uint64_t quota)1056 zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
1057 const char *domain, uint64_t rid, uint64_t quota)
1058 {
1059 char buf[32];
1060 int err;
1061 dmu_tx_t *tx;
1062 uint64_t *objp;
1063 boolean_t fuid_dirtied;
1064
1065 if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
1066 return (SET_ERROR(EINVAL));
1067
1068 if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
1069 return (SET_ERROR(ENOTSUP));
1070
1071 objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj :
1072 &zfsvfs->z_groupquota_obj;
1073
1074 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
1075 if (err)
1076 return (err);
1077 fuid_dirtied = zfsvfs->z_fuid_dirty;
1078
1079 tx = dmu_tx_create(zfsvfs->z_os);
1080 dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
1081 if (*objp == 0) {
1082 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
1083 zfs_userquota_prop_prefixes[type]);
1084 }
1085 if (fuid_dirtied)
1086 zfs_fuid_txhold(zfsvfs, tx);
1087 err = dmu_tx_assign(tx, TXG_WAIT);
1088 if (err) {
1089 dmu_tx_abort(tx);
1090 return (err);
1091 }
1092
1093 mutex_enter(&zfsvfs->z_lock);
1094 if (*objp == 0) {
1095 *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
1096 DMU_OT_NONE, 0, tx);
1097 VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
1098 zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
1099 }
1100 mutex_exit(&zfsvfs->z_lock);
1101
1102 if (quota == 0) {
1103 err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
1104 if (err == ENOENT)
1105 err = 0;
1106 } else {
1107 err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx);
1108 }
1109 ASSERT(err == 0);
1110 if (fuid_dirtied)
1111 zfs_fuid_sync(zfsvfs, tx);
1112 dmu_tx_commit(tx);
1113 return (err);
1114 }
1115
1116 boolean_t
zfs_fuid_overquota(zfsvfs_t * zfsvfs,boolean_t isgroup,uint64_t fuid)1117 zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
1118 {
1119 char buf[32];
1120 uint64_t used, quota, usedobj, quotaobj;
1121 int err;
1122
1123 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
1124 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
1125
1126 if (quotaobj == 0 || zfsvfs->z_replay)
1127 return (B_FALSE);
1128
1129 (void) sprintf(buf, "%llx", (longlong_t)fuid);
1130 err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a);
1131 if (err != 0)
1132 return (B_FALSE);
1133
1134 err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
1135 if (err != 0)
1136 return (B_FALSE);
1137 return (used >= quota);
1138 }
1139
1140 boolean_t
zfs_owner_overquota(zfsvfs_t * zfsvfs,znode_t * zp,boolean_t isgroup)1141 zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
1142 {
1143 uint64_t fuid;
1144 uint64_t quotaobj;
1145
1146 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
1147
1148 fuid = isgroup ? zp->z_gid : zp->z_uid;
1149
1150 if (quotaobj == 0 || zfsvfs->z_replay)
1151 return (B_FALSE);
1152
1153 return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
1154 }
1155
1156 /*
1157 * Associate this zfsvfs with the given objset, which must be owned.
1158 * This will cache a bunch of on-disk state from the objset in the
1159 * zfsvfs.
1160 */
1161 static int
zfsvfs_init(zfsvfs_t * zfsvfs,objset_t * os)1162 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
1163 {
1164 int error;
1165 uint64_t val;
1166
1167 zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
1168 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
1169 zfsvfs->z_os = os;
1170
1171 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
1172 if (error != 0)
1173 return (error);
1174 if (zfsvfs->z_version >
1175 zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
1176 (void) printf("Can't mount a version %lld file system "
1177 "on a version %lld pool\n. Pool must be upgraded to mount "
1178 "this file system.", (u_longlong_t)zfsvfs->z_version,
1179 (u_longlong_t)spa_version(dmu_objset_spa(os)));
1180 return (SET_ERROR(ENOTSUP));
1181 }
1182 error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
1183 if (error != 0)
1184 return (error);
1185 zfsvfs->z_norm = (int)val;
1186
1187 error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
1188 if (error != 0)
1189 return (error);
1190 zfsvfs->z_utf8 = (val != 0);
1191
1192 error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
1193 if (error != 0)
1194 return (error);
1195 zfsvfs->z_case = (uint_t)val;
1196
1197 /*
1198 * Fold case on file systems that are always or sometimes case
1199 * insensitive.
1200 */
1201 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
1202 zfsvfs->z_case == ZFS_CASE_MIXED)
1203 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
1204
1205 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
1206 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
1207
1208 uint64_t sa_obj = 0;
1209 if (zfsvfs->z_use_sa) {
1210 /* should either have both of these objects or none */
1211 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
1212 &sa_obj);
1213 if (error != 0)
1214 return (error);
1215 }
1216
1217 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
1218 &zfsvfs->z_attr_table);
1219 if (error != 0)
1220 return (error);
1221
1222 if (zfsvfs->z_version >= ZPL_VERSION_SA)
1223 sa_register_update_callback(os, zfs_sa_upgrade);
1224
1225 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
1226 &zfsvfs->z_root);
1227 if (error != 0)
1228 return (error);
1229 ASSERT(zfsvfs->z_root != 0);
1230
1231 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
1232 &zfsvfs->z_unlinkedobj);
1233 if (error != 0)
1234 return (error);
1235
1236 error = zap_lookup(os, MASTER_NODE_OBJ,
1237 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
1238 8, 1, &zfsvfs->z_userquota_obj);
1239 if (error == ENOENT)
1240 zfsvfs->z_userquota_obj = 0;
1241 else if (error != 0)
1242 return (error);
1243
1244 error = zap_lookup(os, MASTER_NODE_OBJ,
1245 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
1246 8, 1, &zfsvfs->z_groupquota_obj);
1247 if (error == ENOENT)
1248 zfsvfs->z_groupquota_obj = 0;
1249 else if (error != 0)
1250 return (error);
1251
1252 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
1253 &zfsvfs->z_fuid_obj);
1254 if (error == ENOENT)
1255 zfsvfs->z_fuid_obj = 0;
1256 else if (error != 0)
1257 return (error);
1258
1259 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
1260 &zfsvfs->z_shares_dir);
1261 if (error == ENOENT)
1262 zfsvfs->z_shares_dir = 0;
1263 else if (error != 0)
1264 return (error);
1265
1266 /*
1267 * Only use the name cache if we are looking for a
1268 * name on a file system that does not require normalization
1269 * or case folding. We can also look there if we happen to be
1270 * on a non-normalizing, mixed sensitivity file system IF we
1271 * are looking for the exact name (which is always the case on
1272 * FreeBSD).
1273 */
1274 zfsvfs->z_use_namecache = !zfsvfs->z_norm ||
1275 ((zfsvfs->z_case == ZFS_CASE_MIXED) &&
1276 !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER));
1277
1278 return (0);
1279 }
1280
1281 int
zfsvfs_create(const char * osname,zfsvfs_t ** zfvp)1282 zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
1283 {
1284 objset_t *os;
1285 zfsvfs_t *zfsvfs;
1286 int error;
1287
1288 /*
1289 * XXX: Fix struct statfs so this isn't necessary!
1290 *
1291 * The 'osname' is used as the filesystem's special node, which means
1292 * it must fit in statfs.f_mntfromname, or else it can't be
1293 * enumerated, so libzfs_mnttab_find() returns NULL, which causes
1294 * 'zfs unmount' to think it's not mounted when it is.
1295 */
1296 if (strlen(osname) >= MNAMELEN)
1297 return (SET_ERROR(ENAMETOOLONG));
1298
1299 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
1300
1301 /*
1302 * We claim to always be readonly so we can open snapshots;
1303 * other ZPL code will prevent us from writing to snapshots.
1304 */
1305 error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
1306 if (error) {
1307 kmem_free(zfsvfs, sizeof (zfsvfs_t));
1308 return (error);
1309 }
1310
1311 zfsvfs->z_vfs = NULL;
1312 zfsvfs->z_parent = zfsvfs;
1313
1314 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
1315 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
1316 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
1317 offsetof(znode_t, z_link_node));
1318 #ifdef DIAGNOSTIC
1319 rrm_init(&zfsvfs->z_teardown_lock, B_TRUE);
1320 #else
1321 rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
1322 #endif
1323 rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
1324 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
1325 for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1326 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
1327
1328 error = zfsvfs_init(zfsvfs, os);
1329 if (error != 0) {
1330 dmu_objset_disown(os, zfsvfs);
1331 *zfvp = NULL;
1332 kmem_free(zfsvfs, sizeof (zfsvfs_t));
1333 return (error);
1334 }
1335
1336 *zfvp = zfsvfs;
1337 return (0);
1338 }
1339
1340 static int
zfsvfs_setup(zfsvfs_t * zfsvfs,boolean_t mounting)1341 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
1342 {
1343 int error;
1344
1345 error = zfs_register_callbacks(zfsvfs->z_vfs);
1346 if (error)
1347 return (error);
1348
1349 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
1350
1351 /*
1352 * If we are not mounting (ie: online recv), then we don't
1353 * have to worry about replaying the log as we blocked all
1354 * operations out since we closed the ZIL.
1355 */
1356 if (mounting) {
1357 boolean_t readonly;
1358
1359 /*
1360 * During replay we remove the read only flag to
1361 * allow replays to succeed.
1362 */
1363 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
1364 if (readonly != 0)
1365 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
1366 else
1367 zfs_unlinked_drain(zfsvfs);
1368
1369 /*
1370 * Parse and replay the intent log.
1371 *
1372 * Because of ziltest, this must be done after
1373 * zfs_unlinked_drain(). (Further note: ziltest
1374 * doesn't use readonly mounts, where
1375 * zfs_unlinked_drain() isn't called.) This is because
1376 * ziltest causes spa_sync() to think it's committed,
1377 * but actually it is not, so the intent log contains
1378 * many txg's worth of changes.
1379 *
1380 * In particular, if object N is in the unlinked set in
1381 * the last txg to actually sync, then it could be
1382 * actually freed in a later txg and then reallocated
1383 * in a yet later txg. This would write a "create
1384 * object N" record to the intent log. Normally, this
1385 * would be fine because the spa_sync() would have
1386 * written out the fact that object N is free, before
1387 * we could write the "create object N" intent log
1388 * record.
1389 *
1390 * But when we are in ziltest mode, we advance the "open
1391 * txg" without actually spa_sync()-ing the changes to
1392 * disk. So we would see that object N is still
1393 * allocated and in the unlinked set, and there is an
1394 * intent log record saying to allocate it.
1395 */
1396 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
1397 if (zil_replay_disable) {
1398 zil_destroy(zfsvfs->z_log, B_FALSE);
1399 } else {
1400 zfsvfs->z_replay = B_TRUE;
1401 zil_replay(zfsvfs->z_os, zfsvfs,
1402 zfs_replay_vector);
1403 zfsvfs->z_replay = B_FALSE;
1404 }
1405 }
1406 zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
1407 }
1408
1409 /*
1410 * Set the objset user_ptr to track its zfsvfs.
1411 */
1412 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1413 dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1414 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1415
1416 return (0);
1417 }
1418
1419 extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
1420
1421 void
zfsvfs_free(zfsvfs_t * zfsvfs)1422 zfsvfs_free(zfsvfs_t *zfsvfs)
1423 {
1424 int i;
1425
1426 /*
1427 * This is a barrier to prevent the filesystem from going away in
1428 * zfs_znode_move() until we can safely ensure that the filesystem is
1429 * not unmounted. We consider the filesystem valid before the barrier
1430 * and invalid after the barrier.
1431 */
1432 rw_enter(&zfsvfs_lock, RW_READER);
1433 rw_exit(&zfsvfs_lock);
1434
1435 zfs_fuid_destroy(zfsvfs);
1436
1437 mutex_destroy(&zfsvfs->z_znodes_lock);
1438 mutex_destroy(&zfsvfs->z_lock);
1439 list_destroy(&zfsvfs->z_all_znodes);
1440 rrm_destroy(&zfsvfs->z_teardown_lock);
1441 rw_destroy(&zfsvfs->z_teardown_inactive_lock);
1442 rw_destroy(&zfsvfs->z_fuid_lock);
1443 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1444 mutex_destroy(&zfsvfs->z_hold_mtx[i]);
1445 kmem_free(zfsvfs, sizeof (zfsvfs_t));
1446 }
1447
1448 static void
zfs_set_fuid_feature(zfsvfs_t * zfsvfs)1449 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
1450 {
1451 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
1452 if (zfsvfs->z_vfs) {
1453 if (zfsvfs->z_use_fuids) {
1454 vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1455 vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1456 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1457 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1458 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1459 vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1460 } else {
1461 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1462 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1463 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1464 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1465 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1466 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1467 }
1468 }
1469 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
1470 }
1471
1472 #ifdef __NetBSD__
1473 int
1474 #else
1475 static int
1476 #endif
zfs_domount(vfs_t * vfsp,char * osname)1477 zfs_domount(vfs_t *vfsp, char *osname)
1478 {
1479 uint64_t recordsize, fsid_guid;
1480 int error = 0;
1481 zfsvfs_t *zfsvfs;
1482 vnode_t *vp;
1483
1484 ASSERT(vfsp);
1485 ASSERT(osname);
1486
1487 error = zfsvfs_create(osname, &zfsvfs);
1488 if (error)
1489 return (error);
1490 zfsvfs->z_vfs = vfsp;
1491
1492 #ifdef illumos
1493 /* Initialize the generic filesystem structure. */
1494 vfsp->vfs_bcount = 0;
1495 vfsp->vfs_data = NULL;
1496
1497 if (zfs_create_unique_device(&mount_dev) == -1) {
1498 error = SET_ERROR(ENODEV);
1499 goto out;
1500 }
1501 ASSERT(vfs_devismounted(mount_dev) == 0);
1502 #endif
1503
1504 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
1505 NULL))
1506 goto out;
1507 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE;
1508 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize;
1509
1510 vfsp->vfs_data = zfsvfs;
1511 #ifdef __FreeBSD_kernel__
1512 vfsp->mnt_flag |= MNT_LOCAL;
1513 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED;
1514 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES;
1515 vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED;
1516 vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */
1517 #endif
1518 #ifdef __NetBSD__
1519 vfsp->mnt_flag |= MNT_LOCAL;
1520 vfsp->mnt_iflag |= IMNT_MPSAFE | IMNT_NCLOOKUP;
1521 #endif
1522
1523 /*
1524 * The fsid is 64 bits, composed of an 8-bit fs type, which
1525 * separates our fsid from any other filesystem types, and a
1526 * 56-bit objset unique ID. The objset unique ID is unique to
1527 * all objsets open on this system, provided by unique_create().
1528 * The 8-bit fs type must be put in the low bits of fsid[1]
1529 * because that's where other Solaris filesystems put it.
1530 */
1531 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
1532 ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
1533 #ifdef __FreeBSD_kernel__
1534 vfsp->vfs_fsid.val[0] = fsid_guid;
1535 vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
1536 vfsp->mnt_vfc->vfc_typenum & 0xFF;
1537 #endif
1538 #ifdef __NetBSD__
1539 vfsp->mnt_stat.f_fsidx.__fsid_val[0] = fsid_guid;
1540 vfsp->mnt_stat.f_fsidx.__fsid_val[1] = ((fsid_guid>>32) << 8) |
1541 makefstype(vfsp->mnt_op->vfs_name) & 0xFF;
1542 vfsp->mnt_stat.f_fsid = fsid_guid;
1543 #endif
1544
1545 /*
1546 * Set features for file system.
1547 */
1548 zfs_set_fuid_feature(zfsvfs);
1549 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
1550 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1551 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1552 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
1553 } else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
1554 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1555 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1556 }
1557 vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
1558
1559 if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
1560 uint64_t pval;
1561
1562 atime_changed_cb(zfsvfs, B_FALSE);
1563 readonly_changed_cb(zfsvfs, B_TRUE);
1564 if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
1565 goto out;
1566 xattr_changed_cb(zfsvfs, pval);
1567 zfsvfs->z_issnap = B_TRUE;
1568 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
1569
1570 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1571 dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1572 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1573 } else {
1574 error = zfsvfs_setup(zfsvfs, B_TRUE);
1575 }
1576
1577 #ifdef __FreeBSD_kernel__
1578 vfs_mountedfrom(vfsp, osname);
1579 #endif
1580 #ifdef __NetBSD__
1581 set_statvfs_info("on-name", UIO_SYSSPACE, osname, UIO_SYSSPACE, "zfs", vfsp, curlwp);
1582 #endif
1583
1584 if (!zfsvfs->z_issnap)
1585 zfsctl_create(zfsvfs);
1586 out:
1587 if (error) {
1588 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1589 zfsvfs_free(zfsvfs);
1590 } else {
1591 atomic_inc_32(&zfs_active_fs_count);
1592 }
1593
1594 return (error);
1595 }
1596
1597 void
zfs_unregister_callbacks(zfsvfs_t * zfsvfs)1598 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
1599 {
1600 objset_t *os = zfsvfs->z_os;
1601
1602 if (!dmu_objset_is_snapshot(os))
1603 dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
1604 }
1605
1606 #ifdef SECLABEL
1607 /*
1608 * Convert a decimal digit string to a uint64_t integer.
1609 */
1610 static int
str_to_uint64(char * str,uint64_t * objnum)1611 str_to_uint64(char *str, uint64_t *objnum)
1612 {
1613 uint64_t num = 0;
1614
1615 while (*str) {
1616 if (*str < '0' || *str > '9')
1617 return (SET_ERROR(EINVAL));
1618
1619 num = num*10 + *str++ - '0';
1620 }
1621
1622 *objnum = num;
1623 return (0);
1624 }
1625
1626 /*
1627 * The boot path passed from the boot loader is in the form of
1628 * "rootpool-name/root-filesystem-object-number'. Convert this
1629 * string to a dataset name: "rootpool-name/root-filesystem-name".
1630 */
1631 static int
zfs_parse_bootfs(char * bpath,char * outpath)1632 zfs_parse_bootfs(char *bpath, char *outpath)
1633 {
1634 char *slashp;
1635 uint64_t objnum;
1636 int error;
1637
1638 if (*bpath == 0 || *bpath == '/')
1639 return (SET_ERROR(EINVAL));
1640
1641 (void) strcpy(outpath, bpath);
1642
1643 slashp = strchr(bpath, '/');
1644
1645 /* if no '/', just return the pool name */
1646 if (slashp == NULL) {
1647 return (0);
1648 }
1649
1650 /* if not a number, just return the root dataset name */
1651 if (str_to_uint64(slashp+1, &objnum)) {
1652 return (0);
1653 }
1654
1655 *slashp = '\0';
1656 error = dsl_dsobj_to_dsname(bpath, objnum, outpath);
1657 *slashp = '/';
1658
1659 return (error);
1660 }
1661
1662 /*
1663 * Check that the hex label string is appropriate for the dataset being
1664 * mounted into the global_zone proper.
1665 *
1666 * Return an error if the hex label string is not default or
1667 * admin_low/admin_high. For admin_low labels, the corresponding
1668 * dataset must be readonly.
1669 */
1670 int
zfs_check_global_label(const char * dsname,const char * hexsl)1671 zfs_check_global_label(const char *dsname, const char *hexsl)
1672 {
1673 if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1674 return (0);
1675 if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
1676 return (0);
1677 if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
1678 /* must be readonly */
1679 uint64_t rdonly;
1680
1681 if (dsl_prop_get_integer(dsname,
1682 zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
1683 return (SET_ERROR(EACCES));
1684 return (rdonly ? 0 : EACCES);
1685 }
1686 return (SET_ERROR(EACCES));
1687 }
1688
1689 /*
1690 * Determine whether the mount is allowed according to MAC check.
1691 * by comparing (where appropriate) label of the dataset against
1692 * the label of the zone being mounted into. If the dataset has
1693 * no label, create one.
1694 *
1695 * Returns 0 if access allowed, error otherwise (e.g. EACCES)
1696 */
1697 static int
zfs_mount_label_policy(vfs_t * vfsp,char * osname)1698 zfs_mount_label_policy(vfs_t *vfsp, char *osname)
1699 {
1700 int error, retv;
1701 zone_t *mntzone = NULL;
1702 ts_label_t *mnt_tsl;
1703 bslabel_t *mnt_sl;
1704 bslabel_t ds_sl;
1705 char ds_hexsl[MAXNAMELEN];
1706
1707 retv = EACCES; /* assume the worst */
1708
1709 /*
1710 * Start by getting the dataset label if it exists.
1711 */
1712 error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1713 1, sizeof (ds_hexsl), &ds_hexsl, NULL);
1714 if (error)
1715 return (SET_ERROR(EACCES));
1716
1717 /*
1718 * If labeling is NOT enabled, then disallow the mount of datasets
1719 * which have a non-default label already. No other label checks
1720 * are needed.
1721 */
1722 if (!is_system_labeled()) {
1723 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1724 return (0);
1725 return (SET_ERROR(EACCES));
1726 }
1727
1728 /*
1729 * Get the label of the mountpoint. If mounting into the global
1730 * zone (i.e. mountpoint is not within an active zone and the
1731 * zoned property is off), the label must be default or
1732 * admin_low/admin_high only; no other checks are needed.
1733 */
1734 mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
1735 if (mntzone->zone_id == GLOBAL_ZONEID) {
1736 uint64_t zoned;
1737
1738 zone_rele(mntzone);
1739
1740 if (dsl_prop_get_integer(osname,
1741 zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
1742 return (SET_ERROR(EACCES));
1743 if (!zoned)
1744 return (zfs_check_global_label(osname, ds_hexsl));
1745 else
1746 /*
1747 * This is the case of a zone dataset being mounted
1748 * initially, before the zone has been fully created;
1749 * allow this mount into global zone.
1750 */
1751 return (0);
1752 }
1753
1754 mnt_tsl = mntzone->zone_slabel;
1755 ASSERT(mnt_tsl != NULL);
1756 label_hold(mnt_tsl);
1757 mnt_sl = label2bslabel(mnt_tsl);
1758
1759 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
1760 /*
1761 * The dataset doesn't have a real label, so fabricate one.
1762 */
1763 char *str = NULL;
1764
1765 if (l_to_str_internal(mnt_sl, &str) == 0 &&
1766 dsl_prop_set_string(osname,
1767 zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1768 ZPROP_SRC_LOCAL, str) == 0)
1769 retv = 0;
1770 if (str != NULL)
1771 kmem_free(str, strlen(str) + 1);
1772 } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
1773 /*
1774 * Now compare labels to complete the MAC check. If the
1775 * labels are equal then allow access. If the mountpoint
1776 * label dominates the dataset label, allow readonly access.
1777 * Otherwise, access is denied.
1778 */
1779 if (blequal(mnt_sl, &ds_sl))
1780 retv = 0;
1781 else if (bldominates(mnt_sl, &ds_sl)) {
1782 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
1783 retv = 0;
1784 }
1785 }
1786
1787 label_rele(mnt_tsl);
1788 zone_rele(mntzone);
1789 return (retv);
1790 }
1791 #endif /* SECLABEL */
1792
1793 #ifdef OPENSOLARIS_MOUNTROOT
1794 static int
zfs_mountroot(vfs_t * vfsp,enum whymountroot why)1795 zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
1796 {
1797 int error = 0;
1798 static int zfsrootdone = 0;
1799 zfsvfs_t *zfsvfs = NULL;
1800 znode_t *zp = NULL;
1801 vnode_t *vp = NULL;
1802 char *zfs_bootfs;
1803 char *zfs_devid;
1804
1805 ASSERT(vfsp);
1806
1807 /*
1808 * The filesystem that we mount as root is defined in the
1809 * boot property "zfs-bootfs" with a format of
1810 * "poolname/root-dataset-objnum".
1811 */
1812 if (why == ROOT_INIT) {
1813 if (zfsrootdone++)
1814 return (SET_ERROR(EBUSY));
1815 /*
1816 * the process of doing a spa_load will require the
1817 * clock to be set before we could (for example) do
1818 * something better by looking at the timestamp on
1819 * an uberblock, so just set it to -1.
1820 */
1821 clkset(-1);
1822
1823 if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) {
1824 cmn_err(CE_NOTE, "spa_get_bootfs: can not get "
1825 "bootfs name");
1826 return (SET_ERROR(EINVAL));
1827 }
1828 zfs_devid = spa_get_bootprop("diskdevid");
1829 error = spa_import_rootpool(rootfs.bo_name, zfs_devid);
1830 if (zfs_devid)
1831 spa_free_bootprop(zfs_devid);
1832 if (error) {
1833 spa_free_bootprop(zfs_bootfs);
1834 cmn_err(CE_NOTE, "spa_import_rootpool: error %d",
1835 error);
1836 return (error);
1837 }
1838 if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) {
1839 spa_free_bootprop(zfs_bootfs);
1840 cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d",
1841 error);
1842 return (error);
1843 }
1844
1845 spa_free_bootprop(zfs_bootfs);
1846
1847 if (error = vfs_lock(vfsp))
1848 return (error);
1849
1850 if (error = zfs_domount(vfsp, rootfs.bo_name)) {
1851 cmn_err(CE_NOTE, "zfs_domount: error %d", error);
1852 goto out;
1853 }
1854
1855 zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
1856 ASSERT(zfsvfs);
1857 if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) {
1858 cmn_err(CE_NOTE, "zfs_zget: error %d", error);
1859 goto out;
1860 }
1861
1862 vp = ZTOV(zp);
1863 mutex_enter(&vp->v_lock);
1864 vp->v_flag |= VROOT;
1865 mutex_exit(&vp->v_lock);
1866 rootvp = vp;
1867
1868 /*
1869 * Leave rootvp held. The root file system is never unmounted.
1870 */
1871
1872 vfs_add((struct vnode *)0, vfsp,
1873 (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
1874 out:
1875 vfs_unlock(vfsp);
1876 return (error);
1877 } else if (why == ROOT_REMOUNT) {
1878 readonly_changed_cb(vfsp->vfs_data, B_FALSE);
1879 vfsp->vfs_flag |= VFS_REMOUNT;
1880
1881 /* refresh mount options */
1882 zfs_unregister_callbacks(vfsp->vfs_data);
1883 return (zfs_register_callbacks(vfsp));
1884
1885 } else if (why == ROOT_UNMOUNT) {
1886 zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data);
1887 (void) zfs_sync(vfsp, 0, 0);
1888 return (0);
1889 }
1890
1891 /*
1892 * if "why" is equal to anything else other than ROOT_INIT,
1893 * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
1894 */
1895 return (SET_ERROR(ENOTSUP));
1896 }
1897 #endif /* OPENSOLARIS_MOUNTROOT */
1898
1899 static int
getpoolname(const char * osname,char * poolname)1900 getpoolname(const char *osname, char *poolname)
1901 {
1902 char *p;
1903
1904 p = strchr(osname, '/');
1905 if (p == NULL) {
1906 if (strlen(osname) >= MAXNAMELEN)
1907 return (ENAMETOOLONG);
1908 (void) strcpy(poolname, osname);
1909 } else {
1910 if (p - osname >= MAXNAMELEN)
1911 return (ENAMETOOLONG);
1912 (void) strncpy(poolname, osname, p - osname);
1913 poolname[p - osname] = '\0';
1914 }
1915 return (0);
1916 }
1917
1918 /*ARGSUSED*/
1919 #ifdef illumos
1920 static int
zfs_mount(vfs_t * vfsp,vnode_t * mvp,struct mounta * uap,cred_t * cr)1921 zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
1922 #endif
1923 #ifdef __FreeBSD_kernel__
1924 static int
1925 zfs_mount(vfs_t *vfsp)
1926 #endif
1927 #ifdef __NetBSD__
1928 static int
1929 zfs_mount(vfs_t *vfsp, const char *path, void *data, size_t *data_len)
1930 #endif
1931 {
1932 vnode_t *mvp = vfsp->mnt_vnodecovered;
1933 char *osname;
1934 int error = 0;
1935 int canwrite;
1936
1937 #ifdef illumos
1938 if (mvp->v_type != VDIR)
1939 return (SET_ERROR(ENOTDIR));
1940
1941 mutex_enter(&mvp->v_lock);
1942 if ((uap->flags & MS_REMOUNT) == 0 &&
1943 (uap->flags & MS_OVERLAY) == 0 &&
1944 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
1945 mutex_exit(&mvp->v_lock);
1946 return (SET_ERROR(EBUSY));
1947 }
1948 mutex_exit(&mvp->v_lock);
1949
1950 /*
1951 * ZFS does not support passing unparsed data in via MS_DATA.
1952 * Users should use the MS_OPTIONSTR interface; this means
1953 * that all option parsing is already done and the options struct
1954 * can be interrogated.
1955 */
1956 if ((uap->flags & MS_DATA) && uap->datalen > 0)
1957 return (SET_ERROR(EINVAL));
1958 #endif /* illumos */
1959
1960 #ifdef __FreeBSD_kernel__
1961 kthread_t *td = curthread;
1962 cred_t *cr = td->td_ucred;
1963
1964 if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_ZFS))
1965 return (SET_ERROR(EPERM));
1966
1967 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL))
1968 return (SET_ERROR(EINVAL));
1969
1970 /*
1971 * If full-owner-access is enabled and delegated administration is
1972 * turned on, we must set nosuid.
1973 */
1974 if (zfs_super_owner &&
1975 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) {
1976 secpolicy_fs_mount_clearopts(cr, vfsp);
1977 }
1978
1979 #endif /* __FreeBSD_kernel__ */
1980
1981 #ifdef __NetBSD__
1982 cred_t *cr = CRED();
1983 struct mounta *uap = data;
1984
1985 if (uap == NULL)
1986 return (SET_ERROR(EINVAL));
1987
1988 if (*data_len < sizeof *uap)
1989 return (SET_ERROR(EINVAL));
1990
1991 if (mvp->v_type != VDIR)
1992 return (SET_ERROR(ENOTDIR));
1993
1994 mutex_enter(mvp->v_interlock);
1995 if ((uap->flags & MS_REMOUNT) == 0 &&
1996 (uap->flags & MS_OVERLAY) == 0 &&
1997 (vrefcnt(mvp) != 1 || (mvp->v_flag & VROOT))) {
1998 mutex_exit(mvp->v_interlock);
1999 return (SET_ERROR(EBUSY));
2000 }
2001 mutex_exit(mvp->v_interlock);
2002
2003 osname = PNBUF_GET();
2004 strlcpy(osname, uap->fspec, strlen(uap->fspec) + 1);
2005 #endif /* __NetBSD__ */
2006
2007 /*
2008 * Check for mount privilege?
2009 *
2010 * If we don't have privilege then see if
2011 * we have local permission to allow it
2012 */
2013 error = secpolicy_fs_mount(cr, mvp, vfsp);
2014 if (error) {
2015 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0)
2016 goto out;
2017
2018 if (!(vfsp->vfs_flag & MS_REMOUNT)) {
2019 vattr_t vattr;
2020
2021 /*
2022 * Make sure user is the owner of the mount point
2023 * or has sufficient privileges.
2024 */
2025
2026 vattr.va_mask = AT_UID;
2027
2028 #ifdef __FreeBSD_kernel__
2029 vn_lock(mvp, LK_SHARED | LK_RETRY);
2030 if (VOP_GETATTR(mvp, &vattr, cr)) {
2031 VOP_UNLOCK(mvp, 0);
2032 goto out;
2033 }
2034
2035 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
2036 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) {
2037 VOP_UNLOCK(mvp, 0);
2038 goto out;
2039 }
2040 VOP_UNLOCK(mvp, 0);
2041 #endif
2042 #ifdef __NetBSD__
2043 vn_lock(mvp, LK_SHARED | LK_RETRY);
2044 if (VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) {
2045 VOP_UNLOCK(mvp, 0);
2046 goto out;
2047 }
2048
2049 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 &&
2050 VOP_ACCESS(mvp, VWRITE, cr) != 0) {
2051 VOP_UNLOCK(mvp, 0);
2052 goto out;
2053 }
2054 VOP_UNLOCK(mvp, 0);
2055 #endif
2056 }
2057
2058 secpolicy_fs_mount_clearopts(cr, vfsp);
2059 }
2060
2061 /*
2062 * Refuse to mount a filesystem if we are in a local zone and the
2063 * dataset is not visible.
2064 */
2065 if (!INGLOBALZONE(curthread) &&
2066 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
2067 error = SET_ERROR(EPERM);
2068 goto out;
2069 }
2070
2071 #ifdef SECLABEL
2072 error = zfs_mount_label_policy(vfsp, osname);
2073 if (error)
2074 goto out;
2075 #endif
2076
2077 #ifdef __FreeBSD_kernel__
2078 vfsp->vfs_flag |= MNT_NFS4ACLS;
2079 #endif
2080 #ifdef __NetBSD__
2081 vfsp->mnt_iflag |= IMNT_MPSAFE | IMNT_NCLOOKUP;
2082 #endif
2083
2084 /*
2085 * When doing a remount, we simply refresh our temporary properties
2086 * according to those options set in the current VFS options.
2087 */
2088 if (vfsp->vfs_flag & MS_REMOUNT) {
2089 zfsvfs_t *zfsvfs = vfsp->vfs_data;
2090
2091 /*
2092 * Refresh mount options with z_teardown_lock blocking I/O while
2093 * the filesystem is in an inconsistent state.
2094 * The lock also serializes this code with filesystem
2095 * manipulations between entry to zfs_suspend_fs() and return
2096 * from zfs_resume_fs().
2097 */
2098 rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
2099 zfs_unregister_callbacks(zfsvfs);
2100 error = zfs_register_callbacks(vfsp);
2101 rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
2102 goto out;
2103 }
2104
2105 #ifdef __FreeBSD_kernel__
2106 /* Initial root mount: try hard to import the requested root pool. */
2107 if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 &&
2108 (vfsp->vfs_flag & MNT_UPDATE) == 0) {
2109 char pname[MAXNAMELEN];
2110
2111 error = getpoolname(osname, pname);
2112 if (error == 0)
2113 error = spa_import_rootpool(pname);
2114 if (error)
2115 goto out;
2116 }
2117 #endif
2118
2119 DROP_GIANT();
2120 error = zfs_domount(vfsp, osname);
2121 PICKUP_GIANT();
2122
2123 #ifdef illumos
2124 /*
2125 * Add an extra VFS_HOLD on our parent vfs so that it can't
2126 * disappear due to a forced unmount.
2127 */
2128 if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap)
2129 VFS_HOLD(mvp->v_vfsp);
2130 #endif
2131
2132 #ifdef __NetBSD__
2133 /* setup zfs mount info */
2134 strlcpy(vfsp->mnt_stat.f_mntfromname, osname,
2135 sizeof(vfsp->mnt_stat.f_mntfromname));
2136 set_statvfs_info(path, UIO_USERSPACE, vfsp->mnt_stat.f_mntfromname,
2137 UIO_SYSSPACE, vfsp->mnt_op->vfs_name, vfsp, curlwp);
2138 #endif
2139
2140 out:
2141 return (error);
2142 }
2143
2144 #ifdef __FreeBSD_kernel__
2145 static int
zfs_statfs(vfs_t * vfsp,struct statfs * statp)2146 zfs_statfs(vfs_t *vfsp, struct statfs *statp)
2147 #endif
2148 #ifdef __NetBSD__
2149 static int
2150 zfs_statvfs(vfs_t *vfsp, struct statvfs *statp)
2151 #endif
2152 {
2153 zfsvfs_t *zfsvfs = vfsp->vfs_data;
2154 uint64_t refdbytes, availbytes, usedobjs, availobjs;
2155
2156 #ifdef __FreeBSD_kernel__
2157 statp->f_version = STATFS_VERSION;
2158 #endif
2159
2160 ZFS_ENTER(zfsvfs);
2161
2162 dmu_objset_space(zfsvfs->z_os,
2163 &refdbytes, &availbytes, &usedobjs, &availobjs);
2164
2165 /*
2166 * The underlying storage pool actually uses multiple block sizes.
2167 * We report the fragsize as the smallest block size we support,
2168 * and we report our blocksize as the filesystem's maximum blocksize.
2169 */
2170 statp->f_bsize = SPA_MINBLOCKSIZE;
2171 #ifdef __NetBSD__
2172 statp->f_frsize = SPA_MINBLOCKSIZE;
2173 #endif
2174 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize;
2175
2176 /*
2177 * The following report "total" blocks of various kinds in the
2178 * file system, but reported in terms of f_frsize - the
2179 * "fragment" size.
2180 */
2181
2182 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
2183 statp->f_bfree = availbytes / statp->f_bsize;
2184 statp->f_bavail = statp->f_bfree; /* no root reservation */
2185 statp->f_bresvd = 0;
2186
2187 /*
2188 * statvfs() should really be called statufs(), because it assumes
2189 * static metadata. ZFS doesn't preallocate files, so the best
2190 * we can do is report the max that could possibly fit in f_files,
2191 * and that minus the number actually used in f_ffree.
2192 * For f_ffree, report the smaller of the number of object available
2193 * and the number of blocks (each object will take at least a block).
2194 */
2195 statp->f_ffree = MIN(availobjs, statp->f_bfree);
2196 #ifndef __FreeBSD__
2197 statp->f_favail = statp->f_ffree; /* no "root reservation" */
2198 #endif
2199 statp->f_files = statp->f_ffree + usedobjs;
2200 statp->f_fresvd = 0;
2201
2202 #ifdef __FreeBSD__
2203 (void) cmpldev(&d32, vfsp->vfs_dev);
2204 statp->f_fsid = d32;
2205 #endif
2206 #ifdef __NetBSD__
2207 statp->f_fsid = vfsp->mnt_stat.f_fsid;
2208 statp->f_fsidx = vfsp->mnt_stat.f_fsidx;
2209 #endif
2210
2211 /*
2212 * We're a zfs filesystem.
2213 */
2214 (void) strlcpy(statp->f_fstypename, "zfs", sizeof(statp->f_fstypename));
2215
2216 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname,
2217 sizeof(statp->f_mntfromname));
2218 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname,
2219 sizeof(statp->f_mntonname));
2220
2221 #ifdef __FreeBSD_kernel__
2222 statp->f_namemax = MAXNAMELEN - 1;
2223 #endif
2224 #ifdef __NetBSD__
2225 statp->f_namemax = ZFS_MAXNAMELEN;
2226 #endif
2227
2228 ZFS_EXIT(zfsvfs);
2229 return (0);
2230 }
2231
2232 static int
zfs_root(vfs_t * vfsp,int flags,vnode_t ** vpp)2233 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp)
2234 {
2235 zfsvfs_t *zfsvfs = vfsp->vfs_data;
2236 znode_t *rootzp;
2237 int error;
2238
2239 ZFS_ENTER(zfsvfs);
2240
2241 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
2242 if (error == 0)
2243 *vpp = ZTOV(rootzp);
2244
2245 ZFS_EXIT(zfsvfs);
2246
2247 if (error == 0) {
2248 error = vn_lock(*vpp, flags);
2249 if (error != 0) {
2250 VN_RELE(*vpp);
2251 *vpp = NULL;
2252 }
2253 }
2254 return (error);
2255 }
2256
2257 /*
2258 * Teardown the zfsvfs::z_os.
2259 *
2260 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
2261 * and 'z_teardown_inactive_lock' held.
2262 */
2263 static int
zfsvfs_teardown(zfsvfs_t * zfsvfs,boolean_t unmounting)2264 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
2265 {
2266 znode_t *zp;
2267
2268 rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
2269
2270 if (!unmounting) {
2271 /*
2272 * We purge the parent filesystem's vfsp as the parent
2273 * filesystem and all of its snapshots have their vnode's
2274 * v_vfsp set to the parent's filesystem's vfsp. Note,
2275 * 'z_parent' is self referential for non-snapshots.
2276 */
2277 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
2278 #ifdef FREEBSD_NAMECACHE
2279 cache_purgevfs(zfsvfs->z_parent->z_vfs, true);
2280 #endif
2281 }
2282
2283 /*
2284 * Close the zil. NB: Can't close the zil while zfs_inactive
2285 * threads are blocked as zil_close can call zfs_inactive.
2286 */
2287 if (zfsvfs->z_log) {
2288 zil_close(zfsvfs->z_log);
2289 zfsvfs->z_log = NULL;
2290 }
2291
2292 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
2293
2294 /*
2295 * If we are not unmounting (ie: online recv) and someone already
2296 * unmounted this file system while we were doing the switcheroo,
2297 * or a reopen of z_os failed then just bail out now.
2298 */
2299 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
2300 rw_exit(&zfsvfs->z_teardown_inactive_lock);
2301 rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
2302 return (SET_ERROR(EIO));
2303 }
2304
2305 /*
2306 * At this point there are no vops active, and any new vops will
2307 * fail with EIO since we have z_teardown_lock for writer (only
2308 * relavent for forced unmount).
2309 *
2310 * Release all holds on dbufs.
2311 */
2312 mutex_enter(&zfsvfs->z_znodes_lock);
2313 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
2314 zp = list_next(&zfsvfs->z_all_znodes, zp))
2315 if (zp->z_sa_hdl) {
2316 #ifdef __NetBSD__
2317 ASSERT(vrefcnt(ZTOV(zp)) >= 0);
2318 #else
2319 ASSERT(ZTOV(zp)->v_count >= 0);
2320 #endif
2321 zfs_znode_dmu_fini(zp);
2322 }
2323 mutex_exit(&zfsvfs->z_znodes_lock);
2324
2325 /*
2326 * If we are unmounting, set the unmounted flag and let new vops
2327 * unblock. zfs_inactive will have the unmounted behavior, and all
2328 * other vops will fail with EIO.
2329 */
2330 if (unmounting) {
2331 zfsvfs->z_unmounted = B_TRUE;
2332 rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
2333 rw_exit(&zfsvfs->z_teardown_inactive_lock);
2334 }
2335
2336 /*
2337 * z_os will be NULL if there was an error in attempting to reopen
2338 * zfsvfs, so just return as the properties had already been
2339 * unregistered and cached data had been evicted before.
2340 */
2341 if (zfsvfs->z_os == NULL)
2342 return (0);
2343
2344 /*
2345 * Unregister properties.
2346 */
2347 zfs_unregister_callbacks(zfsvfs);
2348
2349 /*
2350 * Evict cached data
2351 */
2352 if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) &&
2353 !(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
2354 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
2355 dmu_objset_evict_dbufs(zfsvfs->z_os);
2356
2357 return (0);
2358 }
2359
2360 /*ARGSUSED*/
2361 static int
zfs_umount(vfs_t * vfsp,int fflag)2362 zfs_umount(vfs_t *vfsp, int fflag)
2363 {
2364 zfsvfs_t *zfsvfs = vfsp->vfs_data;
2365 objset_t *os;
2366 int ret;
2367 #ifdef __FreeBSD_kernel__
2368 kthread_t *td = curthread;
2369 cred_t *cr = td->td_ucred;
2370 #endif
2371 #ifdef __NetBSD__
2372 cred_t *cr = CRED();
2373 #endif
2374
2375 ret = secpolicy_fs_unmount(cr, vfsp);
2376 if (ret) {
2377 if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
2378 ZFS_DELEG_PERM_MOUNT, cr))
2379 return (ret);
2380 }
2381
2382 /*
2383 * We purge the parent filesystem's vfsp as the parent filesystem
2384 * and all of its snapshots have their vnode's v_vfsp set to the
2385 * parent's filesystem's vfsp. Note, 'z_parent' is self
2386 * referential for non-snapshots.
2387 */
2388 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
2389
2390 /*
2391 * Unmount any snapshots mounted under .zfs before unmounting the
2392 * dataset itself.
2393 */
2394 if (zfsvfs->z_ctldir != NULL) {
2395 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0)
2396 return (ret);
2397 }
2398
2399 if (fflag & MS_FORCE) {
2400 /*
2401 * Mark file system as unmounted before calling
2402 * vflush(FORCECLOSE). This way we ensure no future vnops
2403 * will be called and risk operating on DOOMED vnodes.
2404 */
2405 rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
2406 zfsvfs->z_unmounted = B_TRUE;
2407 rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
2408 }
2409
2410 /*
2411 * Flush all the files.
2412 */
2413 #ifdef __FreeBSD_kernel__
2414 ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td);
2415 #endif
2416 #ifdef __NetBSD__
2417 ret = vflush(vfsp, NULL, (fflag & MS_FORCE) ? FORCECLOSE : 0);
2418 #endif
2419 if (ret != 0)
2420 return (ret);
2421
2422 #ifdef illumos
2423 if (!(fflag & MS_FORCE)) {
2424 /*
2425 * Check the number of active vnodes in the file system.
2426 * Our count is maintained in the vfs structure, but the
2427 * number is off by 1 to indicate a hold on the vfs
2428 * structure itself.
2429 *
2430 * The '.zfs' directory maintains a reference of its
2431 * own, and any active references underneath are
2432 * reflected in the vnode count.
2433 */
2434 if (zfsvfs->z_ctldir == NULL) {
2435 if (vfsp->vfs_count > 1)
2436 return (SET_ERROR(EBUSY));
2437 } else {
2438 if (vfsp->vfs_count > 2 ||
2439 zfsvfs->z_ctldir->v_count > 1)
2440 return (SET_ERROR(EBUSY));
2441 }
2442 }
2443 #endif
2444
2445 VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
2446 os = zfsvfs->z_os;
2447
2448 /*
2449 * z_os will be NULL if there was an error in
2450 * attempting to reopen zfsvfs.
2451 */
2452 if (os != NULL) {
2453 /*
2454 * Unset the objset user_ptr.
2455 */
2456 mutex_enter(&os->os_user_ptr_lock);
2457 dmu_objset_set_user(os, NULL);
2458 mutex_exit(&os->os_user_ptr_lock);
2459
2460 /*
2461 * Finally release the objset
2462 */
2463 dmu_objset_disown(os, zfsvfs);
2464 }
2465
2466 /*
2467 * We can now safely destroy the '.zfs' directory node.
2468 */
2469 if (zfsvfs->z_ctldir != NULL)
2470 zfsctl_destroy(zfsvfs);
2471 zfs_freevfs(vfsp);
2472
2473 return (0);
2474 }
2475
2476 static int
zfs_vget(vfs_t * vfsp,ino_t ino,int flags,vnode_t ** vpp)2477 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp)
2478 {
2479 zfsvfs_t *zfsvfs = vfsp->vfs_data;
2480 znode_t *zp;
2481 int err;
2482
2483 /*
2484 * zfs_zget() can't operate on virtual entries like .zfs/ or
2485 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP.
2486 * This will make NFS to switch to LOOKUP instead of using VGET.
2487 */
2488 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR ||
2489 (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir))
2490 return (EOPNOTSUPP);
2491
2492 ZFS_ENTER(zfsvfs);
2493 err = zfs_zget(zfsvfs, ino, &zp);
2494 if (err == 0 && zp->z_unlinked) {
2495 VN_RELE(ZTOV(zp));
2496 err = EINVAL;
2497 }
2498 if (err == 0)
2499 *vpp = ZTOV(zp);
2500 ZFS_EXIT(zfsvfs);
2501 if (err == 0)
2502 err = vn_lock(*vpp, flags);
2503 if (err != 0)
2504 *vpp = NULL;
2505
2506 return (err);
2507 }
2508
2509 #ifdef __FreeBSD_kernel__
2510 static int
zfs_checkexp(vfs_t * vfsp,struct sockaddr * nam,int * extflagsp,struct ucred ** credanonp,int * numsecflavors,int ** secflavors)2511 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp,
2512 struct ucred **credanonp, int *numsecflavors, int **secflavors)
2513 {
2514 zfsvfs_t *zfsvfs = vfsp->vfs_data;
2515
2516 /*
2517 * If this is regular file system vfsp is the same as
2518 * zfsvfs->z_parent->z_vfs, but if it is snapshot,
2519 * zfsvfs->z_parent->z_vfs represents parent file system
2520 * which we have to use here, because only this file system
2521 * has mnt_export configured.
2522 */
2523 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp,
2524 credanonp, numsecflavors, secflavors));
2525 }
2526
2527 CTASSERT(SHORT_FID_LEN <= sizeof(struct fid));
2528 CTASSERT(LONG_FID_LEN <= sizeof(struct fid));
2529 #endif
2530
2531 #ifdef __FreeBSD_kernel__
2532 static int
zfs_fhtovp(vfs_t * vfsp,fid_t * fidp,int flags,vnode_t ** vpp)2533 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
2534 {
2535 struct componentname cn;
2536 zfsvfs_t *zfsvfs = vfsp->vfs_data;
2537 znode_t *zp;
2538 vnode_t *dvp;
2539 uint64_t object = 0;
2540 uint64_t fid_gen = 0;
2541 uint64_t gen_mask;
2542 uint64_t zp_gen;
2543 int i, err;
2544
2545 *vpp = NULL;
2546
2547 ZFS_ENTER(zfsvfs);
2548
2549 /*
2550 * On FreeBSD we can get snapshot's mount point or its parent file
2551 * system mount point depending if snapshot is already mounted or not.
2552 */
2553 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) {
2554 zfid_long_t *zlfid = (zfid_long_t *)fidp;
2555 uint64_t objsetid = 0;
2556 uint64_t setgen = 0;
2557
2558 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
2559 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
2560
2561 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
2562 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
2563
2564 ZFS_EXIT(zfsvfs);
2565
2566 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
2567 if (err)
2568 return (SET_ERROR(EINVAL));
2569 ZFS_ENTER(zfsvfs);
2570 }
2571
2572 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
2573 zfid_short_t *zfid = (zfid_short_t *)fidp;
2574
2575 for (i = 0; i < sizeof (zfid->zf_object); i++)
2576 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
2577
2578 for (i = 0; i < sizeof (zfid->zf_gen); i++)
2579 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
2580 } else {
2581 ZFS_EXIT(zfsvfs);
2582 return (SET_ERROR(EINVAL));
2583 }
2584
2585 /*
2586 * A zero fid_gen means we are in .zfs or the .zfs/snapshot
2587 * directory tree. If the object == zfsvfs->z_shares_dir, then
2588 * we are in the .zfs/shares directory tree.
2589 */
2590 if ((fid_gen == 0 &&
2591 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) ||
2592 (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) {
2593 ZFS_EXIT(zfsvfs);
2594 VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp));
2595 if (object == ZFSCTL_INO_SNAPDIR) {
2596 cn.cn_nameptr = "snapshot";
2597 cn.cn_namelen = strlen(cn.cn_nameptr);
2598 cn.cn_nameiop = LOOKUP;
2599 cn.cn_flags = ISLASTCN | LOCKLEAF;
2600 cn.cn_lkflags = flags;
2601 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
2602 vput(dvp);
2603 } else if (object == zfsvfs->z_shares_dir) {
2604 /*
2605 * XXX This branch must not be taken,
2606 * if it is, then the lookup below will
2607 * explode.
2608 */
2609 cn.cn_nameptr = "shares";
2610 cn.cn_namelen = strlen(cn.cn_nameptr);
2611 cn.cn_nameiop = LOOKUP;
2612 cn.cn_flags = ISLASTCN;
2613 cn.cn_lkflags = flags;
2614 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn));
2615 vput(dvp);
2616 } else {
2617 *vpp = dvp;
2618 }
2619 return (err);
2620 }
2621
2622 gen_mask = -1ULL >> (64 - 8 * i);
2623
2624 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
2625 if (err = zfs_zget(zfsvfs, object, &zp)) {
2626 ZFS_EXIT(zfsvfs);
2627 return (err);
2628 }
2629 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
2630 sizeof (uint64_t));
2631 zp_gen = zp_gen & gen_mask;
2632 if (zp_gen == 0)
2633 zp_gen = 1;
2634 if (zp->z_unlinked || zp_gen != fid_gen) {
2635 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
2636 VN_RELE(ZTOV(zp));
2637 ZFS_EXIT(zfsvfs);
2638 return (SET_ERROR(EINVAL));
2639 }
2640
2641 *vpp = ZTOV(zp);
2642 ZFS_EXIT(zfsvfs);
2643 err = vn_lock(*vpp, flags);
2644 if (err == 0)
2645 vnode_create_vobject(*vpp, zp->z_size, curthread);
2646 else
2647 *vpp = NULL;
2648 return (err);
2649 }
2650 #endif /* __FreeBSD_kernel__ */
2651
2652 /*
2653 * Block out VOPs and close zfsvfs_t::z_os
2654 *
2655 * Note, if successful, then we return with the 'z_teardown_lock' and
2656 * 'z_teardown_inactive_lock' write held. We leave ownership of the underlying
2657 * dataset and objset intact so that they can be atomically handed off during
2658 * a subsequent rollback or recv operation and the resume thereafter.
2659 */
2660 int
zfs_suspend_fs(zfsvfs_t * zfsvfs)2661 zfs_suspend_fs(zfsvfs_t *zfsvfs)
2662 {
2663 int error;
2664
2665 #ifdef __NetBSD__
2666 if ((error = vfs_suspend(zfsvfs->z_vfs, 0)) != 0)
2667 return error;
2668 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) {
2669 vfs_resume(zfsvfs->z_vfs);
2670 return (error);
2671 }
2672 #else
2673 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
2674 return (error);
2675 #endif
2676
2677 return (0);
2678 }
2679
2680 /*
2681 * Rebuild SA and release VOPs. Note that ownership of the underlying dataset
2682 * is an invariant across any of the operations that can be performed while the
2683 * filesystem was suspended. Whether it succeeded or failed, the preconditions
2684 * are the same: the relevant objset and associated dataset are owned by
2685 * zfsvfs, held, and long held on entry.
2686 */
2687 #ifdef __NetBSD__
2688 static bool
zfs_resume_selector(void * cl,struct vnode * vp)2689 zfs_resume_selector(void *cl, struct vnode *vp)
2690 {
2691
2692 if (zfsctl_is_node(vp))
2693 return false;
2694 return (VTOZ(vp)->z_sa_hdl == NULL);
2695 }
2696 #endif
2697 int
zfs_resume_fs(zfsvfs_t * zfsvfs,dsl_dataset_t * ds)2698 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
2699 {
2700 int err;
2701 znode_t *zp;
2702
2703 ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
2704 ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
2705
2706 /*
2707 * We already own this, so just update the objset_t, as the one we
2708 * had before may have been evicted.
2709 */
2710 objset_t *os;
2711 VERIFY3P(ds->ds_owner, ==, zfsvfs);
2712 VERIFY(dsl_dataset_long_held(ds));
2713 VERIFY0(dmu_objset_from_ds(ds, &os));
2714
2715 err = zfsvfs_init(zfsvfs, os);
2716 if (err != 0)
2717 goto bail;
2718
2719 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
2720
2721 zfs_set_fuid_feature(zfsvfs);
2722
2723 /*
2724 * Attempt to re-establish all the active znodes with
2725 * their dbufs. If a zfs_rezget() fails, then we'll let
2726 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
2727 * when they try to use their znode.
2728 */
2729 mutex_enter(&zfsvfs->z_znodes_lock);
2730 for (zp = list_head(&zfsvfs->z_all_znodes); zp;
2731 zp = list_next(&zfsvfs->z_all_znodes, zp)) {
2732 (void) zfs_rezget(zp);
2733 }
2734 mutex_exit(&zfsvfs->z_znodes_lock);
2735
2736 bail:
2737 /* release the VOPs */
2738 rw_exit(&zfsvfs->z_teardown_inactive_lock);
2739 rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
2740 #ifdef __NetBSD__
2741 struct vnode_iterator *marker;
2742 vnode_t *vp;
2743
2744 vfs_vnode_iterator_init(zfsvfs->z_vfs, &marker);
2745 while ((vp = vfs_vnode_iterator_next(marker,
2746 zfs_resume_selector, NULL))) {
2747 vgone(vp);
2748 }
2749 vfs_vnode_iterator_destroy(marker);
2750 vfs_resume(zfsvfs->z_vfs);
2751 #endif
2752
2753 if (err) {
2754 /*
2755 * Since we couldn't setup the sa framework, try to force
2756 * unmount this file system.
2757 */
2758 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) {
2759 vfs_ref(zfsvfs->z_vfs);
2760 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread);
2761 }
2762 }
2763 return (err);
2764 }
2765
2766 static void
zfs_freevfs(vfs_t * vfsp)2767 zfs_freevfs(vfs_t *vfsp)
2768 {
2769 zfsvfs_t *zfsvfs = vfsp->vfs_data;
2770
2771 #ifdef illumos
2772 /*
2773 * If this is a snapshot, we have an extra VFS_HOLD on our parent
2774 * from zfs_mount(). Release it here. If we came through
2775 * zfs_mountroot() instead, we didn't grab an extra hold, so
2776 * skip the VFS_RELE for rootvfs.
2777 */
2778 if (zfsvfs->z_issnap && (vfsp != rootvfs))
2779 VFS_RELE(zfsvfs->z_parent->z_vfs);
2780 #endif
2781
2782 zfsvfs_free(zfsvfs);
2783
2784 atomic_dec_32(&zfs_active_fs_count);
2785 }
2786
2787 #ifdef __FreeBSD_kernel__
2788 #ifdef __i386__
2789 static int desiredvnodes_backup;
2790 #endif
2791
2792 static void
zfs_vnodes_adjust(void)2793 zfs_vnodes_adjust(void)
2794 {
2795 #ifdef __i386__
2796 int newdesiredvnodes;
2797
2798 desiredvnodes_backup = desiredvnodes;
2799
2800 /*
2801 * We calculate newdesiredvnodes the same way it is done in
2802 * vntblinit(). If it is equal to desiredvnodes, it means that
2803 * it wasn't tuned by the administrator and we can tune it down.
2804 */
2805 newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 *
2806 vm_kmem_size / (5 * (sizeof(struct vm_object) +
2807 sizeof(struct vnode))));
2808 if (newdesiredvnodes == desiredvnodes)
2809 desiredvnodes = (3 * newdesiredvnodes) / 4;
2810 #endif
2811 }
2812
2813 static void
zfs_vnodes_adjust_back(void)2814 zfs_vnodes_adjust_back(void)
2815 {
2816
2817 #ifdef __i386__
2818 desiredvnodes = desiredvnodes_backup;
2819 #endif
2820 }
2821 #endif /* __FreeBSD_kernel__ */
2822
2823 #ifdef __NetBSD__
2824 static void
zfs_vnodes_adjust(void)2825 zfs_vnodes_adjust(void)
2826 {
2827 }
2828
2829 static void
zfs_vnodes_adjust_back(void)2830 zfs_vnodes_adjust_back(void)
2831 {
2832 }
2833 #endif
2834
2835 void
zfs_init(void)2836 zfs_init(void)
2837 {
2838
2839 printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
2840
2841 /*
2842 * Initialize .zfs directory structures
2843 */
2844 zfsctl_init();
2845
2846 /*
2847 * Initialize znode cache, vnode ops, etc...
2848 */
2849 zfs_znode_init();
2850
2851 /*
2852 * Reduce number of vnodes. Originally number of vnodes is calculated
2853 * with UFS inode in mind. We reduce it here, because it's too big for
2854 * ZFS/i386.
2855 */
2856 zfs_vnodes_adjust();
2857
2858 dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
2859 }
2860
2861 void
zfs_fini(void)2862 zfs_fini(void)
2863 {
2864 zfsctl_fini();
2865 zfs_znode_fini();
2866 zfs_vnodes_adjust_back();
2867 }
2868
2869 int
zfs_busy(void)2870 zfs_busy(void)
2871 {
2872 return (zfs_active_fs_count != 0);
2873 }
2874
2875 int
zfs_set_version(zfsvfs_t * zfsvfs,uint64_t newvers)2876 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
2877 {
2878 int error;
2879 objset_t *os = zfsvfs->z_os;
2880 dmu_tx_t *tx;
2881
2882 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
2883 return (SET_ERROR(EINVAL));
2884
2885 if (newvers < zfsvfs->z_version)
2886 return (SET_ERROR(EINVAL));
2887
2888 if (zfs_spa_version_map(newvers) >
2889 spa_version(dmu_objset_spa(zfsvfs->z_os)))
2890 return (SET_ERROR(ENOTSUP));
2891
2892 tx = dmu_tx_create(os);
2893 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
2894 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2895 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
2896 ZFS_SA_ATTRS);
2897 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
2898 }
2899 error = dmu_tx_assign(tx, TXG_WAIT);
2900 if (error) {
2901 dmu_tx_abort(tx);
2902 return (error);
2903 }
2904
2905 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
2906 8, 1, &newvers, tx);
2907
2908 if (error) {
2909 dmu_tx_commit(tx);
2910 return (error);
2911 }
2912
2913 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2914 uint64_t sa_obj;
2915
2916 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
2917 SPA_VERSION_SA);
2918 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
2919 DMU_OT_NONE, 0, tx);
2920
2921 error = zap_add(os, MASTER_NODE_OBJ,
2922 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
2923 ASSERT0(error);
2924
2925 VERIFY(0 == sa_set_sa_object(os, sa_obj));
2926 sa_register_update_callback(os, zfs_sa_upgrade);
2927 }
2928
2929 spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
2930 "from %llu to %llu", zfsvfs->z_version, newvers);
2931
2932 dmu_tx_commit(tx);
2933
2934 zfsvfs->z_version = newvers;
2935
2936 zfs_set_fuid_feature(zfsvfs);
2937
2938 return (0);
2939 }
2940
2941 /*
2942 * Read a property stored within the master node.
2943 */
2944 int
zfs_get_zplprop(objset_t * os,zfs_prop_t prop,uint64_t * value)2945 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
2946 {
2947 const char *pname;
2948 int error = ENOENT;
2949
2950 /*
2951 * Look up the file system's value for the property. For the
2952 * version property, we look up a slightly different string.
2953 */
2954 if (prop == ZFS_PROP_VERSION)
2955 pname = ZPL_VERSION_STR;
2956 else
2957 pname = zfs_prop_to_name(prop);
2958
2959 if (os != NULL)
2960 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
2961
2962 if (error == ENOENT) {
2963 /* No value set, use the default value */
2964 switch (prop) {
2965 case ZFS_PROP_VERSION:
2966 *value = ZPL_VERSION;
2967 break;
2968 case ZFS_PROP_NORMALIZE:
2969 case ZFS_PROP_UTF8ONLY:
2970 *value = 0;
2971 break;
2972 case ZFS_PROP_CASE:
2973 *value = ZFS_CASE_SENSITIVE;
2974 break;
2975 default:
2976 return (error);
2977 }
2978 error = 0;
2979 }
2980 return (error);
2981 }
2982
2983 #if defined(__FreeBSD_kernel__) || defined(__NetBSD__)
2984 #ifdef _KERNEL
2985 void
zfsvfs_update_fromname(const char * oldname,const char * newname)2986 zfsvfs_update_fromname(const char *oldname, const char *newname)
2987 {
2988 char tmpbuf[MAXPATHLEN];
2989 struct mount *mp;
2990 char *fromname;
2991 size_t oldlen;
2992
2993 oldlen = strlen(oldname);
2994
2995 #ifdef __NetBSD__
2996 mount_iterator_t *iter;
2997 mountlist_iterator_init(&iter);
2998 while ((mp = mountlist_iterator_next(iter)) != NULL) {
2999 #else
3000 mtx_lock(&mountlist_mtx);
3001 TAILQ_FOREACH(mp, &mountlist, mnt_list) {
3002 #endif
3003 fromname = mp->mnt_stat.f_mntfromname;
3004 if (strcmp(fromname, oldname) == 0) {
3005 (void)strlcpy(fromname, newname,
3006 sizeof(mp->mnt_stat.f_mntfromname));
3007 continue;
3008 }
3009 if (strncmp(fromname, oldname, oldlen) == 0 &&
3010 (fromname[oldlen] == '/' || fromname[oldlen] == '@')) {
3011 (void)snprintf(tmpbuf, sizeof(tmpbuf), "%s%s",
3012 newname, fromname + oldlen);
3013 (void)strlcpy(fromname, tmpbuf,
3014 sizeof(mp->mnt_stat.f_mntfromname));
3015 continue;
3016 }
3017 }
3018 #ifdef __NetBSD__
3019 mountlist_iterator_destroy(iter);
3020 #else
3021 mtx_unlock(&mountlist_mtx);
3022 #endif
3023 }
3024 #endif
3025 #endif
3026