1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 24 * All rights reserved. 25 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 26 * Copyright (c) 2014 Integros [integros.com] 27 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 28 */ 29 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/sysmacros.h> 37 #include <sys/kmem.h> 38 #include <sys/acl.h> 39 #include <sys/vnode.h> 40 #include <sys/vfs.h> 41 #include <sys/mntent.h> 42 #include <sys/mount.h> 43 #include <sys/cmn_err.h> 44 #include <sys/zfs_znode.h> 45 #include <sys/zfs_vnops.h> 46 #include <sys/zfs_dir.h> 47 #include <sys/zil.h> 48 #include <sys/fs/zfs.h> 49 #include <sys/dmu.h> 50 #include <sys/dsl_prop.h> 51 #include <sys/dsl_dataset.h> 52 #include <sys/dsl_deleg.h> 53 #include <sys/spa.h> 54 #include <sys/zap.h> 55 #include <sys/sa.h> 56 #include <sys/sa_impl.h> 57 #include <sys/policy.h> 58 #include <sys/atomic.h> 59 #include <sys/zfs_ioctl.h> 60 #include <sys/zfs_ctldir.h> 61 #include <sys/zfs_fuid.h> 62 #include <sys/sunddi.h> 63 #include <sys/dmu_objset.h> 64 #include <sys/dsl_dir.h> 65 #include <sys/jail.h> 66 #include <sys/osd.h> 67 #include <ufs/ufs/quota.h> 68 #include <sys/zfs_quota.h> 69 70 #include "zfs_comutil.h" 71 72 #ifndef MNTK_VMSETSIZE_BUG 73 #define MNTK_VMSETSIZE_BUG 0 74 #endif 75 #ifndef MNTK_NOMSYNC 76 #define MNTK_NOMSYNC 8 77 #endif 78 79 struct mtx zfs_debug_mtx; 80 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 81 82 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 83 84 int zfs_super_owner; 85 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 86 "File system owners can perform privileged operation on file systems"); 87 88 int zfs_debug_level; 89 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, 90 "Debug level"); 91 92 struct zfs_jailparam { 93 int mount_snapshot; 94 }; 95 96 static struct zfs_jailparam zfs_jailparam0 = { 97 .mount_snapshot = 0, 98 }; 99 100 static int zfs_jailparam_slot; 101 102 SYSCTL_JAIL_PARAM_SYS_NODE(zfs, CTLFLAG_RW, "Jail ZFS parameters"); 103 SYSCTL_JAIL_PARAM(_zfs, mount_snapshot, CTLTYPE_INT | CTLFLAG_RW, "I", 104 "Allow mounting snapshots in the .zfs directory for unjailed datasets"); 105 106 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 107 static int zfs_version_acl = ZFS_ACL_VERSION; 108 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 109 "ZFS_ACL_VERSION"); 110 static int zfs_version_spa = SPA_VERSION; 111 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 112 "SPA_VERSION"); 113 static int zfs_version_zpl = ZPL_VERSION; 114 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 115 "ZPL_VERSION"); 116 117 #if __FreeBSD_version >= 1400018 118 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, 119 bool *mp_busy); 120 #else 121 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg); 122 #endif 123 static int zfs_mount(vfs_t *vfsp); 124 static int zfs_umount(vfs_t *vfsp, int fflag); 125 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 126 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 127 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 128 static int zfs_sync(vfs_t *vfsp, int waitfor); 129 #if __FreeBSD_version >= 1300098 130 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 131 struct ucred **credanonp, int *numsecflavors, int *secflavors); 132 #else 133 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 134 struct ucred **credanonp, int *numsecflavors, int **secflavors); 135 #endif 136 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); 137 static void zfs_freevfs(vfs_t *vfsp); 138 139 struct vfsops zfs_vfsops = { 140 .vfs_mount = zfs_mount, 141 .vfs_unmount = zfs_umount, 142 #if __FreeBSD_version >= 1300049 143 .vfs_root = vfs_cache_root, 144 .vfs_cachedroot = zfs_root, 145 #else 146 .vfs_root = zfs_root, 147 #endif 148 .vfs_statfs = zfs_statfs, 149 .vfs_vget = zfs_vget, 150 .vfs_sync = zfs_sync, 151 .vfs_checkexp = zfs_checkexp, 152 .vfs_fhtovp = zfs_fhtovp, 153 .vfs_quotactl = zfs_quotactl, 154 }; 155 156 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 157 158 /* 159 * We need to keep a count of active fs's. 160 * This is necessary to prevent our module 161 * from being unloaded after a umount -f 162 */ 163 static uint32_t zfs_active_fs_count = 0; 164 165 int 166 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val, 167 char *setpoint) 168 { 169 int error; 170 zfsvfs_t *zfvp; 171 vfs_t *vfsp; 172 objset_t *os; 173 uint64_t tmp = *val; 174 175 error = dmu_objset_from_ds(ds, &os); 176 if (error != 0) 177 return (error); 178 179 error = getzfsvfs_impl(os, &zfvp); 180 if (error != 0) 181 return (error); 182 if (zfvp == NULL) 183 return (ENOENT); 184 vfsp = zfvp->z_vfs; 185 switch (zfs_prop) { 186 case ZFS_PROP_ATIME: 187 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 188 tmp = 0; 189 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 190 tmp = 1; 191 break; 192 case ZFS_PROP_DEVICES: 193 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 194 tmp = 0; 195 if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) 196 tmp = 1; 197 break; 198 case ZFS_PROP_EXEC: 199 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 200 tmp = 0; 201 if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 202 tmp = 1; 203 break; 204 case ZFS_PROP_SETUID: 205 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 206 tmp = 0; 207 if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 208 tmp = 1; 209 break; 210 case ZFS_PROP_READONLY: 211 if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 212 tmp = 0; 213 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 214 tmp = 1; 215 break; 216 case ZFS_PROP_XATTR: 217 if (zfvp->z_flags & ZSB_XATTR) 218 tmp = zfvp->z_xattr; 219 break; 220 case ZFS_PROP_NBMAND: 221 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 222 tmp = 0; 223 if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) 224 tmp = 1; 225 break; 226 default: 227 vfs_unbusy(vfsp); 228 return (ENOENT); 229 } 230 231 vfs_unbusy(vfsp); 232 if (tmp != *val) { 233 (void) strcpy(setpoint, "temporary"); 234 *val = tmp; 235 } 236 return (0); 237 } 238 239 static int 240 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp) 241 { 242 int error = 0; 243 char buf[32]; 244 uint64_t usedobj, quotaobj; 245 uint64_t quota, used = 0; 246 timespec_t now; 247 248 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 249 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 250 251 if (quotaobj == 0 || zfsvfs->z_replay) { 252 error = ENOENT; 253 goto done; 254 } 255 (void) sprintf(buf, "%llx", (longlong_t)id); 256 if ((error = zap_lookup(zfsvfs->z_os, quotaobj, 257 buf, sizeof (quota), 1, "a)) != 0) { 258 dprintf("%s(%d): quotaobj lookup failed\n", 259 __FUNCTION__, __LINE__); 260 goto done; 261 } 262 /* 263 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit". 264 * So we set them to be the same. 265 */ 266 dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota); 267 error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used); 268 if (error && error != ENOENT) { 269 dprintf("%s(%d): usedobj failed; %d\n", 270 __FUNCTION__, __LINE__, error); 271 goto done; 272 } 273 dqp->dqb_curblocks = btodb(used); 274 dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0; 275 vfs_timestamp(&now); 276 /* 277 * Setting this to 0 causes FreeBSD quota(8) to print 278 * the number of days since the epoch, which isn't 279 * particularly useful. 280 */ 281 dqp->dqb_btime = dqp->dqb_itime = now.tv_sec; 282 done: 283 return (error); 284 } 285 286 static int 287 #if __FreeBSD_version >= 1400018 288 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy) 289 #else 290 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg) 291 #endif 292 { 293 zfsvfs_t *zfsvfs = vfsp->vfs_data; 294 struct thread *td; 295 int cmd, type, error = 0; 296 int bitsize; 297 zfs_userquota_prop_t quota_type; 298 struct dqblk64 dqblk = { 0 }; 299 300 td = curthread; 301 cmd = cmds >> SUBCMDSHIFT; 302 type = cmds & SUBCMDMASK; 303 304 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 305 return (error); 306 if (id == -1) { 307 switch (type) { 308 case USRQUOTA: 309 id = td->td_ucred->cr_ruid; 310 break; 311 case GRPQUOTA: 312 id = td->td_ucred->cr_rgid; 313 break; 314 default: 315 error = EINVAL; 316 #if __FreeBSD_version < 1400018 317 if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF) 318 vfs_unbusy(vfsp); 319 #endif 320 goto done; 321 } 322 } 323 /* 324 * Map BSD type to: 325 * ZFS_PROP_USERUSED, 326 * ZFS_PROP_USERQUOTA, 327 * ZFS_PROP_GROUPUSED, 328 * ZFS_PROP_GROUPQUOTA 329 */ 330 switch (cmd) { 331 case Q_SETQUOTA: 332 case Q_SETQUOTA32: 333 if (type == USRQUOTA) 334 quota_type = ZFS_PROP_USERQUOTA; 335 else if (type == GRPQUOTA) 336 quota_type = ZFS_PROP_GROUPQUOTA; 337 else 338 error = EINVAL; 339 break; 340 case Q_GETQUOTA: 341 case Q_GETQUOTA32: 342 if (type == USRQUOTA) 343 quota_type = ZFS_PROP_USERUSED; 344 else if (type == GRPQUOTA) 345 quota_type = ZFS_PROP_GROUPUSED; 346 else 347 error = EINVAL; 348 break; 349 } 350 351 /* 352 * Depending on the cmd, we may need to get 353 * the ruid and domain (see fuidstr_to_sid?), 354 * the fuid (how?), or other information. 355 * Create fuid using zfs_fuid_create(zfsvfs, id, 356 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)? 357 * I think I can use just the id? 358 * 359 * Look at zfs_id_overquota() to look up a quota. 360 * zap_lookup(something, quotaobj, fuidstring, 361 * sizeof (long long), 1, "a) 362 * 363 * See zfs_set_userquota() to set a quota. 364 */ 365 if ((uint32_t)type >= MAXQUOTAS) { 366 error = EINVAL; 367 goto done; 368 } 369 370 switch (cmd) { 371 case Q_GETQUOTASIZE: 372 bitsize = 64; 373 error = copyout(&bitsize, arg, sizeof (int)); 374 break; 375 case Q_QUOTAON: 376 // As far as I can tell, you can't turn quotas on or off on zfs 377 error = 0; 378 #if __FreeBSD_version < 1400018 379 vfs_unbusy(vfsp); 380 #endif 381 break; 382 case Q_QUOTAOFF: 383 error = ENOTSUP; 384 #if __FreeBSD_version < 1400018 385 vfs_unbusy(vfsp); 386 #endif 387 break; 388 case Q_SETQUOTA: 389 error = copyin(arg, &dqblk, sizeof (dqblk)); 390 if (error == 0) 391 error = zfs_set_userquota(zfsvfs, quota_type, 392 "", id, dbtob(dqblk.dqb_bhardlimit)); 393 break; 394 case Q_GETQUOTA: 395 error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk); 396 if (error == 0) 397 error = copyout(&dqblk, arg, sizeof (dqblk)); 398 break; 399 default: 400 error = EINVAL; 401 break; 402 } 403 done: 404 zfs_exit(zfsvfs, FTAG); 405 return (error); 406 } 407 408 409 boolean_t 410 zfs_is_readonly(zfsvfs_t *zfsvfs) 411 { 412 return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)); 413 } 414 415 static int 416 zfs_sync(vfs_t *vfsp, int waitfor) 417 { 418 419 /* 420 * Data integrity is job one. We don't want a compromised kernel 421 * writing to the storage pool, so we never sync during panic. 422 */ 423 if (panicstr) 424 return (0); 425 426 /* 427 * Ignore the system syncher. ZFS already commits async data 428 * at zfs_txg_timeout intervals. 429 */ 430 if (waitfor == MNT_LAZY) 431 return (0); 432 433 if (vfsp != NULL) { 434 /* 435 * Sync a specific filesystem. 436 */ 437 zfsvfs_t *zfsvfs = vfsp->vfs_data; 438 dsl_pool_t *dp; 439 int error; 440 441 error = vfs_stdsync(vfsp, waitfor); 442 if (error != 0) 443 return (error); 444 445 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 446 return (error); 447 dp = dmu_objset_pool(zfsvfs->z_os); 448 449 /* 450 * If the system is shutting down, then skip any 451 * filesystems which may exist on a suspended pool. 452 */ 453 if (rebooting && spa_suspended(dp->dp_spa)) { 454 zfs_exit(zfsvfs, FTAG); 455 return (0); 456 } 457 458 if (zfsvfs->z_log != NULL) 459 zil_commit(zfsvfs->z_log, 0); 460 461 zfs_exit(zfsvfs, FTAG); 462 } else { 463 /* 464 * Sync all ZFS filesystems. This is what happens when you 465 * run sync(8). Unlike other filesystems, ZFS honors the 466 * request by waiting for all pools to commit all dirty data. 467 */ 468 spa_sync_allpools(); 469 } 470 471 return (0); 472 } 473 474 static void 475 atime_changed_cb(void *arg, uint64_t newval) 476 { 477 zfsvfs_t *zfsvfs = arg; 478 479 if (newval == TRUE) { 480 zfsvfs->z_atime = TRUE; 481 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 482 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 483 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 484 } else { 485 zfsvfs->z_atime = FALSE; 486 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 487 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 488 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 489 } 490 } 491 492 static void 493 xattr_changed_cb(void *arg, uint64_t newval) 494 { 495 zfsvfs_t *zfsvfs = arg; 496 497 if (newval == ZFS_XATTR_OFF) { 498 zfsvfs->z_flags &= ~ZSB_XATTR; 499 } else { 500 zfsvfs->z_flags |= ZSB_XATTR; 501 502 if (newval == ZFS_XATTR_SA) 503 zfsvfs->z_xattr_sa = B_TRUE; 504 else 505 zfsvfs->z_xattr_sa = B_FALSE; 506 } 507 } 508 509 static void 510 blksz_changed_cb(void *arg, uint64_t newval) 511 { 512 zfsvfs_t *zfsvfs = arg; 513 ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os))); 514 ASSERT3U(newval, >=, SPA_MINBLOCKSIZE); 515 ASSERT(ISP2(newval)); 516 517 zfsvfs->z_max_blksz = newval; 518 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 519 } 520 521 static void 522 readonly_changed_cb(void *arg, uint64_t newval) 523 { 524 zfsvfs_t *zfsvfs = arg; 525 526 if (newval) { 527 /* XXX locking on vfs_flag? */ 528 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 529 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 530 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 531 } else { 532 /* XXX locking on vfs_flag? */ 533 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 534 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 535 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 536 } 537 } 538 539 static void 540 setuid_changed_cb(void *arg, uint64_t newval) 541 { 542 zfsvfs_t *zfsvfs = arg; 543 544 if (newval == FALSE) { 545 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 546 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 547 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 548 } else { 549 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 550 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 551 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 552 } 553 } 554 555 static void 556 exec_changed_cb(void *arg, uint64_t newval) 557 { 558 zfsvfs_t *zfsvfs = arg; 559 560 if (newval == FALSE) { 561 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 562 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 563 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 564 } else { 565 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 566 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 567 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 568 } 569 } 570 571 /* 572 * The nbmand mount option can be changed at mount time. 573 * We can't allow it to be toggled on live file systems or incorrect 574 * behavior may be seen from cifs clients 575 * 576 * This property isn't registered via dsl_prop_register(), but this callback 577 * will be called when a file system is first mounted 578 */ 579 static void 580 nbmand_changed_cb(void *arg, uint64_t newval) 581 { 582 zfsvfs_t *zfsvfs = arg; 583 if (newval == FALSE) { 584 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 585 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 586 } else { 587 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 588 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 589 } 590 } 591 592 static void 593 snapdir_changed_cb(void *arg, uint64_t newval) 594 { 595 zfsvfs_t *zfsvfs = arg; 596 597 zfsvfs->z_show_ctldir = newval; 598 } 599 600 static void 601 acl_mode_changed_cb(void *arg, uint64_t newval) 602 { 603 zfsvfs_t *zfsvfs = arg; 604 605 zfsvfs->z_acl_mode = newval; 606 } 607 608 static void 609 acl_inherit_changed_cb(void *arg, uint64_t newval) 610 { 611 zfsvfs_t *zfsvfs = arg; 612 613 zfsvfs->z_acl_inherit = newval; 614 } 615 616 static void 617 acl_type_changed_cb(void *arg, uint64_t newval) 618 { 619 zfsvfs_t *zfsvfs = arg; 620 621 zfsvfs->z_acl_type = newval; 622 } 623 624 static int 625 zfs_register_callbacks(vfs_t *vfsp) 626 { 627 struct dsl_dataset *ds = NULL; 628 objset_t *os = NULL; 629 zfsvfs_t *zfsvfs = NULL; 630 uint64_t nbmand; 631 boolean_t readonly = B_FALSE; 632 boolean_t do_readonly = B_FALSE; 633 boolean_t setuid = B_FALSE; 634 boolean_t do_setuid = B_FALSE; 635 boolean_t exec = B_FALSE; 636 boolean_t do_exec = B_FALSE; 637 boolean_t xattr = B_FALSE; 638 boolean_t atime = B_FALSE; 639 boolean_t do_atime = B_FALSE; 640 boolean_t do_xattr = B_FALSE; 641 int error = 0; 642 643 ASSERT3P(vfsp, !=, NULL); 644 zfsvfs = vfsp->vfs_data; 645 ASSERT3P(zfsvfs, !=, NULL); 646 os = zfsvfs->z_os; 647 648 /* 649 * This function can be called for a snapshot when we update snapshot's 650 * mount point, which isn't really supported. 651 */ 652 if (dmu_objset_is_snapshot(os)) 653 return (EOPNOTSUPP); 654 655 /* 656 * The act of registering our callbacks will destroy any mount 657 * options we may have. In order to enable temporary overrides 658 * of mount options, we stash away the current values and 659 * restore them after we register the callbacks. 660 */ 661 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || 662 !spa_writeable(dmu_objset_spa(os))) { 663 readonly = B_TRUE; 664 do_readonly = B_TRUE; 665 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 666 readonly = B_FALSE; 667 do_readonly = B_TRUE; 668 } 669 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 670 setuid = B_FALSE; 671 do_setuid = B_TRUE; 672 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 673 setuid = B_TRUE; 674 do_setuid = B_TRUE; 675 } 676 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 677 exec = B_FALSE; 678 do_exec = B_TRUE; 679 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 680 exec = B_TRUE; 681 do_exec = B_TRUE; 682 } 683 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 684 zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF; 685 do_xattr = B_TRUE; 686 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 687 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 688 do_xattr = B_TRUE; 689 } else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) { 690 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 691 do_xattr = B_TRUE; 692 } else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) { 693 zfsvfs->z_xattr = xattr = ZFS_XATTR_SA; 694 do_xattr = B_TRUE; 695 } 696 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 697 atime = B_FALSE; 698 do_atime = B_TRUE; 699 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 700 atime = B_TRUE; 701 do_atime = B_TRUE; 702 } 703 704 /* 705 * We need to enter pool configuration here, so that we can use 706 * dsl_prop_get_int_ds() to handle the special nbmand property below. 707 * dsl_prop_get_integer() can not be used, because it has to acquire 708 * spa_namespace_lock and we can not do that because we already hold 709 * z_teardown_lock. The problem is that spa_write_cachefile() is called 710 * with spa_namespace_lock held and the function calls ZFS vnode 711 * operations to write the cache file and thus z_teardown_lock is 712 * acquired after spa_namespace_lock. 713 */ 714 ds = dmu_objset_ds(os); 715 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 716 717 /* 718 * nbmand is a special property. It can only be changed at 719 * mount time. 720 * 721 * This is weird, but it is documented to only be changeable 722 * at mount time. 723 */ 724 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 725 nbmand = B_FALSE; 726 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 727 nbmand = B_TRUE; 728 } else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0)) { 729 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 730 return (error); 731 } 732 733 /* 734 * Register property callbacks. 735 * 736 * It would probably be fine to just check for i/o error from 737 * the first prop_register(), but I guess I like to go 738 * overboard... 739 */ 740 error = dsl_prop_register(ds, 741 zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs); 742 error = error ? error : dsl_prop_register(ds, 743 zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs); 744 error = error ? error : dsl_prop_register(ds, 745 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs); 746 error = error ? error : dsl_prop_register(ds, 747 zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs); 748 error = error ? error : dsl_prop_register(ds, 749 zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs); 750 error = error ? error : dsl_prop_register(ds, 751 zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs); 752 error = error ? error : dsl_prop_register(ds, 753 zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs); 754 error = error ? error : dsl_prop_register(ds, 755 zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs); 756 error = error ? error : dsl_prop_register(ds, 757 zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs); 758 error = error ? error : dsl_prop_register(ds, 759 zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, 760 zfsvfs); 761 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 762 if (error) 763 goto unregister; 764 765 /* 766 * Invoke our callbacks to restore temporary mount options. 767 */ 768 if (do_readonly) 769 readonly_changed_cb(zfsvfs, readonly); 770 if (do_setuid) 771 setuid_changed_cb(zfsvfs, setuid); 772 if (do_exec) 773 exec_changed_cb(zfsvfs, exec); 774 if (do_xattr) 775 xattr_changed_cb(zfsvfs, xattr); 776 if (do_atime) 777 atime_changed_cb(zfsvfs, atime); 778 779 nbmand_changed_cb(zfsvfs, nbmand); 780 781 return (0); 782 783 unregister: 784 dsl_prop_unregister_all(ds, zfsvfs); 785 return (error); 786 } 787 788 /* 789 * Associate this zfsvfs with the given objset, which must be owned. 790 * This will cache a bunch of on-disk state from the objset in the 791 * zfsvfs. 792 */ 793 static int 794 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) 795 { 796 int error; 797 uint64_t val; 798 799 zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; 800 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 801 zfsvfs->z_os = os; 802 803 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 804 if (error != 0) 805 return (error); 806 if (zfsvfs->z_version > 807 zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { 808 (void) printf("Can't mount a version %lld file system " 809 "on a version %lld pool\n. Pool must be upgraded to mount " 810 "this file system.", (u_longlong_t)zfsvfs->z_version, 811 (u_longlong_t)spa_version(dmu_objset_spa(os))); 812 return (SET_ERROR(ENOTSUP)); 813 } 814 error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val); 815 if (error != 0) 816 return (error); 817 zfsvfs->z_norm = (int)val; 818 819 error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val); 820 if (error != 0) 821 return (error); 822 zfsvfs->z_utf8 = (val != 0); 823 824 error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val); 825 if (error != 0) 826 return (error); 827 zfsvfs->z_case = (uint_t)val; 828 829 error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val); 830 if (error != 0) 831 return (error); 832 zfsvfs->z_acl_type = (uint_t)val; 833 834 /* 835 * Fold case on file systems that are always or sometimes case 836 * insensitive. 837 */ 838 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 839 zfsvfs->z_case == ZFS_CASE_MIXED) 840 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 841 842 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 843 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 844 845 uint64_t sa_obj = 0; 846 if (zfsvfs->z_use_sa) { 847 /* should either have both of these objects or none */ 848 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, 849 &sa_obj); 850 if (error != 0) 851 return (error); 852 853 error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val); 854 if (error == 0 && val == ZFS_XATTR_SA) 855 zfsvfs->z_xattr_sa = B_TRUE; 856 } 857 858 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 859 &zfsvfs->z_attr_table); 860 if (error != 0) 861 return (error); 862 863 if (zfsvfs->z_version >= ZPL_VERSION_SA) 864 sa_register_update_callback(os, zfs_sa_upgrade); 865 866 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 867 &zfsvfs->z_root); 868 if (error != 0) 869 return (error); 870 ASSERT3U(zfsvfs->z_root, !=, 0); 871 872 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 873 &zfsvfs->z_unlinkedobj); 874 if (error != 0) 875 return (error); 876 877 error = zap_lookup(os, MASTER_NODE_OBJ, 878 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 879 8, 1, &zfsvfs->z_userquota_obj); 880 if (error == ENOENT) 881 zfsvfs->z_userquota_obj = 0; 882 else if (error != 0) 883 return (error); 884 885 error = zap_lookup(os, MASTER_NODE_OBJ, 886 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 887 8, 1, &zfsvfs->z_groupquota_obj); 888 if (error == ENOENT) 889 zfsvfs->z_groupquota_obj = 0; 890 else if (error != 0) 891 return (error); 892 893 error = zap_lookup(os, MASTER_NODE_OBJ, 894 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA], 895 8, 1, &zfsvfs->z_projectquota_obj); 896 if (error == ENOENT) 897 zfsvfs->z_projectquota_obj = 0; 898 else if (error != 0) 899 return (error); 900 901 error = zap_lookup(os, MASTER_NODE_OBJ, 902 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA], 903 8, 1, &zfsvfs->z_userobjquota_obj); 904 if (error == ENOENT) 905 zfsvfs->z_userobjquota_obj = 0; 906 else if (error != 0) 907 return (error); 908 909 error = zap_lookup(os, MASTER_NODE_OBJ, 910 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA], 911 8, 1, &zfsvfs->z_groupobjquota_obj); 912 if (error == ENOENT) 913 zfsvfs->z_groupobjquota_obj = 0; 914 else if (error != 0) 915 return (error); 916 917 error = zap_lookup(os, MASTER_NODE_OBJ, 918 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA], 919 8, 1, &zfsvfs->z_projectobjquota_obj); 920 if (error == ENOENT) 921 zfsvfs->z_projectobjquota_obj = 0; 922 else if (error != 0) 923 return (error); 924 925 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 926 &zfsvfs->z_fuid_obj); 927 if (error == ENOENT) 928 zfsvfs->z_fuid_obj = 0; 929 else if (error != 0) 930 return (error); 931 932 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 933 &zfsvfs->z_shares_dir); 934 if (error == ENOENT) 935 zfsvfs->z_shares_dir = 0; 936 else if (error != 0) 937 return (error); 938 939 /* 940 * Only use the name cache if we are looking for a 941 * name on a file system that does not require normalization 942 * or case folding. We can also look there if we happen to be 943 * on a non-normalizing, mixed sensitivity file system IF we 944 * are looking for the exact name (which is always the case on 945 * FreeBSD). 946 */ 947 zfsvfs->z_use_namecache = !zfsvfs->z_norm || 948 ((zfsvfs->z_case == ZFS_CASE_MIXED) && 949 !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER)); 950 951 return (0); 952 } 953 954 taskq_t *zfsvfs_taskq; 955 956 static void 957 zfsvfs_task_unlinked_drain(void *context, int pending __unused) 958 { 959 960 zfs_unlinked_drain((zfsvfs_t *)context); 961 } 962 963 int 964 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp) 965 { 966 objset_t *os; 967 zfsvfs_t *zfsvfs; 968 int error; 969 boolean_t ro = (readonly || (strchr(osname, '@') != NULL)); 970 971 /* 972 * XXX: Fix struct statfs so this isn't necessary! 973 * 974 * The 'osname' is used as the filesystem's special node, which means 975 * it must fit in statfs.f_mntfromname, or else it can't be 976 * enumerated, so libzfs_mnttab_find() returns NULL, which causes 977 * 'zfs unmount' to think it's not mounted when it is. 978 */ 979 if (strlen(osname) >= MNAMELEN) 980 return (SET_ERROR(ENAMETOOLONG)); 981 982 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 983 984 error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, 985 &os); 986 if (error != 0) { 987 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 988 return (error); 989 } 990 991 error = zfsvfs_create_impl(zfvp, zfsvfs, os); 992 993 return (error); 994 } 995 996 997 int 998 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) 999 { 1000 int error; 1001 1002 zfsvfs->z_vfs = NULL; 1003 zfsvfs->z_parent = zfsvfs; 1004 1005 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1006 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 1007 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 1008 offsetof(znode_t, z_link_node)); 1009 TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0, 1010 zfsvfs_task_unlinked_drain, zfsvfs); 1011 ZFS_TEARDOWN_INIT(zfsvfs); 1012 ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs); 1013 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 1014 for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1015 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1016 1017 error = zfsvfs_init(zfsvfs, os); 1018 if (error != 0) { 1019 dmu_objset_disown(os, B_TRUE, zfsvfs); 1020 *zfvp = NULL; 1021 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1022 return (error); 1023 } 1024 1025 *zfvp = zfsvfs; 1026 return (0); 1027 } 1028 1029 static int 1030 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 1031 { 1032 int error; 1033 1034 /* 1035 * Check for a bad on-disk format version now since we 1036 * lied about owning the dataset readonly before. 1037 */ 1038 if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && 1039 dmu_objset_incompatible_encryption_version(zfsvfs->z_os)) 1040 return (SET_ERROR(EROFS)); 1041 1042 error = zfs_register_callbacks(zfsvfs->z_vfs); 1043 if (error) 1044 return (error); 1045 1046 /* 1047 * If we are not mounting (ie: online recv), then we don't 1048 * have to worry about replaying the log as we blocked all 1049 * operations out since we closed the ZIL. 1050 */ 1051 if (mounting) { 1052 boolean_t readonly; 1053 1054 ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); 1055 error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); 1056 if (error) 1057 return (error); 1058 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1059 &zfsvfs->z_kstat.dk_zil_sums); 1060 1061 /* 1062 * During replay we remove the read only flag to 1063 * allow replays to succeed. 1064 */ 1065 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 1066 if (readonly != 0) { 1067 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 1068 } else { 1069 dsl_dir_t *dd; 1070 zap_stats_t zs; 1071 1072 if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj, 1073 &zs) == 0) { 1074 dataset_kstats_update_nunlinks_kstat( 1075 &zfsvfs->z_kstat, zs.zs_num_entries); 1076 dprintf_ds(zfsvfs->z_os->os_dsl_dataset, 1077 "num_entries in unlinked set: %llu", 1078 (u_longlong_t)zs.zs_num_entries); 1079 } 1080 1081 zfs_unlinked_drain(zfsvfs); 1082 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1083 dd->dd_activity_cancelled = B_FALSE; 1084 } 1085 1086 /* 1087 * Parse and replay the intent log. 1088 * 1089 * Because of ziltest, this must be done after 1090 * zfs_unlinked_drain(). (Further note: ziltest 1091 * doesn't use readonly mounts, where 1092 * zfs_unlinked_drain() isn't called.) This is because 1093 * ziltest causes spa_sync() to think it's committed, 1094 * but actually it is not, so the intent log contains 1095 * many txg's worth of changes. 1096 * 1097 * In particular, if object N is in the unlinked set in 1098 * the last txg to actually sync, then it could be 1099 * actually freed in a later txg and then reallocated 1100 * in a yet later txg. This would write a "create 1101 * object N" record to the intent log. Normally, this 1102 * would be fine because the spa_sync() would have 1103 * written out the fact that object N is free, before 1104 * we could write the "create object N" intent log 1105 * record. 1106 * 1107 * But when we are in ziltest mode, we advance the "open 1108 * txg" without actually spa_sync()-ing the changes to 1109 * disk. So we would see that object N is still 1110 * allocated and in the unlinked set, and there is an 1111 * intent log record saying to allocate it. 1112 */ 1113 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { 1114 if (zil_replay_disable) { 1115 zil_destroy(zfsvfs->z_log, B_FALSE); 1116 } else { 1117 boolean_t use_nc = zfsvfs->z_use_namecache; 1118 zfsvfs->z_use_namecache = B_FALSE; 1119 zfsvfs->z_replay = B_TRUE; 1120 zil_replay(zfsvfs->z_os, zfsvfs, 1121 zfs_replay_vector); 1122 zfsvfs->z_replay = B_FALSE; 1123 zfsvfs->z_use_namecache = use_nc; 1124 } 1125 } 1126 1127 /* restore readonly bit */ 1128 if (readonly != 0) 1129 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 1130 } else { 1131 ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL); 1132 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1133 &zfsvfs->z_kstat.dk_zil_sums); 1134 } 1135 1136 /* 1137 * Set the objset user_ptr to track its zfsvfs. 1138 */ 1139 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1140 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1141 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1142 1143 return (0); 1144 } 1145 1146 void 1147 zfsvfs_free(zfsvfs_t *zfsvfs) 1148 { 1149 int i; 1150 1151 zfs_fuid_destroy(zfsvfs); 1152 1153 mutex_destroy(&zfsvfs->z_znodes_lock); 1154 mutex_destroy(&zfsvfs->z_lock); 1155 ASSERT3U(zfsvfs->z_nr_znodes, ==, 0); 1156 list_destroy(&zfsvfs->z_all_znodes); 1157 ZFS_TEARDOWN_DESTROY(zfsvfs); 1158 ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs); 1159 rw_destroy(&zfsvfs->z_fuid_lock); 1160 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1161 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1162 dataset_kstats_destroy(&zfsvfs->z_kstat); 1163 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1164 } 1165 1166 static void 1167 zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 1168 { 1169 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 1170 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 1171 } 1172 1173 static int 1174 zfs_domount(vfs_t *vfsp, char *osname) 1175 { 1176 uint64_t recordsize, fsid_guid; 1177 int error = 0; 1178 zfsvfs_t *zfsvfs; 1179 1180 ASSERT3P(vfsp, !=, NULL); 1181 ASSERT3P(osname, !=, NULL); 1182 1183 error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs); 1184 if (error) 1185 return (error); 1186 zfsvfs->z_vfs = vfsp; 1187 1188 if ((error = dsl_prop_get_integer(osname, 1189 "recordsize", &recordsize, NULL))) 1190 goto out; 1191 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1192 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1193 1194 vfsp->vfs_data = zfsvfs; 1195 vfsp->mnt_flag |= MNT_LOCAL; 1196 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1197 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1198 vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; 1199 /* 1200 * This can cause a loss of coherence between ARC and page cache 1201 * on ZoF - unclear if the problem is in FreeBSD or ZoF 1202 */ 1203 vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ 1204 vfsp->mnt_kern_flag |= MNTK_NOMSYNC; 1205 vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG; 1206 1207 #if defined(_KERNEL) && !defined(KMEM_DEBUG) 1208 vfsp->mnt_kern_flag |= MNTK_FPLOOKUP; 1209 #endif 1210 /* 1211 * The fsid is 64 bits, composed of an 8-bit fs type, which 1212 * separates our fsid from any other filesystem types, and a 1213 * 56-bit objset unique ID. The objset unique ID is unique to 1214 * all objsets open on this system, provided by unique_create(). 1215 * The 8-bit fs type must be put in the low bits of fsid[1] 1216 * because that's where other Solaris filesystems put it. 1217 */ 1218 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1219 ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0); 1220 vfsp->vfs_fsid.val[0] = fsid_guid; 1221 vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) | 1222 (vfsp->mnt_vfc->vfc_typenum & 0xFF); 1223 1224 /* 1225 * Set features for file system. 1226 */ 1227 zfs_set_fuid_feature(zfsvfs); 1228 1229 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1230 uint64_t pval; 1231 1232 atime_changed_cb(zfsvfs, B_FALSE); 1233 readonly_changed_cb(zfsvfs, B_TRUE); 1234 if ((error = dsl_prop_get_integer(osname, 1235 "xattr", &pval, NULL))) 1236 goto out; 1237 xattr_changed_cb(zfsvfs, pval); 1238 if ((error = dsl_prop_get_integer(osname, 1239 "acltype", &pval, NULL))) 1240 goto out; 1241 acl_type_changed_cb(zfsvfs, pval); 1242 zfsvfs->z_issnap = B_TRUE; 1243 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; 1244 1245 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1246 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1247 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1248 } else { 1249 if ((error = zfsvfs_setup(zfsvfs, B_TRUE))) 1250 goto out; 1251 } 1252 1253 vfs_mountedfrom(vfsp, osname); 1254 1255 if (!zfsvfs->z_issnap) 1256 zfsctl_create(zfsvfs); 1257 out: 1258 if (error) { 1259 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); 1260 zfsvfs_free(zfsvfs); 1261 } else { 1262 atomic_inc_32(&zfs_active_fs_count); 1263 } 1264 1265 return (error); 1266 } 1267 1268 static void 1269 zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1270 { 1271 objset_t *os = zfsvfs->z_os; 1272 1273 if (!dmu_objset_is_snapshot(os)) 1274 dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs); 1275 } 1276 1277 static int 1278 getpoolname(const char *osname, char *poolname) 1279 { 1280 char *p; 1281 1282 p = strchr(osname, '/'); 1283 if (p == NULL) { 1284 if (strlen(osname) >= MAXNAMELEN) 1285 return (ENAMETOOLONG); 1286 (void) strcpy(poolname, osname); 1287 } else { 1288 if (p - osname >= MAXNAMELEN) 1289 return (ENAMETOOLONG); 1290 (void) strlcpy(poolname, osname, p - osname + 1); 1291 } 1292 return (0); 1293 } 1294 1295 static void 1296 fetch_osname_options(char *name, bool *checkpointrewind) 1297 { 1298 1299 if (name[0] == '!') { 1300 *checkpointrewind = true; 1301 memmove(name, name + 1, strlen(name)); 1302 } else { 1303 *checkpointrewind = false; 1304 } 1305 } 1306 1307 static int 1308 zfs_mount(vfs_t *vfsp) 1309 { 1310 kthread_t *td = curthread; 1311 vnode_t *mvp = vfsp->mnt_vnodecovered; 1312 cred_t *cr = td->td_ucred; 1313 char *osname; 1314 int error = 0; 1315 int canwrite; 1316 bool checkpointrewind, isctlsnap = false; 1317 1318 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1319 return (SET_ERROR(EINVAL)); 1320 1321 /* 1322 * If full-owner-access is enabled and delegated administration is 1323 * turned on, we must set nosuid. 1324 */ 1325 if (zfs_super_owner && 1326 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1327 secpolicy_fs_mount_clearopts(cr, vfsp); 1328 } 1329 1330 fetch_osname_options(osname, &checkpointrewind); 1331 isctlsnap = (zfsctl_is_node(mvp) && strchr(osname, '@') != NULL); 1332 1333 /* 1334 * Check for mount privilege? 1335 * 1336 * If we don't have privilege then see if 1337 * we have local permission to allow it 1338 */ 1339 error = secpolicy_fs_mount(cr, mvp, vfsp); 1340 if (error && isctlsnap) { 1341 secpolicy_fs_mount_clearopts(cr, vfsp); 1342 } else if (error) { 1343 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 1344 goto out; 1345 1346 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1347 vattr_t vattr; 1348 1349 /* 1350 * Make sure user is the owner of the mount point 1351 * or has sufficient privileges. 1352 */ 1353 1354 vattr.va_mask = AT_UID; 1355 1356 vn_lock(mvp, LK_SHARED | LK_RETRY); 1357 if (VOP_GETATTR(mvp, &vattr, cr)) { 1358 VOP_UNLOCK1(mvp); 1359 goto out; 1360 } 1361 1362 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1363 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1364 VOP_UNLOCK1(mvp); 1365 goto out; 1366 } 1367 VOP_UNLOCK1(mvp); 1368 } 1369 1370 secpolicy_fs_mount_clearopts(cr, vfsp); 1371 } 1372 1373 /* 1374 * Refuse to mount a filesystem if we are in a local zone and the 1375 * dataset is not visible. 1376 */ 1377 if (!INGLOBALZONE(curproc) && 1378 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1379 boolean_t mount_snapshot = B_FALSE; 1380 1381 /* 1382 * Snapshots may be mounted in .zfs for unjailed datasets 1383 * if allowed by the jail param zfs.mount_snapshot. 1384 */ 1385 if (isctlsnap) { 1386 struct prison *pr; 1387 struct zfs_jailparam *zjp; 1388 1389 pr = curthread->td_ucred->cr_prison; 1390 mtx_lock(&pr->pr_mtx); 1391 zjp = osd_jail_get(pr, zfs_jailparam_slot); 1392 mtx_unlock(&pr->pr_mtx); 1393 if (zjp && zjp->mount_snapshot) 1394 mount_snapshot = B_TRUE; 1395 } 1396 if (!mount_snapshot) { 1397 error = SET_ERROR(EPERM); 1398 goto out; 1399 } 1400 } 1401 1402 vfsp->vfs_flag |= MNT_NFS4ACLS; 1403 1404 /* 1405 * When doing a remount, we simply refresh our temporary properties 1406 * according to those options set in the current VFS options. 1407 */ 1408 if (vfsp->vfs_flag & MS_REMOUNT) { 1409 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1410 1411 /* 1412 * Refresh mount options with z_teardown_lock blocking I/O while 1413 * the filesystem is in an inconsistent state. 1414 * The lock also serializes this code with filesystem 1415 * manipulations between entry to zfs_suspend_fs() and return 1416 * from zfs_resume_fs(). 1417 */ 1418 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1419 zfs_unregister_callbacks(zfsvfs); 1420 error = zfs_register_callbacks(vfsp); 1421 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1422 goto out; 1423 } 1424 1425 /* Initial root mount: try hard to import the requested root pool. */ 1426 if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 && 1427 (vfsp->vfs_flag & MNT_UPDATE) == 0) { 1428 char pname[MAXNAMELEN]; 1429 1430 error = getpoolname(osname, pname); 1431 if (error == 0) 1432 error = spa_import_rootpool(pname, checkpointrewind); 1433 if (error) 1434 goto out; 1435 } 1436 DROP_GIANT(); 1437 error = zfs_domount(vfsp, osname); 1438 PICKUP_GIANT(); 1439 1440 out: 1441 return (error); 1442 } 1443 1444 static int 1445 zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1446 { 1447 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1448 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1449 int error; 1450 1451 statp->f_version = STATFS_VERSION; 1452 1453 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1454 return (error); 1455 1456 dmu_objset_space(zfsvfs->z_os, 1457 &refdbytes, &availbytes, &usedobjs, &availobjs); 1458 1459 /* 1460 * The underlying storage pool actually uses multiple block sizes. 1461 * We report the fragsize as the smallest block size we support, 1462 * and we report our blocksize as the filesystem's maximum blocksize. 1463 */ 1464 statp->f_bsize = SPA_MINBLOCKSIZE; 1465 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1466 1467 /* 1468 * The following report "total" blocks of various kinds in the 1469 * file system, but reported in terms of f_frsize - the 1470 * "fragment" size. 1471 */ 1472 1473 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1474 statp->f_bfree = availbytes / statp->f_bsize; 1475 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1476 1477 /* 1478 * statvfs() should really be called statufs(), because it assumes 1479 * static metadata. ZFS doesn't preallocate files, so the best 1480 * we can do is report the max that could possibly fit in f_files, 1481 * and that minus the number actually used in f_ffree. 1482 * For f_ffree, report the smaller of the number of object available 1483 * and the number of blocks (each object will take at least a block). 1484 */ 1485 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1486 statp->f_files = statp->f_ffree + usedobjs; 1487 1488 /* 1489 * We're a zfs filesystem. 1490 */ 1491 strlcpy(statp->f_fstypename, "zfs", 1492 sizeof (statp->f_fstypename)); 1493 1494 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1495 sizeof (statp->f_mntfromname)); 1496 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1497 sizeof (statp->f_mntonname)); 1498 1499 statp->f_namemax = MAXNAMELEN - 1; 1500 1501 zfs_exit(zfsvfs, FTAG); 1502 return (0); 1503 } 1504 1505 static int 1506 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1507 { 1508 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1509 znode_t *rootzp; 1510 int error; 1511 1512 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1513 return (error); 1514 1515 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1516 if (error == 0) 1517 *vpp = ZTOV(rootzp); 1518 1519 zfs_exit(zfsvfs, FTAG); 1520 1521 if (error == 0) { 1522 error = vn_lock(*vpp, flags); 1523 if (error != 0) { 1524 VN_RELE(*vpp); 1525 *vpp = NULL; 1526 } 1527 } 1528 return (error); 1529 } 1530 1531 /* 1532 * Teardown the zfsvfs::z_os. 1533 * 1534 * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock' 1535 * and 'z_teardown_inactive_lock' held. 1536 */ 1537 static int 1538 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1539 { 1540 znode_t *zp; 1541 dsl_dir_t *dd; 1542 1543 /* 1544 * If someone has not already unmounted this file system, 1545 * drain the zrele_taskq to ensure all active references to the 1546 * zfsvfs_t have been handled only then can it be safely destroyed. 1547 */ 1548 if (zfsvfs->z_os) { 1549 /* 1550 * If we're unmounting we have to wait for the list to 1551 * drain completely. 1552 * 1553 * If we're not unmounting there's no guarantee the list 1554 * will drain completely, but zreles run from the taskq 1555 * may add the parents of dir-based xattrs to the taskq 1556 * so we want to wait for these. 1557 * 1558 * We can safely read z_nr_znodes without locking because the 1559 * VFS has already blocked operations which add to the 1560 * z_all_znodes list and thus increment z_nr_znodes. 1561 */ 1562 int round = 0; 1563 while (zfsvfs->z_nr_znodes > 0) { 1564 taskq_wait_outstanding(dsl_pool_zrele_taskq( 1565 dmu_objset_pool(zfsvfs->z_os)), 0); 1566 if (++round > 1 && !unmounting) 1567 break; 1568 } 1569 } 1570 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1571 1572 if (!unmounting) { 1573 /* 1574 * We purge the parent filesystem's vfsp as the parent 1575 * filesystem and all of its snapshots have their vnode's 1576 * v_vfsp set to the parent's filesystem's vfsp. Note, 1577 * 'z_parent' is self referential for non-snapshots. 1578 */ 1579 #ifdef FREEBSD_NAMECACHE 1580 #if __FreeBSD_version >= 1300117 1581 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1582 #else 1583 cache_purgevfs(zfsvfs->z_parent->z_vfs, true); 1584 #endif 1585 #endif 1586 } 1587 1588 /* 1589 * Close the zil. NB: Can't close the zil while zfs_inactive 1590 * threads are blocked as zil_close can call zfs_inactive. 1591 */ 1592 if (zfsvfs->z_log) { 1593 zil_close(zfsvfs->z_log); 1594 zfsvfs->z_log = NULL; 1595 } 1596 1597 ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs); 1598 1599 /* 1600 * If we are not unmounting (ie: online recv) and someone already 1601 * unmounted this file system while we were doing the switcheroo, 1602 * or a reopen of z_os failed then just bail out now. 1603 */ 1604 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1605 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1606 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1607 return (SET_ERROR(EIO)); 1608 } 1609 1610 /* 1611 * At this point there are no vops active, and any new vops will 1612 * fail with EIO since we have z_teardown_lock for writer (only 1613 * relevant for forced unmount). 1614 * 1615 * Release all holds on dbufs. 1616 */ 1617 mutex_enter(&zfsvfs->z_znodes_lock); 1618 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1619 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1620 if (zp->z_sa_hdl != NULL) { 1621 zfs_znode_dmu_fini(zp); 1622 } 1623 } 1624 mutex_exit(&zfsvfs->z_znodes_lock); 1625 1626 /* 1627 * If we are unmounting, set the unmounted flag and let new vops 1628 * unblock. zfs_inactive will have the unmounted behavior, and all 1629 * other vops will fail with EIO. 1630 */ 1631 if (unmounting) { 1632 zfsvfs->z_unmounted = B_TRUE; 1633 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1634 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1635 } 1636 1637 /* 1638 * z_os will be NULL if there was an error in attempting to reopen 1639 * zfsvfs, so just return as the properties had already been 1640 * unregistered and cached data had been evicted before. 1641 */ 1642 if (zfsvfs->z_os == NULL) 1643 return (0); 1644 1645 /* 1646 * Unregister properties. 1647 */ 1648 zfs_unregister_callbacks(zfsvfs); 1649 1650 /* 1651 * Evict cached data 1652 */ 1653 if (!zfs_is_readonly(zfsvfs)) 1654 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1655 dmu_objset_evict_dbufs(zfsvfs->z_os); 1656 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1657 dsl_dir_cancel_waiters(dd); 1658 1659 return (0); 1660 } 1661 1662 static int 1663 zfs_umount(vfs_t *vfsp, int fflag) 1664 { 1665 kthread_t *td = curthread; 1666 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1667 objset_t *os; 1668 cred_t *cr = td->td_ucred; 1669 int ret; 1670 1671 ret = secpolicy_fs_unmount(cr, vfsp); 1672 if (ret) { 1673 if (dsl_deleg_access((char *)vfsp->vfs_resource, 1674 ZFS_DELEG_PERM_MOUNT, cr)) 1675 return (ret); 1676 } 1677 1678 /* 1679 * Unmount any snapshots mounted under .zfs before unmounting the 1680 * dataset itself. 1681 */ 1682 if (zfsvfs->z_ctldir != NULL) { 1683 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1684 return (ret); 1685 } 1686 1687 if (fflag & MS_FORCE) { 1688 /* 1689 * Mark file system as unmounted before calling 1690 * vflush(FORCECLOSE). This way we ensure no future vnops 1691 * will be called and risk operating on DOOMED vnodes. 1692 */ 1693 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1694 zfsvfs->z_unmounted = B_TRUE; 1695 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1696 } 1697 1698 /* 1699 * Flush all the files. 1700 */ 1701 ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1702 if (ret != 0) 1703 return (ret); 1704 while (taskqueue_cancel(zfsvfs_taskq->tq_queue, 1705 &zfsvfs->z_unlinked_drain_task, NULL) != 0) 1706 taskqueue_drain(zfsvfs_taskq->tq_queue, 1707 &zfsvfs->z_unlinked_drain_task); 1708 1709 VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE)); 1710 os = zfsvfs->z_os; 1711 1712 /* 1713 * z_os will be NULL if there was an error in 1714 * attempting to reopen zfsvfs. 1715 */ 1716 if (os != NULL) { 1717 /* 1718 * Unset the objset user_ptr. 1719 */ 1720 mutex_enter(&os->os_user_ptr_lock); 1721 dmu_objset_set_user(os, NULL); 1722 mutex_exit(&os->os_user_ptr_lock); 1723 1724 /* 1725 * Finally release the objset 1726 */ 1727 dmu_objset_disown(os, B_TRUE, zfsvfs); 1728 } 1729 1730 /* 1731 * We can now safely destroy the '.zfs' directory node. 1732 */ 1733 if (zfsvfs->z_ctldir != NULL) 1734 zfsctl_destroy(zfsvfs); 1735 zfs_freevfs(vfsp); 1736 1737 return (0); 1738 } 1739 1740 static int 1741 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1742 { 1743 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1744 znode_t *zp; 1745 int err; 1746 1747 /* 1748 * zfs_zget() can't operate on virtual entries like .zfs/ or 1749 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 1750 * This will make NFS to switch to LOOKUP instead of using VGET. 1751 */ 1752 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR || 1753 (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir)) 1754 return (EOPNOTSUPP); 1755 1756 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1757 return (err); 1758 err = zfs_zget(zfsvfs, ino, &zp); 1759 if (err == 0 && zp->z_unlinked) { 1760 vrele(ZTOV(zp)); 1761 err = EINVAL; 1762 } 1763 if (err == 0) 1764 *vpp = ZTOV(zp); 1765 zfs_exit(zfsvfs, FTAG); 1766 if (err == 0) { 1767 err = vn_lock(*vpp, flags); 1768 if (err != 0) 1769 vrele(*vpp); 1770 } 1771 if (err != 0) 1772 *vpp = NULL; 1773 return (err); 1774 } 1775 1776 static int 1777 #if __FreeBSD_version >= 1300098 1778 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 1779 struct ucred **credanonp, int *numsecflavors, int *secflavors) 1780 #else 1781 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1782 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1783 #endif 1784 { 1785 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1786 1787 /* 1788 * If this is regular file system vfsp is the same as 1789 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1790 * zfsvfs->z_parent->z_vfs represents parent file system 1791 * which we have to use here, because only this file system 1792 * has mnt_export configured. 1793 */ 1794 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1795 credanonp, numsecflavors, secflavors)); 1796 } 1797 1798 _Static_assert(sizeof (struct fid) >= SHORT_FID_LEN, 1799 "struct fid bigger than SHORT_FID_LEN"); 1800 _Static_assert(sizeof (struct fid) >= LONG_FID_LEN, 1801 "struct fid bigger than LONG_FID_LEN"); 1802 1803 static int 1804 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) 1805 { 1806 struct componentname cn; 1807 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1808 znode_t *zp; 1809 vnode_t *dvp; 1810 uint64_t object = 0; 1811 uint64_t fid_gen = 0; 1812 uint64_t setgen = 0; 1813 uint64_t gen_mask; 1814 uint64_t zp_gen; 1815 int i, err; 1816 1817 *vpp = NULL; 1818 1819 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1820 return (err); 1821 1822 /* 1823 * On FreeBSD we can get snapshot's mount point or its parent file 1824 * system mount point depending if snapshot is already mounted or not. 1825 */ 1826 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1827 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1828 uint64_t objsetid = 0; 1829 1830 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1831 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1832 1833 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1834 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1835 1836 zfs_exit(zfsvfs, FTAG); 1837 1838 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1839 if (err) 1840 return (SET_ERROR(EINVAL)); 1841 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1842 return (err); 1843 } 1844 1845 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1846 zfid_short_t *zfid = (zfid_short_t *)fidp; 1847 1848 for (i = 0; i < sizeof (zfid->zf_object); i++) 1849 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1850 1851 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1852 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1853 } else { 1854 zfs_exit(zfsvfs, FTAG); 1855 return (SET_ERROR(EINVAL)); 1856 } 1857 1858 if (fidp->fid_len == LONG_FID_LEN && setgen != 0) { 1859 zfs_exit(zfsvfs, FTAG); 1860 dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n", 1861 (u_longlong_t)fid_gen, (u_longlong_t)setgen); 1862 return (SET_ERROR(EINVAL)); 1863 } 1864 1865 /* 1866 * A zero fid_gen means we are in .zfs or the .zfs/snapshot 1867 * directory tree. If the object == zfsvfs->z_shares_dir, then 1868 * we are in the .zfs/shares directory tree. 1869 */ 1870 if ((fid_gen == 0 && 1871 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) || 1872 (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) { 1873 zfs_exit(zfsvfs, FTAG); 1874 VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp)); 1875 if (object == ZFSCTL_INO_SNAPDIR) { 1876 cn.cn_nameptr = "snapshot"; 1877 cn.cn_namelen = strlen(cn.cn_nameptr); 1878 cn.cn_nameiop = LOOKUP; 1879 cn.cn_flags = ISLASTCN | LOCKLEAF; 1880 cn.cn_lkflags = flags; 1881 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1882 vput(dvp); 1883 } else if (object == zfsvfs->z_shares_dir) { 1884 /* 1885 * XXX This branch must not be taken, 1886 * if it is, then the lookup below will 1887 * explode. 1888 */ 1889 cn.cn_nameptr = "shares"; 1890 cn.cn_namelen = strlen(cn.cn_nameptr); 1891 cn.cn_nameiop = LOOKUP; 1892 cn.cn_flags = ISLASTCN; 1893 cn.cn_lkflags = flags; 1894 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1895 vput(dvp); 1896 } else { 1897 *vpp = dvp; 1898 } 1899 return (err); 1900 } 1901 1902 gen_mask = -1ULL >> (64 - 8 * i); 1903 1904 dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object, 1905 (u_longlong_t)fid_gen, 1906 (u_longlong_t)gen_mask); 1907 if ((err = zfs_zget(zfsvfs, object, &zp))) { 1908 zfs_exit(zfsvfs, FTAG); 1909 return (err); 1910 } 1911 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, 1912 sizeof (uint64_t)); 1913 zp_gen = zp_gen & gen_mask; 1914 if (zp_gen == 0) 1915 zp_gen = 1; 1916 if (zp->z_unlinked || zp_gen != fid_gen) { 1917 dprintf("znode gen (%llu) != fid gen (%llu)\n", 1918 (u_longlong_t)zp_gen, (u_longlong_t)fid_gen); 1919 vrele(ZTOV(zp)); 1920 zfs_exit(zfsvfs, FTAG); 1921 return (SET_ERROR(EINVAL)); 1922 } 1923 1924 *vpp = ZTOV(zp); 1925 zfs_exit(zfsvfs, FTAG); 1926 err = vn_lock(*vpp, flags); 1927 if (err == 0) 1928 vnode_create_vobject(*vpp, zp->z_size, curthread); 1929 else 1930 *vpp = NULL; 1931 return (err); 1932 } 1933 1934 /* 1935 * Block out VOPs and close zfsvfs_t::z_os 1936 * 1937 * Note, if successful, then we return with the 'z_teardown_lock' and 1938 * 'z_teardown_inactive_lock' write held. We leave ownership of the underlying 1939 * dataset and objset intact so that they can be atomically handed off during 1940 * a subsequent rollback or recv operation and the resume thereafter. 1941 */ 1942 int 1943 zfs_suspend_fs(zfsvfs_t *zfsvfs) 1944 { 1945 int error; 1946 1947 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1948 return (error); 1949 1950 return (0); 1951 } 1952 1953 /* 1954 * Rebuild SA and release VOPs. Note that ownership of the underlying dataset 1955 * is an invariant across any of the operations that can be performed while the 1956 * filesystem was suspended. Whether it succeeded or failed, the preconditions 1957 * are the same: the relevant objset and associated dataset are owned by 1958 * zfsvfs, held, and long held on entry. 1959 */ 1960 int 1961 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 1962 { 1963 int err; 1964 znode_t *zp; 1965 1966 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 1967 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 1968 1969 /* 1970 * We already own this, so just update the objset_t, as the one we 1971 * had before may have been evicted. 1972 */ 1973 objset_t *os; 1974 VERIFY3P(ds->ds_owner, ==, zfsvfs); 1975 VERIFY(dsl_dataset_long_held(ds)); 1976 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 1977 dsl_pool_config_enter(dp, FTAG); 1978 VERIFY0(dmu_objset_from_ds(ds, &os)); 1979 dsl_pool_config_exit(dp, FTAG); 1980 1981 err = zfsvfs_init(zfsvfs, os); 1982 if (err != 0) 1983 goto bail; 1984 1985 ds->ds_dir->dd_activity_cancelled = B_FALSE; 1986 VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE)); 1987 1988 zfs_set_fuid_feature(zfsvfs); 1989 1990 /* 1991 * Attempt to re-establish all the active znodes with 1992 * their dbufs. If a zfs_rezget() fails, then we'll let 1993 * any potential callers discover that via zfs_enter_verify_zp 1994 * when they try to use their znode. 1995 */ 1996 mutex_enter(&zfsvfs->z_znodes_lock); 1997 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1998 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1999 (void) zfs_rezget(zp); 2000 } 2001 mutex_exit(&zfsvfs->z_znodes_lock); 2002 2003 bail: 2004 /* release the VOPs */ 2005 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2006 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2007 2008 if (err) { 2009 /* 2010 * Since we couldn't setup the sa framework, try to force 2011 * unmount this file system. 2012 */ 2013 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) { 2014 vfs_ref(zfsvfs->z_vfs); 2015 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 2016 } 2017 } 2018 return (err); 2019 } 2020 2021 static void 2022 zfs_freevfs(vfs_t *vfsp) 2023 { 2024 zfsvfs_t *zfsvfs = vfsp->vfs_data; 2025 2026 zfsvfs_free(zfsvfs); 2027 2028 atomic_dec_32(&zfs_active_fs_count); 2029 } 2030 2031 #ifdef __i386__ 2032 static int desiredvnodes_backup; 2033 #include <sys/vmmeter.h> 2034 2035 2036 #include <vm/vm_page.h> 2037 #include <vm/vm_object.h> 2038 #include <vm/vm_kern.h> 2039 #include <vm/vm_map.h> 2040 #endif 2041 2042 static void 2043 zfs_vnodes_adjust(void) 2044 { 2045 #ifdef __i386__ 2046 int newdesiredvnodes; 2047 2048 desiredvnodes_backup = desiredvnodes; 2049 2050 /* 2051 * We calculate newdesiredvnodes the same way it is done in 2052 * vntblinit(). If it is equal to desiredvnodes, it means that 2053 * it wasn't tuned by the administrator and we can tune it down. 2054 */ 2055 newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 * 2056 vm_kmem_size / (5 * (sizeof (struct vm_object) + 2057 sizeof (struct vnode)))); 2058 if (newdesiredvnodes == desiredvnodes) 2059 desiredvnodes = (3 * newdesiredvnodes) / 4; 2060 #endif 2061 } 2062 2063 static void 2064 zfs_vnodes_adjust_back(void) 2065 { 2066 2067 #ifdef __i386__ 2068 desiredvnodes = desiredvnodes_backup; 2069 #endif 2070 } 2071 2072 void 2073 zfs_init(void) 2074 { 2075 2076 printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n"); 2077 2078 /* 2079 * Initialize .zfs directory structures 2080 */ 2081 zfsctl_init(); 2082 2083 /* 2084 * Initialize znode cache, vnode ops, etc... 2085 */ 2086 zfs_znode_init(); 2087 2088 /* 2089 * Reduce number of vnodes. Originally number of vnodes is calculated 2090 * with UFS inode in mind. We reduce it here, because it's too big for 2091 * ZFS/i386. 2092 */ 2093 zfs_vnodes_adjust(); 2094 2095 dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); 2096 2097 zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); 2098 } 2099 2100 void 2101 zfs_fini(void) 2102 { 2103 taskq_destroy(zfsvfs_taskq); 2104 zfsctl_fini(); 2105 zfs_znode_fini(); 2106 zfs_vnodes_adjust_back(); 2107 } 2108 2109 int 2110 zfs_busy(void) 2111 { 2112 return (zfs_active_fs_count != 0); 2113 } 2114 2115 /* 2116 * Release VOPs and unmount a suspended filesystem. 2117 */ 2118 int 2119 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 2120 { 2121 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 2122 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 2123 2124 /* 2125 * We already own this, so just hold and rele it to update the 2126 * objset_t, as the one we had before may have been evicted. 2127 */ 2128 objset_t *os; 2129 VERIFY3P(ds->ds_owner, ==, zfsvfs); 2130 VERIFY(dsl_dataset_long_held(ds)); 2131 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 2132 dsl_pool_config_enter(dp, FTAG); 2133 VERIFY0(dmu_objset_from_ds(ds, &os)); 2134 dsl_pool_config_exit(dp, FTAG); 2135 zfsvfs->z_os = os; 2136 2137 /* release the VOPs */ 2138 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2139 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2140 2141 /* 2142 * Try to force unmount this file system. 2143 */ 2144 (void) zfs_umount(zfsvfs->z_vfs, 0); 2145 zfsvfs->z_unmounted = B_TRUE; 2146 return (0); 2147 } 2148 2149 int 2150 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 2151 { 2152 int error; 2153 objset_t *os = zfsvfs->z_os; 2154 dmu_tx_t *tx; 2155 2156 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 2157 return (SET_ERROR(EINVAL)); 2158 2159 if (newvers < zfsvfs->z_version) 2160 return (SET_ERROR(EINVAL)); 2161 2162 if (zfs_spa_version_map(newvers) > 2163 spa_version(dmu_objset_spa(zfsvfs->z_os))) 2164 return (SET_ERROR(ENOTSUP)); 2165 2166 tx = dmu_tx_create(os); 2167 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 2168 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2169 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 2170 ZFS_SA_ATTRS); 2171 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2172 } 2173 error = dmu_tx_assign(tx, TXG_WAIT); 2174 if (error) { 2175 dmu_tx_abort(tx); 2176 return (error); 2177 } 2178 2179 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 2180 8, 1, &newvers, tx); 2181 2182 if (error) { 2183 dmu_tx_commit(tx); 2184 return (error); 2185 } 2186 2187 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2188 uint64_t sa_obj; 2189 2190 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, 2191 SPA_VERSION_SA); 2192 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 2193 DMU_OT_NONE, 0, tx); 2194 2195 error = zap_add(os, MASTER_NODE_OBJ, 2196 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 2197 ASSERT0(error); 2198 2199 VERIFY0(sa_set_sa_object(os, sa_obj)); 2200 sa_register_update_callback(os, zfs_sa_upgrade); 2201 } 2202 2203 spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, 2204 "from %ju to %ju", (uintmax_t)zfsvfs->z_version, 2205 (uintmax_t)newvers); 2206 dmu_tx_commit(tx); 2207 2208 zfsvfs->z_version = newvers; 2209 os->os_version = newvers; 2210 2211 zfs_set_fuid_feature(zfsvfs); 2212 2213 return (0); 2214 } 2215 2216 /* 2217 * Read a property stored within the master node. 2218 */ 2219 int 2220 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 2221 { 2222 uint64_t *cached_copy = NULL; 2223 2224 /* 2225 * Figure out where in the objset_t the cached copy would live, if it 2226 * is available for the requested property. 2227 */ 2228 if (os != NULL) { 2229 switch (prop) { 2230 case ZFS_PROP_VERSION: 2231 cached_copy = &os->os_version; 2232 break; 2233 case ZFS_PROP_NORMALIZE: 2234 cached_copy = &os->os_normalization; 2235 break; 2236 case ZFS_PROP_UTF8ONLY: 2237 cached_copy = &os->os_utf8only; 2238 break; 2239 case ZFS_PROP_CASE: 2240 cached_copy = &os->os_casesensitivity; 2241 break; 2242 default: 2243 break; 2244 } 2245 } 2246 if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { 2247 *value = *cached_copy; 2248 return (0); 2249 } 2250 2251 /* 2252 * If the property wasn't cached, look up the file system's value for 2253 * the property. For the version property, we look up a slightly 2254 * different string. 2255 */ 2256 const char *pname; 2257 int error = ENOENT; 2258 if (prop == ZFS_PROP_VERSION) { 2259 pname = ZPL_VERSION_STR; 2260 } else { 2261 pname = zfs_prop_to_name(prop); 2262 } 2263 2264 if (os != NULL) { 2265 ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); 2266 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 2267 } 2268 2269 if (error == ENOENT) { 2270 /* No value set, use the default value */ 2271 switch (prop) { 2272 case ZFS_PROP_VERSION: 2273 *value = ZPL_VERSION; 2274 break; 2275 case ZFS_PROP_NORMALIZE: 2276 case ZFS_PROP_UTF8ONLY: 2277 *value = 0; 2278 break; 2279 case ZFS_PROP_CASE: 2280 *value = ZFS_CASE_SENSITIVE; 2281 break; 2282 case ZFS_PROP_ACLTYPE: 2283 *value = ZFS_ACLTYPE_NFSV4; 2284 break; 2285 default: 2286 return (error); 2287 } 2288 error = 0; 2289 } 2290 2291 /* 2292 * If one of the methods for getting the property value above worked, 2293 * copy it into the objset_t's cache. 2294 */ 2295 if (error == 0 && cached_copy != NULL) { 2296 *cached_copy = *value; 2297 } 2298 2299 return (error); 2300 } 2301 2302 /* 2303 * Return true if the corresponding vfs's unmounted flag is set. 2304 * Otherwise return false. 2305 * If this function returns true we know VFS unmount has been initiated. 2306 */ 2307 boolean_t 2308 zfs_get_vfs_flag_unmounted(objset_t *os) 2309 { 2310 zfsvfs_t *zfvp; 2311 boolean_t unmounted = B_FALSE; 2312 2313 ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS); 2314 2315 mutex_enter(&os->os_user_ptr_lock); 2316 zfvp = dmu_objset_get_user(os); 2317 if (zfvp != NULL && zfvp->z_vfs != NULL && 2318 (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT)) 2319 unmounted = B_TRUE; 2320 mutex_exit(&os->os_user_ptr_lock); 2321 2322 return (unmounted); 2323 } 2324 2325 #ifdef _KERNEL 2326 void 2327 zfsvfs_update_fromname(const char *oldname, const char *newname) 2328 { 2329 char tmpbuf[MAXPATHLEN]; 2330 struct mount *mp; 2331 char *fromname; 2332 size_t oldlen; 2333 2334 oldlen = strlen(oldname); 2335 2336 mtx_lock(&mountlist_mtx); 2337 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 2338 fromname = mp->mnt_stat.f_mntfromname; 2339 if (strcmp(fromname, oldname) == 0) { 2340 (void) strlcpy(fromname, newname, 2341 sizeof (mp->mnt_stat.f_mntfromname)); 2342 continue; 2343 } 2344 if (strncmp(fromname, oldname, oldlen) == 0 && 2345 (fromname[oldlen] == '/' || fromname[oldlen] == '@')) { 2346 (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s", 2347 newname, fromname + oldlen); 2348 (void) strlcpy(fromname, tmpbuf, 2349 sizeof (mp->mnt_stat.f_mntfromname)); 2350 continue; 2351 } 2352 } 2353 mtx_unlock(&mountlist_mtx); 2354 } 2355 #endif 2356 2357 /* 2358 * Find a prison with ZFS info. 2359 * Return the ZFS info and the (locked) prison. 2360 */ 2361 static struct zfs_jailparam * 2362 zfs_jailparam_find(struct prison *spr, struct prison **prp) 2363 { 2364 struct prison *pr; 2365 struct zfs_jailparam *zjp; 2366 2367 for (pr = spr; ; pr = pr->pr_parent) { 2368 mtx_lock(&pr->pr_mtx); 2369 if (pr == &prison0) { 2370 zjp = &zfs_jailparam0; 2371 break; 2372 } 2373 zjp = osd_jail_get(pr, zfs_jailparam_slot); 2374 if (zjp != NULL) 2375 break; 2376 mtx_unlock(&pr->pr_mtx); 2377 } 2378 *prp = pr; 2379 2380 return (zjp); 2381 } 2382 2383 /* 2384 * Ensure a prison has its own ZFS info. If zjpp is non-null, point it to the 2385 * ZFS info and lock the prison. 2386 */ 2387 static void 2388 zfs_jailparam_alloc(struct prison *pr, struct zfs_jailparam **zjpp) 2389 { 2390 struct prison *ppr; 2391 struct zfs_jailparam *zjp, *nzjp; 2392 void **rsv; 2393 2394 /* If this prison already has ZFS info, return that. */ 2395 zjp = zfs_jailparam_find(pr, &ppr); 2396 if (ppr == pr) 2397 goto done; 2398 2399 /* 2400 * Allocate a new info record. Then check again, in case something 2401 * changed during the allocation. 2402 */ 2403 mtx_unlock(&ppr->pr_mtx); 2404 nzjp = malloc(sizeof (struct zfs_jailparam), M_PRISON, M_WAITOK); 2405 rsv = osd_reserve(zfs_jailparam_slot); 2406 zjp = zfs_jailparam_find(pr, &ppr); 2407 if (ppr == pr) { 2408 free(nzjp, M_PRISON); 2409 osd_free_reserved(rsv); 2410 goto done; 2411 } 2412 /* Inherit the initial values from the ancestor. */ 2413 mtx_lock(&pr->pr_mtx); 2414 (void) osd_jail_set_reserved(pr, zfs_jailparam_slot, rsv, nzjp); 2415 (void) memcpy(nzjp, zjp, sizeof (*zjp)); 2416 zjp = nzjp; 2417 mtx_unlock(&ppr->pr_mtx); 2418 done: 2419 if (zjpp != NULL) 2420 *zjpp = zjp; 2421 else 2422 mtx_unlock(&pr->pr_mtx); 2423 } 2424 2425 /* 2426 * Jail OSD methods for ZFS VFS info. 2427 */ 2428 static int 2429 zfs_jailparam_create(void *obj, void *data) 2430 { 2431 struct prison *pr = obj; 2432 struct vfsoptlist *opts = data; 2433 int jsys; 2434 2435 if (vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)) == 0 && 2436 jsys == JAIL_SYS_INHERIT) 2437 return (0); 2438 /* 2439 * Inherit a prison's initial values from its parent 2440 * (different from JAIL_SYS_INHERIT which also inherits changes). 2441 */ 2442 zfs_jailparam_alloc(pr, NULL); 2443 return (0); 2444 } 2445 2446 static int 2447 zfs_jailparam_get(void *obj, void *data) 2448 { 2449 struct prison *ppr, *pr = obj; 2450 struct vfsoptlist *opts = data; 2451 struct zfs_jailparam *zjp; 2452 int jsys, error; 2453 2454 zjp = zfs_jailparam_find(pr, &ppr); 2455 jsys = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 2456 error = vfs_setopt(opts, "zfs", &jsys, sizeof (jsys)); 2457 if (error != 0 && error != ENOENT) 2458 goto done; 2459 if (jsys == JAIL_SYS_NEW) { 2460 error = vfs_setopt(opts, "zfs.mount_snapshot", 2461 &zjp->mount_snapshot, sizeof (zjp->mount_snapshot)); 2462 if (error != 0 && error != ENOENT) 2463 goto done; 2464 } else { 2465 /* 2466 * If this prison is inheriting its ZFS info, report 2467 * empty/zero parameters. 2468 */ 2469 static int mount_snapshot = 0; 2470 2471 error = vfs_setopt(opts, "zfs.mount_snapshot", 2472 &mount_snapshot, sizeof (mount_snapshot)); 2473 if (error != 0 && error != ENOENT) 2474 goto done; 2475 } 2476 error = 0; 2477 done: 2478 mtx_unlock(&ppr->pr_mtx); 2479 return (error); 2480 } 2481 2482 static int 2483 zfs_jailparam_set(void *obj, void *data) 2484 { 2485 struct prison *pr = obj; 2486 struct prison *ppr; 2487 struct vfsoptlist *opts = data; 2488 int error, jsys, mount_snapshot; 2489 2490 /* Set the parameters, which should be correct. */ 2491 error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); 2492 if (error == ENOENT) 2493 jsys = -1; 2494 error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, 2495 sizeof (mount_snapshot)); 2496 if (error == ENOENT) 2497 mount_snapshot = -1; 2498 else 2499 jsys = JAIL_SYS_NEW; 2500 if (jsys == JAIL_SYS_NEW) { 2501 /* "zfs=new" or "zfs.*": the prison gets its own ZFS info. */ 2502 struct zfs_jailparam *zjp; 2503 2504 /* 2505 * A child jail cannot have more permissions than its parent 2506 */ 2507 if (pr->pr_parent != &prison0) { 2508 zjp = zfs_jailparam_find(pr->pr_parent, &ppr); 2509 mtx_unlock(&ppr->pr_mtx); 2510 if (zjp->mount_snapshot < mount_snapshot) { 2511 return (EPERM); 2512 } 2513 } 2514 zfs_jailparam_alloc(pr, &zjp); 2515 if (mount_snapshot != -1) 2516 zjp->mount_snapshot = mount_snapshot; 2517 mtx_unlock(&pr->pr_mtx); 2518 } else { 2519 /* "zfs=inherit": inherit the parent's ZFS info. */ 2520 mtx_lock(&pr->pr_mtx); 2521 osd_jail_del(pr, zfs_jailparam_slot); 2522 mtx_unlock(&pr->pr_mtx); 2523 } 2524 return (0); 2525 } 2526 2527 static int 2528 zfs_jailparam_check(void *obj __unused, void *data) 2529 { 2530 struct vfsoptlist *opts = data; 2531 int error, jsys, mount_snapshot; 2532 2533 /* Check that the parameters are correct. */ 2534 error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); 2535 if (error != ENOENT) { 2536 if (error != 0) 2537 return (error); 2538 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 2539 return (EINVAL); 2540 } 2541 error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, 2542 sizeof (mount_snapshot)); 2543 if (error != ENOENT) { 2544 if (error != 0) 2545 return (error); 2546 if (mount_snapshot != 0 && mount_snapshot != 1) 2547 return (EINVAL); 2548 } 2549 return (0); 2550 } 2551 2552 static void 2553 zfs_jailparam_destroy(void *data) 2554 { 2555 2556 free(data, M_PRISON); 2557 } 2558 2559 static void 2560 zfs_jailparam_sysinit(void *arg __unused) 2561 { 2562 struct prison *pr; 2563 osd_method_t methods[PR_MAXMETHOD] = { 2564 [PR_METHOD_CREATE] = zfs_jailparam_create, 2565 [PR_METHOD_GET] = zfs_jailparam_get, 2566 [PR_METHOD_SET] = zfs_jailparam_set, 2567 [PR_METHOD_CHECK] = zfs_jailparam_check, 2568 }; 2569 2570 zfs_jailparam_slot = osd_jail_register(zfs_jailparam_destroy, methods); 2571 /* Copy the defaults to any existing prisons. */ 2572 sx_slock(&allprison_lock); 2573 TAILQ_FOREACH(pr, &allprison, pr_list) 2574 zfs_jailparam_alloc(pr, NULL); 2575 sx_sunlock(&allprison_lock); 2576 } 2577 2578 static void 2579 zfs_jailparam_sysuninit(void *arg __unused) 2580 { 2581 2582 osd_jail_deregister(zfs_jailparam_slot); 2583 } 2584 2585 SYSINIT(zfs_jailparam_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY, 2586 zfs_jailparam_sysinit, NULL); 2587 SYSUNINIT(zfs_jailparam_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY, 2588 zfs_jailparam_sysuninit, NULL); 2589