1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 24 * All rights reserved. 25 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 26 * Copyright (c) 2014 Integros [integros.com] 27 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 28 */ 29 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/sysmacros.h> 37 #include <sys/kmem.h> 38 #include <sys/acl.h> 39 #include <sys/vnode.h> 40 #include <sys/vfs.h> 41 #include <sys/mntent.h> 42 #include <sys/mount.h> 43 #include <sys/cmn_err.h> 44 #include <sys/zfs_znode.h> 45 #include <sys/zfs_vnops.h> 46 #include <sys/zfs_dir.h> 47 #include <sys/zil.h> 48 #include <sys/fs/zfs.h> 49 #include <sys/dmu.h> 50 #include <sys/dsl_prop.h> 51 #include <sys/dsl_dataset.h> 52 #include <sys/dsl_deleg.h> 53 #include <sys/spa.h> 54 #include <sys/zap.h> 55 #include <sys/sa.h> 56 #include <sys/sa_impl.h> 57 #include <sys/policy.h> 58 #include <sys/atomic.h> 59 #include <sys/zfs_ioctl.h> 60 #include <sys/zfs_ctldir.h> 61 #include <sys/zfs_fuid.h> 62 #include <sys/sunddi.h> 63 #include <sys/dmu_objset.h> 64 #include <sys/dsl_dir.h> 65 #include <sys/jail.h> 66 #include <sys/osd.h> 67 #include <ufs/ufs/quota.h> 68 #include <sys/zfs_quota.h> 69 70 #include "zfs_comutil.h" 71 72 #ifndef MNTK_VMSETSIZE_BUG 73 #define MNTK_VMSETSIZE_BUG 0 74 #endif 75 #ifndef MNTK_NOMSYNC 76 #define MNTK_NOMSYNC 8 77 #endif 78 79 struct mtx zfs_debug_mtx; 80 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 81 82 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 83 84 int zfs_super_owner; 85 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 86 "File system owners can perform privileged operation on file systems"); 87 88 int zfs_debug_level; 89 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, 90 "Debug level"); 91 92 struct zfs_jailparam { 93 int mount_snapshot; 94 }; 95 96 static struct zfs_jailparam zfs_jailparam0 = { 97 .mount_snapshot = 0, 98 }; 99 100 static int zfs_jailparam_slot; 101 102 SYSCTL_JAIL_PARAM_SYS_NODE(zfs, CTLFLAG_RW, "Jail ZFS parameters"); 103 SYSCTL_JAIL_PARAM(_zfs, mount_snapshot, CTLTYPE_INT | CTLFLAG_RW, "I", 104 "Allow mounting snapshots in the .zfs directory for unjailed datasets"); 105 106 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 107 static int zfs_version_acl = ZFS_ACL_VERSION; 108 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 109 "ZFS_ACL_VERSION"); 110 static int zfs_version_spa = SPA_VERSION; 111 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 112 "SPA_VERSION"); 113 static int zfs_version_zpl = ZPL_VERSION; 114 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 115 "ZPL_VERSION"); 116 117 #if __FreeBSD_version >= 1400018 118 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, 119 bool *mp_busy); 120 #else 121 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg); 122 #endif 123 static int zfs_mount(vfs_t *vfsp); 124 static int zfs_umount(vfs_t *vfsp, int fflag); 125 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 126 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 127 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 128 static int zfs_sync(vfs_t *vfsp, int waitfor); 129 #if __FreeBSD_version >= 1300098 130 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 131 struct ucred **credanonp, int *numsecflavors, int *secflavors); 132 #else 133 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 134 struct ucred **credanonp, int *numsecflavors, int **secflavors); 135 #endif 136 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); 137 static void zfs_freevfs(vfs_t *vfsp); 138 139 struct vfsops zfs_vfsops = { 140 .vfs_mount = zfs_mount, 141 .vfs_unmount = zfs_umount, 142 #if __FreeBSD_version >= 1300049 143 .vfs_root = vfs_cache_root, 144 .vfs_cachedroot = zfs_root, 145 #else 146 .vfs_root = zfs_root, 147 #endif 148 .vfs_statfs = zfs_statfs, 149 .vfs_vget = zfs_vget, 150 .vfs_sync = zfs_sync, 151 .vfs_checkexp = zfs_checkexp, 152 .vfs_fhtovp = zfs_fhtovp, 153 .vfs_quotactl = zfs_quotactl, 154 }; 155 156 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 157 158 /* 159 * We need to keep a count of active fs's. 160 * This is necessary to prevent our module 161 * from being unloaded after a umount -f 162 */ 163 static uint32_t zfs_active_fs_count = 0; 164 165 int 166 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val, 167 char *setpoint) 168 { 169 int error; 170 zfsvfs_t *zfvp; 171 vfs_t *vfsp; 172 objset_t *os; 173 uint64_t tmp = *val; 174 175 error = dmu_objset_from_ds(ds, &os); 176 if (error != 0) 177 return (error); 178 179 error = getzfsvfs_impl(os, &zfvp); 180 if (error != 0) 181 return (error); 182 if (zfvp == NULL) 183 return (ENOENT); 184 vfsp = zfvp->z_vfs; 185 switch (zfs_prop) { 186 case ZFS_PROP_ATIME: 187 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 188 tmp = 0; 189 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 190 tmp = 1; 191 break; 192 case ZFS_PROP_DEVICES: 193 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 194 tmp = 0; 195 if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) 196 tmp = 1; 197 break; 198 case ZFS_PROP_EXEC: 199 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 200 tmp = 0; 201 if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 202 tmp = 1; 203 break; 204 case ZFS_PROP_SETUID: 205 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 206 tmp = 0; 207 if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 208 tmp = 1; 209 break; 210 case ZFS_PROP_READONLY: 211 if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 212 tmp = 0; 213 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 214 tmp = 1; 215 break; 216 case ZFS_PROP_XATTR: 217 if (zfvp->z_flags & ZSB_XATTR) 218 tmp = zfvp->z_xattr; 219 break; 220 case ZFS_PROP_NBMAND: 221 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 222 tmp = 0; 223 if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) 224 tmp = 1; 225 break; 226 default: 227 vfs_unbusy(vfsp); 228 return (ENOENT); 229 } 230 231 vfs_unbusy(vfsp); 232 if (tmp != *val) { 233 if (setpoint) 234 (void) strcpy(setpoint, "temporary"); 235 *val = tmp; 236 } 237 return (0); 238 } 239 240 static int 241 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp) 242 { 243 int error = 0; 244 char buf[32]; 245 uint64_t usedobj, quotaobj; 246 uint64_t quota, used = 0; 247 timespec_t now; 248 249 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 250 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 251 252 if (quotaobj == 0 || zfsvfs->z_replay) { 253 error = ENOENT; 254 goto done; 255 } 256 (void) sprintf(buf, "%llx", (longlong_t)id); 257 if ((error = zap_lookup(zfsvfs->z_os, quotaobj, 258 buf, sizeof (quota), 1, "a)) != 0) { 259 dprintf("%s(%d): quotaobj lookup failed\n", 260 __FUNCTION__, __LINE__); 261 goto done; 262 } 263 /* 264 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit". 265 * So we set them to be the same. 266 */ 267 dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota); 268 error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used); 269 if (error && error != ENOENT) { 270 dprintf("%s(%d): usedobj failed; %d\n", 271 __FUNCTION__, __LINE__, error); 272 goto done; 273 } 274 dqp->dqb_curblocks = btodb(used); 275 dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0; 276 vfs_timestamp(&now); 277 /* 278 * Setting this to 0 causes FreeBSD quota(8) to print 279 * the number of days since the epoch, which isn't 280 * particularly useful. 281 */ 282 dqp->dqb_btime = dqp->dqb_itime = now.tv_sec; 283 done: 284 return (error); 285 } 286 287 static int 288 #if __FreeBSD_version >= 1400018 289 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy) 290 #else 291 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg) 292 #endif 293 { 294 zfsvfs_t *zfsvfs = vfsp->vfs_data; 295 struct thread *td; 296 int cmd, type, error = 0; 297 int bitsize; 298 zfs_userquota_prop_t quota_type; 299 struct dqblk64 dqblk = { 0 }; 300 301 td = curthread; 302 cmd = cmds >> SUBCMDSHIFT; 303 type = cmds & SUBCMDMASK; 304 305 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 306 return (error); 307 if (id == -1) { 308 switch (type) { 309 case USRQUOTA: 310 id = td->td_ucred->cr_ruid; 311 break; 312 case GRPQUOTA: 313 id = td->td_ucred->cr_rgid; 314 break; 315 default: 316 error = EINVAL; 317 #if __FreeBSD_version < 1400018 318 if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF) 319 vfs_unbusy(vfsp); 320 #endif 321 goto done; 322 } 323 } 324 /* 325 * Map BSD type to: 326 * ZFS_PROP_USERUSED, 327 * ZFS_PROP_USERQUOTA, 328 * ZFS_PROP_GROUPUSED, 329 * ZFS_PROP_GROUPQUOTA 330 */ 331 switch (cmd) { 332 case Q_SETQUOTA: 333 case Q_SETQUOTA32: 334 if (type == USRQUOTA) 335 quota_type = ZFS_PROP_USERQUOTA; 336 else if (type == GRPQUOTA) 337 quota_type = ZFS_PROP_GROUPQUOTA; 338 else 339 error = EINVAL; 340 break; 341 case Q_GETQUOTA: 342 case Q_GETQUOTA32: 343 if (type == USRQUOTA) 344 quota_type = ZFS_PROP_USERUSED; 345 else if (type == GRPQUOTA) 346 quota_type = ZFS_PROP_GROUPUSED; 347 else 348 error = EINVAL; 349 break; 350 } 351 352 /* 353 * Depending on the cmd, we may need to get 354 * the ruid and domain (see fuidstr_to_sid?), 355 * the fuid (how?), or other information. 356 * Create fuid using zfs_fuid_create(zfsvfs, id, 357 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)? 358 * I think I can use just the id? 359 * 360 * Look at zfs_id_overquota() to look up a quota. 361 * zap_lookup(something, quotaobj, fuidstring, 362 * sizeof (long long), 1, "a) 363 * 364 * See zfs_set_userquota() to set a quota. 365 */ 366 if ((uint32_t)type >= MAXQUOTAS) { 367 error = EINVAL; 368 goto done; 369 } 370 371 switch (cmd) { 372 case Q_GETQUOTASIZE: 373 bitsize = 64; 374 error = copyout(&bitsize, arg, sizeof (int)); 375 break; 376 case Q_QUOTAON: 377 // As far as I can tell, you can't turn quotas on or off on zfs 378 error = 0; 379 #if __FreeBSD_version < 1400018 380 vfs_unbusy(vfsp); 381 #endif 382 break; 383 case Q_QUOTAOFF: 384 error = ENOTSUP; 385 #if __FreeBSD_version < 1400018 386 vfs_unbusy(vfsp); 387 #endif 388 break; 389 case Q_SETQUOTA: 390 error = copyin(arg, &dqblk, sizeof (dqblk)); 391 if (error == 0) 392 error = zfs_set_userquota(zfsvfs, quota_type, 393 "", id, dbtob(dqblk.dqb_bhardlimit)); 394 break; 395 case Q_GETQUOTA: 396 error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk); 397 if (error == 0) 398 error = copyout(&dqblk, arg, sizeof (dqblk)); 399 break; 400 default: 401 error = EINVAL; 402 break; 403 } 404 done: 405 zfs_exit(zfsvfs, FTAG); 406 return (error); 407 } 408 409 410 boolean_t 411 zfs_is_readonly(zfsvfs_t *zfsvfs) 412 { 413 return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)); 414 } 415 416 static int 417 zfs_sync(vfs_t *vfsp, int waitfor) 418 { 419 420 /* 421 * Data integrity is job one. We don't want a compromised kernel 422 * writing to the storage pool, so we never sync during panic. 423 */ 424 if (panicstr) 425 return (0); 426 427 /* 428 * Ignore the system syncher. ZFS already commits async data 429 * at zfs_txg_timeout intervals. 430 */ 431 if (waitfor == MNT_LAZY) 432 return (0); 433 434 if (vfsp != NULL) { 435 /* 436 * Sync a specific filesystem. 437 */ 438 zfsvfs_t *zfsvfs = vfsp->vfs_data; 439 dsl_pool_t *dp; 440 int error; 441 442 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 443 return (error); 444 dp = dmu_objset_pool(zfsvfs->z_os); 445 446 /* 447 * If the system is shutting down, then skip any 448 * filesystems which may exist on a suspended pool. 449 */ 450 if (rebooting && spa_suspended(dp->dp_spa)) { 451 zfs_exit(zfsvfs, FTAG); 452 return (0); 453 } 454 455 if (zfsvfs->z_log != NULL) 456 zil_commit(zfsvfs->z_log, 0); 457 458 zfs_exit(zfsvfs, FTAG); 459 } else { 460 /* 461 * Sync all ZFS filesystems. This is what happens when you 462 * run sync(8). Unlike other filesystems, ZFS honors the 463 * request by waiting for all pools to commit all dirty data. 464 */ 465 spa_sync_allpools(); 466 } 467 468 return (0); 469 } 470 471 static void 472 atime_changed_cb(void *arg, uint64_t newval) 473 { 474 zfsvfs_t *zfsvfs = arg; 475 476 if (newval == TRUE) { 477 zfsvfs->z_atime = TRUE; 478 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 479 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 480 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 481 } else { 482 zfsvfs->z_atime = FALSE; 483 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 484 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 485 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 486 } 487 } 488 489 static void 490 xattr_changed_cb(void *arg, uint64_t newval) 491 { 492 zfsvfs_t *zfsvfs = arg; 493 494 if (newval == ZFS_XATTR_OFF) { 495 zfsvfs->z_flags &= ~ZSB_XATTR; 496 } else { 497 zfsvfs->z_flags |= ZSB_XATTR; 498 499 if (newval == ZFS_XATTR_SA) 500 zfsvfs->z_xattr_sa = B_TRUE; 501 else 502 zfsvfs->z_xattr_sa = B_FALSE; 503 } 504 } 505 506 static void 507 blksz_changed_cb(void *arg, uint64_t newval) 508 { 509 zfsvfs_t *zfsvfs = arg; 510 ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os))); 511 ASSERT3U(newval, >=, SPA_MINBLOCKSIZE); 512 ASSERT(ISP2(newval)); 513 514 zfsvfs->z_max_blksz = newval; 515 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 516 } 517 518 static void 519 readonly_changed_cb(void *arg, uint64_t newval) 520 { 521 zfsvfs_t *zfsvfs = arg; 522 523 if (newval) { 524 /* XXX locking on vfs_flag? */ 525 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 526 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 527 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 528 } else { 529 /* XXX locking on vfs_flag? */ 530 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 531 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 532 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 533 } 534 } 535 536 static void 537 setuid_changed_cb(void *arg, uint64_t newval) 538 { 539 zfsvfs_t *zfsvfs = arg; 540 541 if (newval == FALSE) { 542 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 543 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 544 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 545 } else { 546 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 547 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 548 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 549 } 550 } 551 552 static void 553 exec_changed_cb(void *arg, uint64_t newval) 554 { 555 zfsvfs_t *zfsvfs = arg; 556 557 if (newval == FALSE) { 558 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 559 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 560 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 561 } else { 562 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 563 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 564 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 565 } 566 } 567 568 /* 569 * The nbmand mount option can be changed at mount time. 570 * We can't allow it to be toggled on live file systems or incorrect 571 * behavior may be seen from cifs clients 572 * 573 * This property isn't registered via dsl_prop_register(), but this callback 574 * will be called when a file system is first mounted 575 */ 576 static void 577 nbmand_changed_cb(void *arg, uint64_t newval) 578 { 579 zfsvfs_t *zfsvfs = arg; 580 if (newval == FALSE) { 581 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 582 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 583 } else { 584 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 585 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 586 } 587 } 588 589 static void 590 snapdir_changed_cb(void *arg, uint64_t newval) 591 { 592 zfsvfs_t *zfsvfs = arg; 593 594 zfsvfs->z_show_ctldir = newval; 595 } 596 597 static void 598 acl_mode_changed_cb(void *arg, uint64_t newval) 599 { 600 zfsvfs_t *zfsvfs = arg; 601 602 zfsvfs->z_acl_mode = newval; 603 } 604 605 static void 606 acl_inherit_changed_cb(void *arg, uint64_t newval) 607 { 608 zfsvfs_t *zfsvfs = arg; 609 610 zfsvfs->z_acl_inherit = newval; 611 } 612 613 static void 614 acl_type_changed_cb(void *arg, uint64_t newval) 615 { 616 zfsvfs_t *zfsvfs = arg; 617 618 zfsvfs->z_acl_type = newval; 619 } 620 621 static int 622 zfs_register_callbacks(vfs_t *vfsp) 623 { 624 struct dsl_dataset *ds = NULL; 625 objset_t *os = NULL; 626 zfsvfs_t *zfsvfs = NULL; 627 uint64_t nbmand; 628 boolean_t readonly = B_FALSE; 629 boolean_t do_readonly = B_FALSE; 630 boolean_t setuid = B_FALSE; 631 boolean_t do_setuid = B_FALSE; 632 boolean_t exec = B_FALSE; 633 boolean_t do_exec = B_FALSE; 634 boolean_t xattr = B_FALSE; 635 boolean_t atime = B_FALSE; 636 boolean_t do_atime = B_FALSE; 637 boolean_t do_xattr = B_FALSE; 638 int error = 0; 639 640 ASSERT3P(vfsp, !=, NULL); 641 zfsvfs = vfsp->vfs_data; 642 ASSERT3P(zfsvfs, !=, NULL); 643 os = zfsvfs->z_os; 644 645 /* 646 * This function can be called for a snapshot when we update snapshot's 647 * mount point, which isn't really supported. 648 */ 649 if (dmu_objset_is_snapshot(os)) 650 return (EOPNOTSUPP); 651 652 /* 653 * The act of registering our callbacks will destroy any mount 654 * options we may have. In order to enable temporary overrides 655 * of mount options, we stash away the current values and 656 * restore them after we register the callbacks. 657 */ 658 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || 659 !spa_writeable(dmu_objset_spa(os))) { 660 readonly = B_TRUE; 661 do_readonly = B_TRUE; 662 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 663 readonly = B_FALSE; 664 do_readonly = B_TRUE; 665 } 666 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 667 setuid = B_FALSE; 668 do_setuid = B_TRUE; 669 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 670 setuid = B_TRUE; 671 do_setuid = B_TRUE; 672 } 673 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 674 exec = B_FALSE; 675 do_exec = B_TRUE; 676 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 677 exec = B_TRUE; 678 do_exec = B_TRUE; 679 } 680 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 681 zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF; 682 do_xattr = B_TRUE; 683 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 684 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 685 do_xattr = B_TRUE; 686 } else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) { 687 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 688 do_xattr = B_TRUE; 689 } else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) { 690 zfsvfs->z_xattr = xattr = ZFS_XATTR_SA; 691 do_xattr = B_TRUE; 692 } 693 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 694 atime = B_FALSE; 695 do_atime = B_TRUE; 696 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 697 atime = B_TRUE; 698 do_atime = B_TRUE; 699 } 700 701 /* 702 * We need to enter pool configuration here, so that we can use 703 * dsl_prop_get_int_ds() to handle the special nbmand property below. 704 * dsl_prop_get_integer() can not be used, because it has to acquire 705 * spa_namespace_lock and we can not do that because we already hold 706 * z_teardown_lock. The problem is that spa_write_cachefile() is called 707 * with spa_namespace_lock held and the function calls ZFS vnode 708 * operations to write the cache file and thus z_teardown_lock is 709 * acquired after spa_namespace_lock. 710 */ 711 ds = dmu_objset_ds(os); 712 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 713 714 /* 715 * nbmand is a special property. It can only be changed at 716 * mount time. 717 * 718 * This is weird, but it is documented to only be changeable 719 * at mount time. 720 */ 721 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 722 nbmand = B_FALSE; 723 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 724 nbmand = B_TRUE; 725 } else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand)) != 0) { 726 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 727 return (error); 728 } 729 730 /* 731 * Register property callbacks. 732 * 733 * It would probably be fine to just check for i/o error from 734 * the first prop_register(), but I guess I like to go 735 * overboard... 736 */ 737 error = dsl_prop_register(ds, 738 zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs); 739 error = error ? error : dsl_prop_register(ds, 740 zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs); 741 error = error ? error : dsl_prop_register(ds, 742 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs); 743 error = error ? error : dsl_prop_register(ds, 744 zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs); 745 error = error ? error : dsl_prop_register(ds, 746 zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs); 747 error = error ? error : dsl_prop_register(ds, 748 zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs); 749 error = error ? error : dsl_prop_register(ds, 750 zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs); 751 error = error ? error : dsl_prop_register(ds, 752 zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs); 753 error = error ? error : dsl_prop_register(ds, 754 zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs); 755 error = error ? error : dsl_prop_register(ds, 756 zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, 757 zfsvfs); 758 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 759 if (error) 760 goto unregister; 761 762 /* 763 * Invoke our callbacks to restore temporary mount options. 764 */ 765 if (do_readonly) 766 readonly_changed_cb(zfsvfs, readonly); 767 if (do_setuid) 768 setuid_changed_cb(zfsvfs, setuid); 769 if (do_exec) 770 exec_changed_cb(zfsvfs, exec); 771 if (do_xattr) 772 xattr_changed_cb(zfsvfs, xattr); 773 if (do_atime) 774 atime_changed_cb(zfsvfs, atime); 775 776 nbmand_changed_cb(zfsvfs, nbmand); 777 778 return (0); 779 780 unregister: 781 dsl_prop_unregister_all(ds, zfsvfs); 782 return (error); 783 } 784 785 /* 786 * Associate this zfsvfs with the given objset, which must be owned. 787 * This will cache a bunch of on-disk state from the objset in the 788 * zfsvfs. 789 */ 790 static int 791 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) 792 { 793 int error; 794 uint64_t val; 795 796 zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; 797 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 798 zfsvfs->z_os = os; 799 800 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 801 if (error != 0) 802 return (error); 803 if (zfsvfs->z_version > 804 zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { 805 (void) printf("Can't mount a version %lld file system " 806 "on a version %lld pool\n. Pool must be upgraded to mount " 807 "this file system.", (u_longlong_t)zfsvfs->z_version, 808 (u_longlong_t)spa_version(dmu_objset_spa(os))); 809 return (SET_ERROR(ENOTSUP)); 810 } 811 error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val); 812 if (error != 0) 813 return (error); 814 zfsvfs->z_norm = (int)val; 815 816 error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val); 817 if (error != 0) 818 return (error); 819 zfsvfs->z_utf8 = (val != 0); 820 821 error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val); 822 if (error != 0) 823 return (error); 824 zfsvfs->z_case = (uint_t)val; 825 826 error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val); 827 if (error != 0) 828 return (error); 829 zfsvfs->z_acl_type = (uint_t)val; 830 831 /* 832 * Fold case on file systems that are always or sometimes case 833 * insensitive. 834 */ 835 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 836 zfsvfs->z_case == ZFS_CASE_MIXED) 837 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 838 839 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 840 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 841 842 uint64_t sa_obj = 0; 843 if (zfsvfs->z_use_sa) { 844 /* should either have both of these objects or none */ 845 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, 846 &sa_obj); 847 if (error != 0) 848 return (error); 849 850 error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val); 851 if (error == 0 && val == ZFS_XATTR_SA) 852 zfsvfs->z_xattr_sa = B_TRUE; 853 } 854 855 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 856 &zfsvfs->z_attr_table); 857 if (error != 0) 858 return (error); 859 860 if (zfsvfs->z_version >= ZPL_VERSION_SA) 861 sa_register_update_callback(os, zfs_sa_upgrade); 862 863 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 864 &zfsvfs->z_root); 865 if (error != 0) 866 return (error); 867 ASSERT3U(zfsvfs->z_root, !=, 0); 868 869 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 870 &zfsvfs->z_unlinkedobj); 871 if (error != 0) 872 return (error); 873 874 error = zap_lookup(os, MASTER_NODE_OBJ, 875 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 876 8, 1, &zfsvfs->z_userquota_obj); 877 if (error == ENOENT) 878 zfsvfs->z_userquota_obj = 0; 879 else if (error != 0) 880 return (error); 881 882 error = zap_lookup(os, MASTER_NODE_OBJ, 883 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 884 8, 1, &zfsvfs->z_groupquota_obj); 885 if (error == ENOENT) 886 zfsvfs->z_groupquota_obj = 0; 887 else if (error != 0) 888 return (error); 889 890 error = zap_lookup(os, MASTER_NODE_OBJ, 891 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA], 892 8, 1, &zfsvfs->z_projectquota_obj); 893 if (error == ENOENT) 894 zfsvfs->z_projectquota_obj = 0; 895 else if (error != 0) 896 return (error); 897 898 error = zap_lookup(os, MASTER_NODE_OBJ, 899 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA], 900 8, 1, &zfsvfs->z_userobjquota_obj); 901 if (error == ENOENT) 902 zfsvfs->z_userobjquota_obj = 0; 903 else if (error != 0) 904 return (error); 905 906 error = zap_lookup(os, MASTER_NODE_OBJ, 907 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA], 908 8, 1, &zfsvfs->z_groupobjquota_obj); 909 if (error == ENOENT) 910 zfsvfs->z_groupobjquota_obj = 0; 911 else if (error != 0) 912 return (error); 913 914 error = zap_lookup(os, MASTER_NODE_OBJ, 915 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA], 916 8, 1, &zfsvfs->z_projectobjquota_obj); 917 if (error == ENOENT) 918 zfsvfs->z_projectobjquota_obj = 0; 919 else if (error != 0) 920 return (error); 921 922 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 923 &zfsvfs->z_fuid_obj); 924 if (error == ENOENT) 925 zfsvfs->z_fuid_obj = 0; 926 else if (error != 0) 927 return (error); 928 929 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 930 &zfsvfs->z_shares_dir); 931 if (error == ENOENT) 932 zfsvfs->z_shares_dir = 0; 933 else if (error != 0) 934 return (error); 935 936 /* 937 * Only use the name cache if we are looking for a 938 * name on a file system that does not require normalization 939 * or case folding. We can also look there if we happen to be 940 * on a non-normalizing, mixed sensitivity file system IF we 941 * are looking for the exact name (which is always the case on 942 * FreeBSD). 943 */ 944 zfsvfs->z_use_namecache = !zfsvfs->z_norm || 945 ((zfsvfs->z_case == ZFS_CASE_MIXED) && 946 !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER)); 947 948 return (0); 949 } 950 951 taskq_t *zfsvfs_taskq; 952 953 static void 954 zfsvfs_task_unlinked_drain(void *context, int pending __unused) 955 { 956 957 zfs_unlinked_drain((zfsvfs_t *)context); 958 } 959 960 int 961 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp) 962 { 963 objset_t *os; 964 zfsvfs_t *zfsvfs; 965 int error; 966 boolean_t ro = (readonly || (strchr(osname, '@') != NULL)); 967 968 /* 969 * XXX: Fix struct statfs so this isn't necessary! 970 * 971 * The 'osname' is used as the filesystem's special node, which means 972 * it must fit in statfs.f_mntfromname, or else it can't be 973 * enumerated, so libzfs_mnttab_find() returns NULL, which causes 974 * 'zfs unmount' to think it's not mounted when it is. 975 */ 976 if (strlen(osname) >= MNAMELEN) 977 return (SET_ERROR(ENAMETOOLONG)); 978 979 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 980 981 error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, 982 &os); 983 if (error != 0) { 984 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 985 return (error); 986 } 987 988 error = zfsvfs_create_impl(zfvp, zfsvfs, os); 989 990 return (error); 991 } 992 993 994 int 995 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) 996 { 997 int error; 998 999 zfsvfs->z_vfs = NULL; 1000 zfsvfs->z_parent = zfsvfs; 1001 1002 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1003 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 1004 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 1005 offsetof(znode_t, z_link_node)); 1006 TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0, 1007 zfsvfs_task_unlinked_drain, zfsvfs); 1008 ZFS_TEARDOWN_INIT(zfsvfs); 1009 ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs); 1010 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 1011 for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1012 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1013 1014 error = zfsvfs_init(zfsvfs, os); 1015 if (error != 0) { 1016 dmu_objset_disown(os, B_TRUE, zfsvfs); 1017 *zfvp = NULL; 1018 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1019 return (error); 1020 } 1021 1022 *zfvp = zfsvfs; 1023 return (0); 1024 } 1025 1026 static int 1027 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 1028 { 1029 int error; 1030 1031 /* 1032 * Check for a bad on-disk format version now since we 1033 * lied about owning the dataset readonly before. 1034 */ 1035 if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && 1036 dmu_objset_incompatible_encryption_version(zfsvfs->z_os)) 1037 return (SET_ERROR(EROFS)); 1038 1039 error = zfs_register_callbacks(zfsvfs->z_vfs); 1040 if (error) 1041 return (error); 1042 1043 /* 1044 * If we are not mounting (ie: online recv), then we don't 1045 * have to worry about replaying the log as we blocked all 1046 * operations out since we closed the ZIL. 1047 */ 1048 if (mounting) { 1049 boolean_t readonly; 1050 1051 ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); 1052 error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); 1053 if (error) 1054 return (error); 1055 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1056 &zfsvfs->z_kstat.dk_zil_sums); 1057 1058 /* 1059 * During replay we remove the read only flag to 1060 * allow replays to succeed. 1061 */ 1062 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 1063 if (readonly != 0) { 1064 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 1065 } else { 1066 dsl_dir_t *dd; 1067 zap_stats_t zs; 1068 1069 if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj, 1070 &zs) == 0) { 1071 dataset_kstats_update_nunlinks_kstat( 1072 &zfsvfs->z_kstat, zs.zs_num_entries); 1073 dprintf_ds(zfsvfs->z_os->os_dsl_dataset, 1074 "num_entries in unlinked set: %llu", 1075 (u_longlong_t)zs.zs_num_entries); 1076 } 1077 1078 zfs_unlinked_drain(zfsvfs); 1079 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1080 dd->dd_activity_cancelled = B_FALSE; 1081 } 1082 1083 /* 1084 * Parse and replay the intent log. 1085 * 1086 * Because of ziltest, this must be done after 1087 * zfs_unlinked_drain(). (Further note: ziltest 1088 * doesn't use readonly mounts, where 1089 * zfs_unlinked_drain() isn't called.) This is because 1090 * ziltest causes spa_sync() to think it's committed, 1091 * but actually it is not, so the intent log contains 1092 * many txg's worth of changes. 1093 * 1094 * In particular, if object N is in the unlinked set in 1095 * the last txg to actually sync, then it could be 1096 * actually freed in a later txg and then reallocated 1097 * in a yet later txg. This would write a "create 1098 * object N" record to the intent log. Normally, this 1099 * would be fine because the spa_sync() would have 1100 * written out the fact that object N is free, before 1101 * we could write the "create object N" intent log 1102 * record. 1103 * 1104 * But when we are in ziltest mode, we advance the "open 1105 * txg" without actually spa_sync()-ing the changes to 1106 * disk. So we would see that object N is still 1107 * allocated and in the unlinked set, and there is an 1108 * intent log record saying to allocate it. 1109 */ 1110 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { 1111 if (zil_replay_disable) { 1112 zil_destroy(zfsvfs->z_log, B_FALSE); 1113 } else { 1114 boolean_t use_nc = zfsvfs->z_use_namecache; 1115 zfsvfs->z_use_namecache = B_FALSE; 1116 zfsvfs->z_replay = B_TRUE; 1117 zil_replay(zfsvfs->z_os, zfsvfs, 1118 zfs_replay_vector); 1119 zfsvfs->z_replay = B_FALSE; 1120 zfsvfs->z_use_namecache = use_nc; 1121 } 1122 } 1123 1124 /* restore readonly bit */ 1125 if (readonly != 0) 1126 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 1127 } else { 1128 ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL); 1129 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1130 &zfsvfs->z_kstat.dk_zil_sums); 1131 } 1132 1133 /* 1134 * Set the objset user_ptr to track its zfsvfs. 1135 */ 1136 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1137 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1138 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1139 1140 return (0); 1141 } 1142 1143 void 1144 zfsvfs_free(zfsvfs_t *zfsvfs) 1145 { 1146 int i; 1147 1148 zfs_fuid_destroy(zfsvfs); 1149 1150 mutex_destroy(&zfsvfs->z_znodes_lock); 1151 mutex_destroy(&zfsvfs->z_lock); 1152 ASSERT3U(zfsvfs->z_nr_znodes, ==, 0); 1153 list_destroy(&zfsvfs->z_all_znodes); 1154 ZFS_TEARDOWN_DESTROY(zfsvfs); 1155 ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs); 1156 rw_destroy(&zfsvfs->z_fuid_lock); 1157 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1158 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1159 dataset_kstats_destroy(&zfsvfs->z_kstat); 1160 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1161 } 1162 1163 static void 1164 zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 1165 { 1166 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 1167 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 1168 } 1169 1170 static int 1171 zfs_domount(vfs_t *vfsp, char *osname) 1172 { 1173 uint64_t recordsize, fsid_guid; 1174 int error = 0; 1175 zfsvfs_t *zfsvfs; 1176 1177 ASSERT3P(vfsp, !=, NULL); 1178 ASSERT3P(osname, !=, NULL); 1179 1180 error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs); 1181 if (error) 1182 return (error); 1183 zfsvfs->z_vfs = vfsp; 1184 1185 if ((error = dsl_prop_get_integer(osname, 1186 "recordsize", &recordsize, NULL))) 1187 goto out; 1188 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1189 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1190 1191 vfsp->vfs_data = zfsvfs; 1192 vfsp->mnt_flag |= MNT_LOCAL; 1193 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1194 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1195 vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; 1196 /* 1197 * This can cause a loss of coherence between ARC and page cache 1198 * on ZoF - unclear if the problem is in FreeBSD or ZoF 1199 */ 1200 vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ 1201 vfsp->mnt_kern_flag |= MNTK_NOMSYNC; 1202 vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG; 1203 1204 #if defined(_KERNEL) && !defined(KMEM_DEBUG) 1205 vfsp->mnt_kern_flag |= MNTK_FPLOOKUP; 1206 #endif 1207 /* 1208 * The fsid is 64 bits, composed of an 8-bit fs type, which 1209 * separates our fsid from any other filesystem types, and a 1210 * 56-bit objset unique ID. The objset unique ID is unique to 1211 * all objsets open on this system, provided by unique_create(). 1212 * The 8-bit fs type must be put in the low bits of fsid[1] 1213 * because that's where other Solaris filesystems put it. 1214 */ 1215 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1216 ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0); 1217 vfsp->vfs_fsid.val[0] = fsid_guid; 1218 vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) | 1219 (vfsp->mnt_vfc->vfc_typenum & 0xFF); 1220 1221 /* 1222 * Set features for file system. 1223 */ 1224 zfs_set_fuid_feature(zfsvfs); 1225 1226 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1227 uint64_t pval; 1228 1229 atime_changed_cb(zfsvfs, B_FALSE); 1230 readonly_changed_cb(zfsvfs, B_TRUE); 1231 if ((error = dsl_prop_get_integer(osname, 1232 "xattr", &pval, NULL))) 1233 goto out; 1234 xattr_changed_cb(zfsvfs, pval); 1235 if ((error = dsl_prop_get_integer(osname, 1236 "acltype", &pval, NULL))) 1237 goto out; 1238 acl_type_changed_cb(zfsvfs, pval); 1239 zfsvfs->z_issnap = B_TRUE; 1240 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; 1241 1242 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1243 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1244 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1245 } else { 1246 if ((error = zfsvfs_setup(zfsvfs, B_TRUE))) 1247 goto out; 1248 } 1249 1250 vfs_mountedfrom(vfsp, osname); 1251 1252 if (!zfsvfs->z_issnap) 1253 zfsctl_create(zfsvfs); 1254 out: 1255 if (error) { 1256 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); 1257 zfsvfs_free(zfsvfs); 1258 } else { 1259 atomic_inc_32(&zfs_active_fs_count); 1260 } 1261 1262 return (error); 1263 } 1264 1265 static void 1266 zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1267 { 1268 objset_t *os = zfsvfs->z_os; 1269 1270 if (!dmu_objset_is_snapshot(os)) 1271 dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs); 1272 } 1273 1274 static int 1275 getpoolname(const char *osname, char *poolname) 1276 { 1277 char *p; 1278 1279 p = strchr(osname, '/'); 1280 if (p == NULL) { 1281 if (strlen(osname) >= MAXNAMELEN) 1282 return (ENAMETOOLONG); 1283 (void) strcpy(poolname, osname); 1284 } else { 1285 if (p - osname >= MAXNAMELEN) 1286 return (ENAMETOOLONG); 1287 (void) strlcpy(poolname, osname, p - osname + 1); 1288 } 1289 return (0); 1290 } 1291 1292 static void 1293 fetch_osname_options(char *name, bool *checkpointrewind) 1294 { 1295 1296 if (name[0] == '!') { 1297 *checkpointrewind = true; 1298 memmove(name, name + 1, strlen(name)); 1299 } else { 1300 *checkpointrewind = false; 1301 } 1302 } 1303 1304 static int 1305 zfs_mount(vfs_t *vfsp) 1306 { 1307 kthread_t *td = curthread; 1308 vnode_t *mvp = vfsp->mnt_vnodecovered; 1309 cred_t *cr = td->td_ucred; 1310 char *osname; 1311 int error = 0; 1312 int canwrite; 1313 bool checkpointrewind, isctlsnap = false; 1314 1315 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1316 return (SET_ERROR(EINVAL)); 1317 1318 /* 1319 * If full-owner-access is enabled and delegated administration is 1320 * turned on, we must set nosuid. 1321 */ 1322 if (zfs_super_owner && 1323 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1324 secpolicy_fs_mount_clearopts(cr, vfsp); 1325 } 1326 1327 fetch_osname_options(osname, &checkpointrewind); 1328 isctlsnap = (mvp != NULL && zfsctl_is_node(mvp) && 1329 strchr(osname, '@') != NULL); 1330 1331 /* 1332 * Check for mount privilege? 1333 * 1334 * If we don't have privilege then see if 1335 * we have local permission to allow it 1336 */ 1337 error = secpolicy_fs_mount(cr, mvp, vfsp); 1338 if (error && isctlsnap) { 1339 secpolicy_fs_mount_clearopts(cr, vfsp); 1340 } else if (error) { 1341 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 1342 goto out; 1343 1344 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1345 vattr_t vattr; 1346 1347 /* 1348 * Make sure user is the owner of the mount point 1349 * or has sufficient privileges. 1350 */ 1351 1352 vattr.va_mask = AT_UID; 1353 1354 vn_lock(mvp, LK_SHARED | LK_RETRY); 1355 if (VOP_GETATTR(mvp, &vattr, cr)) { 1356 VOP_UNLOCK1(mvp); 1357 goto out; 1358 } 1359 1360 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1361 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1362 VOP_UNLOCK1(mvp); 1363 goto out; 1364 } 1365 VOP_UNLOCK1(mvp); 1366 } 1367 1368 secpolicy_fs_mount_clearopts(cr, vfsp); 1369 } 1370 1371 /* 1372 * Refuse to mount a filesystem if we are in a local zone and the 1373 * dataset is not visible. 1374 */ 1375 if (!INGLOBALZONE(curproc) && 1376 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1377 boolean_t mount_snapshot = B_FALSE; 1378 1379 /* 1380 * Snapshots may be mounted in .zfs for unjailed datasets 1381 * if allowed by the jail param zfs.mount_snapshot. 1382 */ 1383 if (isctlsnap) { 1384 struct prison *pr; 1385 struct zfs_jailparam *zjp; 1386 1387 pr = curthread->td_ucred->cr_prison; 1388 mtx_lock(&pr->pr_mtx); 1389 zjp = osd_jail_get(pr, zfs_jailparam_slot); 1390 mtx_unlock(&pr->pr_mtx); 1391 if (zjp && zjp->mount_snapshot) 1392 mount_snapshot = B_TRUE; 1393 } 1394 if (!mount_snapshot) { 1395 error = SET_ERROR(EPERM); 1396 goto out; 1397 } 1398 } 1399 1400 vfsp->vfs_flag |= MNT_NFS4ACLS; 1401 1402 /* 1403 * When doing a remount, we simply refresh our temporary properties 1404 * according to those options set in the current VFS options. 1405 */ 1406 if (vfsp->vfs_flag & MS_REMOUNT) { 1407 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1408 1409 /* 1410 * Refresh mount options with z_teardown_lock blocking I/O while 1411 * the filesystem is in an inconsistent state. 1412 * The lock also serializes this code with filesystem 1413 * manipulations between entry to zfs_suspend_fs() and return 1414 * from zfs_resume_fs(). 1415 */ 1416 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1417 zfs_unregister_callbacks(zfsvfs); 1418 error = zfs_register_callbacks(vfsp); 1419 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1420 goto out; 1421 } 1422 1423 /* Initial root mount: try hard to import the requested root pool. */ 1424 if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 && 1425 (vfsp->vfs_flag & MNT_UPDATE) == 0) { 1426 char pname[MAXNAMELEN]; 1427 1428 error = getpoolname(osname, pname); 1429 if (error == 0) 1430 error = spa_import_rootpool(pname, checkpointrewind); 1431 if (error) 1432 goto out; 1433 } 1434 DROP_GIANT(); 1435 error = zfs_domount(vfsp, osname); 1436 PICKUP_GIANT(); 1437 1438 out: 1439 return (error); 1440 } 1441 1442 static int 1443 zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1444 { 1445 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1446 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1447 int error; 1448 1449 statp->f_version = STATFS_VERSION; 1450 1451 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1452 return (error); 1453 1454 dmu_objset_space(zfsvfs->z_os, 1455 &refdbytes, &availbytes, &usedobjs, &availobjs); 1456 1457 /* 1458 * The underlying storage pool actually uses multiple block sizes. 1459 * We report the fragsize as the smallest block size we support, 1460 * and we report our blocksize as the filesystem's maximum blocksize. 1461 */ 1462 statp->f_bsize = SPA_MINBLOCKSIZE; 1463 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1464 1465 /* 1466 * The following report "total" blocks of various kinds in the 1467 * file system, but reported in terms of f_frsize - the 1468 * "fragment" size. 1469 */ 1470 1471 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1472 statp->f_bfree = availbytes / statp->f_bsize; 1473 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1474 1475 /* 1476 * statvfs() should really be called statufs(), because it assumes 1477 * static metadata. ZFS doesn't preallocate files, so the best 1478 * we can do is report the max that could possibly fit in f_files, 1479 * and that minus the number actually used in f_ffree. 1480 * For f_ffree, report the smaller of the number of object available 1481 * and the number of blocks (each object will take at least a block). 1482 */ 1483 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1484 statp->f_files = statp->f_ffree + usedobjs; 1485 1486 /* 1487 * We're a zfs filesystem. 1488 */ 1489 strlcpy(statp->f_fstypename, "zfs", 1490 sizeof (statp->f_fstypename)); 1491 1492 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1493 sizeof (statp->f_mntfromname)); 1494 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1495 sizeof (statp->f_mntonname)); 1496 1497 statp->f_namemax = MAXNAMELEN - 1; 1498 1499 zfs_exit(zfsvfs, FTAG); 1500 return (0); 1501 } 1502 1503 static int 1504 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1505 { 1506 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1507 znode_t *rootzp; 1508 int error; 1509 1510 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1511 return (error); 1512 1513 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1514 if (error == 0) 1515 *vpp = ZTOV(rootzp); 1516 1517 zfs_exit(zfsvfs, FTAG); 1518 1519 if (error == 0) { 1520 error = vn_lock(*vpp, flags); 1521 if (error != 0) { 1522 VN_RELE(*vpp); 1523 *vpp = NULL; 1524 } 1525 } 1526 return (error); 1527 } 1528 1529 /* 1530 * Teardown the zfsvfs::z_os. 1531 * 1532 * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock' 1533 * and 'z_teardown_inactive_lock' held. 1534 */ 1535 static int 1536 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1537 { 1538 znode_t *zp; 1539 dsl_dir_t *dd; 1540 1541 /* 1542 * If someone has not already unmounted this file system, 1543 * drain the zrele_taskq to ensure all active references to the 1544 * zfsvfs_t have been handled only then can it be safely destroyed. 1545 */ 1546 if (zfsvfs->z_os) { 1547 /* 1548 * If we're unmounting we have to wait for the list to 1549 * drain completely. 1550 * 1551 * If we're not unmounting there's no guarantee the list 1552 * will drain completely, but zreles run from the taskq 1553 * may add the parents of dir-based xattrs to the taskq 1554 * so we want to wait for these. 1555 * 1556 * We can safely read z_nr_znodes without locking because the 1557 * VFS has already blocked operations which add to the 1558 * z_all_znodes list and thus increment z_nr_znodes. 1559 */ 1560 int round = 0; 1561 while (zfsvfs->z_nr_znodes > 0) { 1562 taskq_wait_outstanding(dsl_pool_zrele_taskq( 1563 dmu_objset_pool(zfsvfs->z_os)), 0); 1564 if (++round > 1 && !unmounting) 1565 break; 1566 } 1567 } 1568 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1569 1570 if (!unmounting) { 1571 /* 1572 * We purge the parent filesystem's vfsp as the parent 1573 * filesystem and all of its snapshots have their vnode's 1574 * v_vfsp set to the parent's filesystem's vfsp. Note, 1575 * 'z_parent' is self referential for non-snapshots. 1576 */ 1577 #ifdef FREEBSD_NAMECACHE 1578 #if __FreeBSD_version >= 1300117 1579 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1580 #else 1581 cache_purgevfs(zfsvfs->z_parent->z_vfs, true); 1582 #endif 1583 #endif 1584 } 1585 1586 /* 1587 * Close the zil. NB: Can't close the zil while zfs_inactive 1588 * threads are blocked as zil_close can call zfs_inactive. 1589 */ 1590 if (zfsvfs->z_log) { 1591 zil_close(zfsvfs->z_log); 1592 zfsvfs->z_log = NULL; 1593 } 1594 1595 ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs); 1596 1597 /* 1598 * If we are not unmounting (ie: online recv) and someone already 1599 * unmounted this file system while we were doing the switcheroo, 1600 * or a reopen of z_os failed then just bail out now. 1601 */ 1602 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1603 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1604 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1605 return (SET_ERROR(EIO)); 1606 } 1607 1608 /* 1609 * At this point there are no vops active, and any new vops will 1610 * fail with EIO since we have z_teardown_lock for writer (only 1611 * relevant for forced unmount). 1612 * 1613 * Release all holds on dbufs. 1614 */ 1615 mutex_enter(&zfsvfs->z_znodes_lock); 1616 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1617 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1618 if (zp->z_sa_hdl != NULL) { 1619 zfs_znode_dmu_fini(zp); 1620 } 1621 } 1622 mutex_exit(&zfsvfs->z_znodes_lock); 1623 1624 /* 1625 * If we are unmounting, set the unmounted flag and let new vops 1626 * unblock. zfs_inactive will have the unmounted behavior, and all 1627 * other vops will fail with EIO. 1628 */ 1629 if (unmounting) { 1630 zfsvfs->z_unmounted = B_TRUE; 1631 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1632 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1633 } 1634 1635 /* 1636 * z_os will be NULL if there was an error in attempting to reopen 1637 * zfsvfs, so just return as the properties had already been 1638 * unregistered and cached data had been evicted before. 1639 */ 1640 if (zfsvfs->z_os == NULL) 1641 return (0); 1642 1643 /* 1644 * Unregister properties. 1645 */ 1646 zfs_unregister_callbacks(zfsvfs); 1647 1648 /* 1649 * Evict cached data 1650 */ 1651 if (!zfs_is_readonly(zfsvfs)) 1652 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1653 dmu_objset_evict_dbufs(zfsvfs->z_os); 1654 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1655 dsl_dir_cancel_waiters(dd); 1656 1657 return (0); 1658 } 1659 1660 static int 1661 zfs_umount(vfs_t *vfsp, int fflag) 1662 { 1663 kthread_t *td = curthread; 1664 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1665 objset_t *os; 1666 cred_t *cr = td->td_ucred; 1667 int ret; 1668 1669 ret = secpolicy_fs_unmount(cr, vfsp); 1670 if (ret) { 1671 if (dsl_deleg_access((char *)vfsp->vfs_resource, 1672 ZFS_DELEG_PERM_MOUNT, cr)) 1673 return (ret); 1674 } 1675 1676 /* 1677 * Unmount any snapshots mounted under .zfs before unmounting the 1678 * dataset itself. 1679 */ 1680 if (zfsvfs->z_ctldir != NULL) { 1681 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1682 return (ret); 1683 } 1684 1685 if (fflag & MS_FORCE) { 1686 /* 1687 * Mark file system as unmounted before calling 1688 * vflush(FORCECLOSE). This way we ensure no future vnops 1689 * will be called and risk operating on DOOMED vnodes. 1690 */ 1691 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1692 zfsvfs->z_unmounted = B_TRUE; 1693 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1694 } 1695 1696 /* 1697 * Flush all the files. 1698 */ 1699 ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1700 if (ret != 0) 1701 return (ret); 1702 while (taskqueue_cancel(zfsvfs_taskq->tq_queue, 1703 &zfsvfs->z_unlinked_drain_task, NULL) != 0) 1704 taskqueue_drain(zfsvfs_taskq->tq_queue, 1705 &zfsvfs->z_unlinked_drain_task); 1706 1707 VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE)); 1708 os = zfsvfs->z_os; 1709 1710 /* 1711 * z_os will be NULL if there was an error in 1712 * attempting to reopen zfsvfs. 1713 */ 1714 if (os != NULL) { 1715 /* 1716 * Unset the objset user_ptr. 1717 */ 1718 mutex_enter(&os->os_user_ptr_lock); 1719 dmu_objset_set_user(os, NULL); 1720 mutex_exit(&os->os_user_ptr_lock); 1721 1722 /* 1723 * Finally release the objset 1724 */ 1725 dmu_objset_disown(os, B_TRUE, zfsvfs); 1726 } 1727 1728 /* 1729 * We can now safely destroy the '.zfs' directory node. 1730 */ 1731 if (zfsvfs->z_ctldir != NULL) 1732 zfsctl_destroy(zfsvfs); 1733 zfs_freevfs(vfsp); 1734 1735 return (0); 1736 } 1737 1738 static int 1739 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1740 { 1741 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1742 znode_t *zp; 1743 int err; 1744 1745 /* 1746 * zfs_zget() can't operate on virtual entries like .zfs/ or 1747 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 1748 * This will make NFS to switch to LOOKUP instead of using VGET. 1749 */ 1750 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR || 1751 (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir)) 1752 return (EOPNOTSUPP); 1753 1754 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1755 return (err); 1756 err = zfs_zget(zfsvfs, ino, &zp); 1757 if (err == 0 && zp->z_unlinked) { 1758 vrele(ZTOV(zp)); 1759 err = EINVAL; 1760 } 1761 if (err == 0) 1762 *vpp = ZTOV(zp); 1763 zfs_exit(zfsvfs, FTAG); 1764 if (err == 0) { 1765 err = vn_lock(*vpp, flags); 1766 if (err != 0) 1767 vrele(*vpp); 1768 } 1769 if (err != 0) 1770 *vpp = NULL; 1771 return (err); 1772 } 1773 1774 static int 1775 #if __FreeBSD_version >= 1300098 1776 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 1777 struct ucred **credanonp, int *numsecflavors, int *secflavors) 1778 #else 1779 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1780 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1781 #endif 1782 { 1783 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1784 1785 /* 1786 * If this is regular file system vfsp is the same as 1787 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1788 * zfsvfs->z_parent->z_vfs represents parent file system 1789 * which we have to use here, because only this file system 1790 * has mnt_export configured. 1791 */ 1792 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1793 credanonp, numsecflavors, secflavors)); 1794 } 1795 1796 _Static_assert(sizeof (struct fid) >= SHORT_FID_LEN, 1797 "struct fid bigger than SHORT_FID_LEN"); 1798 _Static_assert(sizeof (struct fid) >= LONG_FID_LEN, 1799 "struct fid bigger than LONG_FID_LEN"); 1800 1801 static int 1802 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) 1803 { 1804 struct componentname cn; 1805 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1806 znode_t *zp; 1807 vnode_t *dvp; 1808 uint64_t object = 0; 1809 uint64_t fid_gen = 0; 1810 uint64_t setgen = 0; 1811 uint64_t gen_mask; 1812 uint64_t zp_gen; 1813 int i, err; 1814 1815 *vpp = NULL; 1816 1817 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1818 return (err); 1819 1820 /* 1821 * On FreeBSD we can get snapshot's mount point or its parent file 1822 * system mount point depending if snapshot is already mounted or not. 1823 */ 1824 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1825 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1826 uint64_t objsetid = 0; 1827 1828 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1829 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1830 1831 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1832 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1833 1834 zfs_exit(zfsvfs, FTAG); 1835 1836 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1837 if (err) 1838 return (SET_ERROR(EINVAL)); 1839 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1840 return (err); 1841 } 1842 1843 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1844 zfid_short_t *zfid = (zfid_short_t *)fidp; 1845 1846 for (i = 0; i < sizeof (zfid->zf_object); i++) 1847 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1848 1849 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1850 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1851 } else { 1852 zfs_exit(zfsvfs, FTAG); 1853 return (SET_ERROR(EINVAL)); 1854 } 1855 1856 if (fidp->fid_len == LONG_FID_LEN && setgen != 0) { 1857 zfs_exit(zfsvfs, FTAG); 1858 dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n", 1859 (u_longlong_t)fid_gen, (u_longlong_t)setgen); 1860 return (SET_ERROR(EINVAL)); 1861 } 1862 1863 /* 1864 * A zero fid_gen means we are in .zfs or the .zfs/snapshot 1865 * directory tree. If the object == zfsvfs->z_shares_dir, then 1866 * we are in the .zfs/shares directory tree. 1867 */ 1868 if ((fid_gen == 0 && 1869 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) || 1870 (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) { 1871 zfs_exit(zfsvfs, FTAG); 1872 VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp)); 1873 if (object == ZFSCTL_INO_SNAPDIR) { 1874 cn.cn_nameptr = "snapshot"; 1875 cn.cn_namelen = strlen(cn.cn_nameptr); 1876 cn.cn_nameiop = LOOKUP; 1877 cn.cn_flags = ISLASTCN | LOCKLEAF; 1878 cn.cn_lkflags = flags; 1879 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1880 vput(dvp); 1881 } else if (object == zfsvfs->z_shares_dir) { 1882 /* 1883 * XXX This branch must not be taken, 1884 * if it is, then the lookup below will 1885 * explode. 1886 */ 1887 cn.cn_nameptr = "shares"; 1888 cn.cn_namelen = strlen(cn.cn_nameptr); 1889 cn.cn_nameiop = LOOKUP; 1890 cn.cn_flags = ISLASTCN; 1891 cn.cn_lkflags = flags; 1892 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1893 vput(dvp); 1894 } else { 1895 *vpp = dvp; 1896 } 1897 return (err); 1898 } 1899 1900 gen_mask = -1ULL >> (64 - 8 * i); 1901 1902 dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object, 1903 (u_longlong_t)fid_gen, 1904 (u_longlong_t)gen_mask); 1905 if ((err = zfs_zget(zfsvfs, object, &zp))) { 1906 zfs_exit(zfsvfs, FTAG); 1907 return (err); 1908 } 1909 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, 1910 sizeof (uint64_t)); 1911 zp_gen = zp_gen & gen_mask; 1912 if (zp_gen == 0) 1913 zp_gen = 1; 1914 if (zp->z_unlinked || zp_gen != fid_gen) { 1915 dprintf("znode gen (%llu) != fid gen (%llu)\n", 1916 (u_longlong_t)zp_gen, (u_longlong_t)fid_gen); 1917 vrele(ZTOV(zp)); 1918 zfs_exit(zfsvfs, FTAG); 1919 return (SET_ERROR(EINVAL)); 1920 } 1921 1922 *vpp = ZTOV(zp); 1923 zfs_exit(zfsvfs, FTAG); 1924 err = vn_lock(*vpp, flags); 1925 if (err == 0) 1926 vnode_create_vobject(*vpp, zp->z_size, curthread); 1927 else 1928 *vpp = NULL; 1929 return (err); 1930 } 1931 1932 /* 1933 * Block out VOPs and close zfsvfs_t::z_os 1934 * 1935 * Note, if successful, then we return with the 'z_teardown_lock' and 1936 * 'z_teardown_inactive_lock' write held. We leave ownership of the underlying 1937 * dataset and objset intact so that they can be atomically handed off during 1938 * a subsequent rollback or recv operation and the resume thereafter. 1939 */ 1940 int 1941 zfs_suspend_fs(zfsvfs_t *zfsvfs) 1942 { 1943 int error; 1944 1945 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1946 return (error); 1947 1948 return (0); 1949 } 1950 1951 /* 1952 * Rebuild SA and release VOPs. Note that ownership of the underlying dataset 1953 * is an invariant across any of the operations that can be performed while the 1954 * filesystem was suspended. Whether it succeeded or failed, the preconditions 1955 * are the same: the relevant objset and associated dataset are owned by 1956 * zfsvfs, held, and long held on entry. 1957 */ 1958 int 1959 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 1960 { 1961 int err; 1962 znode_t *zp; 1963 1964 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 1965 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 1966 1967 /* 1968 * We already own this, so just update the objset_t, as the one we 1969 * had before may have been evicted. 1970 */ 1971 objset_t *os; 1972 VERIFY3P(ds->ds_owner, ==, zfsvfs); 1973 VERIFY(dsl_dataset_long_held(ds)); 1974 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 1975 dsl_pool_config_enter(dp, FTAG); 1976 VERIFY0(dmu_objset_from_ds(ds, &os)); 1977 dsl_pool_config_exit(dp, FTAG); 1978 1979 err = zfsvfs_init(zfsvfs, os); 1980 if (err != 0) 1981 goto bail; 1982 1983 ds->ds_dir->dd_activity_cancelled = B_FALSE; 1984 VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE)); 1985 1986 zfs_set_fuid_feature(zfsvfs); 1987 1988 /* 1989 * Attempt to re-establish all the active znodes with 1990 * their dbufs. If a zfs_rezget() fails, then we'll let 1991 * any potential callers discover that via zfs_enter_verify_zp 1992 * when they try to use their znode. 1993 */ 1994 mutex_enter(&zfsvfs->z_znodes_lock); 1995 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1996 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1997 (void) zfs_rezget(zp); 1998 } 1999 mutex_exit(&zfsvfs->z_znodes_lock); 2000 2001 bail: 2002 /* release the VOPs */ 2003 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2004 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2005 2006 if (err) { 2007 /* 2008 * Since we couldn't setup the sa framework, try to force 2009 * unmount this file system. 2010 */ 2011 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) { 2012 vfs_ref(zfsvfs->z_vfs); 2013 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 2014 } 2015 } 2016 return (err); 2017 } 2018 2019 static void 2020 zfs_freevfs(vfs_t *vfsp) 2021 { 2022 zfsvfs_t *zfsvfs = vfsp->vfs_data; 2023 2024 zfsvfs_free(zfsvfs); 2025 2026 atomic_dec_32(&zfs_active_fs_count); 2027 } 2028 2029 #ifdef __i386__ 2030 static int desiredvnodes_backup; 2031 #include <sys/vmmeter.h> 2032 2033 2034 #include <vm/vm_page.h> 2035 #include <vm/vm_object.h> 2036 #include <vm/vm_kern.h> 2037 #include <vm/vm_map.h> 2038 #endif 2039 2040 static void 2041 zfs_vnodes_adjust(void) 2042 { 2043 #ifdef __i386__ 2044 int newdesiredvnodes; 2045 2046 desiredvnodes_backup = desiredvnodes; 2047 2048 /* 2049 * We calculate newdesiredvnodes the same way it is done in 2050 * vntblinit(). If it is equal to desiredvnodes, it means that 2051 * it wasn't tuned by the administrator and we can tune it down. 2052 */ 2053 newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 * 2054 vm_kmem_size / (5 * (sizeof (struct vm_object) + 2055 sizeof (struct vnode)))); 2056 if (newdesiredvnodes == desiredvnodes) 2057 desiredvnodes = (3 * newdesiredvnodes) / 4; 2058 #endif 2059 } 2060 2061 static void 2062 zfs_vnodes_adjust_back(void) 2063 { 2064 2065 #ifdef __i386__ 2066 desiredvnodes = desiredvnodes_backup; 2067 #endif 2068 } 2069 2070 void 2071 zfs_init(void) 2072 { 2073 2074 printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n"); 2075 2076 /* 2077 * Initialize .zfs directory structures 2078 */ 2079 zfsctl_init(); 2080 2081 /* 2082 * Initialize znode cache, vnode ops, etc... 2083 */ 2084 zfs_znode_init(); 2085 2086 /* 2087 * Reduce number of vnodes. Originally number of vnodes is calculated 2088 * with UFS inode in mind. We reduce it here, because it's too big for 2089 * ZFS/i386. 2090 */ 2091 zfs_vnodes_adjust(); 2092 2093 dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); 2094 2095 zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); 2096 } 2097 2098 void 2099 zfs_fini(void) 2100 { 2101 taskq_destroy(zfsvfs_taskq); 2102 zfsctl_fini(); 2103 zfs_znode_fini(); 2104 zfs_vnodes_adjust_back(); 2105 } 2106 2107 int 2108 zfs_busy(void) 2109 { 2110 return (zfs_active_fs_count != 0); 2111 } 2112 2113 /* 2114 * Release VOPs and unmount a suspended filesystem. 2115 */ 2116 int 2117 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 2118 { 2119 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 2120 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 2121 2122 /* 2123 * We already own this, so just hold and rele it to update the 2124 * objset_t, as the one we had before may have been evicted. 2125 */ 2126 objset_t *os; 2127 VERIFY3P(ds->ds_owner, ==, zfsvfs); 2128 VERIFY(dsl_dataset_long_held(ds)); 2129 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 2130 dsl_pool_config_enter(dp, FTAG); 2131 VERIFY0(dmu_objset_from_ds(ds, &os)); 2132 dsl_pool_config_exit(dp, FTAG); 2133 zfsvfs->z_os = os; 2134 2135 /* release the VOPs */ 2136 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2137 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2138 2139 /* 2140 * Try to force unmount this file system. 2141 */ 2142 (void) zfs_umount(zfsvfs->z_vfs, 0); 2143 zfsvfs->z_unmounted = B_TRUE; 2144 return (0); 2145 } 2146 2147 int 2148 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 2149 { 2150 int error; 2151 objset_t *os = zfsvfs->z_os; 2152 dmu_tx_t *tx; 2153 2154 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 2155 return (SET_ERROR(EINVAL)); 2156 2157 if (newvers < zfsvfs->z_version) 2158 return (SET_ERROR(EINVAL)); 2159 2160 if (zfs_spa_version_map(newvers) > 2161 spa_version(dmu_objset_spa(zfsvfs->z_os))) 2162 return (SET_ERROR(ENOTSUP)); 2163 2164 tx = dmu_tx_create(os); 2165 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 2166 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2167 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 2168 ZFS_SA_ATTRS); 2169 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2170 } 2171 error = dmu_tx_assign(tx, TXG_WAIT); 2172 if (error) { 2173 dmu_tx_abort(tx); 2174 return (error); 2175 } 2176 2177 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 2178 8, 1, &newvers, tx); 2179 2180 if (error) { 2181 dmu_tx_commit(tx); 2182 return (error); 2183 } 2184 2185 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2186 uint64_t sa_obj; 2187 2188 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, 2189 SPA_VERSION_SA); 2190 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 2191 DMU_OT_NONE, 0, tx); 2192 2193 error = zap_add(os, MASTER_NODE_OBJ, 2194 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 2195 ASSERT0(error); 2196 2197 VERIFY0(sa_set_sa_object(os, sa_obj)); 2198 sa_register_update_callback(os, zfs_sa_upgrade); 2199 } 2200 2201 spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, 2202 "from %ju to %ju", (uintmax_t)zfsvfs->z_version, 2203 (uintmax_t)newvers); 2204 dmu_tx_commit(tx); 2205 2206 zfsvfs->z_version = newvers; 2207 os->os_version = newvers; 2208 2209 zfs_set_fuid_feature(zfsvfs); 2210 2211 return (0); 2212 } 2213 2214 /* 2215 * Read a property stored within the master node. 2216 */ 2217 int 2218 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 2219 { 2220 uint64_t *cached_copy = NULL; 2221 2222 /* 2223 * Figure out where in the objset_t the cached copy would live, if it 2224 * is available for the requested property. 2225 */ 2226 if (os != NULL) { 2227 switch (prop) { 2228 case ZFS_PROP_VERSION: 2229 cached_copy = &os->os_version; 2230 break; 2231 case ZFS_PROP_NORMALIZE: 2232 cached_copy = &os->os_normalization; 2233 break; 2234 case ZFS_PROP_UTF8ONLY: 2235 cached_copy = &os->os_utf8only; 2236 break; 2237 case ZFS_PROP_CASE: 2238 cached_copy = &os->os_casesensitivity; 2239 break; 2240 default: 2241 break; 2242 } 2243 } 2244 if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { 2245 *value = *cached_copy; 2246 return (0); 2247 } 2248 2249 /* 2250 * If the property wasn't cached, look up the file system's value for 2251 * the property. For the version property, we look up a slightly 2252 * different string. 2253 */ 2254 const char *pname; 2255 int error = ENOENT; 2256 if (prop == ZFS_PROP_VERSION) { 2257 pname = ZPL_VERSION_STR; 2258 } else { 2259 pname = zfs_prop_to_name(prop); 2260 } 2261 2262 if (os != NULL) { 2263 ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); 2264 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 2265 } 2266 2267 if (error == ENOENT) { 2268 /* No value set, use the default value */ 2269 switch (prop) { 2270 case ZFS_PROP_VERSION: 2271 *value = ZPL_VERSION; 2272 break; 2273 case ZFS_PROP_NORMALIZE: 2274 case ZFS_PROP_UTF8ONLY: 2275 *value = 0; 2276 break; 2277 case ZFS_PROP_CASE: 2278 *value = ZFS_CASE_SENSITIVE; 2279 break; 2280 case ZFS_PROP_ACLTYPE: 2281 *value = ZFS_ACLTYPE_NFSV4; 2282 break; 2283 default: 2284 return (error); 2285 } 2286 error = 0; 2287 } 2288 2289 /* 2290 * If one of the methods for getting the property value above worked, 2291 * copy it into the objset_t's cache. 2292 */ 2293 if (error == 0 && cached_copy != NULL) { 2294 *cached_copy = *value; 2295 } 2296 2297 return (error); 2298 } 2299 2300 /* 2301 * Return true if the corresponding vfs's unmounted flag is set. 2302 * Otherwise return false. 2303 * If this function returns true we know VFS unmount has been initiated. 2304 */ 2305 boolean_t 2306 zfs_get_vfs_flag_unmounted(objset_t *os) 2307 { 2308 zfsvfs_t *zfvp; 2309 boolean_t unmounted = B_FALSE; 2310 2311 ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS); 2312 2313 mutex_enter(&os->os_user_ptr_lock); 2314 zfvp = dmu_objset_get_user(os); 2315 if (zfvp != NULL && zfvp->z_vfs != NULL && 2316 (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT)) 2317 unmounted = B_TRUE; 2318 mutex_exit(&os->os_user_ptr_lock); 2319 2320 return (unmounted); 2321 } 2322 2323 #ifdef _KERNEL 2324 void 2325 zfsvfs_update_fromname(const char *oldname, const char *newname) 2326 { 2327 char tmpbuf[MAXPATHLEN]; 2328 struct mount *mp; 2329 char *fromname; 2330 size_t oldlen; 2331 2332 oldlen = strlen(oldname); 2333 2334 mtx_lock(&mountlist_mtx); 2335 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 2336 fromname = mp->mnt_stat.f_mntfromname; 2337 if (strcmp(fromname, oldname) == 0) { 2338 (void) strlcpy(fromname, newname, 2339 sizeof (mp->mnt_stat.f_mntfromname)); 2340 continue; 2341 } 2342 if (strncmp(fromname, oldname, oldlen) == 0 && 2343 (fromname[oldlen] == '/' || fromname[oldlen] == '@')) { 2344 (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s", 2345 newname, fromname + oldlen); 2346 (void) strlcpy(fromname, tmpbuf, 2347 sizeof (mp->mnt_stat.f_mntfromname)); 2348 continue; 2349 } 2350 } 2351 mtx_unlock(&mountlist_mtx); 2352 } 2353 #endif 2354 2355 /* 2356 * Find a prison with ZFS info. 2357 * Return the ZFS info and the (locked) prison. 2358 */ 2359 static struct zfs_jailparam * 2360 zfs_jailparam_find(struct prison *spr, struct prison **prp) 2361 { 2362 struct prison *pr; 2363 struct zfs_jailparam *zjp; 2364 2365 for (pr = spr; ; pr = pr->pr_parent) { 2366 mtx_lock(&pr->pr_mtx); 2367 if (pr == &prison0) { 2368 zjp = &zfs_jailparam0; 2369 break; 2370 } 2371 zjp = osd_jail_get(pr, zfs_jailparam_slot); 2372 if (zjp != NULL) 2373 break; 2374 mtx_unlock(&pr->pr_mtx); 2375 } 2376 *prp = pr; 2377 2378 return (zjp); 2379 } 2380 2381 /* 2382 * Ensure a prison has its own ZFS info. If zjpp is non-null, point it to the 2383 * ZFS info and lock the prison. 2384 */ 2385 static void 2386 zfs_jailparam_alloc(struct prison *pr, struct zfs_jailparam **zjpp) 2387 { 2388 struct prison *ppr; 2389 struct zfs_jailparam *zjp, *nzjp; 2390 void **rsv; 2391 2392 /* If this prison already has ZFS info, return that. */ 2393 zjp = zfs_jailparam_find(pr, &ppr); 2394 if (ppr == pr) 2395 goto done; 2396 2397 /* 2398 * Allocate a new info record. Then check again, in case something 2399 * changed during the allocation. 2400 */ 2401 mtx_unlock(&ppr->pr_mtx); 2402 nzjp = malloc(sizeof (struct zfs_jailparam), M_PRISON, M_WAITOK); 2403 rsv = osd_reserve(zfs_jailparam_slot); 2404 zjp = zfs_jailparam_find(pr, &ppr); 2405 if (ppr == pr) { 2406 free(nzjp, M_PRISON); 2407 osd_free_reserved(rsv); 2408 goto done; 2409 } 2410 /* Inherit the initial values from the ancestor. */ 2411 mtx_lock(&pr->pr_mtx); 2412 (void) osd_jail_set_reserved(pr, zfs_jailparam_slot, rsv, nzjp); 2413 (void) memcpy(nzjp, zjp, sizeof (*zjp)); 2414 zjp = nzjp; 2415 mtx_unlock(&ppr->pr_mtx); 2416 done: 2417 if (zjpp != NULL) 2418 *zjpp = zjp; 2419 else 2420 mtx_unlock(&pr->pr_mtx); 2421 } 2422 2423 /* 2424 * Jail OSD methods for ZFS VFS info. 2425 */ 2426 static int 2427 zfs_jailparam_create(void *obj, void *data) 2428 { 2429 struct prison *pr = obj; 2430 struct vfsoptlist *opts = data; 2431 int jsys; 2432 2433 if (vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)) == 0 && 2434 jsys == JAIL_SYS_INHERIT) 2435 return (0); 2436 /* 2437 * Inherit a prison's initial values from its parent 2438 * (different from JAIL_SYS_INHERIT which also inherits changes). 2439 */ 2440 zfs_jailparam_alloc(pr, NULL); 2441 return (0); 2442 } 2443 2444 static int 2445 zfs_jailparam_get(void *obj, void *data) 2446 { 2447 struct prison *ppr, *pr = obj; 2448 struct vfsoptlist *opts = data; 2449 struct zfs_jailparam *zjp; 2450 int jsys, error; 2451 2452 zjp = zfs_jailparam_find(pr, &ppr); 2453 jsys = (ppr == pr) ? JAIL_SYS_NEW : JAIL_SYS_INHERIT; 2454 error = vfs_setopt(opts, "zfs", &jsys, sizeof (jsys)); 2455 if (error != 0 && error != ENOENT) 2456 goto done; 2457 if (jsys == JAIL_SYS_NEW) { 2458 error = vfs_setopt(opts, "zfs.mount_snapshot", 2459 &zjp->mount_snapshot, sizeof (zjp->mount_snapshot)); 2460 if (error != 0 && error != ENOENT) 2461 goto done; 2462 } else { 2463 /* 2464 * If this prison is inheriting its ZFS info, report 2465 * empty/zero parameters. 2466 */ 2467 static int mount_snapshot = 0; 2468 2469 error = vfs_setopt(opts, "zfs.mount_snapshot", 2470 &mount_snapshot, sizeof (mount_snapshot)); 2471 if (error != 0 && error != ENOENT) 2472 goto done; 2473 } 2474 error = 0; 2475 done: 2476 mtx_unlock(&ppr->pr_mtx); 2477 return (error); 2478 } 2479 2480 static int 2481 zfs_jailparam_set(void *obj, void *data) 2482 { 2483 struct prison *pr = obj; 2484 struct prison *ppr; 2485 struct vfsoptlist *opts = data; 2486 int error, jsys, mount_snapshot; 2487 2488 /* Set the parameters, which should be correct. */ 2489 error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); 2490 if (error == ENOENT) 2491 jsys = -1; 2492 error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, 2493 sizeof (mount_snapshot)); 2494 if (error == ENOENT) 2495 mount_snapshot = -1; 2496 else 2497 jsys = JAIL_SYS_NEW; 2498 switch (jsys) { 2499 case JAIL_SYS_NEW: 2500 { 2501 /* "zfs=new" or "zfs.*": the prison gets its own ZFS info. */ 2502 struct zfs_jailparam *zjp; 2503 2504 /* 2505 * A child jail cannot have more permissions than its parent 2506 */ 2507 if (pr->pr_parent != &prison0) { 2508 zjp = zfs_jailparam_find(pr->pr_parent, &ppr); 2509 mtx_unlock(&ppr->pr_mtx); 2510 if (zjp->mount_snapshot < mount_snapshot) { 2511 return (EPERM); 2512 } 2513 } 2514 zfs_jailparam_alloc(pr, &zjp); 2515 if (mount_snapshot != -1) 2516 zjp->mount_snapshot = mount_snapshot; 2517 mtx_unlock(&pr->pr_mtx); 2518 break; 2519 } 2520 case JAIL_SYS_INHERIT: 2521 /* "zfs=inherit": inherit the parent's ZFS info. */ 2522 mtx_lock(&pr->pr_mtx); 2523 osd_jail_del(pr, zfs_jailparam_slot); 2524 mtx_unlock(&pr->pr_mtx); 2525 break; 2526 case -1: 2527 /* 2528 * If the setting being changed is not ZFS related 2529 * then do nothing. 2530 */ 2531 break; 2532 } 2533 2534 return (0); 2535 } 2536 2537 static int 2538 zfs_jailparam_check(void *obj __unused, void *data) 2539 { 2540 struct vfsoptlist *opts = data; 2541 int error, jsys, mount_snapshot; 2542 2543 /* Check that the parameters are correct. */ 2544 error = vfs_copyopt(opts, "zfs", &jsys, sizeof (jsys)); 2545 if (error != ENOENT) { 2546 if (error != 0) 2547 return (error); 2548 if (jsys != JAIL_SYS_NEW && jsys != JAIL_SYS_INHERIT) 2549 return (EINVAL); 2550 } 2551 error = vfs_copyopt(opts, "zfs.mount_snapshot", &mount_snapshot, 2552 sizeof (mount_snapshot)); 2553 if (error != ENOENT) { 2554 if (error != 0) 2555 return (error); 2556 if (mount_snapshot != 0 && mount_snapshot != 1) 2557 return (EINVAL); 2558 } 2559 return (0); 2560 } 2561 2562 static void 2563 zfs_jailparam_destroy(void *data) 2564 { 2565 2566 free(data, M_PRISON); 2567 } 2568 2569 static void 2570 zfs_jailparam_sysinit(void *arg __unused) 2571 { 2572 struct prison *pr; 2573 osd_method_t methods[PR_MAXMETHOD] = { 2574 [PR_METHOD_CREATE] = zfs_jailparam_create, 2575 [PR_METHOD_GET] = zfs_jailparam_get, 2576 [PR_METHOD_SET] = zfs_jailparam_set, 2577 [PR_METHOD_CHECK] = zfs_jailparam_check, 2578 }; 2579 2580 zfs_jailparam_slot = osd_jail_register(zfs_jailparam_destroy, methods); 2581 /* Copy the defaults to any existing prisons. */ 2582 sx_slock(&allprison_lock); 2583 TAILQ_FOREACH(pr, &allprison, pr_list) 2584 zfs_jailparam_alloc(pr, NULL); 2585 sx_sunlock(&allprison_lock); 2586 } 2587 2588 static void 2589 zfs_jailparam_sysuninit(void *arg __unused) 2590 { 2591 2592 osd_jail_deregister(zfs_jailparam_slot); 2593 } 2594 2595 SYSINIT(zfs_jailparam_sysinit, SI_SUB_DRIVERS, SI_ORDER_ANY, 2596 zfs_jailparam_sysinit, NULL); 2597 SYSUNINIT(zfs_jailparam_sysuninit, SI_SUB_DRIVERS, SI_ORDER_ANY, 2598 zfs_jailparam_sysuninit, NULL); 2599