1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 24 * All rights reserved. 25 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 26 * Copyright (c) 2014 Integros [integros.com] 27 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 28 */ 29 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/sysmacros.h> 37 #include <sys/kmem.h> 38 #include <sys/acl.h> 39 #include <sys/vnode.h> 40 #include <sys/vfs.h> 41 #include <sys/mntent.h> 42 #include <sys/mount.h> 43 #include <sys/cmn_err.h> 44 #include <sys/zfs_znode.h> 45 #include <sys/zfs_vnops.h> 46 #include <sys/zfs_dir.h> 47 #include <sys/zil.h> 48 #include <sys/fs/zfs.h> 49 #include <sys/dmu.h> 50 #include <sys/dsl_prop.h> 51 #include <sys/dsl_dataset.h> 52 #include <sys/dsl_deleg.h> 53 #include <sys/spa.h> 54 #include <sys/zap.h> 55 #include <sys/sa.h> 56 #include <sys/sa_impl.h> 57 #include <sys/policy.h> 58 #include <sys/atomic.h> 59 #include <sys/zfs_ioctl.h> 60 #include <sys/zfs_ctldir.h> 61 #include <sys/zfs_fuid.h> 62 #include <sys/sunddi.h> 63 #include <sys/dmu_objset.h> 64 #include <sys/dsl_dir.h> 65 #include <sys/jail.h> 66 #include <ufs/ufs/quota.h> 67 #include <sys/zfs_quota.h> 68 69 #include "zfs_comutil.h" 70 71 #ifndef MNTK_VMSETSIZE_BUG 72 #define MNTK_VMSETSIZE_BUG 0 73 #endif 74 #ifndef MNTK_NOMSYNC 75 #define MNTK_NOMSYNC 8 76 #endif 77 78 struct mtx zfs_debug_mtx; 79 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 80 81 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 82 83 int zfs_super_owner; 84 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 85 "File system owners can perform privileged operation on file systems"); 86 87 int zfs_debug_level; 88 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, 89 "Debug level"); 90 91 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 92 static int zfs_version_acl = ZFS_ACL_VERSION; 93 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 94 "ZFS_ACL_VERSION"); 95 static int zfs_version_spa = SPA_VERSION; 96 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 97 "SPA_VERSION"); 98 static int zfs_version_zpl = ZPL_VERSION; 99 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 100 "ZPL_VERSION"); 101 102 #if __FreeBSD_version >= 1400018 103 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, 104 bool *mp_busy); 105 #else 106 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg); 107 #endif 108 static int zfs_mount(vfs_t *vfsp); 109 static int zfs_umount(vfs_t *vfsp, int fflag); 110 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 111 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 112 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 113 static int zfs_sync(vfs_t *vfsp, int waitfor); 114 #if __FreeBSD_version >= 1300098 115 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 116 struct ucred **credanonp, int *numsecflavors, int *secflavors); 117 #else 118 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 119 struct ucred **credanonp, int *numsecflavors, int **secflavors); 120 #endif 121 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); 122 static void zfs_freevfs(vfs_t *vfsp); 123 124 struct vfsops zfs_vfsops = { 125 .vfs_mount = zfs_mount, 126 .vfs_unmount = zfs_umount, 127 #if __FreeBSD_version >= 1300049 128 .vfs_root = vfs_cache_root, 129 .vfs_cachedroot = zfs_root, 130 #else 131 .vfs_root = zfs_root, 132 #endif 133 .vfs_statfs = zfs_statfs, 134 .vfs_vget = zfs_vget, 135 .vfs_sync = zfs_sync, 136 .vfs_checkexp = zfs_checkexp, 137 .vfs_fhtovp = zfs_fhtovp, 138 .vfs_quotactl = zfs_quotactl, 139 }; 140 141 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 142 143 /* 144 * We need to keep a count of active fs's. 145 * This is necessary to prevent our module 146 * from being unloaded after a umount -f 147 */ 148 static uint32_t zfs_active_fs_count = 0; 149 150 int 151 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val, 152 char *setpoint) 153 { 154 int error; 155 zfsvfs_t *zfvp; 156 vfs_t *vfsp; 157 objset_t *os; 158 uint64_t tmp = *val; 159 160 error = dmu_objset_from_ds(ds, &os); 161 if (error != 0) 162 return (error); 163 164 error = getzfsvfs_impl(os, &zfvp); 165 if (error != 0) 166 return (error); 167 if (zfvp == NULL) 168 return (ENOENT); 169 vfsp = zfvp->z_vfs; 170 switch (zfs_prop) { 171 case ZFS_PROP_ATIME: 172 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 173 tmp = 0; 174 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 175 tmp = 1; 176 break; 177 case ZFS_PROP_DEVICES: 178 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 179 tmp = 0; 180 if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) 181 tmp = 1; 182 break; 183 case ZFS_PROP_EXEC: 184 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 185 tmp = 0; 186 if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 187 tmp = 1; 188 break; 189 case ZFS_PROP_SETUID: 190 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 191 tmp = 0; 192 if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 193 tmp = 1; 194 break; 195 case ZFS_PROP_READONLY: 196 if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 197 tmp = 0; 198 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 199 tmp = 1; 200 break; 201 case ZFS_PROP_XATTR: 202 if (zfvp->z_flags & ZSB_XATTR) 203 tmp = zfvp->z_xattr; 204 break; 205 case ZFS_PROP_NBMAND: 206 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 207 tmp = 0; 208 if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) 209 tmp = 1; 210 break; 211 default: 212 vfs_unbusy(vfsp); 213 return (ENOENT); 214 } 215 216 vfs_unbusy(vfsp); 217 if (tmp != *val) { 218 (void) strcpy(setpoint, "temporary"); 219 *val = tmp; 220 } 221 return (0); 222 } 223 224 static int 225 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp) 226 { 227 int error = 0; 228 char buf[32]; 229 uint64_t usedobj, quotaobj; 230 uint64_t quota, used = 0; 231 timespec_t now; 232 233 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 234 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 235 236 if (quotaobj == 0 || zfsvfs->z_replay) { 237 error = ENOENT; 238 goto done; 239 } 240 (void) sprintf(buf, "%llx", (longlong_t)id); 241 if ((error = zap_lookup(zfsvfs->z_os, quotaobj, 242 buf, sizeof (quota), 1, "a)) != 0) { 243 dprintf("%s(%d): quotaobj lookup failed\n", 244 __FUNCTION__, __LINE__); 245 goto done; 246 } 247 /* 248 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit". 249 * So we set them to be the same. 250 */ 251 dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota); 252 error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used); 253 if (error && error != ENOENT) { 254 dprintf("%s(%d): usedobj failed; %d\n", 255 __FUNCTION__, __LINE__, error); 256 goto done; 257 } 258 dqp->dqb_curblocks = btodb(used); 259 dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0; 260 vfs_timestamp(&now); 261 /* 262 * Setting this to 0 causes FreeBSD quota(8) to print 263 * the number of days since the epoch, which isn't 264 * particularly useful. 265 */ 266 dqp->dqb_btime = dqp->dqb_itime = now.tv_sec; 267 done: 268 return (error); 269 } 270 271 static int 272 #if __FreeBSD_version >= 1400018 273 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy) 274 #else 275 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg) 276 #endif 277 { 278 zfsvfs_t *zfsvfs = vfsp->vfs_data; 279 struct thread *td; 280 int cmd, type, error = 0; 281 int bitsize; 282 zfs_userquota_prop_t quota_type; 283 struct dqblk64 dqblk = { 0 }; 284 285 td = curthread; 286 cmd = cmds >> SUBCMDSHIFT; 287 type = cmds & SUBCMDMASK; 288 289 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 290 return (error); 291 if (id == -1) { 292 switch (type) { 293 case USRQUOTA: 294 id = td->td_ucred->cr_ruid; 295 break; 296 case GRPQUOTA: 297 id = td->td_ucred->cr_rgid; 298 break; 299 default: 300 error = EINVAL; 301 #if __FreeBSD_version < 1400018 302 if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF) 303 vfs_unbusy(vfsp); 304 #endif 305 goto done; 306 } 307 } 308 /* 309 * Map BSD type to: 310 * ZFS_PROP_USERUSED, 311 * ZFS_PROP_USERQUOTA, 312 * ZFS_PROP_GROUPUSED, 313 * ZFS_PROP_GROUPQUOTA 314 */ 315 switch (cmd) { 316 case Q_SETQUOTA: 317 case Q_SETQUOTA32: 318 if (type == USRQUOTA) 319 quota_type = ZFS_PROP_USERQUOTA; 320 else if (type == GRPQUOTA) 321 quota_type = ZFS_PROP_GROUPQUOTA; 322 else 323 error = EINVAL; 324 break; 325 case Q_GETQUOTA: 326 case Q_GETQUOTA32: 327 if (type == USRQUOTA) 328 quota_type = ZFS_PROP_USERUSED; 329 else if (type == GRPQUOTA) 330 quota_type = ZFS_PROP_GROUPUSED; 331 else 332 error = EINVAL; 333 break; 334 } 335 336 /* 337 * Depending on the cmd, we may need to get 338 * the ruid and domain (see fuidstr_to_sid?), 339 * the fuid (how?), or other information. 340 * Create fuid using zfs_fuid_create(zfsvfs, id, 341 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)? 342 * I think I can use just the id? 343 * 344 * Look at zfs_id_overquota() to look up a quota. 345 * zap_lookup(something, quotaobj, fuidstring, 346 * sizeof (long long), 1, "a) 347 * 348 * See zfs_set_userquota() to set a quota. 349 */ 350 if ((uint32_t)type >= MAXQUOTAS) { 351 error = EINVAL; 352 goto done; 353 } 354 355 switch (cmd) { 356 case Q_GETQUOTASIZE: 357 bitsize = 64; 358 error = copyout(&bitsize, arg, sizeof (int)); 359 break; 360 case Q_QUOTAON: 361 // As far as I can tell, you can't turn quotas on or off on zfs 362 error = 0; 363 #if __FreeBSD_version < 1400018 364 vfs_unbusy(vfsp); 365 #endif 366 break; 367 case Q_QUOTAOFF: 368 error = ENOTSUP; 369 #if __FreeBSD_version < 1400018 370 vfs_unbusy(vfsp); 371 #endif 372 break; 373 case Q_SETQUOTA: 374 error = copyin(arg, &dqblk, sizeof (dqblk)); 375 if (error == 0) 376 error = zfs_set_userquota(zfsvfs, quota_type, 377 "", id, dbtob(dqblk.dqb_bhardlimit)); 378 break; 379 case Q_GETQUOTA: 380 error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk); 381 if (error == 0) 382 error = copyout(&dqblk, arg, sizeof (dqblk)); 383 break; 384 default: 385 error = EINVAL; 386 break; 387 } 388 done: 389 zfs_exit(zfsvfs, FTAG); 390 return (error); 391 } 392 393 394 boolean_t 395 zfs_is_readonly(zfsvfs_t *zfsvfs) 396 { 397 return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)); 398 } 399 400 static int 401 zfs_sync(vfs_t *vfsp, int waitfor) 402 { 403 404 /* 405 * Data integrity is job one. We don't want a compromised kernel 406 * writing to the storage pool, so we never sync during panic. 407 */ 408 if (panicstr) 409 return (0); 410 411 /* 412 * Ignore the system syncher. ZFS already commits async data 413 * at zfs_txg_timeout intervals. 414 */ 415 if (waitfor == MNT_LAZY) 416 return (0); 417 418 if (vfsp != NULL) { 419 /* 420 * Sync a specific filesystem. 421 */ 422 zfsvfs_t *zfsvfs = vfsp->vfs_data; 423 dsl_pool_t *dp; 424 int error; 425 426 error = vfs_stdsync(vfsp, waitfor); 427 if (error != 0) 428 return (error); 429 430 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 431 return (error); 432 dp = dmu_objset_pool(zfsvfs->z_os); 433 434 /* 435 * If the system is shutting down, then skip any 436 * filesystems which may exist on a suspended pool. 437 */ 438 if (rebooting && spa_suspended(dp->dp_spa)) { 439 zfs_exit(zfsvfs, FTAG); 440 return (0); 441 } 442 443 if (zfsvfs->z_log != NULL) 444 zil_commit(zfsvfs->z_log, 0); 445 446 zfs_exit(zfsvfs, FTAG); 447 } else { 448 /* 449 * Sync all ZFS filesystems. This is what happens when you 450 * run sync(8). Unlike other filesystems, ZFS honors the 451 * request by waiting for all pools to commit all dirty data. 452 */ 453 spa_sync_allpools(); 454 } 455 456 return (0); 457 } 458 459 static void 460 atime_changed_cb(void *arg, uint64_t newval) 461 { 462 zfsvfs_t *zfsvfs = arg; 463 464 if (newval == TRUE) { 465 zfsvfs->z_atime = TRUE; 466 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 467 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 468 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 469 } else { 470 zfsvfs->z_atime = FALSE; 471 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 472 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 473 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 474 } 475 } 476 477 static void 478 xattr_changed_cb(void *arg, uint64_t newval) 479 { 480 zfsvfs_t *zfsvfs = arg; 481 482 if (newval == ZFS_XATTR_OFF) { 483 zfsvfs->z_flags &= ~ZSB_XATTR; 484 } else { 485 zfsvfs->z_flags |= ZSB_XATTR; 486 487 if (newval == ZFS_XATTR_SA) 488 zfsvfs->z_xattr_sa = B_TRUE; 489 else 490 zfsvfs->z_xattr_sa = B_FALSE; 491 } 492 } 493 494 static void 495 blksz_changed_cb(void *arg, uint64_t newval) 496 { 497 zfsvfs_t *zfsvfs = arg; 498 ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os))); 499 ASSERT3U(newval, >=, SPA_MINBLOCKSIZE); 500 ASSERT(ISP2(newval)); 501 502 zfsvfs->z_max_blksz = newval; 503 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 504 } 505 506 static void 507 readonly_changed_cb(void *arg, uint64_t newval) 508 { 509 zfsvfs_t *zfsvfs = arg; 510 511 if (newval) { 512 /* XXX locking on vfs_flag? */ 513 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 514 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 515 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 516 } else { 517 /* XXX locking on vfs_flag? */ 518 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 519 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 520 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 521 } 522 } 523 524 static void 525 setuid_changed_cb(void *arg, uint64_t newval) 526 { 527 zfsvfs_t *zfsvfs = arg; 528 529 if (newval == FALSE) { 530 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 531 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 532 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 533 } else { 534 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 535 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 536 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 537 } 538 } 539 540 static void 541 exec_changed_cb(void *arg, uint64_t newval) 542 { 543 zfsvfs_t *zfsvfs = arg; 544 545 if (newval == FALSE) { 546 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 547 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 548 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 549 } else { 550 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 551 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 552 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 553 } 554 } 555 556 /* 557 * The nbmand mount option can be changed at mount time. 558 * We can't allow it to be toggled on live file systems or incorrect 559 * behavior may be seen from cifs clients 560 * 561 * This property isn't registered via dsl_prop_register(), but this callback 562 * will be called when a file system is first mounted 563 */ 564 static void 565 nbmand_changed_cb(void *arg, uint64_t newval) 566 { 567 zfsvfs_t *zfsvfs = arg; 568 if (newval == FALSE) { 569 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 570 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 571 } else { 572 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 573 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 574 } 575 } 576 577 static void 578 snapdir_changed_cb(void *arg, uint64_t newval) 579 { 580 zfsvfs_t *zfsvfs = arg; 581 582 zfsvfs->z_show_ctldir = newval; 583 } 584 585 static void 586 acl_mode_changed_cb(void *arg, uint64_t newval) 587 { 588 zfsvfs_t *zfsvfs = arg; 589 590 zfsvfs->z_acl_mode = newval; 591 } 592 593 static void 594 acl_inherit_changed_cb(void *arg, uint64_t newval) 595 { 596 zfsvfs_t *zfsvfs = arg; 597 598 zfsvfs->z_acl_inherit = newval; 599 } 600 601 static void 602 acl_type_changed_cb(void *arg, uint64_t newval) 603 { 604 zfsvfs_t *zfsvfs = arg; 605 606 zfsvfs->z_acl_type = newval; 607 } 608 609 static int 610 zfs_register_callbacks(vfs_t *vfsp) 611 { 612 struct dsl_dataset *ds = NULL; 613 objset_t *os = NULL; 614 zfsvfs_t *zfsvfs = NULL; 615 uint64_t nbmand; 616 boolean_t readonly = B_FALSE; 617 boolean_t do_readonly = B_FALSE; 618 boolean_t setuid = B_FALSE; 619 boolean_t do_setuid = B_FALSE; 620 boolean_t exec = B_FALSE; 621 boolean_t do_exec = B_FALSE; 622 boolean_t xattr = B_FALSE; 623 boolean_t atime = B_FALSE; 624 boolean_t do_atime = B_FALSE; 625 boolean_t do_xattr = B_FALSE; 626 int error = 0; 627 628 ASSERT3P(vfsp, !=, NULL); 629 zfsvfs = vfsp->vfs_data; 630 ASSERT3P(zfsvfs, !=, NULL); 631 os = zfsvfs->z_os; 632 633 /* 634 * This function can be called for a snapshot when we update snapshot's 635 * mount point, which isn't really supported. 636 */ 637 if (dmu_objset_is_snapshot(os)) 638 return (EOPNOTSUPP); 639 640 /* 641 * The act of registering our callbacks will destroy any mount 642 * options we may have. In order to enable temporary overrides 643 * of mount options, we stash away the current values and 644 * restore them after we register the callbacks. 645 */ 646 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || 647 !spa_writeable(dmu_objset_spa(os))) { 648 readonly = B_TRUE; 649 do_readonly = B_TRUE; 650 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 651 readonly = B_FALSE; 652 do_readonly = B_TRUE; 653 } 654 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 655 setuid = B_FALSE; 656 do_setuid = B_TRUE; 657 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 658 setuid = B_TRUE; 659 do_setuid = B_TRUE; 660 } 661 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 662 exec = B_FALSE; 663 do_exec = B_TRUE; 664 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 665 exec = B_TRUE; 666 do_exec = B_TRUE; 667 } 668 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 669 zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF; 670 do_xattr = B_TRUE; 671 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 672 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 673 do_xattr = B_TRUE; 674 } else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) { 675 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 676 do_xattr = B_TRUE; 677 } else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) { 678 zfsvfs->z_xattr = xattr = ZFS_XATTR_SA; 679 do_xattr = B_TRUE; 680 } 681 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 682 atime = B_FALSE; 683 do_atime = B_TRUE; 684 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 685 atime = B_TRUE; 686 do_atime = B_TRUE; 687 } 688 689 /* 690 * We need to enter pool configuration here, so that we can use 691 * dsl_prop_get_int_ds() to handle the special nbmand property below. 692 * dsl_prop_get_integer() can not be used, because it has to acquire 693 * spa_namespace_lock and we can not do that because we already hold 694 * z_teardown_lock. The problem is that spa_write_cachefile() is called 695 * with spa_namespace_lock held and the function calls ZFS vnode 696 * operations to write the cache file and thus z_teardown_lock is 697 * acquired after spa_namespace_lock. 698 */ 699 ds = dmu_objset_ds(os); 700 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 701 702 /* 703 * nbmand is a special property. It can only be changed at 704 * mount time. 705 * 706 * This is weird, but it is documented to only be changeable 707 * at mount time. 708 */ 709 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 710 nbmand = B_FALSE; 711 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 712 nbmand = B_TRUE; 713 } else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0)) { 714 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 715 return (error); 716 } 717 718 /* 719 * Register property callbacks. 720 * 721 * It would probably be fine to just check for i/o error from 722 * the first prop_register(), but I guess I like to go 723 * overboard... 724 */ 725 error = dsl_prop_register(ds, 726 zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs); 727 error = error ? error : dsl_prop_register(ds, 728 zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs); 729 error = error ? error : dsl_prop_register(ds, 730 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs); 731 error = error ? error : dsl_prop_register(ds, 732 zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs); 733 error = error ? error : dsl_prop_register(ds, 734 zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs); 735 error = error ? error : dsl_prop_register(ds, 736 zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs); 737 error = error ? error : dsl_prop_register(ds, 738 zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs); 739 error = error ? error : dsl_prop_register(ds, 740 zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs); 741 error = error ? error : dsl_prop_register(ds, 742 zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs); 743 error = error ? error : dsl_prop_register(ds, 744 zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, 745 zfsvfs); 746 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 747 if (error) 748 goto unregister; 749 750 /* 751 * Invoke our callbacks to restore temporary mount options. 752 */ 753 if (do_readonly) 754 readonly_changed_cb(zfsvfs, readonly); 755 if (do_setuid) 756 setuid_changed_cb(zfsvfs, setuid); 757 if (do_exec) 758 exec_changed_cb(zfsvfs, exec); 759 if (do_xattr) 760 xattr_changed_cb(zfsvfs, xattr); 761 if (do_atime) 762 atime_changed_cb(zfsvfs, atime); 763 764 nbmand_changed_cb(zfsvfs, nbmand); 765 766 return (0); 767 768 unregister: 769 dsl_prop_unregister_all(ds, zfsvfs); 770 return (error); 771 } 772 773 /* 774 * Associate this zfsvfs with the given objset, which must be owned. 775 * This will cache a bunch of on-disk state from the objset in the 776 * zfsvfs. 777 */ 778 static int 779 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) 780 { 781 int error; 782 uint64_t val; 783 784 zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; 785 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 786 zfsvfs->z_os = os; 787 788 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 789 if (error != 0) 790 return (error); 791 if (zfsvfs->z_version > 792 zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { 793 (void) printf("Can't mount a version %lld file system " 794 "on a version %lld pool\n. Pool must be upgraded to mount " 795 "this file system.", (u_longlong_t)zfsvfs->z_version, 796 (u_longlong_t)spa_version(dmu_objset_spa(os))); 797 return (SET_ERROR(ENOTSUP)); 798 } 799 error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val); 800 if (error != 0) 801 return (error); 802 zfsvfs->z_norm = (int)val; 803 804 error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val); 805 if (error != 0) 806 return (error); 807 zfsvfs->z_utf8 = (val != 0); 808 809 error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val); 810 if (error != 0) 811 return (error); 812 zfsvfs->z_case = (uint_t)val; 813 814 error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val); 815 if (error != 0) 816 return (error); 817 zfsvfs->z_acl_type = (uint_t)val; 818 819 /* 820 * Fold case on file systems that are always or sometimes case 821 * insensitive. 822 */ 823 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 824 zfsvfs->z_case == ZFS_CASE_MIXED) 825 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 826 827 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 828 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 829 830 uint64_t sa_obj = 0; 831 if (zfsvfs->z_use_sa) { 832 /* should either have both of these objects or none */ 833 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, 834 &sa_obj); 835 if (error != 0) 836 return (error); 837 838 error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val); 839 if (error == 0 && val == ZFS_XATTR_SA) 840 zfsvfs->z_xattr_sa = B_TRUE; 841 } 842 843 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 844 &zfsvfs->z_attr_table); 845 if (error != 0) 846 return (error); 847 848 if (zfsvfs->z_version >= ZPL_VERSION_SA) 849 sa_register_update_callback(os, zfs_sa_upgrade); 850 851 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 852 &zfsvfs->z_root); 853 if (error != 0) 854 return (error); 855 ASSERT3U(zfsvfs->z_root, !=, 0); 856 857 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 858 &zfsvfs->z_unlinkedobj); 859 if (error != 0) 860 return (error); 861 862 error = zap_lookup(os, MASTER_NODE_OBJ, 863 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 864 8, 1, &zfsvfs->z_userquota_obj); 865 if (error == ENOENT) 866 zfsvfs->z_userquota_obj = 0; 867 else if (error != 0) 868 return (error); 869 870 error = zap_lookup(os, MASTER_NODE_OBJ, 871 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 872 8, 1, &zfsvfs->z_groupquota_obj); 873 if (error == ENOENT) 874 zfsvfs->z_groupquota_obj = 0; 875 else if (error != 0) 876 return (error); 877 878 error = zap_lookup(os, MASTER_NODE_OBJ, 879 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA], 880 8, 1, &zfsvfs->z_projectquota_obj); 881 if (error == ENOENT) 882 zfsvfs->z_projectquota_obj = 0; 883 else if (error != 0) 884 return (error); 885 886 error = zap_lookup(os, MASTER_NODE_OBJ, 887 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA], 888 8, 1, &zfsvfs->z_userobjquota_obj); 889 if (error == ENOENT) 890 zfsvfs->z_userobjquota_obj = 0; 891 else if (error != 0) 892 return (error); 893 894 error = zap_lookup(os, MASTER_NODE_OBJ, 895 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA], 896 8, 1, &zfsvfs->z_groupobjquota_obj); 897 if (error == ENOENT) 898 zfsvfs->z_groupobjquota_obj = 0; 899 else if (error != 0) 900 return (error); 901 902 error = zap_lookup(os, MASTER_NODE_OBJ, 903 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA], 904 8, 1, &zfsvfs->z_projectobjquota_obj); 905 if (error == ENOENT) 906 zfsvfs->z_projectobjquota_obj = 0; 907 else if (error != 0) 908 return (error); 909 910 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 911 &zfsvfs->z_fuid_obj); 912 if (error == ENOENT) 913 zfsvfs->z_fuid_obj = 0; 914 else if (error != 0) 915 return (error); 916 917 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 918 &zfsvfs->z_shares_dir); 919 if (error == ENOENT) 920 zfsvfs->z_shares_dir = 0; 921 else if (error != 0) 922 return (error); 923 924 /* 925 * Only use the name cache if we are looking for a 926 * name on a file system that does not require normalization 927 * or case folding. We can also look there if we happen to be 928 * on a non-normalizing, mixed sensitivity file system IF we 929 * are looking for the exact name (which is always the case on 930 * FreeBSD). 931 */ 932 zfsvfs->z_use_namecache = !zfsvfs->z_norm || 933 ((zfsvfs->z_case == ZFS_CASE_MIXED) && 934 !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER)); 935 936 return (0); 937 } 938 939 taskq_t *zfsvfs_taskq; 940 941 static void 942 zfsvfs_task_unlinked_drain(void *context, int pending __unused) 943 { 944 945 zfs_unlinked_drain((zfsvfs_t *)context); 946 } 947 948 int 949 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp) 950 { 951 objset_t *os; 952 zfsvfs_t *zfsvfs; 953 int error; 954 boolean_t ro = (readonly || (strchr(osname, '@') != NULL)); 955 956 /* 957 * XXX: Fix struct statfs so this isn't necessary! 958 * 959 * The 'osname' is used as the filesystem's special node, which means 960 * it must fit in statfs.f_mntfromname, or else it can't be 961 * enumerated, so libzfs_mnttab_find() returns NULL, which causes 962 * 'zfs unmount' to think it's not mounted when it is. 963 */ 964 if (strlen(osname) >= MNAMELEN) 965 return (SET_ERROR(ENAMETOOLONG)); 966 967 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 968 969 error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, 970 &os); 971 if (error != 0) { 972 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 973 return (error); 974 } 975 976 error = zfsvfs_create_impl(zfvp, zfsvfs, os); 977 978 return (error); 979 } 980 981 982 int 983 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) 984 { 985 int error; 986 987 zfsvfs->z_vfs = NULL; 988 zfsvfs->z_parent = zfsvfs; 989 990 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 991 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 992 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 993 offsetof(znode_t, z_link_node)); 994 TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0, 995 zfsvfs_task_unlinked_drain, zfsvfs); 996 ZFS_TEARDOWN_INIT(zfsvfs); 997 ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs); 998 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 999 for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1000 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1001 1002 error = zfsvfs_init(zfsvfs, os); 1003 if (error != 0) { 1004 dmu_objset_disown(os, B_TRUE, zfsvfs); 1005 *zfvp = NULL; 1006 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1007 return (error); 1008 } 1009 1010 *zfvp = zfsvfs; 1011 return (0); 1012 } 1013 1014 static int 1015 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 1016 { 1017 int error; 1018 1019 /* 1020 * Check for a bad on-disk format version now since we 1021 * lied about owning the dataset readonly before. 1022 */ 1023 if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && 1024 dmu_objset_incompatible_encryption_version(zfsvfs->z_os)) 1025 return (SET_ERROR(EROFS)); 1026 1027 error = zfs_register_callbacks(zfsvfs->z_vfs); 1028 if (error) 1029 return (error); 1030 1031 /* 1032 * If we are not mounting (ie: online recv), then we don't 1033 * have to worry about replaying the log as we blocked all 1034 * operations out since we closed the ZIL. 1035 */ 1036 if (mounting) { 1037 boolean_t readonly; 1038 1039 ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); 1040 error = dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); 1041 if (error) 1042 return (error); 1043 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1044 &zfsvfs->z_kstat.dk_zil_sums); 1045 1046 /* 1047 * During replay we remove the read only flag to 1048 * allow replays to succeed. 1049 */ 1050 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 1051 if (readonly != 0) { 1052 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 1053 } else { 1054 dsl_dir_t *dd; 1055 zap_stats_t zs; 1056 1057 if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj, 1058 &zs) == 0) { 1059 dataset_kstats_update_nunlinks_kstat( 1060 &zfsvfs->z_kstat, zs.zs_num_entries); 1061 dprintf_ds(zfsvfs->z_os->os_dsl_dataset, 1062 "num_entries in unlinked set: %llu", 1063 (u_longlong_t)zs.zs_num_entries); 1064 } 1065 1066 zfs_unlinked_drain(zfsvfs); 1067 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1068 dd->dd_activity_cancelled = B_FALSE; 1069 } 1070 1071 /* 1072 * Parse and replay the intent log. 1073 * 1074 * Because of ziltest, this must be done after 1075 * zfs_unlinked_drain(). (Further note: ziltest 1076 * doesn't use readonly mounts, where 1077 * zfs_unlinked_drain() isn't called.) This is because 1078 * ziltest causes spa_sync() to think it's committed, 1079 * but actually it is not, so the intent log contains 1080 * many txg's worth of changes. 1081 * 1082 * In particular, if object N is in the unlinked set in 1083 * the last txg to actually sync, then it could be 1084 * actually freed in a later txg and then reallocated 1085 * in a yet later txg. This would write a "create 1086 * object N" record to the intent log. Normally, this 1087 * would be fine because the spa_sync() would have 1088 * written out the fact that object N is free, before 1089 * we could write the "create object N" intent log 1090 * record. 1091 * 1092 * But when we are in ziltest mode, we advance the "open 1093 * txg" without actually spa_sync()-ing the changes to 1094 * disk. So we would see that object N is still 1095 * allocated and in the unlinked set, and there is an 1096 * intent log record saying to allocate it. 1097 */ 1098 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { 1099 if (zil_replay_disable) { 1100 zil_destroy(zfsvfs->z_log, B_FALSE); 1101 } else { 1102 boolean_t use_nc = zfsvfs->z_use_namecache; 1103 zfsvfs->z_use_namecache = B_FALSE; 1104 zfsvfs->z_replay = B_TRUE; 1105 zil_replay(zfsvfs->z_os, zfsvfs, 1106 zfs_replay_vector); 1107 zfsvfs->z_replay = B_FALSE; 1108 zfsvfs->z_use_namecache = use_nc; 1109 } 1110 } 1111 1112 /* restore readonly bit */ 1113 if (readonly != 0) 1114 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 1115 } else { 1116 ASSERT3P(zfsvfs->z_kstat.dk_kstats, !=, NULL); 1117 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data, 1118 &zfsvfs->z_kstat.dk_zil_sums); 1119 } 1120 1121 /* 1122 * Set the objset user_ptr to track its zfsvfs. 1123 */ 1124 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1125 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1126 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1127 1128 return (0); 1129 } 1130 1131 void 1132 zfsvfs_free(zfsvfs_t *zfsvfs) 1133 { 1134 int i; 1135 1136 zfs_fuid_destroy(zfsvfs); 1137 1138 mutex_destroy(&zfsvfs->z_znodes_lock); 1139 mutex_destroy(&zfsvfs->z_lock); 1140 ASSERT3U(zfsvfs->z_nr_znodes, ==, 0); 1141 list_destroy(&zfsvfs->z_all_znodes); 1142 ZFS_TEARDOWN_DESTROY(zfsvfs); 1143 ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs); 1144 rw_destroy(&zfsvfs->z_fuid_lock); 1145 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1146 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1147 dataset_kstats_destroy(&zfsvfs->z_kstat); 1148 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1149 } 1150 1151 static void 1152 zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 1153 { 1154 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 1155 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 1156 } 1157 1158 static int 1159 zfs_domount(vfs_t *vfsp, char *osname) 1160 { 1161 uint64_t recordsize, fsid_guid; 1162 int error = 0; 1163 zfsvfs_t *zfsvfs; 1164 1165 ASSERT3P(vfsp, !=, NULL); 1166 ASSERT3P(osname, !=, NULL); 1167 1168 error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs); 1169 if (error) 1170 return (error); 1171 zfsvfs->z_vfs = vfsp; 1172 1173 if ((error = dsl_prop_get_integer(osname, 1174 "recordsize", &recordsize, NULL))) 1175 goto out; 1176 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1177 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1178 1179 vfsp->vfs_data = zfsvfs; 1180 vfsp->mnt_flag |= MNT_LOCAL; 1181 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1182 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1183 vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; 1184 /* 1185 * This can cause a loss of coherence between ARC and page cache 1186 * on ZoF - unclear if the problem is in FreeBSD or ZoF 1187 */ 1188 vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ 1189 vfsp->mnt_kern_flag |= MNTK_NOMSYNC; 1190 vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG; 1191 1192 #if defined(_KERNEL) && !defined(KMEM_DEBUG) 1193 vfsp->mnt_kern_flag |= MNTK_FPLOOKUP; 1194 #endif 1195 /* 1196 * The fsid is 64 bits, composed of an 8-bit fs type, which 1197 * separates our fsid from any other filesystem types, and a 1198 * 56-bit objset unique ID. The objset unique ID is unique to 1199 * all objsets open on this system, provided by unique_create(). 1200 * The 8-bit fs type must be put in the low bits of fsid[1] 1201 * because that's where other Solaris filesystems put it. 1202 */ 1203 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1204 ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0); 1205 vfsp->vfs_fsid.val[0] = fsid_guid; 1206 vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) | 1207 (vfsp->mnt_vfc->vfc_typenum & 0xFF); 1208 1209 /* 1210 * Set features for file system. 1211 */ 1212 zfs_set_fuid_feature(zfsvfs); 1213 1214 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1215 uint64_t pval; 1216 1217 atime_changed_cb(zfsvfs, B_FALSE); 1218 readonly_changed_cb(zfsvfs, B_TRUE); 1219 if ((error = dsl_prop_get_integer(osname, 1220 "xattr", &pval, NULL))) 1221 goto out; 1222 xattr_changed_cb(zfsvfs, pval); 1223 if ((error = dsl_prop_get_integer(osname, 1224 "acltype", &pval, NULL))) 1225 goto out; 1226 acl_type_changed_cb(zfsvfs, pval); 1227 zfsvfs->z_issnap = B_TRUE; 1228 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; 1229 1230 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1231 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1232 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1233 } else { 1234 if ((error = zfsvfs_setup(zfsvfs, B_TRUE))) 1235 goto out; 1236 } 1237 1238 vfs_mountedfrom(vfsp, osname); 1239 1240 if (!zfsvfs->z_issnap) 1241 zfsctl_create(zfsvfs); 1242 out: 1243 if (error) { 1244 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); 1245 zfsvfs_free(zfsvfs); 1246 } else { 1247 atomic_inc_32(&zfs_active_fs_count); 1248 } 1249 1250 return (error); 1251 } 1252 1253 static void 1254 zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1255 { 1256 objset_t *os = zfsvfs->z_os; 1257 1258 if (!dmu_objset_is_snapshot(os)) 1259 dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs); 1260 } 1261 1262 static int 1263 getpoolname(const char *osname, char *poolname) 1264 { 1265 char *p; 1266 1267 p = strchr(osname, '/'); 1268 if (p == NULL) { 1269 if (strlen(osname) >= MAXNAMELEN) 1270 return (ENAMETOOLONG); 1271 (void) strcpy(poolname, osname); 1272 } else { 1273 if (p - osname >= MAXNAMELEN) 1274 return (ENAMETOOLONG); 1275 (void) strlcpy(poolname, osname, p - osname + 1); 1276 } 1277 return (0); 1278 } 1279 1280 static void 1281 fetch_osname_options(char *name, bool *checkpointrewind) 1282 { 1283 1284 if (name[0] == '!') { 1285 *checkpointrewind = true; 1286 memmove(name, name + 1, strlen(name)); 1287 } else { 1288 *checkpointrewind = false; 1289 } 1290 } 1291 1292 static int 1293 zfs_mount(vfs_t *vfsp) 1294 { 1295 kthread_t *td = curthread; 1296 vnode_t *mvp = vfsp->mnt_vnodecovered; 1297 cred_t *cr = td->td_ucred; 1298 char *osname; 1299 int error = 0; 1300 int canwrite; 1301 bool checkpointrewind; 1302 1303 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1304 return (SET_ERROR(EINVAL)); 1305 1306 /* 1307 * If full-owner-access is enabled and delegated administration is 1308 * turned on, we must set nosuid. 1309 */ 1310 if (zfs_super_owner && 1311 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1312 secpolicy_fs_mount_clearopts(cr, vfsp); 1313 } 1314 1315 fetch_osname_options(osname, &checkpointrewind); 1316 1317 /* 1318 * Check for mount privilege? 1319 * 1320 * If we don't have privilege then see if 1321 * we have local permission to allow it 1322 */ 1323 error = secpolicy_fs_mount(cr, mvp, vfsp); 1324 if (error) { 1325 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 1326 goto out; 1327 1328 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1329 vattr_t vattr; 1330 1331 /* 1332 * Make sure user is the owner of the mount point 1333 * or has sufficient privileges. 1334 */ 1335 1336 vattr.va_mask = AT_UID; 1337 1338 vn_lock(mvp, LK_SHARED | LK_RETRY); 1339 if (VOP_GETATTR(mvp, &vattr, cr)) { 1340 VOP_UNLOCK1(mvp); 1341 goto out; 1342 } 1343 1344 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1345 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1346 VOP_UNLOCK1(mvp); 1347 goto out; 1348 } 1349 VOP_UNLOCK1(mvp); 1350 } 1351 1352 secpolicy_fs_mount_clearopts(cr, vfsp); 1353 } 1354 1355 /* 1356 * Refuse to mount a filesystem if we are in a local zone and the 1357 * dataset is not visible. 1358 */ 1359 if (!INGLOBALZONE(curproc) && 1360 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1361 error = SET_ERROR(EPERM); 1362 goto out; 1363 } 1364 1365 vfsp->vfs_flag |= MNT_NFS4ACLS; 1366 1367 /* 1368 * When doing a remount, we simply refresh our temporary properties 1369 * according to those options set in the current VFS options. 1370 */ 1371 if (vfsp->vfs_flag & MS_REMOUNT) { 1372 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1373 1374 /* 1375 * Refresh mount options with z_teardown_lock blocking I/O while 1376 * the filesystem is in an inconsistent state. 1377 * The lock also serializes this code with filesystem 1378 * manipulations between entry to zfs_suspend_fs() and return 1379 * from zfs_resume_fs(). 1380 */ 1381 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1382 zfs_unregister_callbacks(zfsvfs); 1383 error = zfs_register_callbacks(vfsp); 1384 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1385 goto out; 1386 } 1387 1388 /* Initial root mount: try hard to import the requested root pool. */ 1389 if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 && 1390 (vfsp->vfs_flag & MNT_UPDATE) == 0) { 1391 char pname[MAXNAMELEN]; 1392 1393 error = getpoolname(osname, pname); 1394 if (error == 0) 1395 error = spa_import_rootpool(pname, checkpointrewind); 1396 if (error) 1397 goto out; 1398 } 1399 DROP_GIANT(); 1400 error = zfs_domount(vfsp, osname); 1401 PICKUP_GIANT(); 1402 1403 out: 1404 return (error); 1405 } 1406 1407 static int 1408 zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1409 { 1410 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1411 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1412 int error; 1413 1414 statp->f_version = STATFS_VERSION; 1415 1416 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1417 return (error); 1418 1419 dmu_objset_space(zfsvfs->z_os, 1420 &refdbytes, &availbytes, &usedobjs, &availobjs); 1421 1422 /* 1423 * The underlying storage pool actually uses multiple block sizes. 1424 * We report the fragsize as the smallest block size we support, 1425 * and we report our blocksize as the filesystem's maximum blocksize. 1426 */ 1427 statp->f_bsize = SPA_MINBLOCKSIZE; 1428 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1429 1430 /* 1431 * The following report "total" blocks of various kinds in the 1432 * file system, but reported in terms of f_frsize - the 1433 * "fragment" size. 1434 */ 1435 1436 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1437 statp->f_bfree = availbytes / statp->f_bsize; 1438 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1439 1440 /* 1441 * statvfs() should really be called statufs(), because it assumes 1442 * static metadata. ZFS doesn't preallocate files, so the best 1443 * we can do is report the max that could possibly fit in f_files, 1444 * and that minus the number actually used in f_ffree. 1445 * For f_ffree, report the smaller of the number of object available 1446 * and the number of blocks (each object will take at least a block). 1447 */ 1448 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1449 statp->f_files = statp->f_ffree + usedobjs; 1450 1451 /* 1452 * We're a zfs filesystem. 1453 */ 1454 strlcpy(statp->f_fstypename, "zfs", 1455 sizeof (statp->f_fstypename)); 1456 1457 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1458 sizeof (statp->f_mntfromname)); 1459 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1460 sizeof (statp->f_mntonname)); 1461 1462 statp->f_namemax = MAXNAMELEN - 1; 1463 1464 zfs_exit(zfsvfs, FTAG); 1465 return (0); 1466 } 1467 1468 static int 1469 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1470 { 1471 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1472 znode_t *rootzp; 1473 int error; 1474 1475 if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1476 return (error); 1477 1478 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1479 if (error == 0) 1480 *vpp = ZTOV(rootzp); 1481 1482 zfs_exit(zfsvfs, FTAG); 1483 1484 if (error == 0) { 1485 error = vn_lock(*vpp, flags); 1486 if (error != 0) { 1487 VN_RELE(*vpp); 1488 *vpp = NULL; 1489 } 1490 } 1491 return (error); 1492 } 1493 1494 /* 1495 * Teardown the zfsvfs::z_os. 1496 * 1497 * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock' 1498 * and 'z_teardown_inactive_lock' held. 1499 */ 1500 static int 1501 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1502 { 1503 znode_t *zp; 1504 dsl_dir_t *dd; 1505 1506 /* 1507 * If someone has not already unmounted this file system, 1508 * drain the zrele_taskq to ensure all active references to the 1509 * zfsvfs_t have been handled only then can it be safely destroyed. 1510 */ 1511 if (zfsvfs->z_os) { 1512 /* 1513 * If we're unmounting we have to wait for the list to 1514 * drain completely. 1515 * 1516 * If we're not unmounting there's no guarantee the list 1517 * will drain completely, but zreles run from the taskq 1518 * may add the parents of dir-based xattrs to the taskq 1519 * so we want to wait for these. 1520 * 1521 * We can safely read z_nr_znodes without locking because the 1522 * VFS has already blocked operations which add to the 1523 * z_all_znodes list and thus increment z_nr_znodes. 1524 */ 1525 int round = 0; 1526 while (zfsvfs->z_nr_znodes > 0) { 1527 taskq_wait_outstanding(dsl_pool_zrele_taskq( 1528 dmu_objset_pool(zfsvfs->z_os)), 0); 1529 if (++round > 1 && !unmounting) 1530 break; 1531 } 1532 } 1533 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1534 1535 if (!unmounting) { 1536 /* 1537 * We purge the parent filesystem's vfsp as the parent 1538 * filesystem and all of its snapshots have their vnode's 1539 * v_vfsp set to the parent's filesystem's vfsp. Note, 1540 * 'z_parent' is self referential for non-snapshots. 1541 */ 1542 #ifdef FREEBSD_NAMECACHE 1543 #if __FreeBSD_version >= 1300117 1544 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1545 #else 1546 cache_purgevfs(zfsvfs->z_parent->z_vfs, true); 1547 #endif 1548 #endif 1549 } 1550 1551 /* 1552 * Close the zil. NB: Can't close the zil while zfs_inactive 1553 * threads are blocked as zil_close can call zfs_inactive. 1554 */ 1555 if (zfsvfs->z_log) { 1556 zil_close(zfsvfs->z_log); 1557 zfsvfs->z_log = NULL; 1558 } 1559 1560 ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs); 1561 1562 /* 1563 * If we are not unmounting (ie: online recv) and someone already 1564 * unmounted this file system while we were doing the switcheroo, 1565 * or a reopen of z_os failed then just bail out now. 1566 */ 1567 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1568 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1569 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1570 return (SET_ERROR(EIO)); 1571 } 1572 1573 /* 1574 * At this point there are no vops active, and any new vops will 1575 * fail with EIO since we have z_teardown_lock for writer (only 1576 * relevant for forced unmount). 1577 * 1578 * Release all holds on dbufs. 1579 */ 1580 mutex_enter(&zfsvfs->z_znodes_lock); 1581 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1582 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1583 if (zp->z_sa_hdl != NULL) { 1584 zfs_znode_dmu_fini(zp); 1585 } 1586 } 1587 mutex_exit(&zfsvfs->z_znodes_lock); 1588 1589 /* 1590 * If we are unmounting, set the unmounted flag and let new vops 1591 * unblock. zfs_inactive will have the unmounted behavior, and all 1592 * other vops will fail with EIO. 1593 */ 1594 if (unmounting) { 1595 zfsvfs->z_unmounted = B_TRUE; 1596 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1597 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1598 } 1599 1600 /* 1601 * z_os will be NULL if there was an error in attempting to reopen 1602 * zfsvfs, so just return as the properties had already been 1603 * unregistered and cached data had been evicted before. 1604 */ 1605 if (zfsvfs->z_os == NULL) 1606 return (0); 1607 1608 /* 1609 * Unregister properties. 1610 */ 1611 zfs_unregister_callbacks(zfsvfs); 1612 1613 /* 1614 * Evict cached data 1615 */ 1616 if (!zfs_is_readonly(zfsvfs)) 1617 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1618 dmu_objset_evict_dbufs(zfsvfs->z_os); 1619 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1620 dsl_dir_cancel_waiters(dd); 1621 1622 return (0); 1623 } 1624 1625 static int 1626 zfs_umount(vfs_t *vfsp, int fflag) 1627 { 1628 kthread_t *td = curthread; 1629 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1630 objset_t *os; 1631 cred_t *cr = td->td_ucred; 1632 int ret; 1633 1634 ret = secpolicy_fs_unmount(cr, vfsp); 1635 if (ret) { 1636 if (dsl_deleg_access((char *)vfsp->vfs_resource, 1637 ZFS_DELEG_PERM_MOUNT, cr)) 1638 return (ret); 1639 } 1640 1641 /* 1642 * Unmount any snapshots mounted under .zfs before unmounting the 1643 * dataset itself. 1644 */ 1645 if (zfsvfs->z_ctldir != NULL) { 1646 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1647 return (ret); 1648 } 1649 1650 if (fflag & MS_FORCE) { 1651 /* 1652 * Mark file system as unmounted before calling 1653 * vflush(FORCECLOSE). This way we ensure no future vnops 1654 * will be called and risk operating on DOOMED vnodes. 1655 */ 1656 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1657 zfsvfs->z_unmounted = B_TRUE; 1658 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1659 } 1660 1661 /* 1662 * Flush all the files. 1663 */ 1664 ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1665 if (ret != 0) 1666 return (ret); 1667 while (taskqueue_cancel(zfsvfs_taskq->tq_queue, 1668 &zfsvfs->z_unlinked_drain_task, NULL) != 0) 1669 taskqueue_drain(zfsvfs_taskq->tq_queue, 1670 &zfsvfs->z_unlinked_drain_task); 1671 1672 VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE)); 1673 os = zfsvfs->z_os; 1674 1675 /* 1676 * z_os will be NULL if there was an error in 1677 * attempting to reopen zfsvfs. 1678 */ 1679 if (os != NULL) { 1680 /* 1681 * Unset the objset user_ptr. 1682 */ 1683 mutex_enter(&os->os_user_ptr_lock); 1684 dmu_objset_set_user(os, NULL); 1685 mutex_exit(&os->os_user_ptr_lock); 1686 1687 /* 1688 * Finally release the objset 1689 */ 1690 dmu_objset_disown(os, B_TRUE, zfsvfs); 1691 } 1692 1693 /* 1694 * We can now safely destroy the '.zfs' directory node. 1695 */ 1696 if (zfsvfs->z_ctldir != NULL) 1697 zfsctl_destroy(zfsvfs); 1698 zfs_freevfs(vfsp); 1699 1700 return (0); 1701 } 1702 1703 static int 1704 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1705 { 1706 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1707 znode_t *zp; 1708 int err; 1709 1710 /* 1711 * zfs_zget() can't operate on virtual entries like .zfs/ or 1712 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 1713 * This will make NFS to switch to LOOKUP instead of using VGET. 1714 */ 1715 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR || 1716 (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir)) 1717 return (EOPNOTSUPP); 1718 1719 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1720 return (err); 1721 err = zfs_zget(zfsvfs, ino, &zp); 1722 if (err == 0 && zp->z_unlinked) { 1723 vrele(ZTOV(zp)); 1724 err = EINVAL; 1725 } 1726 if (err == 0) 1727 *vpp = ZTOV(zp); 1728 zfs_exit(zfsvfs, FTAG); 1729 if (err == 0) { 1730 err = vn_lock(*vpp, flags); 1731 if (err != 0) 1732 vrele(*vpp); 1733 } 1734 if (err != 0) 1735 *vpp = NULL; 1736 return (err); 1737 } 1738 1739 static int 1740 #if __FreeBSD_version >= 1300098 1741 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 1742 struct ucred **credanonp, int *numsecflavors, int *secflavors) 1743 #else 1744 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1745 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1746 #endif 1747 { 1748 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1749 1750 /* 1751 * If this is regular file system vfsp is the same as 1752 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1753 * zfsvfs->z_parent->z_vfs represents parent file system 1754 * which we have to use here, because only this file system 1755 * has mnt_export configured. 1756 */ 1757 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1758 credanonp, numsecflavors, secflavors)); 1759 } 1760 1761 _Static_assert(sizeof (struct fid) >= SHORT_FID_LEN, 1762 "struct fid bigger than SHORT_FID_LEN"); 1763 _Static_assert(sizeof (struct fid) >= LONG_FID_LEN, 1764 "struct fid bigger than LONG_FID_LEN"); 1765 1766 static int 1767 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) 1768 { 1769 struct componentname cn; 1770 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1771 znode_t *zp; 1772 vnode_t *dvp; 1773 uint64_t object = 0; 1774 uint64_t fid_gen = 0; 1775 uint64_t setgen = 0; 1776 uint64_t gen_mask; 1777 uint64_t zp_gen; 1778 int i, err; 1779 1780 *vpp = NULL; 1781 1782 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1783 return (err); 1784 1785 /* 1786 * On FreeBSD we can get snapshot's mount point or its parent file 1787 * system mount point depending if snapshot is already mounted or not. 1788 */ 1789 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1790 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1791 uint64_t objsetid = 0; 1792 1793 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1794 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1795 1796 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1797 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1798 1799 zfs_exit(zfsvfs, FTAG); 1800 1801 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1802 if (err) 1803 return (SET_ERROR(EINVAL)); 1804 if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1805 return (err); 1806 } 1807 1808 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1809 zfid_short_t *zfid = (zfid_short_t *)fidp; 1810 1811 for (i = 0; i < sizeof (zfid->zf_object); i++) 1812 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1813 1814 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1815 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1816 } else { 1817 zfs_exit(zfsvfs, FTAG); 1818 return (SET_ERROR(EINVAL)); 1819 } 1820 1821 if (fidp->fid_len == LONG_FID_LEN && setgen != 0) { 1822 zfs_exit(zfsvfs, FTAG); 1823 dprintf("snapdir fid: fid_gen (%llu) and setgen (%llu)\n", 1824 (u_longlong_t)fid_gen, (u_longlong_t)setgen); 1825 return (SET_ERROR(EINVAL)); 1826 } 1827 1828 /* 1829 * A zero fid_gen means we are in .zfs or the .zfs/snapshot 1830 * directory tree. If the object == zfsvfs->z_shares_dir, then 1831 * we are in the .zfs/shares directory tree. 1832 */ 1833 if ((fid_gen == 0 && 1834 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) || 1835 (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) { 1836 zfs_exit(zfsvfs, FTAG); 1837 VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp)); 1838 if (object == ZFSCTL_INO_SNAPDIR) { 1839 cn.cn_nameptr = "snapshot"; 1840 cn.cn_namelen = strlen(cn.cn_nameptr); 1841 cn.cn_nameiop = LOOKUP; 1842 cn.cn_flags = ISLASTCN | LOCKLEAF; 1843 cn.cn_lkflags = flags; 1844 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1845 vput(dvp); 1846 } else if (object == zfsvfs->z_shares_dir) { 1847 /* 1848 * XXX This branch must not be taken, 1849 * if it is, then the lookup below will 1850 * explode. 1851 */ 1852 cn.cn_nameptr = "shares"; 1853 cn.cn_namelen = strlen(cn.cn_nameptr); 1854 cn.cn_nameiop = LOOKUP; 1855 cn.cn_flags = ISLASTCN; 1856 cn.cn_lkflags = flags; 1857 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1858 vput(dvp); 1859 } else { 1860 *vpp = dvp; 1861 } 1862 return (err); 1863 } 1864 1865 gen_mask = -1ULL >> (64 - 8 * i); 1866 1867 dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object, 1868 (u_longlong_t)fid_gen, 1869 (u_longlong_t)gen_mask); 1870 if ((err = zfs_zget(zfsvfs, object, &zp))) { 1871 zfs_exit(zfsvfs, FTAG); 1872 return (err); 1873 } 1874 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, 1875 sizeof (uint64_t)); 1876 zp_gen = zp_gen & gen_mask; 1877 if (zp_gen == 0) 1878 zp_gen = 1; 1879 if (zp->z_unlinked || zp_gen != fid_gen) { 1880 dprintf("znode gen (%llu) != fid gen (%llu)\n", 1881 (u_longlong_t)zp_gen, (u_longlong_t)fid_gen); 1882 vrele(ZTOV(zp)); 1883 zfs_exit(zfsvfs, FTAG); 1884 return (SET_ERROR(EINVAL)); 1885 } 1886 1887 *vpp = ZTOV(zp); 1888 zfs_exit(zfsvfs, FTAG); 1889 err = vn_lock(*vpp, flags); 1890 if (err == 0) 1891 vnode_create_vobject(*vpp, zp->z_size, curthread); 1892 else 1893 *vpp = NULL; 1894 return (err); 1895 } 1896 1897 /* 1898 * Block out VOPs and close zfsvfs_t::z_os 1899 * 1900 * Note, if successful, then we return with the 'z_teardown_lock' and 1901 * 'z_teardown_inactive_lock' write held. We leave ownership of the underlying 1902 * dataset and objset intact so that they can be atomically handed off during 1903 * a subsequent rollback or recv operation and the resume thereafter. 1904 */ 1905 int 1906 zfs_suspend_fs(zfsvfs_t *zfsvfs) 1907 { 1908 int error; 1909 1910 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1911 return (error); 1912 1913 return (0); 1914 } 1915 1916 /* 1917 * Rebuild SA and release VOPs. Note that ownership of the underlying dataset 1918 * is an invariant across any of the operations that can be performed while the 1919 * filesystem was suspended. Whether it succeeded or failed, the preconditions 1920 * are the same: the relevant objset and associated dataset are owned by 1921 * zfsvfs, held, and long held on entry. 1922 */ 1923 int 1924 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 1925 { 1926 int err; 1927 znode_t *zp; 1928 1929 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 1930 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 1931 1932 /* 1933 * We already own this, so just update the objset_t, as the one we 1934 * had before may have been evicted. 1935 */ 1936 objset_t *os; 1937 VERIFY3P(ds->ds_owner, ==, zfsvfs); 1938 VERIFY(dsl_dataset_long_held(ds)); 1939 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 1940 dsl_pool_config_enter(dp, FTAG); 1941 VERIFY0(dmu_objset_from_ds(ds, &os)); 1942 dsl_pool_config_exit(dp, FTAG); 1943 1944 err = zfsvfs_init(zfsvfs, os); 1945 if (err != 0) 1946 goto bail; 1947 1948 ds->ds_dir->dd_activity_cancelled = B_FALSE; 1949 VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE)); 1950 1951 zfs_set_fuid_feature(zfsvfs); 1952 1953 /* 1954 * Attempt to re-establish all the active znodes with 1955 * their dbufs. If a zfs_rezget() fails, then we'll let 1956 * any potential callers discover that via zfs_enter_verify_zp 1957 * when they try to use their znode. 1958 */ 1959 mutex_enter(&zfsvfs->z_znodes_lock); 1960 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1961 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1962 (void) zfs_rezget(zp); 1963 } 1964 mutex_exit(&zfsvfs->z_znodes_lock); 1965 1966 bail: 1967 /* release the VOPs */ 1968 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1969 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1970 1971 if (err) { 1972 /* 1973 * Since we couldn't setup the sa framework, try to force 1974 * unmount this file system. 1975 */ 1976 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) { 1977 vfs_ref(zfsvfs->z_vfs); 1978 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 1979 } 1980 } 1981 return (err); 1982 } 1983 1984 static void 1985 zfs_freevfs(vfs_t *vfsp) 1986 { 1987 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1988 1989 zfsvfs_free(zfsvfs); 1990 1991 atomic_dec_32(&zfs_active_fs_count); 1992 } 1993 1994 #ifdef __i386__ 1995 static int desiredvnodes_backup; 1996 #include <sys/vmmeter.h> 1997 1998 1999 #include <vm/vm_page.h> 2000 #include <vm/vm_object.h> 2001 #include <vm/vm_kern.h> 2002 #include <vm/vm_map.h> 2003 #endif 2004 2005 static void 2006 zfs_vnodes_adjust(void) 2007 { 2008 #ifdef __i386__ 2009 int newdesiredvnodes; 2010 2011 desiredvnodes_backup = desiredvnodes; 2012 2013 /* 2014 * We calculate newdesiredvnodes the same way it is done in 2015 * vntblinit(). If it is equal to desiredvnodes, it means that 2016 * it wasn't tuned by the administrator and we can tune it down. 2017 */ 2018 newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 * 2019 vm_kmem_size / (5 * (sizeof (struct vm_object) + 2020 sizeof (struct vnode)))); 2021 if (newdesiredvnodes == desiredvnodes) 2022 desiredvnodes = (3 * newdesiredvnodes) / 4; 2023 #endif 2024 } 2025 2026 static void 2027 zfs_vnodes_adjust_back(void) 2028 { 2029 2030 #ifdef __i386__ 2031 desiredvnodes = desiredvnodes_backup; 2032 #endif 2033 } 2034 2035 void 2036 zfs_init(void) 2037 { 2038 2039 printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n"); 2040 2041 /* 2042 * Initialize .zfs directory structures 2043 */ 2044 zfsctl_init(); 2045 2046 /* 2047 * Initialize znode cache, vnode ops, etc... 2048 */ 2049 zfs_znode_init(); 2050 2051 /* 2052 * Reduce number of vnodes. Originally number of vnodes is calculated 2053 * with UFS inode in mind. We reduce it here, because it's too big for 2054 * ZFS/i386. 2055 */ 2056 zfs_vnodes_adjust(); 2057 2058 dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); 2059 2060 zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); 2061 } 2062 2063 void 2064 zfs_fini(void) 2065 { 2066 taskq_destroy(zfsvfs_taskq); 2067 zfsctl_fini(); 2068 zfs_znode_fini(); 2069 zfs_vnodes_adjust_back(); 2070 } 2071 2072 int 2073 zfs_busy(void) 2074 { 2075 return (zfs_active_fs_count != 0); 2076 } 2077 2078 /* 2079 * Release VOPs and unmount a suspended filesystem. 2080 */ 2081 int 2082 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 2083 { 2084 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 2085 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 2086 2087 /* 2088 * We already own this, so just hold and rele it to update the 2089 * objset_t, as the one we had before may have been evicted. 2090 */ 2091 objset_t *os; 2092 VERIFY3P(ds->ds_owner, ==, zfsvfs); 2093 VERIFY(dsl_dataset_long_held(ds)); 2094 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 2095 dsl_pool_config_enter(dp, FTAG); 2096 VERIFY0(dmu_objset_from_ds(ds, &os)); 2097 dsl_pool_config_exit(dp, FTAG); 2098 zfsvfs->z_os = os; 2099 2100 /* release the VOPs */ 2101 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2102 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2103 2104 /* 2105 * Try to force unmount this file system. 2106 */ 2107 (void) zfs_umount(zfsvfs->z_vfs, 0); 2108 zfsvfs->z_unmounted = B_TRUE; 2109 return (0); 2110 } 2111 2112 int 2113 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 2114 { 2115 int error; 2116 objset_t *os = zfsvfs->z_os; 2117 dmu_tx_t *tx; 2118 2119 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 2120 return (SET_ERROR(EINVAL)); 2121 2122 if (newvers < zfsvfs->z_version) 2123 return (SET_ERROR(EINVAL)); 2124 2125 if (zfs_spa_version_map(newvers) > 2126 spa_version(dmu_objset_spa(zfsvfs->z_os))) 2127 return (SET_ERROR(ENOTSUP)); 2128 2129 tx = dmu_tx_create(os); 2130 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 2131 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2132 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 2133 ZFS_SA_ATTRS); 2134 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2135 } 2136 error = dmu_tx_assign(tx, TXG_WAIT); 2137 if (error) { 2138 dmu_tx_abort(tx); 2139 return (error); 2140 } 2141 2142 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 2143 8, 1, &newvers, tx); 2144 2145 if (error) { 2146 dmu_tx_commit(tx); 2147 return (error); 2148 } 2149 2150 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2151 uint64_t sa_obj; 2152 2153 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, 2154 SPA_VERSION_SA); 2155 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 2156 DMU_OT_NONE, 0, tx); 2157 2158 error = zap_add(os, MASTER_NODE_OBJ, 2159 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 2160 ASSERT0(error); 2161 2162 VERIFY0(sa_set_sa_object(os, sa_obj)); 2163 sa_register_update_callback(os, zfs_sa_upgrade); 2164 } 2165 2166 spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, 2167 "from %ju to %ju", (uintmax_t)zfsvfs->z_version, 2168 (uintmax_t)newvers); 2169 dmu_tx_commit(tx); 2170 2171 zfsvfs->z_version = newvers; 2172 os->os_version = newvers; 2173 2174 zfs_set_fuid_feature(zfsvfs); 2175 2176 return (0); 2177 } 2178 2179 /* 2180 * Read a property stored within the master node. 2181 */ 2182 int 2183 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 2184 { 2185 uint64_t *cached_copy = NULL; 2186 2187 /* 2188 * Figure out where in the objset_t the cached copy would live, if it 2189 * is available for the requested property. 2190 */ 2191 if (os != NULL) { 2192 switch (prop) { 2193 case ZFS_PROP_VERSION: 2194 cached_copy = &os->os_version; 2195 break; 2196 case ZFS_PROP_NORMALIZE: 2197 cached_copy = &os->os_normalization; 2198 break; 2199 case ZFS_PROP_UTF8ONLY: 2200 cached_copy = &os->os_utf8only; 2201 break; 2202 case ZFS_PROP_CASE: 2203 cached_copy = &os->os_casesensitivity; 2204 break; 2205 default: 2206 break; 2207 } 2208 } 2209 if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { 2210 *value = *cached_copy; 2211 return (0); 2212 } 2213 2214 /* 2215 * If the property wasn't cached, look up the file system's value for 2216 * the property. For the version property, we look up a slightly 2217 * different string. 2218 */ 2219 const char *pname; 2220 int error = ENOENT; 2221 if (prop == ZFS_PROP_VERSION) { 2222 pname = ZPL_VERSION_STR; 2223 } else { 2224 pname = zfs_prop_to_name(prop); 2225 } 2226 2227 if (os != NULL) { 2228 ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); 2229 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 2230 } 2231 2232 if (error == ENOENT) { 2233 /* No value set, use the default value */ 2234 switch (prop) { 2235 case ZFS_PROP_VERSION: 2236 *value = ZPL_VERSION; 2237 break; 2238 case ZFS_PROP_NORMALIZE: 2239 case ZFS_PROP_UTF8ONLY: 2240 *value = 0; 2241 break; 2242 case ZFS_PROP_CASE: 2243 *value = ZFS_CASE_SENSITIVE; 2244 break; 2245 case ZFS_PROP_ACLTYPE: 2246 *value = ZFS_ACLTYPE_NFSV4; 2247 break; 2248 default: 2249 return (error); 2250 } 2251 error = 0; 2252 } 2253 2254 /* 2255 * If one of the methods for getting the property value above worked, 2256 * copy it into the objset_t's cache. 2257 */ 2258 if (error == 0 && cached_copy != NULL) { 2259 *cached_copy = *value; 2260 } 2261 2262 return (error); 2263 } 2264 2265 /* 2266 * Return true if the corresponding vfs's unmounted flag is set. 2267 * Otherwise return false. 2268 * If this function returns true we know VFS unmount has been initiated. 2269 */ 2270 boolean_t 2271 zfs_get_vfs_flag_unmounted(objset_t *os) 2272 { 2273 zfsvfs_t *zfvp; 2274 boolean_t unmounted = B_FALSE; 2275 2276 ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS); 2277 2278 mutex_enter(&os->os_user_ptr_lock); 2279 zfvp = dmu_objset_get_user(os); 2280 if (zfvp != NULL && zfvp->z_vfs != NULL && 2281 (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT)) 2282 unmounted = B_TRUE; 2283 mutex_exit(&os->os_user_ptr_lock); 2284 2285 return (unmounted); 2286 } 2287 2288 #ifdef _KERNEL 2289 void 2290 zfsvfs_update_fromname(const char *oldname, const char *newname) 2291 { 2292 char tmpbuf[MAXPATHLEN]; 2293 struct mount *mp; 2294 char *fromname; 2295 size_t oldlen; 2296 2297 oldlen = strlen(oldname); 2298 2299 mtx_lock(&mountlist_mtx); 2300 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 2301 fromname = mp->mnt_stat.f_mntfromname; 2302 if (strcmp(fromname, oldname) == 0) { 2303 (void) strlcpy(fromname, newname, 2304 sizeof (mp->mnt_stat.f_mntfromname)); 2305 continue; 2306 } 2307 if (strncmp(fromname, oldname, oldlen) == 0 && 2308 (fromname[oldlen] == '/' || fromname[oldlen] == '@')) { 2309 (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s", 2310 newname, fromname + oldlen); 2311 (void) strlcpy(fromname, tmpbuf, 2312 sizeof (mp->mnt_stat.f_mntfromname)); 2313 continue; 2314 } 2315 } 2316 mtx_unlock(&mountlist_mtx); 2317 } 2318 #endif 2319