1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>. 24 * All rights reserved. 25 * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 26 * Copyright (c) 2014 Integros [integros.com] 27 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 28 */ 29 30 /* Portions Copyright 2010 Robert Milkowski */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/sysmacros.h> 37 #include <sys/kmem.h> 38 #include <sys/acl.h> 39 #include <sys/vnode.h> 40 #include <sys/vfs.h> 41 #include <sys/mntent.h> 42 #include <sys/mount.h> 43 #include <sys/cmn_err.h> 44 #include <sys/zfs_znode.h> 45 #include <sys/zfs_vnops.h> 46 #include <sys/zfs_dir.h> 47 #include <sys/zil.h> 48 #include <sys/fs/zfs.h> 49 #include <sys/dmu.h> 50 #include <sys/dsl_prop.h> 51 #include <sys/dsl_dataset.h> 52 #include <sys/dsl_deleg.h> 53 #include <sys/spa.h> 54 #include <sys/zap.h> 55 #include <sys/sa.h> 56 #include <sys/sa_impl.h> 57 #include <sys/policy.h> 58 #include <sys/atomic.h> 59 #include <sys/zfs_ioctl.h> 60 #include <sys/zfs_ctldir.h> 61 #include <sys/zfs_fuid.h> 62 #include <sys/sunddi.h> 63 #include <sys/dmu_objset.h> 64 #include <sys/dsl_dir.h> 65 #include <sys/spa_boot.h> 66 #include <sys/jail.h> 67 #include <ufs/ufs/quota.h> 68 #include <sys/zfs_quota.h> 69 70 #include "zfs_comutil.h" 71 72 #ifndef MNTK_VMSETSIZE_BUG 73 #define MNTK_VMSETSIZE_BUG 0 74 #endif 75 #ifndef MNTK_NOMSYNC 76 #define MNTK_NOMSYNC 8 77 #endif 78 79 /* BEGIN CSTYLED */ 80 struct mtx zfs_debug_mtx; 81 MTX_SYSINIT(zfs_debug_mtx, &zfs_debug_mtx, "zfs_debug", MTX_DEF); 82 83 SYSCTL_NODE(_vfs, OID_AUTO, zfs, CTLFLAG_RW, 0, "ZFS file system"); 84 85 int zfs_super_owner; 86 SYSCTL_INT(_vfs_zfs, OID_AUTO, super_owner, CTLFLAG_RW, &zfs_super_owner, 0, 87 "File system owner can perform privileged operation on his file systems"); 88 89 int zfs_debug_level; 90 SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0, 91 "Debug level"); 92 93 SYSCTL_NODE(_vfs_zfs, OID_AUTO, version, CTLFLAG_RD, 0, "ZFS versions"); 94 static int zfs_version_acl = ZFS_ACL_VERSION; 95 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, acl, CTLFLAG_RD, &zfs_version_acl, 0, 96 "ZFS_ACL_VERSION"); 97 static int zfs_version_spa = SPA_VERSION; 98 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, spa, CTLFLAG_RD, &zfs_version_spa, 0, 99 "SPA_VERSION"); 100 static int zfs_version_zpl = ZPL_VERSION; 101 SYSCTL_INT(_vfs_zfs_version, OID_AUTO, zpl, CTLFLAG_RD, &zfs_version_zpl, 0, 102 "ZPL_VERSION"); 103 /* END CSTYLED */ 104 105 #if __FreeBSD_version >= 1400018 106 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, 107 bool *mp_busy); 108 #else 109 static int zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg); 110 #endif 111 static int zfs_mount(vfs_t *vfsp); 112 static int zfs_umount(vfs_t *vfsp, int fflag); 113 static int zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp); 114 static int zfs_statfs(vfs_t *vfsp, struct statfs *statp); 115 static int zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp); 116 static int zfs_sync(vfs_t *vfsp, int waitfor); 117 #if __FreeBSD_version >= 1300098 118 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 119 struct ucred **credanonp, int *numsecflavors, int *secflavors); 120 #else 121 static int zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 122 struct ucred **credanonp, int *numsecflavors, int **secflavors); 123 #endif 124 static int zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp); 125 static void zfs_freevfs(vfs_t *vfsp); 126 127 struct vfsops zfs_vfsops = { 128 .vfs_mount = zfs_mount, 129 .vfs_unmount = zfs_umount, 130 #if __FreeBSD_version >= 1300049 131 .vfs_root = vfs_cache_root, 132 .vfs_cachedroot = zfs_root, 133 #else 134 .vfs_root = zfs_root, 135 #endif 136 .vfs_statfs = zfs_statfs, 137 .vfs_vget = zfs_vget, 138 .vfs_sync = zfs_sync, 139 .vfs_checkexp = zfs_checkexp, 140 .vfs_fhtovp = zfs_fhtovp, 141 .vfs_quotactl = zfs_quotactl, 142 }; 143 144 VFS_SET(zfs_vfsops, zfs, VFCF_JAIL | VFCF_DELEGADMIN); 145 146 /* 147 * We need to keep a count of active fs's. 148 * This is necessary to prevent our module 149 * from being unloaded after a umount -f 150 */ 151 static uint32_t zfs_active_fs_count = 0; 152 153 int 154 zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val, 155 char *setpoint) 156 { 157 int error; 158 zfsvfs_t *zfvp; 159 vfs_t *vfsp; 160 objset_t *os; 161 uint64_t tmp = *val; 162 163 error = dmu_objset_from_ds(ds, &os); 164 if (error != 0) 165 return (error); 166 167 error = getzfsvfs_impl(os, &zfvp); 168 if (error != 0) 169 return (error); 170 if (zfvp == NULL) 171 return (ENOENT); 172 vfsp = zfvp->z_vfs; 173 switch (zfs_prop) { 174 case ZFS_PROP_ATIME: 175 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 176 tmp = 0; 177 if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 178 tmp = 1; 179 break; 180 case ZFS_PROP_DEVICES: 181 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 182 tmp = 0; 183 if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) 184 tmp = 1; 185 break; 186 case ZFS_PROP_EXEC: 187 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 188 tmp = 0; 189 if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 190 tmp = 1; 191 break; 192 case ZFS_PROP_SETUID: 193 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 194 tmp = 0; 195 if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 196 tmp = 1; 197 break; 198 case ZFS_PROP_READONLY: 199 if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 200 tmp = 0; 201 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 202 tmp = 1; 203 break; 204 case ZFS_PROP_XATTR: 205 if (zfvp->z_flags & ZSB_XATTR) 206 tmp = zfvp->z_xattr; 207 break; 208 case ZFS_PROP_NBMAND: 209 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 210 tmp = 0; 211 if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) 212 tmp = 1; 213 break; 214 default: 215 vfs_unbusy(vfsp); 216 return (ENOENT); 217 } 218 219 vfs_unbusy(vfsp); 220 if (tmp != *val) { 221 (void) strcpy(setpoint, "temporary"); 222 *val = tmp; 223 } 224 return (0); 225 } 226 227 static int 228 zfs_getquota(zfsvfs_t *zfsvfs, uid_t id, int isgroup, struct dqblk64 *dqp) 229 { 230 int error = 0; 231 char buf[32]; 232 uint64_t usedobj, quotaobj; 233 uint64_t quota, used = 0; 234 timespec_t now; 235 236 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT; 237 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj; 238 239 if (quotaobj == 0 || zfsvfs->z_replay) { 240 error = ENOENT; 241 goto done; 242 } 243 (void) sprintf(buf, "%llx", (longlong_t)id); 244 if ((error = zap_lookup(zfsvfs->z_os, quotaobj, 245 buf, sizeof (quota), 1, "a)) != 0) { 246 dprintf("%s(%d): quotaobj lookup failed\n", 247 __FUNCTION__, __LINE__); 248 goto done; 249 } 250 /* 251 * quota(8) uses bsoftlimit as "quoota", and hardlimit as "limit". 252 * So we set them to be the same. 253 */ 254 dqp->dqb_bsoftlimit = dqp->dqb_bhardlimit = btodb(quota); 255 error = zap_lookup(zfsvfs->z_os, usedobj, buf, sizeof (used), 1, &used); 256 if (error && error != ENOENT) { 257 dprintf("%s(%d): usedobj failed; %d\n", 258 __FUNCTION__, __LINE__, error); 259 goto done; 260 } 261 dqp->dqb_curblocks = btodb(used); 262 dqp->dqb_ihardlimit = dqp->dqb_isoftlimit = 0; 263 vfs_timestamp(&now); 264 /* 265 * Setting this to 0 causes FreeBSD quota(8) to print 266 * the number of days since the epoch, which isn't 267 * particularly useful. 268 */ 269 dqp->dqb_btime = dqp->dqb_itime = now.tv_sec; 270 done: 271 return (error); 272 } 273 274 static int 275 #if __FreeBSD_version >= 1400018 276 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg, bool *mp_busy) 277 #else 278 zfs_quotactl(vfs_t *vfsp, int cmds, uid_t id, void *arg) 279 #endif 280 { 281 zfsvfs_t *zfsvfs = vfsp->vfs_data; 282 struct thread *td; 283 int cmd, type, error = 0; 284 int bitsize; 285 zfs_userquota_prop_t quota_type; 286 struct dqblk64 dqblk = { 0 }; 287 288 td = curthread; 289 cmd = cmds >> SUBCMDSHIFT; 290 type = cmds & SUBCMDMASK; 291 292 ZFS_ENTER(zfsvfs); 293 if (id == -1) { 294 switch (type) { 295 case USRQUOTA: 296 id = td->td_ucred->cr_ruid; 297 break; 298 case GRPQUOTA: 299 id = td->td_ucred->cr_rgid; 300 break; 301 default: 302 error = EINVAL; 303 #if __FreeBSD_version < 1400018 304 if (cmd == Q_QUOTAON || cmd == Q_QUOTAOFF) 305 vfs_unbusy(vfsp); 306 #endif 307 goto done; 308 } 309 } 310 /* 311 * Map BSD type to: 312 * ZFS_PROP_USERUSED, 313 * ZFS_PROP_USERQUOTA, 314 * ZFS_PROP_GROUPUSED, 315 * ZFS_PROP_GROUPQUOTA 316 */ 317 switch (cmd) { 318 case Q_SETQUOTA: 319 case Q_SETQUOTA32: 320 if (type == USRQUOTA) 321 quota_type = ZFS_PROP_USERQUOTA; 322 else if (type == GRPQUOTA) 323 quota_type = ZFS_PROP_GROUPQUOTA; 324 else 325 error = EINVAL; 326 break; 327 case Q_GETQUOTA: 328 case Q_GETQUOTA32: 329 if (type == USRQUOTA) 330 quota_type = ZFS_PROP_USERUSED; 331 else if (type == GRPQUOTA) 332 quota_type = ZFS_PROP_GROUPUSED; 333 else 334 error = EINVAL; 335 break; 336 } 337 338 /* 339 * Depending on the cmd, we may need to get 340 * the ruid and domain (see fuidstr_to_sid?), 341 * the fuid (how?), or other information. 342 * Create fuid using zfs_fuid_create(zfsvfs, id, 343 * ZFS_OWNER or ZFS_GROUP, cr, &fuidp)? 344 * I think I can use just the id? 345 * 346 * Look at zfs_id_overquota() to look up a quota. 347 * zap_lookup(something, quotaobj, fuidstring, 348 * sizeof (long long), 1, "a) 349 * 350 * See zfs_set_userquota() to set a quota. 351 */ 352 if ((uint32_t)type >= MAXQUOTAS) { 353 error = EINVAL; 354 goto done; 355 } 356 357 switch (cmd) { 358 case Q_GETQUOTASIZE: 359 bitsize = 64; 360 error = copyout(&bitsize, arg, sizeof (int)); 361 break; 362 case Q_QUOTAON: 363 // As far as I can tell, you can't turn quotas on or off on zfs 364 error = 0; 365 #if __FreeBSD_version < 1400018 366 vfs_unbusy(vfsp); 367 #endif 368 break; 369 case Q_QUOTAOFF: 370 error = ENOTSUP; 371 #if __FreeBSD_version < 1400018 372 vfs_unbusy(vfsp); 373 #endif 374 break; 375 case Q_SETQUOTA: 376 error = copyin(arg, &dqblk, sizeof (dqblk)); 377 if (error == 0) 378 error = zfs_set_userquota(zfsvfs, quota_type, 379 "", id, dbtob(dqblk.dqb_bhardlimit)); 380 break; 381 case Q_GETQUOTA: 382 error = zfs_getquota(zfsvfs, id, type == GRPQUOTA, &dqblk); 383 if (error == 0) 384 error = copyout(&dqblk, arg, sizeof (dqblk)); 385 break; 386 default: 387 error = EINVAL; 388 break; 389 } 390 done: 391 ZFS_EXIT(zfsvfs); 392 return (error); 393 } 394 395 396 boolean_t 397 zfs_is_readonly(zfsvfs_t *zfsvfs) 398 { 399 return (!!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)); 400 } 401 402 /*ARGSUSED*/ 403 static int 404 zfs_sync(vfs_t *vfsp, int waitfor) 405 { 406 407 /* 408 * Data integrity is job one. We don't want a compromised kernel 409 * writing to the storage pool, so we never sync during panic. 410 */ 411 if (panicstr) 412 return (0); 413 414 /* 415 * Ignore the system syncher. ZFS already commits async data 416 * at zfs_txg_timeout intervals. 417 */ 418 if (waitfor == MNT_LAZY) 419 return (0); 420 421 if (vfsp != NULL) { 422 /* 423 * Sync a specific filesystem. 424 */ 425 zfsvfs_t *zfsvfs = vfsp->vfs_data; 426 dsl_pool_t *dp; 427 int error; 428 429 error = vfs_stdsync(vfsp, waitfor); 430 if (error != 0) 431 return (error); 432 433 ZFS_ENTER(zfsvfs); 434 dp = dmu_objset_pool(zfsvfs->z_os); 435 436 /* 437 * If the system is shutting down, then skip any 438 * filesystems which may exist on a suspended pool. 439 */ 440 if (rebooting && spa_suspended(dp->dp_spa)) { 441 ZFS_EXIT(zfsvfs); 442 return (0); 443 } 444 445 if (zfsvfs->z_log != NULL) 446 zil_commit(zfsvfs->z_log, 0); 447 448 ZFS_EXIT(zfsvfs); 449 } else { 450 /* 451 * Sync all ZFS filesystems. This is what happens when you 452 * run sync(8). Unlike other filesystems, ZFS honors the 453 * request by waiting for all pools to commit all dirty data. 454 */ 455 spa_sync_allpools(); 456 } 457 458 return (0); 459 } 460 461 static void 462 atime_changed_cb(void *arg, uint64_t newval) 463 { 464 zfsvfs_t *zfsvfs = arg; 465 466 if (newval == TRUE) { 467 zfsvfs->z_atime = TRUE; 468 zfsvfs->z_vfs->vfs_flag &= ~MNT_NOATIME; 469 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 470 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 471 } else { 472 zfsvfs->z_atime = FALSE; 473 zfsvfs->z_vfs->vfs_flag |= MNT_NOATIME; 474 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 475 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 476 } 477 } 478 479 static void 480 xattr_changed_cb(void *arg, uint64_t newval) 481 { 482 zfsvfs_t *zfsvfs = arg; 483 484 if (newval == ZFS_XATTR_OFF) { 485 zfsvfs->z_flags &= ~ZSB_XATTR; 486 } else { 487 zfsvfs->z_flags |= ZSB_XATTR; 488 489 if (newval == ZFS_XATTR_SA) 490 zfsvfs->z_xattr_sa = B_TRUE; 491 else 492 zfsvfs->z_xattr_sa = B_FALSE; 493 } 494 } 495 496 static void 497 blksz_changed_cb(void *arg, uint64_t newval) 498 { 499 zfsvfs_t *zfsvfs = arg; 500 ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os))); 501 ASSERT3U(newval, >=, SPA_MINBLOCKSIZE); 502 ASSERT(ISP2(newval)); 503 504 zfsvfs->z_max_blksz = newval; 505 zfsvfs->z_vfs->mnt_stat.f_iosize = newval; 506 } 507 508 static void 509 readonly_changed_cb(void *arg, uint64_t newval) 510 { 511 zfsvfs_t *zfsvfs = arg; 512 513 if (newval) { 514 /* XXX locking on vfs_flag? */ 515 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 516 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 517 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 518 } else { 519 /* XXX locking on vfs_flag? */ 520 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 521 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 522 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 523 } 524 } 525 526 static void 527 setuid_changed_cb(void *arg, uint64_t newval) 528 { 529 zfsvfs_t *zfsvfs = arg; 530 531 if (newval == FALSE) { 532 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 533 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 534 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 535 } else { 536 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 537 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 538 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 539 } 540 } 541 542 static void 543 exec_changed_cb(void *arg, uint64_t newval) 544 { 545 zfsvfs_t *zfsvfs = arg; 546 547 if (newval == FALSE) { 548 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 549 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 550 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 551 } else { 552 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 553 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 554 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 555 } 556 } 557 558 /* 559 * The nbmand mount option can be changed at mount time. 560 * We can't allow it to be toggled on live file systems or incorrect 561 * behavior may be seen from cifs clients 562 * 563 * This property isn't registered via dsl_prop_register(), but this callback 564 * will be called when a file system is first mounted 565 */ 566 static void 567 nbmand_changed_cb(void *arg, uint64_t newval) 568 { 569 zfsvfs_t *zfsvfs = arg; 570 if (newval == FALSE) { 571 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND); 572 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0); 573 } else { 574 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND); 575 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0); 576 } 577 } 578 579 static void 580 snapdir_changed_cb(void *arg, uint64_t newval) 581 { 582 zfsvfs_t *zfsvfs = arg; 583 584 zfsvfs->z_show_ctldir = newval; 585 } 586 587 static void 588 acl_mode_changed_cb(void *arg, uint64_t newval) 589 { 590 zfsvfs_t *zfsvfs = arg; 591 592 zfsvfs->z_acl_mode = newval; 593 } 594 595 static void 596 acl_inherit_changed_cb(void *arg, uint64_t newval) 597 { 598 zfsvfs_t *zfsvfs = arg; 599 600 zfsvfs->z_acl_inherit = newval; 601 } 602 603 static void 604 acl_type_changed_cb(void *arg, uint64_t newval) 605 { 606 zfsvfs_t *zfsvfs = arg; 607 608 zfsvfs->z_acl_type = newval; 609 } 610 611 static int 612 zfs_register_callbacks(vfs_t *vfsp) 613 { 614 struct dsl_dataset *ds = NULL; 615 objset_t *os = NULL; 616 zfsvfs_t *zfsvfs = NULL; 617 uint64_t nbmand; 618 boolean_t readonly = B_FALSE; 619 boolean_t do_readonly = B_FALSE; 620 boolean_t setuid = B_FALSE; 621 boolean_t do_setuid = B_FALSE; 622 boolean_t exec = B_FALSE; 623 boolean_t do_exec = B_FALSE; 624 boolean_t xattr = B_FALSE; 625 boolean_t atime = B_FALSE; 626 boolean_t do_atime = B_FALSE; 627 boolean_t do_xattr = B_FALSE; 628 int error = 0; 629 630 ASSERT3P(vfsp, !=, NULL); 631 zfsvfs = vfsp->vfs_data; 632 ASSERT3P(zfsvfs, !=, NULL); 633 os = zfsvfs->z_os; 634 635 /* 636 * This function can be called for a snapshot when we update snapshot's 637 * mount point, which isn't really supported. 638 */ 639 if (dmu_objset_is_snapshot(os)) 640 return (EOPNOTSUPP); 641 642 /* 643 * The act of registering our callbacks will destroy any mount 644 * options we may have. In order to enable temporary overrides 645 * of mount options, we stash away the current values and 646 * restore them after we register the callbacks. 647 */ 648 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) || 649 !spa_writeable(dmu_objset_spa(os))) { 650 readonly = B_TRUE; 651 do_readonly = B_TRUE; 652 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 653 readonly = B_FALSE; 654 do_readonly = B_TRUE; 655 } 656 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 657 setuid = B_FALSE; 658 do_setuid = B_TRUE; 659 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 660 setuid = B_TRUE; 661 do_setuid = B_TRUE; 662 } 663 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 664 exec = B_FALSE; 665 do_exec = B_TRUE; 666 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 667 exec = B_TRUE; 668 do_exec = B_TRUE; 669 } 670 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 671 zfsvfs->z_xattr = xattr = ZFS_XATTR_OFF; 672 do_xattr = B_TRUE; 673 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 674 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 675 do_xattr = B_TRUE; 676 } else if (vfs_optionisset(vfsp, MNTOPT_DIRXATTR, NULL)) { 677 zfsvfs->z_xattr = xattr = ZFS_XATTR_DIR; 678 do_xattr = B_TRUE; 679 } else if (vfs_optionisset(vfsp, MNTOPT_SAXATTR, NULL)) { 680 zfsvfs->z_xattr = xattr = ZFS_XATTR_SA; 681 do_xattr = B_TRUE; 682 } 683 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 684 atime = B_FALSE; 685 do_atime = B_TRUE; 686 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 687 atime = B_TRUE; 688 do_atime = B_TRUE; 689 } 690 691 /* 692 * We need to enter pool configuration here, so that we can use 693 * dsl_prop_get_int_ds() to handle the special nbmand property below. 694 * dsl_prop_get_integer() can not be used, because it has to acquire 695 * spa_namespace_lock and we can not do that because we already hold 696 * z_teardown_lock. The problem is that spa_write_cachefile() is called 697 * with spa_namespace_lock held and the function calls ZFS vnode 698 * operations to write the cache file and thus z_teardown_lock is 699 * acquired after spa_namespace_lock. 700 */ 701 ds = dmu_objset_ds(os); 702 dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 703 704 /* 705 * nbmand is a special property. It can only be changed at 706 * mount time. 707 * 708 * This is weird, but it is documented to only be changeable 709 * at mount time. 710 */ 711 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 712 nbmand = B_FALSE; 713 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) { 714 nbmand = B_TRUE; 715 } else if ((error = dsl_prop_get_int_ds(ds, "nbmand", &nbmand) != 0)) { 716 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 717 return (error); 718 } 719 720 /* 721 * Register property callbacks. 722 * 723 * It would probably be fine to just check for i/o error from 724 * the first prop_register(), but I guess I like to go 725 * overboard... 726 */ 727 error = dsl_prop_register(ds, 728 zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs); 729 error = error ? error : dsl_prop_register(ds, 730 zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs); 731 error = error ? error : dsl_prop_register(ds, 732 zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs); 733 error = error ? error : dsl_prop_register(ds, 734 zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs); 735 error = error ? error : dsl_prop_register(ds, 736 zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs); 737 error = error ? error : dsl_prop_register(ds, 738 zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs); 739 error = error ? error : dsl_prop_register(ds, 740 zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs); 741 error = error ? error : dsl_prop_register(ds, 742 zfs_prop_to_name(ZFS_PROP_ACLTYPE), acl_type_changed_cb, zfsvfs); 743 error = error ? error : dsl_prop_register(ds, 744 zfs_prop_to_name(ZFS_PROP_ACLMODE), acl_mode_changed_cb, zfsvfs); 745 error = error ? error : dsl_prop_register(ds, 746 zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, 747 zfsvfs); 748 dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 749 if (error) 750 goto unregister; 751 752 /* 753 * Invoke our callbacks to restore temporary mount options. 754 */ 755 if (do_readonly) 756 readonly_changed_cb(zfsvfs, readonly); 757 if (do_setuid) 758 setuid_changed_cb(zfsvfs, setuid); 759 if (do_exec) 760 exec_changed_cb(zfsvfs, exec); 761 if (do_xattr) 762 xattr_changed_cb(zfsvfs, xattr); 763 if (do_atime) 764 atime_changed_cb(zfsvfs, atime); 765 766 nbmand_changed_cb(zfsvfs, nbmand); 767 768 return (0); 769 770 unregister: 771 dsl_prop_unregister_all(ds, zfsvfs); 772 return (error); 773 } 774 775 /* 776 * Associate this zfsvfs with the given objset, which must be owned. 777 * This will cache a bunch of on-disk state from the objset in the 778 * zfsvfs. 779 */ 780 static int 781 zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os) 782 { 783 int error; 784 uint64_t val; 785 786 zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE; 787 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 788 zfsvfs->z_os = os; 789 790 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); 791 if (error != 0) 792 return (error); 793 if (zfsvfs->z_version > 794 zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) { 795 (void) printf("Can't mount a version %lld file system " 796 "on a version %lld pool\n. Pool must be upgraded to mount " 797 "this file system.", (u_longlong_t)zfsvfs->z_version, 798 (u_longlong_t)spa_version(dmu_objset_spa(os))); 799 return (SET_ERROR(ENOTSUP)); 800 } 801 error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val); 802 if (error != 0) 803 return (error); 804 zfsvfs->z_norm = (int)val; 805 806 error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val); 807 if (error != 0) 808 return (error); 809 zfsvfs->z_utf8 = (val != 0); 810 811 error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val); 812 if (error != 0) 813 return (error); 814 zfsvfs->z_case = (uint_t)val; 815 816 error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val); 817 if (error != 0) 818 return (error); 819 zfsvfs->z_acl_type = (uint_t)val; 820 821 /* 822 * Fold case on file systems that are always or sometimes case 823 * insensitive. 824 */ 825 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || 826 zfsvfs->z_case == ZFS_CASE_MIXED) 827 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 828 829 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 830 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 831 832 uint64_t sa_obj = 0; 833 if (zfsvfs->z_use_sa) { 834 /* should either have both of these objects or none */ 835 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, 836 &sa_obj); 837 if (error != 0) 838 return (error); 839 840 error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val); 841 if (error == 0 && val == ZFS_XATTR_SA) 842 zfsvfs->z_xattr_sa = B_TRUE; 843 } 844 845 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 846 &zfsvfs->z_attr_table); 847 if (error != 0) 848 return (error); 849 850 if (zfsvfs->z_version >= ZPL_VERSION_SA) 851 sa_register_update_callback(os, zfs_sa_upgrade); 852 853 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, 854 &zfsvfs->z_root); 855 if (error != 0) 856 return (error); 857 ASSERT3U(zfsvfs->z_root, !=, 0); 858 859 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, 860 &zfsvfs->z_unlinkedobj); 861 if (error != 0) 862 return (error); 863 864 error = zap_lookup(os, MASTER_NODE_OBJ, 865 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA], 866 8, 1, &zfsvfs->z_userquota_obj); 867 if (error == ENOENT) 868 zfsvfs->z_userquota_obj = 0; 869 else if (error != 0) 870 return (error); 871 872 error = zap_lookup(os, MASTER_NODE_OBJ, 873 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA], 874 8, 1, &zfsvfs->z_groupquota_obj); 875 if (error == ENOENT) 876 zfsvfs->z_groupquota_obj = 0; 877 else if (error != 0) 878 return (error); 879 880 error = zap_lookup(os, MASTER_NODE_OBJ, 881 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA], 882 8, 1, &zfsvfs->z_projectquota_obj); 883 if (error == ENOENT) 884 zfsvfs->z_projectquota_obj = 0; 885 else if (error != 0) 886 return (error); 887 888 error = zap_lookup(os, MASTER_NODE_OBJ, 889 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA], 890 8, 1, &zfsvfs->z_userobjquota_obj); 891 if (error == ENOENT) 892 zfsvfs->z_userobjquota_obj = 0; 893 else if (error != 0) 894 return (error); 895 896 error = zap_lookup(os, MASTER_NODE_OBJ, 897 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA], 898 8, 1, &zfsvfs->z_groupobjquota_obj); 899 if (error == ENOENT) 900 zfsvfs->z_groupobjquota_obj = 0; 901 else if (error != 0) 902 return (error); 903 904 error = zap_lookup(os, MASTER_NODE_OBJ, 905 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTOBJQUOTA], 906 8, 1, &zfsvfs->z_projectobjquota_obj); 907 if (error == ENOENT) 908 zfsvfs->z_projectobjquota_obj = 0; 909 else if (error != 0) 910 return (error); 911 912 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, 913 &zfsvfs->z_fuid_obj); 914 if (error == ENOENT) 915 zfsvfs->z_fuid_obj = 0; 916 else if (error != 0) 917 return (error); 918 919 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 920 &zfsvfs->z_shares_dir); 921 if (error == ENOENT) 922 zfsvfs->z_shares_dir = 0; 923 else if (error != 0) 924 return (error); 925 926 /* 927 * Only use the name cache if we are looking for a 928 * name on a file system that does not require normalization 929 * or case folding. We can also look there if we happen to be 930 * on a non-normalizing, mixed sensitivity file system IF we 931 * are looking for the exact name (which is always the case on 932 * FreeBSD). 933 */ 934 zfsvfs->z_use_namecache = !zfsvfs->z_norm || 935 ((zfsvfs->z_case == ZFS_CASE_MIXED) && 936 !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER)); 937 938 return (0); 939 } 940 941 taskq_t *zfsvfs_taskq; 942 943 static void 944 zfsvfs_task_unlinked_drain(void *context, int pending __unused) 945 { 946 947 zfs_unlinked_drain((zfsvfs_t *)context); 948 } 949 950 int 951 zfsvfs_create(const char *osname, boolean_t readonly, zfsvfs_t **zfvp) 952 { 953 objset_t *os; 954 zfsvfs_t *zfsvfs; 955 int error; 956 boolean_t ro = (readonly || (strchr(osname, '@') != NULL)); 957 958 /* 959 * XXX: Fix struct statfs so this isn't necessary! 960 * 961 * The 'osname' is used as the filesystem's special node, which means 962 * it must fit in statfs.f_mntfromname, or else it can't be 963 * enumerated, so libzfs_mnttab_find() returns NULL, which causes 964 * 'zfs unmount' to think it's not mounted when it is. 965 */ 966 if (strlen(osname) >= MNAMELEN) 967 return (SET_ERROR(ENAMETOOLONG)); 968 969 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 970 971 error = dmu_objset_own(osname, DMU_OST_ZFS, ro, B_TRUE, zfsvfs, 972 &os); 973 if (error != 0) { 974 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 975 return (error); 976 } 977 978 error = zfsvfs_create_impl(zfvp, zfsvfs, os); 979 980 return (error); 981 } 982 983 984 int 985 zfsvfs_create_impl(zfsvfs_t **zfvp, zfsvfs_t *zfsvfs, objset_t *os) 986 { 987 int error; 988 989 zfsvfs->z_vfs = NULL; 990 zfsvfs->z_parent = zfsvfs; 991 992 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 993 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL); 994 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 995 offsetof(znode_t, z_link_node)); 996 TASK_INIT(&zfsvfs->z_unlinked_drain_task, 0, 997 zfsvfs_task_unlinked_drain, zfsvfs); 998 ZFS_TEARDOWN_INIT(zfsvfs); 999 ZFS_TEARDOWN_INACTIVE_INIT(zfsvfs); 1000 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL); 1001 for (int i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1002 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); 1003 1004 error = zfsvfs_init(zfsvfs, os); 1005 if (error != 0) { 1006 dmu_objset_disown(os, B_TRUE, zfsvfs); 1007 *zfvp = NULL; 1008 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1009 return (error); 1010 } 1011 1012 *zfvp = zfsvfs; 1013 return (0); 1014 } 1015 1016 static int 1017 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting) 1018 { 1019 int error; 1020 1021 /* 1022 * Check for a bad on-disk format version now since we 1023 * lied about owning the dataset readonly before. 1024 */ 1025 if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && 1026 dmu_objset_incompatible_encryption_version(zfsvfs->z_os)) 1027 return (SET_ERROR(EROFS)); 1028 1029 error = zfs_register_callbacks(zfsvfs->z_vfs); 1030 if (error) 1031 return (error); 1032 1033 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 1034 1035 /* 1036 * If we are not mounting (ie: online recv), then we don't 1037 * have to worry about replaying the log as we blocked all 1038 * operations out since we closed the ZIL. 1039 */ 1040 if (mounting) { 1041 boolean_t readonly; 1042 1043 ASSERT3P(zfsvfs->z_kstat.dk_kstats, ==, NULL); 1044 dataset_kstats_create(&zfsvfs->z_kstat, zfsvfs->z_os); 1045 1046 /* 1047 * During replay we remove the read only flag to 1048 * allow replays to succeed. 1049 */ 1050 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 1051 if (readonly != 0) { 1052 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 1053 } else { 1054 dsl_dir_t *dd; 1055 zap_stats_t zs; 1056 1057 if (zap_get_stats(zfsvfs->z_os, zfsvfs->z_unlinkedobj, 1058 &zs) == 0) { 1059 dataset_kstats_update_nunlinks_kstat( 1060 &zfsvfs->z_kstat, zs.zs_num_entries); 1061 dprintf_ds(zfsvfs->z_os->os_dsl_dataset, 1062 "num_entries in unlinked set: %llu", 1063 (u_longlong_t)zs.zs_num_entries); 1064 } 1065 1066 zfs_unlinked_drain(zfsvfs); 1067 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1068 dd->dd_activity_cancelled = B_FALSE; 1069 } 1070 1071 /* 1072 * Parse and replay the intent log. 1073 * 1074 * Because of ziltest, this must be done after 1075 * zfs_unlinked_drain(). (Further note: ziltest 1076 * doesn't use readonly mounts, where 1077 * zfs_unlinked_drain() isn't called.) This is because 1078 * ziltest causes spa_sync() to think it's committed, 1079 * but actually it is not, so the intent log contains 1080 * many txg's worth of changes. 1081 * 1082 * In particular, if object N is in the unlinked set in 1083 * the last txg to actually sync, then it could be 1084 * actually freed in a later txg and then reallocated 1085 * in a yet later txg. This would write a "create 1086 * object N" record to the intent log. Normally, this 1087 * would be fine because the spa_sync() would have 1088 * written out the fact that object N is free, before 1089 * we could write the "create object N" intent log 1090 * record. 1091 * 1092 * But when we are in ziltest mode, we advance the "open 1093 * txg" without actually spa_sync()-ing the changes to 1094 * disk. So we would see that object N is still 1095 * allocated and in the unlinked set, and there is an 1096 * intent log record saying to allocate it. 1097 */ 1098 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) { 1099 if (zil_replay_disable) { 1100 zil_destroy(zfsvfs->z_log, B_FALSE); 1101 } else { 1102 boolean_t use_nc = zfsvfs->z_use_namecache; 1103 zfsvfs->z_use_namecache = B_FALSE; 1104 zfsvfs->z_replay = B_TRUE; 1105 zil_replay(zfsvfs->z_os, zfsvfs, 1106 zfs_replay_vector); 1107 zfsvfs->z_replay = B_FALSE; 1108 zfsvfs->z_use_namecache = use_nc; 1109 } 1110 } 1111 1112 /* restore readonly bit */ 1113 if (readonly != 0) 1114 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 1115 } 1116 1117 /* 1118 * Set the objset user_ptr to track its zfsvfs. 1119 */ 1120 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1121 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1122 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1123 1124 return (0); 1125 } 1126 1127 void 1128 zfsvfs_free(zfsvfs_t *zfsvfs) 1129 { 1130 int i; 1131 1132 zfs_fuid_destroy(zfsvfs); 1133 1134 mutex_destroy(&zfsvfs->z_znodes_lock); 1135 mutex_destroy(&zfsvfs->z_lock); 1136 ASSERT3U(zfsvfs->z_nr_znodes, ==, 0); 1137 list_destroy(&zfsvfs->z_all_znodes); 1138 ZFS_TEARDOWN_DESTROY(zfsvfs); 1139 ZFS_TEARDOWN_INACTIVE_DESTROY(zfsvfs); 1140 rw_destroy(&zfsvfs->z_fuid_lock); 1141 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1142 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1143 dataset_kstats_destroy(&zfsvfs->z_kstat); 1144 kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1145 } 1146 1147 static void 1148 zfs_set_fuid_feature(zfsvfs_t *zfsvfs) 1149 { 1150 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os); 1151 if (zfsvfs->z_vfs) { 1152 if (zfsvfs->z_use_fuids) { 1153 vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 1154 vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 1155 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 1156 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 1157 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 1158 vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE); 1159 } else { 1160 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR); 1161 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS); 1162 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS); 1163 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE); 1164 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER); 1165 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE); 1166 } 1167 } 1168 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os); 1169 } 1170 1171 static int 1172 zfs_domount(vfs_t *vfsp, char *osname) 1173 { 1174 uint64_t recordsize, fsid_guid; 1175 int error = 0; 1176 zfsvfs_t *zfsvfs; 1177 1178 ASSERT3P(vfsp, !=, NULL); 1179 ASSERT3P(osname, !=, NULL); 1180 1181 error = zfsvfs_create(osname, vfsp->mnt_flag & MNT_RDONLY, &zfsvfs); 1182 if (error) 1183 return (error); 1184 zfsvfs->z_vfs = vfsp; 1185 1186 if ((error = dsl_prop_get_integer(osname, 1187 "recordsize", &recordsize, NULL))) 1188 goto out; 1189 zfsvfs->z_vfs->vfs_bsize = SPA_MINBLOCKSIZE; 1190 zfsvfs->z_vfs->mnt_stat.f_iosize = recordsize; 1191 1192 vfsp->vfs_data = zfsvfs; 1193 vfsp->mnt_flag |= MNT_LOCAL; 1194 vfsp->mnt_kern_flag |= MNTK_LOOKUP_SHARED; 1195 vfsp->mnt_kern_flag |= MNTK_SHARED_WRITES; 1196 vfsp->mnt_kern_flag |= MNTK_EXTENDED_SHARED; 1197 /* 1198 * This can cause a loss of coherence between ARC and page cache 1199 * on ZoF - unclear if the problem is in FreeBSD or ZoF 1200 */ 1201 vfsp->mnt_kern_flag |= MNTK_NO_IOPF; /* vn_io_fault can be used */ 1202 vfsp->mnt_kern_flag |= MNTK_NOMSYNC; 1203 vfsp->mnt_kern_flag |= MNTK_VMSETSIZE_BUG; 1204 1205 #if defined(_KERNEL) && !defined(KMEM_DEBUG) 1206 vfsp->mnt_kern_flag |= MNTK_FPLOOKUP; 1207 #endif 1208 /* 1209 * The fsid is 64 bits, composed of an 8-bit fs type, which 1210 * separates our fsid from any other filesystem types, and a 1211 * 56-bit objset unique ID. The objset unique ID is unique to 1212 * all objsets open on this system, provided by unique_create(). 1213 * The 8-bit fs type must be put in the low bits of fsid[1] 1214 * because that's where other Solaris filesystems put it. 1215 */ 1216 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os); 1217 ASSERT3U((fsid_guid & ~((1ULL << 56) - 1)), ==, 0); 1218 vfsp->vfs_fsid.val[0] = fsid_guid; 1219 vfsp->vfs_fsid.val[1] = ((fsid_guid >> 32) << 8) | 1220 (vfsp->mnt_vfc->vfc_typenum & 0xFF); 1221 1222 /* 1223 * Set features for file system. 1224 */ 1225 zfs_set_fuid_feature(zfsvfs); 1226 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 1227 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1228 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1229 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE); 1230 } else if (zfsvfs->z_case == ZFS_CASE_MIXED) { 1231 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS); 1232 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE); 1233 } 1234 vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED); 1235 1236 if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 1237 uint64_t pval; 1238 1239 atime_changed_cb(zfsvfs, B_FALSE); 1240 readonly_changed_cb(zfsvfs, B_TRUE); 1241 if ((error = dsl_prop_get_integer(osname, 1242 "xattr", &pval, NULL))) 1243 goto out; 1244 xattr_changed_cb(zfsvfs, pval); 1245 if ((error = dsl_prop_get_integer(osname, 1246 "acltype", &pval, NULL))) 1247 goto out; 1248 acl_type_changed_cb(zfsvfs, pval); 1249 zfsvfs->z_issnap = B_TRUE; 1250 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED; 1251 1252 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock); 1253 dmu_objset_set_user(zfsvfs->z_os, zfsvfs); 1254 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock); 1255 } else { 1256 if ((error = zfsvfs_setup(zfsvfs, B_TRUE))) 1257 goto out; 1258 } 1259 1260 vfs_mountedfrom(vfsp, osname); 1261 1262 if (!zfsvfs->z_issnap) 1263 zfsctl_create(zfsvfs); 1264 out: 1265 if (error) { 1266 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs); 1267 zfsvfs_free(zfsvfs); 1268 } else { 1269 atomic_inc_32(&zfs_active_fs_count); 1270 } 1271 1272 return (error); 1273 } 1274 1275 static void 1276 zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 1277 { 1278 objset_t *os = zfsvfs->z_os; 1279 1280 if (!dmu_objset_is_snapshot(os)) 1281 dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs); 1282 } 1283 1284 static int 1285 getpoolname(const char *osname, char *poolname) 1286 { 1287 char *p; 1288 1289 p = strchr(osname, '/'); 1290 if (p == NULL) { 1291 if (strlen(osname) >= MAXNAMELEN) 1292 return (ENAMETOOLONG); 1293 (void) strcpy(poolname, osname); 1294 } else { 1295 if (p - osname >= MAXNAMELEN) 1296 return (ENAMETOOLONG); 1297 (void) strncpy(poolname, osname, p - osname); 1298 poolname[p - osname] = '\0'; 1299 } 1300 return (0); 1301 } 1302 1303 static void 1304 fetch_osname_options(char *name, bool *checkpointrewind) 1305 { 1306 1307 if (name[0] == '!') { 1308 *checkpointrewind = true; 1309 memmove(name, name + 1, strlen(name)); 1310 } else { 1311 *checkpointrewind = false; 1312 } 1313 } 1314 1315 /*ARGSUSED*/ 1316 static int 1317 zfs_mount(vfs_t *vfsp) 1318 { 1319 kthread_t *td = curthread; 1320 vnode_t *mvp = vfsp->mnt_vnodecovered; 1321 cred_t *cr = td->td_ucred; 1322 char *osname; 1323 int error = 0; 1324 int canwrite; 1325 bool checkpointrewind; 1326 1327 if (vfs_getopt(vfsp->mnt_optnew, "from", (void **)&osname, NULL)) 1328 return (SET_ERROR(EINVAL)); 1329 1330 /* 1331 * If full-owner-access is enabled and delegated administration is 1332 * turned on, we must set nosuid. 1333 */ 1334 if (zfs_super_owner && 1335 dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != ECANCELED) { 1336 secpolicy_fs_mount_clearopts(cr, vfsp); 1337 } 1338 1339 fetch_osname_options(osname, &checkpointrewind); 1340 1341 /* 1342 * Check for mount privilege? 1343 * 1344 * If we don't have privilege then see if 1345 * we have local permission to allow it 1346 */ 1347 error = secpolicy_fs_mount(cr, mvp, vfsp); 1348 if (error) { 1349 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) != 0) 1350 goto out; 1351 1352 if (!(vfsp->vfs_flag & MS_REMOUNT)) { 1353 vattr_t vattr; 1354 1355 /* 1356 * Make sure user is the owner of the mount point 1357 * or has sufficient privileges. 1358 */ 1359 1360 vattr.va_mask = AT_UID; 1361 1362 vn_lock(mvp, LK_SHARED | LK_RETRY); 1363 if (VOP_GETATTR(mvp, &vattr, cr)) { 1364 VOP_UNLOCK1(mvp); 1365 goto out; 1366 } 1367 1368 if (secpolicy_vnode_owner(mvp, cr, vattr.va_uid) != 0 && 1369 VOP_ACCESS(mvp, VWRITE, cr, td) != 0) { 1370 VOP_UNLOCK1(mvp); 1371 goto out; 1372 } 1373 VOP_UNLOCK1(mvp); 1374 } 1375 1376 secpolicy_fs_mount_clearopts(cr, vfsp); 1377 } 1378 1379 /* 1380 * Refuse to mount a filesystem if we are in a local zone and the 1381 * dataset is not visible. 1382 */ 1383 if (!INGLOBALZONE(curproc) && 1384 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 1385 error = SET_ERROR(EPERM); 1386 goto out; 1387 } 1388 1389 vfsp->vfs_flag |= MNT_NFS4ACLS; 1390 1391 /* 1392 * When doing a remount, we simply refresh our temporary properties 1393 * according to those options set in the current VFS options. 1394 */ 1395 if (vfsp->vfs_flag & MS_REMOUNT) { 1396 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1397 1398 /* 1399 * Refresh mount options with z_teardown_lock blocking I/O while 1400 * the filesystem is in an inconsistent state. 1401 * The lock also serializes this code with filesystem 1402 * manipulations between entry to zfs_suspend_fs() and return 1403 * from zfs_resume_fs(). 1404 */ 1405 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1406 zfs_unregister_callbacks(zfsvfs); 1407 error = zfs_register_callbacks(vfsp); 1408 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1409 goto out; 1410 } 1411 1412 /* Initial root mount: try hard to import the requested root pool. */ 1413 if ((vfsp->vfs_flag & MNT_ROOTFS) != 0 && 1414 (vfsp->vfs_flag & MNT_UPDATE) == 0) { 1415 char pname[MAXNAMELEN]; 1416 1417 error = getpoolname(osname, pname); 1418 if (error == 0) 1419 error = spa_import_rootpool(pname, checkpointrewind); 1420 if (error) 1421 goto out; 1422 } 1423 DROP_GIANT(); 1424 error = zfs_domount(vfsp, osname); 1425 PICKUP_GIANT(); 1426 1427 out: 1428 return (error); 1429 } 1430 1431 static int 1432 zfs_statfs(vfs_t *vfsp, struct statfs *statp) 1433 { 1434 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1435 uint64_t refdbytes, availbytes, usedobjs, availobjs; 1436 1437 statp->f_version = STATFS_VERSION; 1438 1439 ZFS_ENTER(zfsvfs); 1440 1441 dmu_objset_space(zfsvfs->z_os, 1442 &refdbytes, &availbytes, &usedobjs, &availobjs); 1443 1444 /* 1445 * The underlying storage pool actually uses multiple block sizes. 1446 * We report the fragsize as the smallest block size we support, 1447 * and we report our blocksize as the filesystem's maximum blocksize. 1448 */ 1449 statp->f_bsize = SPA_MINBLOCKSIZE; 1450 statp->f_iosize = zfsvfs->z_vfs->mnt_stat.f_iosize; 1451 1452 /* 1453 * The following report "total" blocks of various kinds in the 1454 * file system, but reported in terms of f_frsize - the 1455 * "fragment" size. 1456 */ 1457 1458 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 1459 statp->f_bfree = availbytes / statp->f_bsize; 1460 statp->f_bavail = statp->f_bfree; /* no root reservation */ 1461 1462 /* 1463 * statvfs() should really be called statufs(), because it assumes 1464 * static metadata. ZFS doesn't preallocate files, so the best 1465 * we can do is report the max that could possibly fit in f_files, 1466 * and that minus the number actually used in f_ffree. 1467 * For f_ffree, report the smaller of the number of object available 1468 * and the number of blocks (each object will take at least a block). 1469 */ 1470 statp->f_ffree = MIN(availobjs, statp->f_bfree); 1471 statp->f_files = statp->f_ffree + usedobjs; 1472 1473 /* 1474 * We're a zfs filesystem. 1475 */ 1476 strlcpy(statp->f_fstypename, "zfs", 1477 sizeof (statp->f_fstypename)); 1478 1479 strlcpy(statp->f_mntfromname, vfsp->mnt_stat.f_mntfromname, 1480 sizeof (statp->f_mntfromname)); 1481 strlcpy(statp->f_mntonname, vfsp->mnt_stat.f_mntonname, 1482 sizeof (statp->f_mntonname)); 1483 1484 statp->f_namemax = MAXNAMELEN - 1; 1485 1486 ZFS_EXIT(zfsvfs); 1487 return (0); 1488 } 1489 1490 static int 1491 zfs_root(vfs_t *vfsp, int flags, vnode_t **vpp) 1492 { 1493 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1494 znode_t *rootzp; 1495 int error; 1496 1497 ZFS_ENTER(zfsvfs); 1498 1499 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1500 if (error == 0) 1501 *vpp = ZTOV(rootzp); 1502 1503 ZFS_EXIT(zfsvfs); 1504 1505 if (error == 0) { 1506 error = vn_lock(*vpp, flags); 1507 if (error != 0) { 1508 VN_RELE(*vpp); 1509 *vpp = NULL; 1510 } 1511 } 1512 return (error); 1513 } 1514 1515 /* 1516 * Teardown the zfsvfs::z_os. 1517 * 1518 * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock' 1519 * and 'z_teardown_inactive_lock' held. 1520 */ 1521 static int 1522 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting) 1523 { 1524 znode_t *zp; 1525 dsl_dir_t *dd; 1526 1527 /* 1528 * If someone has not already unmounted this file system, 1529 * drain the zrele_taskq to ensure all active references to the 1530 * zfsvfs_t have been handled only then can it be safely destroyed. 1531 */ 1532 if (zfsvfs->z_os) { 1533 /* 1534 * If we're unmounting we have to wait for the list to 1535 * drain completely. 1536 * 1537 * If we're not unmounting there's no guarantee the list 1538 * will drain completely, but zreles run from the taskq 1539 * may add the parents of dir-based xattrs to the taskq 1540 * so we want to wait for these. 1541 * 1542 * We can safely read z_nr_znodes without locking because the 1543 * VFS has already blocked operations which add to the 1544 * z_all_znodes list and thus increment z_nr_znodes. 1545 */ 1546 int round = 0; 1547 while (zfsvfs->z_nr_znodes > 0) { 1548 taskq_wait_outstanding(dsl_pool_zrele_taskq( 1549 dmu_objset_pool(zfsvfs->z_os)), 0); 1550 if (++round > 1 && !unmounting) 1551 break; 1552 } 1553 } 1554 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1555 1556 if (!unmounting) { 1557 /* 1558 * We purge the parent filesystem's vfsp as the parent 1559 * filesystem and all of its snapshots have their vnode's 1560 * v_vfsp set to the parent's filesystem's vfsp. Note, 1561 * 'z_parent' is self referential for non-snapshots. 1562 */ 1563 #ifdef FREEBSD_NAMECACHE 1564 #if __FreeBSD_version >= 1300117 1565 cache_purgevfs(zfsvfs->z_parent->z_vfs); 1566 #else 1567 cache_purgevfs(zfsvfs->z_parent->z_vfs, true); 1568 #endif 1569 #endif 1570 } 1571 1572 /* 1573 * Close the zil. NB: Can't close the zil while zfs_inactive 1574 * threads are blocked as zil_close can call zfs_inactive. 1575 */ 1576 if (zfsvfs->z_log) { 1577 zil_close(zfsvfs->z_log); 1578 zfsvfs->z_log = NULL; 1579 } 1580 1581 ZFS_TEARDOWN_INACTIVE_ENTER_WRITE(zfsvfs); 1582 1583 /* 1584 * If we are not unmounting (ie: online recv) and someone already 1585 * unmounted this file system while we were doing the switcheroo, 1586 * or a reopen of z_os failed then just bail out now. 1587 */ 1588 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) { 1589 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1590 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1591 return (SET_ERROR(EIO)); 1592 } 1593 1594 /* 1595 * At this point there are no vops active, and any new vops will 1596 * fail with EIO since we have z_teardown_lock for writer (only 1597 * relevant for forced unmount). 1598 * 1599 * Release all holds on dbufs. 1600 */ 1601 mutex_enter(&zfsvfs->z_znodes_lock); 1602 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL; 1603 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1604 if (zp->z_sa_hdl != NULL) { 1605 zfs_znode_dmu_fini(zp); 1606 } 1607 } 1608 mutex_exit(&zfsvfs->z_znodes_lock); 1609 1610 /* 1611 * If we are unmounting, set the unmounted flag and let new vops 1612 * unblock. zfs_inactive will have the unmounted behavior, and all 1613 * other vops will fail with EIO. 1614 */ 1615 if (unmounting) { 1616 zfsvfs->z_unmounted = B_TRUE; 1617 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1618 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1619 } 1620 1621 /* 1622 * z_os will be NULL if there was an error in attempting to reopen 1623 * zfsvfs, so just return as the properties had already been 1624 * unregistered and cached data had been evicted before. 1625 */ 1626 if (zfsvfs->z_os == NULL) 1627 return (0); 1628 1629 /* 1630 * Unregister properties. 1631 */ 1632 zfs_unregister_callbacks(zfsvfs); 1633 1634 /* 1635 * Evict cached data 1636 */ 1637 if (!zfs_is_readonly(zfsvfs)) 1638 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1639 dmu_objset_evict_dbufs(zfsvfs->z_os); 1640 dd = zfsvfs->z_os->os_dsl_dataset->ds_dir; 1641 dsl_dir_cancel_waiters(dd); 1642 1643 return (0); 1644 } 1645 1646 /*ARGSUSED*/ 1647 static int 1648 zfs_umount(vfs_t *vfsp, int fflag) 1649 { 1650 kthread_t *td = curthread; 1651 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1652 objset_t *os; 1653 cred_t *cr = td->td_ucred; 1654 int ret; 1655 1656 ret = secpolicy_fs_unmount(cr, vfsp); 1657 if (ret) { 1658 if (dsl_deleg_access((char *)vfsp->vfs_resource, 1659 ZFS_DELEG_PERM_MOUNT, cr)) 1660 return (ret); 1661 } 1662 1663 /* 1664 * Unmount any snapshots mounted under .zfs before unmounting the 1665 * dataset itself. 1666 */ 1667 if (zfsvfs->z_ctldir != NULL) { 1668 if ((ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) 1669 return (ret); 1670 } 1671 1672 if (fflag & MS_FORCE) { 1673 /* 1674 * Mark file system as unmounted before calling 1675 * vflush(FORCECLOSE). This way we ensure no future vnops 1676 * will be called and risk operating on DOOMED vnodes. 1677 */ 1678 ZFS_TEARDOWN_ENTER_WRITE(zfsvfs, FTAG); 1679 zfsvfs->z_unmounted = B_TRUE; 1680 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1681 } 1682 1683 /* 1684 * Flush all the files. 1685 */ 1686 ret = vflush(vfsp, 0, (fflag & MS_FORCE) ? FORCECLOSE : 0, td); 1687 if (ret != 0) 1688 return (ret); 1689 while (taskqueue_cancel(zfsvfs_taskq->tq_queue, 1690 &zfsvfs->z_unlinked_drain_task, NULL) != 0) 1691 taskqueue_drain(zfsvfs_taskq->tq_queue, 1692 &zfsvfs->z_unlinked_drain_task); 1693 1694 VERIFY0(zfsvfs_teardown(zfsvfs, B_TRUE)); 1695 os = zfsvfs->z_os; 1696 1697 /* 1698 * z_os will be NULL if there was an error in 1699 * attempting to reopen zfsvfs. 1700 */ 1701 if (os != NULL) { 1702 /* 1703 * Unset the objset user_ptr. 1704 */ 1705 mutex_enter(&os->os_user_ptr_lock); 1706 dmu_objset_set_user(os, NULL); 1707 mutex_exit(&os->os_user_ptr_lock); 1708 1709 /* 1710 * Finally release the objset 1711 */ 1712 dmu_objset_disown(os, B_TRUE, zfsvfs); 1713 } 1714 1715 /* 1716 * We can now safely destroy the '.zfs' directory node. 1717 */ 1718 if (zfsvfs->z_ctldir != NULL) 1719 zfsctl_destroy(zfsvfs); 1720 zfs_freevfs(vfsp); 1721 1722 return (0); 1723 } 1724 1725 static int 1726 zfs_vget(vfs_t *vfsp, ino_t ino, int flags, vnode_t **vpp) 1727 { 1728 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1729 znode_t *zp; 1730 int err; 1731 1732 /* 1733 * zfs_zget() can't operate on virtual entries like .zfs/ or 1734 * .zfs/snapshot/ directories, that's why we return EOPNOTSUPP. 1735 * This will make NFS to switch to LOOKUP instead of using VGET. 1736 */ 1737 if (ino == ZFSCTL_INO_ROOT || ino == ZFSCTL_INO_SNAPDIR || 1738 (zfsvfs->z_shares_dir != 0 && ino == zfsvfs->z_shares_dir)) 1739 return (EOPNOTSUPP); 1740 1741 ZFS_ENTER(zfsvfs); 1742 err = zfs_zget(zfsvfs, ino, &zp); 1743 if (err == 0 && zp->z_unlinked) { 1744 vrele(ZTOV(zp)); 1745 err = EINVAL; 1746 } 1747 if (err == 0) 1748 *vpp = ZTOV(zp); 1749 ZFS_EXIT(zfsvfs); 1750 if (err == 0) { 1751 err = vn_lock(*vpp, flags); 1752 if (err != 0) 1753 vrele(*vpp); 1754 } 1755 if (err != 0) 1756 *vpp = NULL; 1757 return (err); 1758 } 1759 1760 static int 1761 #if __FreeBSD_version >= 1300098 1762 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, uint64_t *extflagsp, 1763 struct ucred **credanonp, int *numsecflavors, int *secflavors) 1764 #else 1765 zfs_checkexp(vfs_t *vfsp, struct sockaddr *nam, int *extflagsp, 1766 struct ucred **credanonp, int *numsecflavors, int **secflavors) 1767 #endif 1768 { 1769 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1770 1771 /* 1772 * If this is regular file system vfsp is the same as 1773 * zfsvfs->z_parent->z_vfs, but if it is snapshot, 1774 * zfsvfs->z_parent->z_vfs represents parent file system 1775 * which we have to use here, because only this file system 1776 * has mnt_export configured. 1777 */ 1778 return (vfs_stdcheckexp(zfsvfs->z_parent->z_vfs, nam, extflagsp, 1779 credanonp, numsecflavors, secflavors)); 1780 } 1781 1782 CTASSERT(SHORT_FID_LEN <= sizeof (struct fid)); 1783 CTASSERT(LONG_FID_LEN <= sizeof (struct fid)); 1784 1785 static int 1786 zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp) 1787 { 1788 struct componentname cn; 1789 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1790 znode_t *zp; 1791 vnode_t *dvp; 1792 uint64_t object = 0; 1793 uint64_t fid_gen = 0; 1794 uint64_t gen_mask; 1795 uint64_t zp_gen; 1796 int i, err; 1797 1798 *vpp = NULL; 1799 1800 ZFS_ENTER(zfsvfs); 1801 1802 /* 1803 * On FreeBSD we can get snapshot's mount point or its parent file 1804 * system mount point depending if snapshot is already mounted or not. 1805 */ 1806 if (zfsvfs->z_parent == zfsvfs && fidp->fid_len == LONG_FID_LEN) { 1807 zfid_long_t *zlfid = (zfid_long_t *)fidp; 1808 uint64_t objsetid = 0; 1809 uint64_t setgen = 0; 1810 1811 for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1812 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1813 1814 for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1815 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1816 1817 ZFS_EXIT(zfsvfs); 1818 1819 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1820 if (err) 1821 return (SET_ERROR(EINVAL)); 1822 ZFS_ENTER(zfsvfs); 1823 } 1824 1825 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1826 zfid_short_t *zfid = (zfid_short_t *)fidp; 1827 1828 for (i = 0; i < sizeof (zfid->zf_object); i++) 1829 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1830 1831 for (i = 0; i < sizeof (zfid->zf_gen); i++) 1832 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1833 } else { 1834 ZFS_EXIT(zfsvfs); 1835 return (SET_ERROR(EINVAL)); 1836 } 1837 1838 /* 1839 * A zero fid_gen means we are in .zfs or the .zfs/snapshot 1840 * directory tree. If the object == zfsvfs->z_shares_dir, then 1841 * we are in the .zfs/shares directory tree. 1842 */ 1843 if ((fid_gen == 0 && 1844 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) || 1845 (zfsvfs->z_shares_dir != 0 && object == zfsvfs->z_shares_dir)) { 1846 ZFS_EXIT(zfsvfs); 1847 VERIFY0(zfsctl_root(zfsvfs, LK_SHARED, &dvp)); 1848 if (object == ZFSCTL_INO_SNAPDIR) { 1849 cn.cn_nameptr = "snapshot"; 1850 cn.cn_namelen = strlen(cn.cn_nameptr); 1851 cn.cn_nameiop = LOOKUP; 1852 cn.cn_flags = ISLASTCN | LOCKLEAF; 1853 cn.cn_lkflags = flags; 1854 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1855 vput(dvp); 1856 } else if (object == zfsvfs->z_shares_dir) { 1857 /* 1858 * XXX This branch must not be taken, 1859 * if it is, then the lookup below will 1860 * explode. 1861 */ 1862 cn.cn_nameptr = "shares"; 1863 cn.cn_namelen = strlen(cn.cn_nameptr); 1864 cn.cn_nameiop = LOOKUP; 1865 cn.cn_flags = ISLASTCN; 1866 cn.cn_lkflags = flags; 1867 VERIFY0(VOP_LOOKUP(dvp, vpp, &cn)); 1868 vput(dvp); 1869 } else { 1870 *vpp = dvp; 1871 } 1872 return (err); 1873 } 1874 1875 gen_mask = -1ULL >> (64 - 8 * i); 1876 1877 dprintf("getting %llu [%llu mask %llx]\n", (u_longlong_t)object, 1878 (u_longlong_t)fid_gen, 1879 (u_longlong_t)gen_mask); 1880 if ((err = zfs_zget(zfsvfs, object, &zp))) { 1881 ZFS_EXIT(zfsvfs); 1882 return (err); 1883 } 1884 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen, 1885 sizeof (uint64_t)); 1886 zp_gen = zp_gen & gen_mask; 1887 if (zp_gen == 0) 1888 zp_gen = 1; 1889 if (zp->z_unlinked || zp_gen != fid_gen) { 1890 dprintf("znode gen (%llu) != fid gen (%llu)\n", 1891 (u_longlong_t)zp_gen, (u_longlong_t)fid_gen); 1892 vrele(ZTOV(zp)); 1893 ZFS_EXIT(zfsvfs); 1894 return (SET_ERROR(EINVAL)); 1895 } 1896 1897 *vpp = ZTOV(zp); 1898 ZFS_EXIT(zfsvfs); 1899 err = vn_lock(*vpp, flags); 1900 if (err == 0) 1901 vnode_create_vobject(*vpp, zp->z_size, curthread); 1902 else 1903 *vpp = NULL; 1904 return (err); 1905 } 1906 1907 /* 1908 * Block out VOPs and close zfsvfs_t::z_os 1909 * 1910 * Note, if successful, then we return with the 'z_teardown_lock' and 1911 * 'z_teardown_inactive_lock' write held. We leave ownership of the underlying 1912 * dataset and objset intact so that they can be atomically handed off during 1913 * a subsequent rollback or recv operation and the resume thereafter. 1914 */ 1915 int 1916 zfs_suspend_fs(zfsvfs_t *zfsvfs) 1917 { 1918 int error; 1919 1920 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0) 1921 return (error); 1922 1923 return (0); 1924 } 1925 1926 /* 1927 * Rebuild SA and release VOPs. Note that ownership of the underlying dataset 1928 * is an invariant across any of the operations that can be performed while the 1929 * filesystem was suspended. Whether it succeeded or failed, the preconditions 1930 * are the same: the relevant objset and associated dataset are owned by 1931 * zfsvfs, held, and long held on entry. 1932 */ 1933 int 1934 zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 1935 { 1936 int err; 1937 znode_t *zp; 1938 1939 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 1940 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 1941 1942 /* 1943 * We already own this, so just update the objset_t, as the one we 1944 * had before may have been evicted. 1945 */ 1946 objset_t *os; 1947 VERIFY3P(ds->ds_owner, ==, zfsvfs); 1948 VERIFY(dsl_dataset_long_held(ds)); 1949 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 1950 dsl_pool_config_enter(dp, FTAG); 1951 VERIFY0(dmu_objset_from_ds(ds, &os)); 1952 dsl_pool_config_exit(dp, FTAG); 1953 1954 err = zfsvfs_init(zfsvfs, os); 1955 if (err != 0) 1956 goto bail; 1957 1958 ds->ds_dir->dd_activity_cancelled = B_FALSE; 1959 VERIFY0(zfsvfs_setup(zfsvfs, B_FALSE)); 1960 1961 zfs_set_fuid_feature(zfsvfs); 1962 1963 /* 1964 * Attempt to re-establish all the active znodes with 1965 * their dbufs. If a zfs_rezget() fails, then we'll let 1966 * any potential callers discover that via ZFS_ENTER_VERIFY_VP 1967 * when they try to use their znode. 1968 */ 1969 mutex_enter(&zfsvfs->z_znodes_lock); 1970 for (zp = list_head(&zfsvfs->z_all_znodes); zp; 1971 zp = list_next(&zfsvfs->z_all_znodes, zp)) { 1972 (void) zfs_rezget(zp); 1973 } 1974 mutex_exit(&zfsvfs->z_znodes_lock); 1975 1976 bail: 1977 /* release the VOPs */ 1978 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 1979 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 1980 1981 if (err) { 1982 /* 1983 * Since we couldn't setup the sa framework, try to force 1984 * unmount this file system. 1985 */ 1986 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0) { 1987 vfs_ref(zfsvfs->z_vfs); 1988 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, curthread); 1989 } 1990 } 1991 return (err); 1992 } 1993 1994 static void 1995 zfs_freevfs(vfs_t *vfsp) 1996 { 1997 zfsvfs_t *zfsvfs = vfsp->vfs_data; 1998 1999 zfsvfs_free(zfsvfs); 2000 2001 atomic_dec_32(&zfs_active_fs_count); 2002 } 2003 2004 #ifdef __i386__ 2005 static int desiredvnodes_backup; 2006 #include <sys/vmmeter.h> 2007 2008 2009 #include <vm/vm_page.h> 2010 #include <vm/vm_object.h> 2011 #include <vm/vm_kern.h> 2012 #include <vm/vm_map.h> 2013 #endif 2014 2015 static void 2016 zfs_vnodes_adjust(void) 2017 { 2018 #ifdef __i386__ 2019 int newdesiredvnodes; 2020 2021 desiredvnodes_backup = desiredvnodes; 2022 2023 /* 2024 * We calculate newdesiredvnodes the same way it is done in 2025 * vntblinit(). If it is equal to desiredvnodes, it means that 2026 * it wasn't tuned by the administrator and we can tune it down. 2027 */ 2028 newdesiredvnodes = min(maxproc + vm_cnt.v_page_count / 4, 2 * 2029 vm_kmem_size / (5 * (sizeof (struct vm_object) + 2030 sizeof (struct vnode)))); 2031 if (newdesiredvnodes == desiredvnodes) 2032 desiredvnodes = (3 * newdesiredvnodes) / 4; 2033 #endif 2034 } 2035 2036 static void 2037 zfs_vnodes_adjust_back(void) 2038 { 2039 2040 #ifdef __i386__ 2041 desiredvnodes = desiredvnodes_backup; 2042 #endif 2043 } 2044 2045 void 2046 zfs_init(void) 2047 { 2048 2049 printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n"); 2050 2051 /* 2052 * Initialize .zfs directory structures 2053 */ 2054 zfsctl_init(); 2055 2056 /* 2057 * Initialize znode cache, vnode ops, etc... 2058 */ 2059 zfs_znode_init(); 2060 2061 /* 2062 * Reduce number of vnodes. Originally number of vnodes is calculated 2063 * with UFS inode in mind. We reduce it here, because it's too big for 2064 * ZFS/i386. 2065 */ 2066 zfs_vnodes_adjust(); 2067 2068 dmu_objset_register_type(DMU_OST_ZFS, zpl_get_file_info); 2069 2070 zfsvfs_taskq = taskq_create("zfsvfs", 1, minclsyspri, 0, 0, 0); 2071 } 2072 2073 void 2074 zfs_fini(void) 2075 { 2076 taskq_destroy(zfsvfs_taskq); 2077 zfsctl_fini(); 2078 zfs_znode_fini(); 2079 zfs_vnodes_adjust_back(); 2080 } 2081 2082 int 2083 zfs_busy(void) 2084 { 2085 return (zfs_active_fs_count != 0); 2086 } 2087 2088 /* 2089 * Release VOPs and unmount a suspended filesystem. 2090 */ 2091 int 2092 zfs_end_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds) 2093 { 2094 ASSERT(ZFS_TEARDOWN_WRITE_HELD(zfsvfs)); 2095 ASSERT(ZFS_TEARDOWN_INACTIVE_WRITE_HELD(zfsvfs)); 2096 2097 /* 2098 * We already own this, so just hold and rele it to update the 2099 * objset_t, as the one we had before may have been evicted. 2100 */ 2101 objset_t *os; 2102 VERIFY3P(ds->ds_owner, ==, zfsvfs); 2103 VERIFY(dsl_dataset_long_held(ds)); 2104 dsl_pool_t *dp = spa_get_dsl(dsl_dataset_get_spa(ds)); 2105 dsl_pool_config_enter(dp, FTAG); 2106 VERIFY0(dmu_objset_from_ds(ds, &os)); 2107 dsl_pool_config_exit(dp, FTAG); 2108 zfsvfs->z_os = os; 2109 2110 /* release the VOPs */ 2111 ZFS_TEARDOWN_INACTIVE_EXIT_WRITE(zfsvfs); 2112 ZFS_TEARDOWN_EXIT(zfsvfs, FTAG); 2113 2114 /* 2115 * Try to force unmount this file system. 2116 */ 2117 (void) zfs_umount(zfsvfs->z_vfs, 0); 2118 zfsvfs->z_unmounted = B_TRUE; 2119 return (0); 2120 } 2121 2122 int 2123 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers) 2124 { 2125 int error; 2126 objset_t *os = zfsvfs->z_os; 2127 dmu_tx_t *tx; 2128 2129 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 2130 return (SET_ERROR(EINVAL)); 2131 2132 if (newvers < zfsvfs->z_version) 2133 return (SET_ERROR(EINVAL)); 2134 2135 if (zfs_spa_version_map(newvers) > 2136 spa_version(dmu_objset_spa(zfsvfs->z_os))) 2137 return (SET_ERROR(ENOTSUP)); 2138 2139 tx = dmu_tx_create(os); 2140 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR); 2141 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2142 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE, 2143 ZFS_SA_ATTRS); 2144 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL); 2145 } 2146 error = dmu_tx_assign(tx, TXG_WAIT); 2147 if (error) { 2148 dmu_tx_abort(tx); 2149 return (error); 2150 } 2151 2152 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 2153 8, 1, &newvers, tx); 2154 2155 if (error) { 2156 dmu_tx_commit(tx); 2157 return (error); 2158 } 2159 2160 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) { 2161 uint64_t sa_obj; 2162 2163 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=, 2164 SPA_VERSION_SA); 2165 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 2166 DMU_OT_NONE, 0, tx); 2167 2168 error = zap_add(os, MASTER_NODE_OBJ, 2169 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 2170 ASSERT0(error); 2171 2172 VERIFY0(sa_set_sa_object(os, sa_obj)); 2173 sa_register_update_callback(os, zfs_sa_upgrade); 2174 } 2175 2176 spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx, 2177 "from %ju to %ju", (uintmax_t)zfsvfs->z_version, 2178 (uintmax_t)newvers); 2179 dmu_tx_commit(tx); 2180 2181 zfsvfs->z_version = newvers; 2182 os->os_version = newvers; 2183 2184 zfs_set_fuid_feature(zfsvfs); 2185 2186 return (0); 2187 } 2188 2189 /* 2190 * Read a property stored within the master node. 2191 */ 2192 int 2193 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value) 2194 { 2195 uint64_t *cached_copy = NULL; 2196 2197 /* 2198 * Figure out where in the objset_t the cached copy would live, if it 2199 * is available for the requested property. 2200 */ 2201 if (os != NULL) { 2202 switch (prop) { 2203 case ZFS_PROP_VERSION: 2204 cached_copy = &os->os_version; 2205 break; 2206 case ZFS_PROP_NORMALIZE: 2207 cached_copy = &os->os_normalization; 2208 break; 2209 case ZFS_PROP_UTF8ONLY: 2210 cached_copy = &os->os_utf8only; 2211 break; 2212 case ZFS_PROP_CASE: 2213 cached_copy = &os->os_casesensitivity; 2214 break; 2215 default: 2216 break; 2217 } 2218 } 2219 if (cached_copy != NULL && *cached_copy != OBJSET_PROP_UNINITIALIZED) { 2220 *value = *cached_copy; 2221 return (0); 2222 } 2223 2224 /* 2225 * If the property wasn't cached, look up the file system's value for 2226 * the property. For the version property, we look up a slightly 2227 * different string. 2228 */ 2229 const char *pname; 2230 int error = ENOENT; 2231 if (prop == ZFS_PROP_VERSION) { 2232 pname = ZPL_VERSION_STR; 2233 } else { 2234 pname = zfs_prop_to_name(prop); 2235 } 2236 2237 if (os != NULL) { 2238 ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS); 2239 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value); 2240 } 2241 2242 if (error == ENOENT) { 2243 /* No value set, use the default value */ 2244 switch (prop) { 2245 case ZFS_PROP_VERSION: 2246 *value = ZPL_VERSION; 2247 break; 2248 case ZFS_PROP_NORMALIZE: 2249 case ZFS_PROP_UTF8ONLY: 2250 *value = 0; 2251 break; 2252 case ZFS_PROP_CASE: 2253 *value = ZFS_CASE_SENSITIVE; 2254 break; 2255 case ZFS_PROP_ACLTYPE: 2256 *value = ZFS_ACLTYPE_NFSV4; 2257 break; 2258 default: 2259 return (error); 2260 } 2261 error = 0; 2262 } 2263 2264 /* 2265 * If one of the methods for getting the property value above worked, 2266 * copy it into the objset_t's cache. 2267 */ 2268 if (error == 0 && cached_copy != NULL) { 2269 *cached_copy = *value; 2270 } 2271 2272 return (error); 2273 } 2274 2275 /* 2276 * Return true if the corresponding vfs's unmounted flag is set. 2277 * Otherwise return false. 2278 * If this function returns true we know VFS unmount has been initiated. 2279 */ 2280 boolean_t 2281 zfs_get_vfs_flag_unmounted(objset_t *os) 2282 { 2283 zfsvfs_t *zfvp; 2284 boolean_t unmounted = B_FALSE; 2285 2286 ASSERT3U(dmu_objset_type(os), ==, DMU_OST_ZFS); 2287 2288 mutex_enter(&os->os_user_ptr_lock); 2289 zfvp = dmu_objset_get_user(os); 2290 if (zfvp != NULL && zfvp->z_vfs != NULL && 2291 (zfvp->z_vfs->mnt_kern_flag & MNTK_UNMOUNT)) 2292 unmounted = B_TRUE; 2293 mutex_exit(&os->os_user_ptr_lock); 2294 2295 return (unmounted); 2296 } 2297 2298 #ifdef _KERNEL 2299 void 2300 zfsvfs_update_fromname(const char *oldname, const char *newname) 2301 { 2302 char tmpbuf[MAXPATHLEN]; 2303 struct mount *mp; 2304 char *fromname; 2305 size_t oldlen; 2306 2307 oldlen = strlen(oldname); 2308 2309 mtx_lock(&mountlist_mtx); 2310 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 2311 fromname = mp->mnt_stat.f_mntfromname; 2312 if (strcmp(fromname, oldname) == 0) { 2313 (void) strlcpy(fromname, newname, 2314 sizeof (mp->mnt_stat.f_mntfromname)); 2315 continue; 2316 } 2317 if (strncmp(fromname, oldname, oldlen) == 0 && 2318 (fromname[oldlen] == '/' || fromname[oldlen] == '@')) { 2319 (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s%s", 2320 newname, fromname + oldlen); 2321 (void) strlcpy(fromname, tmpbuf, 2322 sizeof (mp->mnt_stat.f_mntfromname)); 2323 continue; 2324 } 2325 } 2326 mtx_unlock(&mountlist_mtx); 2327 } 2328 #endif 2329