1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51484Sek110237 * Common Development and Distribution License (the "License"). 61484Sek110237 * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 223461Sahrens * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 263246Sck153898 #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/types.h> 29789Sahrens #include <sys/param.h> 30789Sahrens #include <sys/systm.h> 31789Sahrens #include <sys/sysmacros.h> 32789Sahrens #include <sys/kmem.h> 33789Sahrens #include <sys/pathname.h> 34789Sahrens #include <sys/vnode.h> 35789Sahrens #include <sys/vfs.h> 363898Srsb #include <sys/vfs_opreg.h> 37789Sahrens #include <sys/mntent.h> 38789Sahrens #include <sys/mount.h> 39789Sahrens #include <sys/cmn_err.h> 40789Sahrens #include "fs/fs_subr.h" 41789Sahrens #include <sys/zfs_znode.h> 423461Sahrens #include <sys/zfs_dir.h> 43789Sahrens #include <sys/zil.h> 44789Sahrens #include <sys/fs/zfs.h> 45789Sahrens #include <sys/dmu.h> 46789Sahrens #include <sys/dsl_prop.h> 473912Slling #include <sys/dsl_dataset.h> 48*4543Smarks #include <sys/dsl_deleg.h> 49789Sahrens #include <sys/spa.h> 50789Sahrens #include <sys/zap.h> 51789Sahrens #include <sys/varargs.h> 52789Sahrens #include <sys/policy.h> 53789Sahrens #include <sys/atomic.h> 54789Sahrens #include <sys/mkdev.h> 55789Sahrens #include <sys/modctl.h> 56*4543Smarks #include <sys/refstr.h> 57789Sahrens #include <sys/zfs_ioctl.h> 58789Sahrens #include <sys/zfs_ctldir.h> 591544Seschrock #include <sys/bootconf.h> 60849Sbonwick #include <sys/sunddi.h> 611484Sek110237 #include <sys/dnlc.h> 62789Sahrens 63789Sahrens int zfsfstype; 64789Sahrens vfsops_t *zfs_vfsops = NULL; 65849Sbonwick static major_t zfs_major; 66789Sahrens static minor_t zfs_minor; 67789Sahrens static kmutex_t zfs_dev_mtx; 68789Sahrens 69789Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); 70789Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); 711544Seschrock static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); 72789Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp); 73789Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); 74789Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); 75789Sahrens static void zfs_freevfs(vfs_t *vfsp); 76789Sahrens static void zfs_objset_close(zfsvfs_t *zfsvfs); 77789Sahrens 78789Sahrens static const fs_operation_def_t zfs_vfsops_template[] = { 793898Srsb VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, 803898Srsb VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, 813898Srsb VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, 823898Srsb VFSNAME_ROOT, { .vfs_root = zfs_root }, 833898Srsb VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, 843898Srsb VFSNAME_SYNC, { .vfs_sync = zfs_sync }, 853898Srsb VFSNAME_VGET, { .vfs_vget = zfs_vget }, 863898Srsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 873898Srsb NULL, NULL 88789Sahrens }; 89789Sahrens 90789Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = { 913898Srsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 923898Srsb NULL, NULL 93789Sahrens }; 94789Sahrens 95789Sahrens /* 96789Sahrens * We need to keep a count of active fs's. 97789Sahrens * This is necessary to prevent our module 98789Sahrens * from being unloaded after a umount -f 99789Sahrens */ 100789Sahrens static uint32_t zfs_active_fs_count = 0; 101789Sahrens 102789Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 103789Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 1043234Sck153898 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 1053234Sck153898 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 106789Sahrens 1073234Sck153898 /* 1083234Sck153898 * MNTOPT_DEFAULT was removed from MNTOPT_XATTR, since the 1093234Sck153898 * default value is now determined by the xattr property. 1103234Sck153898 */ 111789Sahrens static mntopt_t mntopts[] = { 1123234Sck153898 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, 1133234Sck153898 { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, 114789Sahrens { MNTOPT_NOATIME, noatime_cancel, NULL, MO_DEFAULT, NULL }, 115789Sahrens { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } 116789Sahrens }; 117789Sahrens 118789Sahrens static mntopts_t zfs_mntopts = { 119789Sahrens sizeof (mntopts) / sizeof (mntopt_t), 120789Sahrens mntopts 121789Sahrens }; 122789Sahrens 123789Sahrens /*ARGSUSED*/ 124789Sahrens int 125789Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 126789Sahrens { 127789Sahrens /* 128789Sahrens * Data integrity is job one. We don't want a compromised kernel 129789Sahrens * writing to the storage pool, so we never sync during panic. 130789Sahrens */ 131789Sahrens if (panicstr) 132789Sahrens return (0); 133789Sahrens 134789Sahrens /* 135789Sahrens * SYNC_ATTR is used by fsflush() to force old filesystems like UFS 136789Sahrens * to sync metadata, which they would otherwise cache indefinitely. 137789Sahrens * Semantically, the only requirement is that the sync be initiated. 138789Sahrens * The DMU syncs out txgs frequently, so there's nothing to do. 139789Sahrens */ 140789Sahrens if (flag & SYNC_ATTR) 141789Sahrens return (0); 142789Sahrens 143789Sahrens if (vfsp != NULL) { 144789Sahrens /* 145789Sahrens * Sync a specific filesystem. 146789Sahrens */ 147789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 148789Sahrens 149789Sahrens ZFS_ENTER(zfsvfs); 150789Sahrens if (zfsvfs->z_log != NULL) 1512638Sperrin zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 152789Sahrens else 153789Sahrens txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 154789Sahrens ZFS_EXIT(zfsvfs); 155789Sahrens } else { 156789Sahrens /* 157789Sahrens * Sync all ZFS filesystems. This is what happens when you 158789Sahrens * run sync(1M). Unlike other filesystems, ZFS honors the 159789Sahrens * request by waiting for all pools to commit all dirty data. 160789Sahrens */ 161789Sahrens spa_sync_allpools(); 162789Sahrens } 163789Sahrens 164789Sahrens return (0); 165789Sahrens } 166789Sahrens 1671544Seschrock static int 1681544Seschrock zfs_create_unique_device(dev_t *dev) 1691544Seschrock { 1701544Seschrock major_t new_major; 1711544Seschrock 1721544Seschrock do { 1731544Seschrock ASSERT3U(zfs_minor, <=, MAXMIN32); 1741544Seschrock minor_t start = zfs_minor; 1751544Seschrock do { 1761544Seschrock mutex_enter(&zfs_dev_mtx); 1771544Seschrock if (zfs_minor >= MAXMIN32) { 1781544Seschrock /* 1791544Seschrock * If we're still using the real major 1801544Seschrock * keep out of /dev/zfs and /dev/zvol minor 1811544Seschrock * number space. If we're using a getudev()'ed 1821544Seschrock * major number, we can use all of its minors. 1831544Seschrock */ 1841544Seschrock if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 1851544Seschrock zfs_minor = ZFS_MIN_MINOR; 1861544Seschrock else 1871544Seschrock zfs_minor = 0; 1881544Seschrock } else { 1891544Seschrock zfs_minor++; 1901544Seschrock } 1911544Seschrock *dev = makedevice(zfs_major, zfs_minor); 1921544Seschrock mutex_exit(&zfs_dev_mtx); 1931544Seschrock } while (vfs_devismounted(*dev) && zfs_minor != start); 1941544Seschrock if (zfs_minor == start) { 1951544Seschrock /* 1961544Seschrock * We are using all ~262,000 minor numbers for the 1971544Seschrock * current major number. Create a new major number. 1981544Seschrock */ 1991544Seschrock if ((new_major = getudev()) == (major_t)-1) { 2001544Seschrock cmn_err(CE_WARN, 2011544Seschrock "zfs_mount: Can't get unique major " 2021544Seschrock "device number."); 2031544Seschrock return (-1); 2041544Seschrock } 2051544Seschrock mutex_enter(&zfs_dev_mtx); 2061544Seschrock zfs_major = new_major; 2071544Seschrock zfs_minor = 0; 2081544Seschrock 2091544Seschrock mutex_exit(&zfs_dev_mtx); 2101544Seschrock } else { 2111544Seschrock break; 2121544Seschrock } 2131544Seschrock /* CONSTANTCONDITION */ 2141544Seschrock } while (1); 2151544Seschrock 2161544Seschrock return (0); 2171544Seschrock } 2181544Seschrock 219789Sahrens static void 220789Sahrens atime_changed_cb(void *arg, uint64_t newval) 221789Sahrens { 222789Sahrens zfsvfs_t *zfsvfs = arg; 223789Sahrens 224789Sahrens if (newval == TRUE) { 225789Sahrens zfsvfs->z_atime = TRUE; 226789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 227789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 228789Sahrens } else { 229789Sahrens zfsvfs->z_atime = FALSE; 230789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 231789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 232789Sahrens } 233789Sahrens } 234789Sahrens 235789Sahrens static void 2363234Sck153898 xattr_changed_cb(void *arg, uint64_t newval) 2373234Sck153898 { 2383234Sck153898 zfsvfs_t *zfsvfs = arg; 2393234Sck153898 2403234Sck153898 if (newval == TRUE) { 2413234Sck153898 /* XXX locking on vfs_flag? */ 2423234Sck153898 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 2433234Sck153898 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 2443234Sck153898 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 2453234Sck153898 } else { 2463234Sck153898 /* XXX locking on vfs_flag? */ 2473234Sck153898 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 2483234Sck153898 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 2493234Sck153898 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 2503234Sck153898 } 2513234Sck153898 } 2523234Sck153898 2533234Sck153898 static void 254789Sahrens blksz_changed_cb(void *arg, uint64_t newval) 255789Sahrens { 256789Sahrens zfsvfs_t *zfsvfs = arg; 257789Sahrens 258789Sahrens if (newval < SPA_MINBLOCKSIZE || 259789Sahrens newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 260789Sahrens newval = SPA_MAXBLOCKSIZE; 261789Sahrens 262789Sahrens zfsvfs->z_max_blksz = newval; 263789Sahrens zfsvfs->z_vfs->vfs_bsize = newval; 264789Sahrens } 265789Sahrens 266789Sahrens static void 267789Sahrens readonly_changed_cb(void *arg, uint64_t newval) 268789Sahrens { 269789Sahrens zfsvfs_t *zfsvfs = arg; 270789Sahrens 271789Sahrens if (newval) { 272789Sahrens /* XXX locking on vfs_flag? */ 273789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 274789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 275789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 276789Sahrens } else { 277789Sahrens /* XXX locking on vfs_flag? */ 278789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 279789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 280789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 281789Sahrens } 282789Sahrens } 283789Sahrens 284789Sahrens static void 285789Sahrens devices_changed_cb(void *arg, uint64_t newval) 286789Sahrens { 287789Sahrens zfsvfs_t *zfsvfs = arg; 288789Sahrens 289789Sahrens if (newval == FALSE) { 290789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; 291789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); 292789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); 293789Sahrens } else { 294789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; 295789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); 296789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); 297789Sahrens } 298789Sahrens } 299789Sahrens 300789Sahrens static void 301789Sahrens setuid_changed_cb(void *arg, uint64_t newval) 302789Sahrens { 303789Sahrens zfsvfs_t *zfsvfs = arg; 304789Sahrens 305789Sahrens if (newval == FALSE) { 306789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 307789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 308789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 309789Sahrens } else { 310789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 311789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 312789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 313789Sahrens } 314789Sahrens } 315789Sahrens 316789Sahrens static void 317789Sahrens exec_changed_cb(void *arg, uint64_t newval) 318789Sahrens { 319789Sahrens zfsvfs_t *zfsvfs = arg; 320789Sahrens 321789Sahrens if (newval == FALSE) { 322789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 323789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 324789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 325789Sahrens } else { 326789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 327789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 328789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 329789Sahrens } 330789Sahrens } 331789Sahrens 332789Sahrens static void 333789Sahrens snapdir_changed_cb(void *arg, uint64_t newval) 334789Sahrens { 335789Sahrens zfsvfs_t *zfsvfs = arg; 336789Sahrens 337789Sahrens zfsvfs->z_show_ctldir = newval; 338789Sahrens } 339789Sahrens 340789Sahrens static void 341789Sahrens acl_mode_changed_cb(void *arg, uint64_t newval) 342789Sahrens { 343789Sahrens zfsvfs_t *zfsvfs = arg; 344789Sahrens 345789Sahrens zfsvfs->z_acl_mode = newval; 346789Sahrens } 347789Sahrens 348789Sahrens static void 349789Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval) 350789Sahrens { 351789Sahrens zfsvfs_t *zfsvfs = arg; 352789Sahrens 353789Sahrens zfsvfs->z_acl_inherit = newval; 354789Sahrens } 355789Sahrens 3561544Seschrock static int 3571544Seschrock zfs_refresh_properties(vfs_t *vfsp) 3581544Seschrock { 3591544Seschrock zfsvfs_t *zfsvfs = vfsp->vfs_data; 3601544Seschrock 3612354Stabriz /* 3622354Stabriz * Remount operations default to "rw" unless "ro" is explicitly 3632354Stabriz * specified. 3642354Stabriz */ 3651544Seschrock if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 3661544Seschrock readonly_changed_cb(zfsvfs, B_TRUE); 3672354Stabriz } else { 3682354Stabriz if (!dmu_objset_is_snapshot(zfsvfs->z_os)) 3692354Stabriz readonly_changed_cb(zfsvfs, B_FALSE); 3702354Stabriz else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) 3713912Slling return (EROFS); 3721544Seschrock } 3731544Seschrock 3741544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 3751544Seschrock devices_changed_cb(zfsvfs, B_FALSE); 3761544Seschrock setuid_changed_cb(zfsvfs, B_FALSE); 3771544Seschrock } else { 3781544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 3791544Seschrock devices_changed_cb(zfsvfs, B_FALSE); 3801544Seschrock else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) 3811544Seschrock devices_changed_cb(zfsvfs, B_TRUE); 3821544Seschrock 3831544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 3841544Seschrock setuid_changed_cb(zfsvfs, B_FALSE); 3851544Seschrock else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) 3861544Seschrock setuid_changed_cb(zfsvfs, B_TRUE); 3871544Seschrock } 3881544Seschrock 3891544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) 3901544Seschrock exec_changed_cb(zfsvfs, B_FALSE); 3911544Seschrock else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) 3921544Seschrock exec_changed_cb(zfsvfs, B_TRUE); 3931544Seschrock 3942474Seschrock if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) 3952474Seschrock atime_changed_cb(zfsvfs, B_TRUE); 3962474Seschrock else if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) 3972474Seschrock atime_changed_cb(zfsvfs, B_FALSE); 3982474Seschrock 3993234Sck153898 if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 4003234Sck153898 xattr_changed_cb(zfsvfs, B_TRUE); 4013234Sck153898 else if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) 4023234Sck153898 xattr_changed_cb(zfsvfs, B_FALSE); 4033234Sck153898 4041544Seschrock return (0); 4051544Seschrock } 4061544Seschrock 4071544Seschrock static int 4081544Seschrock zfs_register_callbacks(vfs_t *vfsp) 4091544Seschrock { 4101544Seschrock struct dsl_dataset *ds = NULL; 4111544Seschrock objset_t *os = NULL; 4121544Seschrock zfsvfs_t *zfsvfs = NULL; 4133265Sahrens int readonly, do_readonly = FALSE; 4143265Sahrens int setuid, do_setuid = FALSE; 4153265Sahrens int exec, do_exec = FALSE; 4163265Sahrens int devices, do_devices = FALSE; 4173265Sahrens int xattr, do_xattr = FALSE; 4181544Seschrock int error = 0; 4191544Seschrock 4201544Seschrock ASSERT(vfsp); 4211544Seschrock zfsvfs = vfsp->vfs_data; 4221544Seschrock ASSERT(zfsvfs); 4231544Seschrock os = zfsvfs->z_os; 4241544Seschrock 4251544Seschrock /* 4261544Seschrock * The act of registering our callbacks will destroy any mount 4271544Seschrock * options we may have. In order to enable temporary overrides 4283234Sck153898 * of mount options, we stash away the current values and 4291544Seschrock * restore them after we register the callbacks. 4301544Seschrock */ 4311544Seschrock if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 4321544Seschrock readonly = B_TRUE; 4331544Seschrock do_readonly = B_TRUE; 4341544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 4351544Seschrock readonly = B_FALSE; 4361544Seschrock do_readonly = B_TRUE; 4371544Seschrock } 4381544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 4391544Seschrock devices = B_FALSE; 4401544Seschrock setuid = B_FALSE; 4411544Seschrock do_devices = B_TRUE; 4421544Seschrock do_setuid = B_TRUE; 4431544Seschrock } else { 4441544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 4451544Seschrock devices = B_FALSE; 4461544Seschrock do_devices = B_TRUE; 4473912Slling } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { 4481544Seschrock devices = B_TRUE; 4491544Seschrock do_devices = B_TRUE; 4501544Seschrock } 4511544Seschrock 4521544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 4531544Seschrock setuid = B_FALSE; 4541544Seschrock do_setuid = B_TRUE; 4551544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 4561544Seschrock setuid = B_TRUE; 4571544Seschrock do_setuid = B_TRUE; 4581544Seschrock } 4591544Seschrock } 4601544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 4611544Seschrock exec = B_FALSE; 4621544Seschrock do_exec = B_TRUE; 4631544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 4641544Seschrock exec = B_TRUE; 4651544Seschrock do_exec = B_TRUE; 4661544Seschrock } 4673234Sck153898 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 4683234Sck153898 xattr = B_FALSE; 4693234Sck153898 do_xattr = B_TRUE; 4703234Sck153898 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 4713234Sck153898 xattr = B_TRUE; 4723234Sck153898 do_xattr = B_TRUE; 4733234Sck153898 } 4741544Seschrock 4751544Seschrock /* 4761544Seschrock * Register property callbacks. 4771544Seschrock * 4781544Seschrock * It would probably be fine to just check for i/o error from 4791544Seschrock * the first prop_register(), but I guess I like to go 4801544Seschrock * overboard... 4811544Seschrock */ 4821544Seschrock ds = dmu_objset_ds(os); 4831544Seschrock error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 4841544Seschrock error = error ? error : dsl_prop_register(ds, 4853234Sck153898 "xattr", xattr_changed_cb, zfsvfs); 4863234Sck153898 error = error ? error : dsl_prop_register(ds, 4871544Seschrock "recordsize", blksz_changed_cb, zfsvfs); 4881544Seschrock error = error ? error : dsl_prop_register(ds, 4891544Seschrock "readonly", readonly_changed_cb, zfsvfs); 4901544Seschrock error = error ? error : dsl_prop_register(ds, 4911544Seschrock "devices", devices_changed_cb, zfsvfs); 4921544Seschrock error = error ? error : dsl_prop_register(ds, 4931544Seschrock "setuid", setuid_changed_cb, zfsvfs); 4941544Seschrock error = error ? error : dsl_prop_register(ds, 4951544Seschrock "exec", exec_changed_cb, zfsvfs); 4961544Seschrock error = error ? error : dsl_prop_register(ds, 4971544Seschrock "snapdir", snapdir_changed_cb, zfsvfs); 4981544Seschrock error = error ? error : dsl_prop_register(ds, 4991544Seschrock "aclmode", acl_mode_changed_cb, zfsvfs); 5001544Seschrock error = error ? error : dsl_prop_register(ds, 5011544Seschrock "aclinherit", acl_inherit_changed_cb, zfsvfs); 5021544Seschrock if (error) 5031544Seschrock goto unregister; 5041544Seschrock 5051544Seschrock /* 5061544Seschrock * Invoke our callbacks to restore temporary mount options. 5071544Seschrock */ 5081544Seschrock if (do_readonly) 5091544Seschrock readonly_changed_cb(zfsvfs, readonly); 5101544Seschrock if (do_setuid) 5111544Seschrock setuid_changed_cb(zfsvfs, setuid); 5121544Seschrock if (do_exec) 5131544Seschrock exec_changed_cb(zfsvfs, exec); 5141544Seschrock if (do_devices) 5151544Seschrock devices_changed_cb(zfsvfs, devices); 5163234Sck153898 if (do_xattr) 5173234Sck153898 xattr_changed_cb(zfsvfs, xattr); 5181544Seschrock 5191544Seschrock return (0); 5201544Seschrock 5211544Seschrock unregister: 5221544Seschrock /* 5231544Seschrock * We may attempt to unregister some callbacks that are not 5241544Seschrock * registered, but this is OK; it will simply return ENOMSG, 5251544Seschrock * which we will ignore. 5261544Seschrock */ 5271544Seschrock (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 5283234Sck153898 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 5291544Seschrock (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 5301544Seschrock (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 5311544Seschrock (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); 5321544Seschrock (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 5331544Seschrock (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 5341544Seschrock (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 5351544Seschrock (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 5361544Seschrock (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 5371544Seschrock zfsvfs); 5381544Seschrock return (error); 5391544Seschrock 5401544Seschrock } 5411544Seschrock 5421544Seschrock static int 5431544Seschrock zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) 5441544Seschrock { 5451544Seschrock dev_t mount_dev; 5461544Seschrock uint64_t recordsize, readonly; 5471544Seschrock int error = 0; 5481544Seschrock int mode; 5491544Seschrock zfsvfs_t *zfsvfs; 5501544Seschrock znode_t *zp = NULL; 5511544Seschrock 5521544Seschrock ASSERT(vfsp); 5531544Seschrock ASSERT(osname); 5541544Seschrock 5551544Seschrock /* 5561544Seschrock * Initialize the zfs-specific filesystem structure. 5571544Seschrock * Should probably make this a kmem cache, shuffle fields, 5581544Seschrock * and just bzero up to z_hold_mtx[]. 5591544Seschrock */ 5601544Seschrock zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 5611544Seschrock zfsvfs->z_vfs = vfsp; 5621544Seschrock zfsvfs->z_parent = zfsvfs; 5631544Seschrock zfsvfs->z_assign = TXG_NOWAIT; 5641544Seschrock zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 5651544Seschrock zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 5661544Seschrock 5671544Seschrock mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 5681544Seschrock list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 5691544Seschrock offsetof(znode_t, z_link_node)); 5701544Seschrock rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); 5711544Seschrock 5721544Seschrock /* Initialize the generic filesystem structure. */ 5731544Seschrock vfsp->vfs_bcount = 0; 5741544Seschrock vfsp->vfs_data = NULL; 5751544Seschrock 5761544Seschrock if (zfs_create_unique_device(&mount_dev) == -1) { 5771544Seschrock error = ENODEV; 5781544Seschrock goto out; 5791544Seschrock } 5801544Seschrock ASSERT(vfs_devismounted(mount_dev) == 0); 5811544Seschrock 5821544Seschrock if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 5831544Seschrock NULL)) 5841544Seschrock goto out; 5851544Seschrock 5861544Seschrock vfsp->vfs_dev = mount_dev; 5871544Seschrock vfsp->vfs_fstype = zfsfstype; 5881544Seschrock vfsp->vfs_bsize = recordsize; 5891544Seschrock vfsp->vfs_flag |= VFS_NOTRUNC; 5901544Seschrock vfsp->vfs_data = zfsvfs; 5911544Seschrock 5921544Seschrock if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 5931544Seschrock goto out; 5941544Seschrock 5951544Seschrock if (readonly) 5961544Seschrock mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 5971544Seschrock else 5981544Seschrock mode = DS_MODE_PRIMARY; 5991544Seschrock 6001544Seschrock error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 6011544Seschrock if (error == EROFS) { 6021544Seschrock mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 6031544Seschrock error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 6041544Seschrock &zfsvfs->z_os); 6051544Seschrock } 6061544Seschrock 6071544Seschrock if (error) 6081544Seschrock goto out; 6091544Seschrock 6101544Seschrock if (error = zfs_init_fs(zfsvfs, &zp, cr)) 6111544Seschrock goto out; 6121544Seschrock 6131544Seschrock /* The call to zfs_init_fs leaves the vnode held, release it here. */ 6141544Seschrock VN_RELE(ZTOV(zp)); 6151544Seschrock 6161544Seschrock if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 6173234Sck153898 uint64_t xattr; 6183234Sck153898 6191544Seschrock ASSERT(mode & DS_MODE_READONLY); 6201544Seschrock atime_changed_cb(zfsvfs, B_FALSE); 6211544Seschrock readonly_changed_cb(zfsvfs, B_TRUE); 6223234Sck153898 if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 6233234Sck153898 goto out; 6243234Sck153898 xattr_changed_cb(zfsvfs, xattr); 6251544Seschrock zfsvfs->z_issnap = B_TRUE; 6261544Seschrock } else { 6271544Seschrock error = zfs_register_callbacks(vfsp); 6281544Seschrock if (error) 6291544Seschrock goto out; 6301544Seschrock 6313461Sahrens zfs_unlinked_drain(zfsvfs); 6321544Seschrock 6331544Seschrock /* 6341544Seschrock * Parse and replay the intent log. 6351544Seschrock */ 6361544Seschrock zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 6373461Sahrens zfs_replay_vector); 6381544Seschrock 6391544Seschrock if (!zil_disable) 6401544Seschrock zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 6411544Seschrock } 6421544Seschrock 6431544Seschrock if (!zfsvfs->z_issnap) 6441544Seschrock zfsctl_create(zfsvfs); 6451544Seschrock out: 6461544Seschrock if (error) { 6471544Seschrock if (zfsvfs->z_os) 6481544Seschrock dmu_objset_close(zfsvfs->z_os); 6491544Seschrock kmem_free(zfsvfs, sizeof (zfsvfs_t)); 6501544Seschrock } else { 6511544Seschrock atomic_add_32(&zfs_active_fs_count, 1); 6521544Seschrock } 6531544Seschrock 6541544Seschrock return (error); 6551544Seschrock 6561544Seschrock } 6571544Seschrock 6581544Seschrock void 6591544Seschrock zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 6601544Seschrock { 6611544Seschrock objset_t *os = zfsvfs->z_os; 6621544Seschrock struct dsl_dataset *ds; 6631544Seschrock 6641544Seschrock /* 6651544Seschrock * Unregister properties. 6661544Seschrock */ 6671544Seschrock if (!dmu_objset_is_snapshot(os)) { 6681544Seschrock ds = dmu_objset_ds(os); 6691544Seschrock VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 6701544Seschrock zfsvfs) == 0); 6711544Seschrock 6723234Sck153898 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 6733234Sck153898 zfsvfs) == 0); 6743234Sck153898 6751544Seschrock VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 6761544Seschrock zfsvfs) == 0); 6771544Seschrock 6781544Seschrock VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 6791544Seschrock zfsvfs) == 0); 6801544Seschrock 6811544Seschrock VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, 6821544Seschrock zfsvfs) == 0); 6831544Seschrock 6841544Seschrock VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 6851544Seschrock zfsvfs) == 0); 6861544Seschrock 6871544Seschrock VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 6881544Seschrock zfsvfs) == 0); 6891544Seschrock 6901544Seschrock VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 6911544Seschrock zfsvfs) == 0); 6921544Seschrock 6931544Seschrock VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 6941544Seschrock zfsvfs) == 0); 6951544Seschrock 6961544Seschrock VERIFY(dsl_prop_unregister(ds, "aclinherit", 6971544Seschrock acl_inherit_changed_cb, zfsvfs) == 0); 6981544Seschrock } 6991544Seschrock } 7001544Seschrock 7013912Slling /* 7023912Slling * Convert a decimal digit string to a uint64_t integer. 7033912Slling */ 7043912Slling static int 7053912Slling str_to_uint64(char *str, uint64_t *objnum) 7063912Slling { 7073912Slling uint64_t num = 0; 7083912Slling 7093912Slling while (*str) { 7103912Slling if (*str < '0' || *str > '9') 7113912Slling return (EINVAL); 7123912Slling 7133912Slling num = num*10 + *str++ - '0'; 7143912Slling } 7153912Slling 7163912Slling *objnum = num; 7173912Slling return (0); 7183912Slling } 7193912Slling 7203912Slling 7213912Slling /* 7223912Slling * The boot path passed from the boot loader is in the form of 7233912Slling * "rootpool-name/root-filesystem-object-number'. Convert this 7243912Slling * string to a dataset name: "rootpool-name/root-filesystem-name". 7253912Slling */ 7263912Slling static int 7273912Slling parse_bootpath(char *bpath, char *outpath) 7283912Slling { 7293912Slling char *slashp; 7303912Slling uint64_t objnum; 7313912Slling int error; 7323912Slling 7333912Slling if (*bpath == 0 || *bpath == '/') 7343912Slling return (EINVAL); 7353912Slling 7363912Slling slashp = strchr(bpath, '/'); 7373912Slling 7383912Slling /* if no '/', just return the pool name */ 7393912Slling if (slashp == NULL) { 7403912Slling (void) strcpy(outpath, bpath); 7413912Slling return (0); 7423912Slling } 7433912Slling 7443912Slling if (error = str_to_uint64(slashp+1, &objnum)) 7453912Slling return (error); 7463912Slling 7473912Slling *slashp = '\0'; 7483912Slling error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 7493912Slling *slashp = '/'; 7503912Slling 7513912Slling return (error); 7523912Slling } 7533912Slling 7541544Seschrock static int 7551544Seschrock zfs_mountroot(vfs_t *vfsp, enum whymountroot why) 7561544Seschrock { 7571544Seschrock int error = 0; 7581544Seschrock int ret = 0; 7591544Seschrock static int zfsrootdone = 0; 7601544Seschrock zfsvfs_t *zfsvfs = NULL; 7611544Seschrock znode_t *zp = NULL; 7621544Seschrock vnode_t *vp = NULL; 7633912Slling char *zfs_bootpath; 7641544Seschrock 7651544Seschrock ASSERT(vfsp); 7661544Seschrock 7671544Seschrock /* 7683912Slling * The filesystem that we mount as root is defined in the 7693912Slling * "zfs-bootfs" property. 7701544Seschrock */ 7711544Seschrock if (why == ROOT_INIT) { 7721544Seschrock if (zfsrootdone++) 7731544Seschrock return (EBUSY); 7741544Seschrock 7753912Slling if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 7763912Slling DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) != 7773912Slling DDI_SUCCESS) 7783912Slling return (EIO); 7793912Slling 7803912Slling error = parse_bootpath(zfs_bootpath, rootfs.bo_name); 7813912Slling ddi_prop_free(zfs_bootpath); 7823912Slling 7833912Slling if (error) 7843912Slling return (error); 7851544Seschrock 7861544Seschrock if (error = vfs_lock(vfsp)) 7871544Seschrock return (error); 7881544Seschrock 7893912Slling if (error = zfs_domount(vfsp, rootfs.bo_name, CRED())) 7901544Seschrock goto out; 7911544Seschrock 7921544Seschrock zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 7931544Seschrock ASSERT(zfsvfs); 7941544Seschrock if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) 7951544Seschrock goto out; 7961544Seschrock 7971544Seschrock vp = ZTOV(zp); 7981544Seschrock mutex_enter(&vp->v_lock); 7991544Seschrock vp->v_flag |= VROOT; 8001544Seschrock mutex_exit(&vp->v_lock); 8011544Seschrock rootvp = vp; 8021544Seschrock 8031544Seschrock /* 8041544Seschrock * The zfs_zget call above returns with a hold on vp, we release 8051544Seschrock * it here. 8061544Seschrock */ 8071544Seschrock VN_RELE(vp); 8081544Seschrock 8091544Seschrock /* 8101544Seschrock * Mount root as readonly initially, it will be remouted 8111544Seschrock * read/write by /lib/svc/method/fs-usr. 8121544Seschrock */ 8131544Seschrock readonly_changed_cb(vfsp->vfs_data, B_TRUE); 8141544Seschrock vfs_add((struct vnode *)0, vfsp, 8151544Seschrock (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 8161544Seschrock out: 8171544Seschrock vfs_unlock(vfsp); 8181544Seschrock ret = (error) ? error : 0; 8191544Seschrock return (ret); 8201544Seschrock 8211544Seschrock } else if (why == ROOT_REMOUNT) { 8221544Seschrock 8231544Seschrock readonly_changed_cb(vfsp->vfs_data, B_FALSE); 8241544Seschrock vfsp->vfs_flag |= VFS_REMOUNT; 8251544Seschrock return (zfs_refresh_properties(vfsp)); 8261544Seschrock 8271544Seschrock } else if (why == ROOT_UNMOUNT) { 8281544Seschrock zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 8291544Seschrock (void) zfs_sync(vfsp, 0, 0); 8301544Seschrock return (0); 8311544Seschrock } 8321544Seschrock 8331544Seschrock /* 8341544Seschrock * if "why" is equal to anything else other than ROOT_INIT, 8351544Seschrock * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 8361544Seschrock */ 8371544Seschrock return (ENOTSUP); 8381544Seschrock } 8391544Seschrock 840789Sahrens /*ARGSUSED*/ 841789Sahrens static int 842789Sahrens zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 843789Sahrens { 844789Sahrens char *osname; 845789Sahrens pathname_t spn; 846789Sahrens int error = 0; 847789Sahrens uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? 8483912Slling UIO_SYSSPACE : UIO_USERSPACE; 849789Sahrens int canwrite; 850789Sahrens 851789Sahrens if (mvp->v_type != VDIR) 852789Sahrens return (ENOTDIR); 853789Sahrens 854789Sahrens mutex_enter(&mvp->v_lock); 855789Sahrens if ((uap->flags & MS_REMOUNT) == 0 && 856789Sahrens (uap->flags & MS_OVERLAY) == 0 && 857789Sahrens (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 858789Sahrens mutex_exit(&mvp->v_lock); 859789Sahrens return (EBUSY); 860789Sahrens } 861789Sahrens mutex_exit(&mvp->v_lock); 862789Sahrens 863789Sahrens /* 864789Sahrens * ZFS does not support passing unparsed data in via MS_DATA. 865789Sahrens * Users should use the MS_OPTIONSTR interface; this means 866789Sahrens * that all option parsing is already done and the options struct 867789Sahrens * can be interrogated. 868789Sahrens */ 869789Sahrens if ((uap->flags & MS_DATA) && uap->datalen > 0) 870789Sahrens return (EINVAL); 871789Sahrens 872789Sahrens /* 873789Sahrens * When doing a remount, we simply refresh our temporary properties 874789Sahrens * according to those options set in the current VFS options. 875789Sahrens */ 876789Sahrens if (uap->flags & MS_REMOUNT) { 8771544Seschrock return (zfs_refresh_properties(vfsp)); 878789Sahrens } 879789Sahrens 880789Sahrens /* 881789Sahrens * Get the objset name (the "special" mount argument). 882789Sahrens */ 883789Sahrens if (error = pn_get(uap->spec, fromspace, &spn)) 884789Sahrens return (error); 885789Sahrens 886789Sahrens osname = spn.pn_path; 887789Sahrens 888*4543Smarks /* 889*4543Smarks * Check for mount privilege? 890*4543Smarks * 891*4543Smarks * If we don't have privilege then see if 892*4543Smarks * we have local permission to allow it 893*4543Smarks */ 894*4543Smarks error = secpolicy_fs_mount(cr, mvp, vfsp); 895*4543Smarks if (error) { 896*4543Smarks error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 897*4543Smarks if (error == 0) { 898*4543Smarks vattr_t vattr; 899*4543Smarks 900*4543Smarks /* 901*4543Smarks * Make sure user is the owner of the mount point 902*4543Smarks * or has sufficient privileges. 903*4543Smarks */ 904*4543Smarks 905*4543Smarks vattr.va_mask = AT_UID; 906*4543Smarks 907*4543Smarks if (VOP_GETATTR(mvp, &vattr, 0, cr)) { 908*4543Smarks goto out; 909*4543Smarks } 910*4543Smarks 911*4543Smarks if (error = secpolicy_vnode_owner(cr, vattr.va_uid)) { 912*4543Smarks goto out; 913*4543Smarks } 914*4543Smarks 915*4543Smarks if (error = VOP_ACCESS(mvp, VWRITE, 0, cr)) { 916*4543Smarks goto out; 917*4543Smarks } 918*4543Smarks 919*4543Smarks secpolicy_fs_mount_clearopts(cr, vfsp); 920*4543Smarks } else { 921*4543Smarks goto out; 922*4543Smarks } 923*4543Smarks } 924789Sahrens 925789Sahrens /* 926789Sahrens * Refuse to mount a filesystem if we are in a local zone and the 927789Sahrens * dataset is not visible. 928789Sahrens */ 929789Sahrens if (!INGLOBALZONE(curproc) && 930789Sahrens (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 931789Sahrens error = EPERM; 932789Sahrens goto out; 933789Sahrens } 934789Sahrens 9351544Seschrock error = zfs_domount(vfsp, osname, cr); 936789Sahrens 937789Sahrens out: 938789Sahrens pn_free(&spn); 939789Sahrens return (error); 940789Sahrens } 941789Sahrens 942789Sahrens static int 943789Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) 944789Sahrens { 945789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 946789Sahrens dev32_t d32; 9472885Sahrens uint64_t refdbytes, availbytes, usedobjs, availobjs; 948789Sahrens 949789Sahrens ZFS_ENTER(zfsvfs); 950789Sahrens 9512885Sahrens dmu_objset_space(zfsvfs->z_os, 9522885Sahrens &refdbytes, &availbytes, &usedobjs, &availobjs); 953789Sahrens 954789Sahrens /* 955789Sahrens * The underlying storage pool actually uses multiple block sizes. 956789Sahrens * We report the fragsize as the smallest block size we support, 957789Sahrens * and we report our blocksize as the filesystem's maximum blocksize. 958789Sahrens */ 959789Sahrens statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; 960789Sahrens statp->f_bsize = zfsvfs->z_max_blksz; 961789Sahrens 962789Sahrens /* 963789Sahrens * The following report "total" blocks of various kinds in the 964789Sahrens * file system, but reported in terms of f_frsize - the 965789Sahrens * "fragment" size. 966789Sahrens */ 967789Sahrens 9682885Sahrens statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 9692885Sahrens statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; 970789Sahrens statp->f_bavail = statp->f_bfree; /* no root reservation */ 971789Sahrens 972789Sahrens /* 973789Sahrens * statvfs() should really be called statufs(), because it assumes 974789Sahrens * static metadata. ZFS doesn't preallocate files, so the best 975789Sahrens * we can do is report the max that could possibly fit in f_files, 976789Sahrens * and that minus the number actually used in f_ffree. 977789Sahrens * For f_ffree, report the smaller of the number of object available 978789Sahrens * and the number of blocks (each object will take at least a block). 979789Sahrens */ 9802885Sahrens statp->f_ffree = MIN(availobjs, statp->f_bfree); 981789Sahrens statp->f_favail = statp->f_ffree; /* no "root reservation" */ 9822885Sahrens statp->f_files = statp->f_ffree + usedobjs; 983789Sahrens 984789Sahrens (void) cmpldev(&d32, vfsp->vfs_dev); 985789Sahrens statp->f_fsid = d32; 986789Sahrens 987789Sahrens /* 988789Sahrens * We're a zfs filesystem. 989789Sahrens */ 990789Sahrens (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 991789Sahrens 9921123Smarks statp->f_flag = vf_to_stf(vfsp->vfs_flag); 993789Sahrens 994789Sahrens statp->f_namemax = ZFS_MAXNAMELEN; 995789Sahrens 996789Sahrens /* 997789Sahrens * We have all of 32 characters to stuff a string here. 998789Sahrens * Is there anything useful we could/should provide? 999789Sahrens */ 1000789Sahrens bzero(statp->f_fstr, sizeof (statp->f_fstr)); 1001789Sahrens 1002789Sahrens ZFS_EXIT(zfsvfs); 1003789Sahrens return (0); 1004789Sahrens } 1005789Sahrens 1006789Sahrens static int 1007789Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp) 1008789Sahrens { 1009789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1010789Sahrens znode_t *rootzp; 1011789Sahrens int error; 1012789Sahrens 1013789Sahrens ZFS_ENTER(zfsvfs); 1014789Sahrens 1015789Sahrens error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1016789Sahrens if (error == 0) 1017789Sahrens *vpp = ZTOV(rootzp); 1018789Sahrens 1019789Sahrens ZFS_EXIT(zfsvfs); 1020789Sahrens return (error); 1021789Sahrens } 1022789Sahrens 1023789Sahrens /*ARGSUSED*/ 1024789Sahrens static int 1025789Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) 1026789Sahrens { 1027789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1028789Sahrens int ret; 1029789Sahrens 1030*4543Smarks ret = secpolicy_fs_unmount(cr, vfsp); 1031*4543Smarks if (ret) { 1032*4543Smarks ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 1033*4543Smarks ZFS_DELEG_PERM_MOUNT, cr); 1034*4543Smarks if (ret) 1035*4543Smarks return (ret); 1036*4543Smarks } 10371484Sek110237 10381484Sek110237 (void) dnlc_purge_vfsp(vfsp, 0); 10391484Sek110237 1040789Sahrens /* 1041789Sahrens * Unmount any snapshots mounted under .zfs before unmounting the 1042789Sahrens * dataset itself. 1043789Sahrens */ 1044789Sahrens if (zfsvfs->z_ctldir != NULL && 1045*4543Smarks (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { 1046789Sahrens return (ret); 1047*4543Smarks } 1048789Sahrens 1049789Sahrens if (fflag & MS_FORCE) { 1050789Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 1051789Sahrens zfsvfs->z_unmounted1 = B_TRUE; 1052789Sahrens 1053789Sahrens /* 10544480Sgw25295 * Ensure that z_unmounted1 reaches global visibility 10554480Sgw25295 * before z_op_cnt. 10564480Sgw25295 */ 10574480Sgw25295 membar_producer(); 10584480Sgw25295 10594480Sgw25295 /* 1060789Sahrens * Wait for all zfs threads to leave zfs. 1061789Sahrens * Grabbing a rwlock as reader in all vops and 1062789Sahrens * as writer here doesn't work because it too easy to get 1063789Sahrens * multiple reader enters as zfs can re-enter itself. 1064789Sahrens * This can lead to deadlock if there is an intervening 1065789Sahrens * rw_enter as writer. 1066789Sahrens * So a file system threads ref count (z_op_cnt) is used. 1067789Sahrens * A polling loop on z_op_cnt may seem inefficient, but 1068789Sahrens * - this saves all threads on exit from having to grab a 1069789Sahrens * mutex in order to cv_signal 1070789Sahrens * - only occurs on forced unmount in the rare case when 1071789Sahrens * there are outstanding threads within the file system. 1072789Sahrens */ 1073789Sahrens while (zfsvfs->z_op_cnt) { 1074789Sahrens delay(1); 1075789Sahrens } 1076789Sahrens 1077789Sahrens zfs_objset_close(zfsvfs); 1078789Sahrens 1079789Sahrens return (0); 1080789Sahrens } 1081789Sahrens /* 1082789Sahrens * Check the number of active vnodes in the file system. 1083789Sahrens * Our count is maintained in the vfs structure, but the number 1084789Sahrens * is off by 1 to indicate a hold on the vfs structure itself. 1085789Sahrens * 1086789Sahrens * The '.zfs' directory maintains a reference of its own, and any active 1087789Sahrens * references underneath are reflected in the vnode count. 1088789Sahrens */ 1089789Sahrens if (zfsvfs->z_ctldir == NULL) { 10903461Sahrens if (vfsp->vfs_count > 1) 1091789Sahrens return (EBUSY); 1092789Sahrens } else { 1093789Sahrens if (vfsp->vfs_count > 2 || 1094789Sahrens (zfsvfs->z_ctldir->v_count > 1 && !(fflag & MS_FORCE))) { 1095789Sahrens return (EBUSY); 1096789Sahrens } 1097789Sahrens } 1098789Sahrens 1099789Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 1100789Sahrens zfs_objset_close(zfsvfs); 1101789Sahrens 1102789Sahrens return (0); 1103789Sahrens } 1104789Sahrens 1105789Sahrens static int 1106789Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1107789Sahrens { 1108789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1109789Sahrens znode_t *zp; 1110789Sahrens uint64_t object = 0; 1111789Sahrens uint64_t fid_gen = 0; 1112789Sahrens uint64_t gen_mask; 1113789Sahrens uint64_t zp_gen; 1114789Sahrens int i, err; 1115789Sahrens 1116789Sahrens *vpp = NULL; 1117789Sahrens 1118789Sahrens ZFS_ENTER(zfsvfs); 1119789Sahrens 1120789Sahrens if (fidp->fid_len == LONG_FID_LEN) { 1121789Sahrens zfid_long_t *zlfid = (zfid_long_t *)fidp; 1122789Sahrens uint64_t objsetid = 0; 1123789Sahrens uint64_t setgen = 0; 1124789Sahrens 1125789Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1126789Sahrens objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1127789Sahrens 1128789Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1129789Sahrens setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1130789Sahrens 1131789Sahrens ZFS_EXIT(zfsvfs); 1132789Sahrens 1133789Sahrens err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1134789Sahrens if (err) 1135789Sahrens return (EINVAL); 1136789Sahrens ZFS_ENTER(zfsvfs); 1137789Sahrens } 1138789Sahrens 1139789Sahrens if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1140789Sahrens zfid_short_t *zfid = (zfid_short_t *)fidp; 1141789Sahrens 1142789Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 1143789Sahrens object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1144789Sahrens 1145789Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 1146789Sahrens fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1147789Sahrens } else { 1148789Sahrens ZFS_EXIT(zfsvfs); 1149789Sahrens return (EINVAL); 1150789Sahrens } 1151789Sahrens 1152789Sahrens /* A zero fid_gen means we are in the .zfs control directories */ 1153789Sahrens if (fid_gen == 0 && 1154789Sahrens (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1155789Sahrens *vpp = zfsvfs->z_ctldir; 1156789Sahrens ASSERT(*vpp != NULL); 1157789Sahrens if (object == ZFSCTL_INO_SNAPDIR) { 1158789Sahrens VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1159789Sahrens 0, NULL, NULL) == 0); 1160789Sahrens } else { 1161789Sahrens VN_HOLD(*vpp); 1162789Sahrens } 1163789Sahrens ZFS_EXIT(zfsvfs); 1164789Sahrens return (0); 1165789Sahrens } 1166789Sahrens 1167789Sahrens gen_mask = -1ULL >> (64 - 8 * i); 1168789Sahrens 1169789Sahrens dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1170789Sahrens if (err = zfs_zget(zfsvfs, object, &zp)) { 1171789Sahrens ZFS_EXIT(zfsvfs); 1172789Sahrens return (err); 1173789Sahrens } 1174789Sahrens zp_gen = zp->z_phys->zp_gen & gen_mask; 1175789Sahrens if (zp_gen == 0) 1176789Sahrens zp_gen = 1; 11773461Sahrens if (zp->z_unlinked || zp_gen != fid_gen) { 1178789Sahrens dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1179789Sahrens VN_RELE(ZTOV(zp)); 1180789Sahrens ZFS_EXIT(zfsvfs); 1181789Sahrens return (EINVAL); 1182789Sahrens } 1183789Sahrens 1184789Sahrens *vpp = ZTOV(zp); 1185789Sahrens ZFS_EXIT(zfsvfs); 1186789Sahrens return (0); 1187789Sahrens } 1188789Sahrens 1189789Sahrens static void 1190789Sahrens zfs_objset_close(zfsvfs_t *zfsvfs) 1191789Sahrens { 1192789Sahrens znode_t *zp, *nextzp; 1193789Sahrens objset_t *os = zfsvfs->z_os; 1194789Sahrens 1195789Sahrens /* 1196789Sahrens * For forced unmount, at this point all vops except zfs_inactive 1197789Sahrens * are erroring EIO. We need to now suspend zfs_inactive threads 1198789Sahrens * while we are freeing dbufs before switching zfs_inactive 1199789Sahrens * to use behaviour without a objset. 1200789Sahrens */ 1201789Sahrens rw_enter(&zfsvfs->z_um_lock, RW_WRITER); 1202789Sahrens 1203789Sahrens /* 1204789Sahrens * Release all holds on dbufs 1205789Sahrens * Note, although we have stopped all other vop threads and 1206789Sahrens * zfs_inactive(), the dmu can callback via znode_pageout_func() 1207789Sahrens * which can zfs_znode_free() the znode. 1208789Sahrens * So we lock z_all_znodes; search the list for a held 1209789Sahrens * dbuf; drop the lock (we know zp can't disappear if we hold 1210789Sahrens * a dbuf lock; then regrab the lock and restart. 1211789Sahrens */ 1212789Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 1213789Sahrens for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 1214789Sahrens nextzp = list_next(&zfsvfs->z_all_znodes, zp); 1215789Sahrens if (zp->z_dbuf_held) { 1216789Sahrens /* dbufs should only be held when force unmounting */ 1217789Sahrens zp->z_dbuf_held = 0; 1218789Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 12191544Seschrock dmu_buf_rele(zp->z_dbuf, NULL); 1220789Sahrens /* Start again */ 1221789Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 1222789Sahrens nextzp = list_head(&zfsvfs->z_all_znodes); 1223789Sahrens } 1224789Sahrens } 1225789Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 1226789Sahrens 1227789Sahrens /* 1228789Sahrens * Unregister properties. 1229789Sahrens */ 12301544Seschrock if (!dmu_objset_is_snapshot(os)) 12311544Seschrock zfs_unregister_callbacks(zfsvfs); 1232789Sahrens 1233789Sahrens /* 1234789Sahrens * Switch zfs_inactive to behaviour without an objset. 1235789Sahrens * It just tosses cached pages and frees the znode & vnode. 1236789Sahrens * Then re-enable zfs_inactive threads in that new behaviour. 1237789Sahrens */ 1238789Sahrens zfsvfs->z_unmounted2 = B_TRUE; 1239789Sahrens rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ 1240789Sahrens 1241789Sahrens /* 1242789Sahrens * Close the zil. Can't close the zil while zfs_inactive 1243789Sahrens * threads are blocked as zil_close can call zfs_inactive. 1244789Sahrens */ 1245789Sahrens if (zfsvfs->z_log) { 1246789Sahrens zil_close(zfsvfs->z_log); 1247789Sahrens zfsvfs->z_log = NULL; 1248789Sahrens } 1249789Sahrens 1250789Sahrens /* 12511544Seschrock * Evict all dbufs so that cached znodes will be freed 12521544Seschrock */ 12531646Sperrin if (dmu_objset_evict_dbufs(os, 1)) { 12541646Sperrin txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 12551646Sperrin (void) dmu_objset_evict_dbufs(os, 0); 12561646Sperrin } 12571544Seschrock 12581544Seschrock /* 1259789Sahrens * Finally close the objset 1260789Sahrens */ 1261789Sahrens dmu_objset_close(os); 1262789Sahrens 12631298Sperrin /* 12641298Sperrin * We can now safely destroy the '.zfs' directory node. 12651298Sperrin */ 12661298Sperrin if (zfsvfs->z_ctldir != NULL) 12671298Sperrin zfsctl_destroy(zfsvfs); 12681298Sperrin 1269789Sahrens } 1270789Sahrens 1271789Sahrens static void 1272789Sahrens zfs_freevfs(vfs_t *vfsp) 1273789Sahrens { 1274789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1275789Sahrens 1276789Sahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1277789Sahrens 1278789Sahrens atomic_add_32(&zfs_active_fs_count, -1); 1279789Sahrens } 1280789Sahrens 1281789Sahrens /* 1282789Sahrens * VFS_INIT() initialization. Note that there is no VFS_FINI(), 1283789Sahrens * so we can't safely do any non-idempotent initialization here. 1284789Sahrens * Leave that to zfs_init() and zfs_fini(), which are called 1285789Sahrens * from the module's _init() and _fini() entry points. 1286789Sahrens */ 1287789Sahrens /*ARGSUSED*/ 1288789Sahrens static int 1289789Sahrens zfs_vfsinit(int fstype, char *name) 1290789Sahrens { 1291789Sahrens int error; 1292789Sahrens 1293789Sahrens zfsfstype = fstype; 1294789Sahrens 1295789Sahrens /* 1296789Sahrens * Setup vfsops and vnodeops tables. 1297789Sahrens */ 1298789Sahrens error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); 1299789Sahrens if (error != 0) { 1300789Sahrens cmn_err(CE_WARN, "zfs: bad vfs ops template"); 1301789Sahrens } 1302789Sahrens 1303789Sahrens error = zfs_create_op_tables(); 1304789Sahrens if (error) { 1305789Sahrens zfs_remove_op_tables(); 1306789Sahrens cmn_err(CE_WARN, "zfs: bad vnode ops template"); 1307789Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 1308789Sahrens return (error); 1309789Sahrens } 1310789Sahrens 1311789Sahrens mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); 1312789Sahrens 1313789Sahrens /* 1314849Sbonwick * Unique major number for all zfs mounts. 1315849Sbonwick * If we run out of 32-bit minors, we'll getudev() another major. 1316789Sahrens */ 1317849Sbonwick zfs_major = ddi_name_to_major(ZFS_DRIVER); 1318849Sbonwick zfs_minor = ZFS_MIN_MINOR; 1319789Sahrens 1320789Sahrens return (0); 1321789Sahrens } 1322789Sahrens 1323789Sahrens void 1324789Sahrens zfs_init(void) 1325789Sahrens { 1326789Sahrens /* 1327789Sahrens * Initialize .zfs directory structures 1328789Sahrens */ 1329789Sahrens zfsctl_init(); 1330789Sahrens 1331789Sahrens /* 1332789Sahrens * Initialize znode cache, vnode ops, etc... 1333789Sahrens */ 1334789Sahrens zfs_znode_init(); 1335789Sahrens } 1336789Sahrens 1337789Sahrens void 1338789Sahrens zfs_fini(void) 1339789Sahrens { 1340789Sahrens zfsctl_fini(); 1341789Sahrens zfs_znode_fini(); 1342789Sahrens } 1343789Sahrens 1344789Sahrens int 1345789Sahrens zfs_busy(void) 1346789Sahrens { 1347789Sahrens return (zfs_active_fs_count != 0); 1348789Sahrens } 1349789Sahrens 1350789Sahrens static vfsdef_t vfw = { 1351789Sahrens VFSDEF_VERSION, 1352789Sahrens MNTTYPE_ZFS, 1353789Sahrens zfs_vfsinit, 13541488Srsb VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS, 1355789Sahrens &zfs_mntopts 1356789Sahrens }; 1357789Sahrens 1358789Sahrens struct modlfs zfs_modlfs = { 13592676Seschrock &mod_fsops, "ZFS filesystem version " ZFS_VERSION_STRING, &vfw 1360789Sahrens }; 1361