1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51484Sek110237 * Common Development and Distribution License (the "License"). 61484Sek110237 * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 223461Sahrens * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 263246Sck153898 #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/types.h> 29789Sahrens #include <sys/param.h> 30789Sahrens #include <sys/systm.h> 31789Sahrens #include <sys/sysmacros.h> 32789Sahrens #include <sys/kmem.h> 33789Sahrens #include <sys/pathname.h> 34789Sahrens #include <sys/vnode.h> 35789Sahrens #include <sys/vfs.h> 363898Srsb #include <sys/vfs_opreg.h> 37789Sahrens #include <sys/mntent.h> 38789Sahrens #include <sys/mount.h> 39789Sahrens #include <sys/cmn_err.h> 40789Sahrens #include "fs/fs_subr.h" 41789Sahrens #include <sys/zfs_znode.h> 423461Sahrens #include <sys/zfs_dir.h> 43789Sahrens #include <sys/zil.h> 44789Sahrens #include <sys/fs/zfs.h> 45789Sahrens #include <sys/dmu.h> 46789Sahrens #include <sys/dsl_prop.h> 473912Slling #include <sys/dsl_dataset.h> 484543Smarks #include <sys/dsl_deleg.h> 49789Sahrens #include <sys/spa.h> 50789Sahrens #include <sys/zap.h> 51789Sahrens #include <sys/varargs.h> 52789Sahrens #include <sys/policy.h> 53789Sahrens #include <sys/atomic.h> 54789Sahrens #include <sys/mkdev.h> 55789Sahrens #include <sys/modctl.h> 564543Smarks #include <sys/refstr.h> 57789Sahrens #include <sys/zfs_ioctl.h> 58789Sahrens #include <sys/zfs_ctldir.h> 591544Seschrock #include <sys/bootconf.h> 60849Sbonwick #include <sys/sunddi.h> 611484Sek110237 #include <sys/dnlc.h> 62789Sahrens 63789Sahrens int zfsfstype; 64789Sahrens vfsops_t *zfs_vfsops = NULL; 65849Sbonwick static major_t zfs_major; 66789Sahrens static minor_t zfs_minor; 67789Sahrens static kmutex_t zfs_dev_mtx; 68789Sahrens 69789Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); 70789Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); 711544Seschrock static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); 72789Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp); 73789Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); 74789Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); 75789Sahrens static void zfs_freevfs(vfs_t *vfsp); 76789Sahrens static void zfs_objset_close(zfsvfs_t *zfsvfs); 77789Sahrens 78789Sahrens static const fs_operation_def_t zfs_vfsops_template[] = { 793898Srsb VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, 803898Srsb VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, 813898Srsb VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, 823898Srsb VFSNAME_ROOT, { .vfs_root = zfs_root }, 833898Srsb VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, 843898Srsb VFSNAME_SYNC, { .vfs_sync = zfs_sync }, 853898Srsb VFSNAME_VGET, { .vfs_vget = zfs_vget }, 863898Srsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 873898Srsb NULL, NULL 88789Sahrens }; 89789Sahrens 90789Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = { 913898Srsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 923898Srsb NULL, NULL 93789Sahrens }; 94789Sahrens 95789Sahrens /* 96789Sahrens * We need to keep a count of active fs's. 97789Sahrens * This is necessary to prevent our module 98789Sahrens * from being unloaded after a umount -f 99789Sahrens */ 100789Sahrens static uint32_t zfs_active_fs_count = 0; 101789Sahrens 102789Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 103789Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 1043234Sck153898 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 1053234Sck153898 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 106789Sahrens 1073234Sck153898 /* 1084596Slling * MO_DEFAULT is not used since the default value is determined 1094596Slling * by the equivalent property. 1103234Sck153898 */ 111789Sahrens static mntopt_t mntopts[] = { 1123234Sck153898 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, 1133234Sck153898 { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, 1144596Slling { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL }, 115789Sahrens { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } 116789Sahrens }; 117789Sahrens 118789Sahrens static mntopts_t zfs_mntopts = { 119789Sahrens sizeof (mntopts) / sizeof (mntopt_t), 120789Sahrens mntopts 121789Sahrens }; 122789Sahrens 123789Sahrens /*ARGSUSED*/ 124789Sahrens int 125789Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 126789Sahrens { 127789Sahrens /* 128789Sahrens * Data integrity is job one. We don't want a compromised kernel 129789Sahrens * writing to the storage pool, so we never sync during panic. 130789Sahrens */ 131789Sahrens if (panicstr) 132789Sahrens return (0); 133789Sahrens 134789Sahrens /* 135789Sahrens * SYNC_ATTR is used by fsflush() to force old filesystems like UFS 136789Sahrens * to sync metadata, which they would otherwise cache indefinitely. 137789Sahrens * Semantically, the only requirement is that the sync be initiated. 138789Sahrens * The DMU syncs out txgs frequently, so there's nothing to do. 139789Sahrens */ 140789Sahrens if (flag & SYNC_ATTR) 141789Sahrens return (0); 142789Sahrens 143789Sahrens if (vfsp != NULL) { 144789Sahrens /* 145789Sahrens * Sync a specific filesystem. 146789Sahrens */ 147789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 148789Sahrens 149789Sahrens ZFS_ENTER(zfsvfs); 150789Sahrens if (zfsvfs->z_log != NULL) 1512638Sperrin zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 152789Sahrens else 153789Sahrens txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 154789Sahrens ZFS_EXIT(zfsvfs); 155789Sahrens } else { 156789Sahrens /* 157789Sahrens * Sync all ZFS filesystems. This is what happens when you 158789Sahrens * run sync(1M). Unlike other filesystems, ZFS honors the 159789Sahrens * request by waiting for all pools to commit all dirty data. 160789Sahrens */ 161789Sahrens spa_sync_allpools(); 162789Sahrens } 163789Sahrens 164789Sahrens return (0); 165789Sahrens } 166789Sahrens 1671544Seschrock static int 1681544Seschrock zfs_create_unique_device(dev_t *dev) 1691544Seschrock { 1701544Seschrock major_t new_major; 1711544Seschrock 1721544Seschrock do { 1731544Seschrock ASSERT3U(zfs_minor, <=, MAXMIN32); 1741544Seschrock minor_t start = zfs_minor; 1751544Seschrock do { 1761544Seschrock mutex_enter(&zfs_dev_mtx); 1771544Seschrock if (zfs_minor >= MAXMIN32) { 1781544Seschrock /* 1791544Seschrock * If we're still using the real major 1801544Seschrock * keep out of /dev/zfs and /dev/zvol minor 1811544Seschrock * number space. If we're using a getudev()'ed 1821544Seschrock * major number, we can use all of its minors. 1831544Seschrock */ 1841544Seschrock if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 1851544Seschrock zfs_minor = ZFS_MIN_MINOR; 1861544Seschrock else 1871544Seschrock zfs_minor = 0; 1881544Seschrock } else { 1891544Seschrock zfs_minor++; 1901544Seschrock } 1911544Seschrock *dev = makedevice(zfs_major, zfs_minor); 1921544Seschrock mutex_exit(&zfs_dev_mtx); 1931544Seschrock } while (vfs_devismounted(*dev) && zfs_minor != start); 1941544Seschrock if (zfs_minor == start) { 1951544Seschrock /* 1961544Seschrock * We are using all ~262,000 minor numbers for the 1971544Seschrock * current major number. Create a new major number. 1981544Seschrock */ 1991544Seschrock if ((new_major = getudev()) == (major_t)-1) { 2001544Seschrock cmn_err(CE_WARN, 2011544Seschrock "zfs_mount: Can't get unique major " 2021544Seschrock "device number."); 2031544Seschrock return (-1); 2041544Seschrock } 2051544Seschrock mutex_enter(&zfs_dev_mtx); 2061544Seschrock zfs_major = new_major; 2071544Seschrock zfs_minor = 0; 2081544Seschrock 2091544Seschrock mutex_exit(&zfs_dev_mtx); 2101544Seschrock } else { 2111544Seschrock break; 2121544Seschrock } 2131544Seschrock /* CONSTANTCONDITION */ 2141544Seschrock } while (1); 2151544Seschrock 2161544Seschrock return (0); 2171544Seschrock } 2181544Seschrock 219789Sahrens static void 220789Sahrens atime_changed_cb(void *arg, uint64_t newval) 221789Sahrens { 222789Sahrens zfsvfs_t *zfsvfs = arg; 223789Sahrens 224789Sahrens if (newval == TRUE) { 225789Sahrens zfsvfs->z_atime = TRUE; 226789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 227789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 228789Sahrens } else { 229789Sahrens zfsvfs->z_atime = FALSE; 230789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 231789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 232789Sahrens } 233789Sahrens } 234789Sahrens 235789Sahrens static void 2363234Sck153898 xattr_changed_cb(void *arg, uint64_t newval) 2373234Sck153898 { 2383234Sck153898 zfsvfs_t *zfsvfs = arg; 2393234Sck153898 2403234Sck153898 if (newval == TRUE) { 2413234Sck153898 /* XXX locking on vfs_flag? */ 2423234Sck153898 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 2433234Sck153898 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 2443234Sck153898 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 2453234Sck153898 } else { 2463234Sck153898 /* XXX locking on vfs_flag? */ 2473234Sck153898 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 2483234Sck153898 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 2493234Sck153898 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 2503234Sck153898 } 2513234Sck153898 } 2523234Sck153898 2533234Sck153898 static void 254789Sahrens blksz_changed_cb(void *arg, uint64_t newval) 255789Sahrens { 256789Sahrens zfsvfs_t *zfsvfs = arg; 257789Sahrens 258789Sahrens if (newval < SPA_MINBLOCKSIZE || 259789Sahrens newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 260789Sahrens newval = SPA_MAXBLOCKSIZE; 261789Sahrens 262789Sahrens zfsvfs->z_max_blksz = newval; 263789Sahrens zfsvfs->z_vfs->vfs_bsize = newval; 264789Sahrens } 265789Sahrens 266789Sahrens static void 267789Sahrens readonly_changed_cb(void *arg, uint64_t newval) 268789Sahrens { 269789Sahrens zfsvfs_t *zfsvfs = arg; 270789Sahrens 271789Sahrens if (newval) { 272789Sahrens /* XXX locking on vfs_flag? */ 273789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 274789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 275789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 276789Sahrens } else { 277789Sahrens /* XXX locking on vfs_flag? */ 278789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 279789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 280789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 281789Sahrens } 282789Sahrens } 283789Sahrens 284789Sahrens static void 285789Sahrens devices_changed_cb(void *arg, uint64_t newval) 286789Sahrens { 287789Sahrens zfsvfs_t *zfsvfs = arg; 288789Sahrens 289789Sahrens if (newval == FALSE) { 290789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; 291789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); 292789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); 293789Sahrens } else { 294789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; 295789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); 296789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); 297789Sahrens } 298789Sahrens } 299789Sahrens 300789Sahrens static void 301789Sahrens setuid_changed_cb(void *arg, uint64_t newval) 302789Sahrens { 303789Sahrens zfsvfs_t *zfsvfs = arg; 304789Sahrens 305789Sahrens if (newval == FALSE) { 306789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 307789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 308789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 309789Sahrens } else { 310789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 311789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 312789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 313789Sahrens } 314789Sahrens } 315789Sahrens 316789Sahrens static void 317789Sahrens exec_changed_cb(void *arg, uint64_t newval) 318789Sahrens { 319789Sahrens zfsvfs_t *zfsvfs = arg; 320789Sahrens 321789Sahrens if (newval == FALSE) { 322789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 323789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 324789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 325789Sahrens } else { 326789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 327789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 328789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 329789Sahrens } 330789Sahrens } 331789Sahrens 332789Sahrens static void 333789Sahrens snapdir_changed_cb(void *arg, uint64_t newval) 334789Sahrens { 335789Sahrens zfsvfs_t *zfsvfs = arg; 336789Sahrens 337789Sahrens zfsvfs->z_show_ctldir = newval; 338789Sahrens } 339789Sahrens 340789Sahrens static void 341789Sahrens acl_mode_changed_cb(void *arg, uint64_t newval) 342789Sahrens { 343789Sahrens zfsvfs_t *zfsvfs = arg; 344789Sahrens 345789Sahrens zfsvfs->z_acl_mode = newval; 346789Sahrens } 347789Sahrens 348789Sahrens static void 349789Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval) 350789Sahrens { 351789Sahrens zfsvfs_t *zfsvfs = arg; 352789Sahrens 353789Sahrens zfsvfs->z_acl_inherit = newval; 354789Sahrens } 355789Sahrens 3561544Seschrock static int 3571544Seschrock zfs_register_callbacks(vfs_t *vfsp) 3581544Seschrock { 3591544Seschrock struct dsl_dataset *ds = NULL; 3601544Seschrock objset_t *os = NULL; 3611544Seschrock zfsvfs_t *zfsvfs = NULL; 3623265Sahrens int readonly, do_readonly = FALSE; 3633265Sahrens int setuid, do_setuid = FALSE; 3643265Sahrens int exec, do_exec = FALSE; 3653265Sahrens int devices, do_devices = FALSE; 3663265Sahrens int xattr, do_xattr = FALSE; 3674596Slling int atime, do_atime = FALSE; 3681544Seschrock int error = 0; 3691544Seschrock 3701544Seschrock ASSERT(vfsp); 3711544Seschrock zfsvfs = vfsp->vfs_data; 3721544Seschrock ASSERT(zfsvfs); 3731544Seschrock os = zfsvfs->z_os; 3741544Seschrock 3751544Seschrock /* 3761544Seschrock * The act of registering our callbacks will destroy any mount 3771544Seschrock * options we may have. In order to enable temporary overrides 3783234Sck153898 * of mount options, we stash away the current values and 3791544Seschrock * restore them after we register the callbacks. 3801544Seschrock */ 3811544Seschrock if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 3821544Seschrock readonly = B_TRUE; 3831544Seschrock do_readonly = B_TRUE; 3841544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 3851544Seschrock readonly = B_FALSE; 3861544Seschrock do_readonly = B_TRUE; 3871544Seschrock } 3881544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 3891544Seschrock devices = B_FALSE; 3901544Seschrock setuid = B_FALSE; 3911544Seschrock do_devices = B_TRUE; 3921544Seschrock do_setuid = B_TRUE; 3931544Seschrock } else { 3941544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 3951544Seschrock devices = B_FALSE; 3961544Seschrock do_devices = B_TRUE; 3973912Slling } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { 3981544Seschrock devices = B_TRUE; 3991544Seschrock do_devices = B_TRUE; 4001544Seschrock } 4011544Seschrock 4021544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 4031544Seschrock setuid = B_FALSE; 4041544Seschrock do_setuid = B_TRUE; 4051544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 4061544Seschrock setuid = B_TRUE; 4071544Seschrock do_setuid = B_TRUE; 4081544Seschrock } 4091544Seschrock } 4101544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 4111544Seschrock exec = B_FALSE; 4121544Seschrock do_exec = B_TRUE; 4131544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 4141544Seschrock exec = B_TRUE; 4151544Seschrock do_exec = B_TRUE; 4161544Seschrock } 4173234Sck153898 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 4183234Sck153898 xattr = B_FALSE; 4193234Sck153898 do_xattr = B_TRUE; 4203234Sck153898 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 4213234Sck153898 xattr = B_TRUE; 4223234Sck153898 do_xattr = B_TRUE; 4233234Sck153898 } 4244596Slling if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 4254596Slling atime = B_FALSE; 4264596Slling do_atime = B_TRUE; 4274596Slling } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 4284596Slling atime = B_TRUE; 4294596Slling do_atime = B_TRUE; 4304596Slling } 4311544Seschrock 4321544Seschrock /* 4331544Seschrock * Register property callbacks. 4341544Seschrock * 4351544Seschrock * It would probably be fine to just check for i/o error from 4361544Seschrock * the first prop_register(), but I guess I like to go 4371544Seschrock * overboard... 4381544Seschrock */ 4391544Seschrock ds = dmu_objset_ds(os); 4401544Seschrock error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 4411544Seschrock error = error ? error : dsl_prop_register(ds, 4423234Sck153898 "xattr", xattr_changed_cb, zfsvfs); 4433234Sck153898 error = error ? error : dsl_prop_register(ds, 4441544Seschrock "recordsize", blksz_changed_cb, zfsvfs); 4451544Seschrock error = error ? error : dsl_prop_register(ds, 4461544Seschrock "readonly", readonly_changed_cb, zfsvfs); 4471544Seschrock error = error ? error : dsl_prop_register(ds, 4481544Seschrock "devices", devices_changed_cb, zfsvfs); 4491544Seschrock error = error ? error : dsl_prop_register(ds, 4501544Seschrock "setuid", setuid_changed_cb, zfsvfs); 4511544Seschrock error = error ? error : dsl_prop_register(ds, 4521544Seschrock "exec", exec_changed_cb, zfsvfs); 4531544Seschrock error = error ? error : dsl_prop_register(ds, 4541544Seschrock "snapdir", snapdir_changed_cb, zfsvfs); 4551544Seschrock error = error ? error : dsl_prop_register(ds, 4561544Seschrock "aclmode", acl_mode_changed_cb, zfsvfs); 4571544Seschrock error = error ? error : dsl_prop_register(ds, 4581544Seschrock "aclinherit", acl_inherit_changed_cb, zfsvfs); 4591544Seschrock if (error) 4601544Seschrock goto unregister; 4611544Seschrock 4621544Seschrock /* 4631544Seschrock * Invoke our callbacks to restore temporary mount options. 4641544Seschrock */ 4651544Seschrock if (do_readonly) 4661544Seschrock readonly_changed_cb(zfsvfs, readonly); 4671544Seschrock if (do_setuid) 4681544Seschrock setuid_changed_cb(zfsvfs, setuid); 4691544Seschrock if (do_exec) 4701544Seschrock exec_changed_cb(zfsvfs, exec); 4711544Seschrock if (do_devices) 4721544Seschrock devices_changed_cb(zfsvfs, devices); 4733234Sck153898 if (do_xattr) 4743234Sck153898 xattr_changed_cb(zfsvfs, xattr); 4754596Slling if (do_atime) 4764596Slling atime_changed_cb(zfsvfs, atime); 4771544Seschrock 4781544Seschrock return (0); 4791544Seschrock 4801544Seschrock unregister: 4811544Seschrock /* 4821544Seschrock * We may attempt to unregister some callbacks that are not 4831544Seschrock * registered, but this is OK; it will simply return ENOMSG, 4841544Seschrock * which we will ignore. 4851544Seschrock */ 4861544Seschrock (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 4873234Sck153898 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 4881544Seschrock (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 4891544Seschrock (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 4901544Seschrock (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); 4911544Seschrock (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 4921544Seschrock (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 4931544Seschrock (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 4941544Seschrock (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 4951544Seschrock (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 4961544Seschrock zfsvfs); 4971544Seschrock return (error); 4981544Seschrock 4991544Seschrock } 5001544Seschrock 5011544Seschrock static int 5021544Seschrock zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) 5031544Seschrock { 5041544Seschrock dev_t mount_dev; 5051544Seschrock uint64_t recordsize, readonly; 5061544Seschrock int error = 0; 5071544Seschrock int mode; 5081544Seschrock zfsvfs_t *zfsvfs; 5091544Seschrock znode_t *zp = NULL; 5101544Seschrock 5111544Seschrock ASSERT(vfsp); 5121544Seschrock ASSERT(osname); 5131544Seschrock 5141544Seschrock /* 5151544Seschrock * Initialize the zfs-specific filesystem structure. 5161544Seschrock * Should probably make this a kmem cache, shuffle fields, 5171544Seschrock * and just bzero up to z_hold_mtx[]. 5181544Seschrock */ 5191544Seschrock zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 5201544Seschrock zfsvfs->z_vfs = vfsp; 5211544Seschrock zfsvfs->z_parent = zfsvfs; 5221544Seschrock zfsvfs->z_assign = TXG_NOWAIT; 5231544Seschrock zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 5241544Seschrock zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 5251544Seschrock 5261544Seschrock mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 5271544Seschrock list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 5281544Seschrock offsetof(znode_t, z_link_node)); 5291544Seschrock rw_init(&zfsvfs->z_um_lock, NULL, RW_DEFAULT, NULL); 5301544Seschrock 5311544Seschrock /* Initialize the generic filesystem structure. */ 5321544Seschrock vfsp->vfs_bcount = 0; 5331544Seschrock vfsp->vfs_data = NULL; 5341544Seschrock 5351544Seschrock if (zfs_create_unique_device(&mount_dev) == -1) { 5361544Seschrock error = ENODEV; 5371544Seschrock goto out; 5381544Seschrock } 5391544Seschrock ASSERT(vfs_devismounted(mount_dev) == 0); 5401544Seschrock 5411544Seschrock if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 5421544Seschrock NULL)) 5431544Seschrock goto out; 5441544Seschrock 5451544Seschrock vfsp->vfs_dev = mount_dev; 5461544Seschrock vfsp->vfs_fstype = zfsfstype; 5471544Seschrock vfsp->vfs_bsize = recordsize; 5481544Seschrock vfsp->vfs_flag |= VFS_NOTRUNC; 5491544Seschrock vfsp->vfs_data = zfsvfs; 5501544Seschrock 5511544Seschrock if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 5521544Seschrock goto out; 5531544Seschrock 5541544Seschrock if (readonly) 5551544Seschrock mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 5561544Seschrock else 5571544Seschrock mode = DS_MODE_PRIMARY; 5581544Seschrock 5591544Seschrock error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 5601544Seschrock if (error == EROFS) { 5611544Seschrock mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 5621544Seschrock error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 5631544Seschrock &zfsvfs->z_os); 5641544Seschrock } 5651544Seschrock 5661544Seschrock if (error) 5671544Seschrock goto out; 5681544Seschrock 5691544Seschrock if (error = zfs_init_fs(zfsvfs, &zp, cr)) 5701544Seschrock goto out; 5711544Seschrock 5721544Seschrock /* The call to zfs_init_fs leaves the vnode held, release it here. */ 5731544Seschrock VN_RELE(ZTOV(zp)); 5741544Seschrock 5751544Seschrock if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 5763234Sck153898 uint64_t xattr; 5773234Sck153898 5781544Seschrock ASSERT(mode & DS_MODE_READONLY); 5791544Seschrock atime_changed_cb(zfsvfs, B_FALSE); 5801544Seschrock readonly_changed_cb(zfsvfs, B_TRUE); 5813234Sck153898 if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 5823234Sck153898 goto out; 5833234Sck153898 xattr_changed_cb(zfsvfs, xattr); 5841544Seschrock zfsvfs->z_issnap = B_TRUE; 5851544Seschrock } else { 5861544Seschrock error = zfs_register_callbacks(vfsp); 5871544Seschrock if (error) 5881544Seschrock goto out; 5891544Seschrock 5904577Sahrens if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)) 5914577Sahrens zfs_unlinked_drain(zfsvfs); 5921544Seschrock 5931544Seschrock /* 5941544Seschrock * Parse and replay the intent log. 5954577Sahrens * 5964577Sahrens * Because of ziltest, this must be done after 5974577Sahrens * zfs_unlinked_drain(). (Further note: ziltest doesn't 5984577Sahrens * use readonly mounts, where zfs_unlinked_drain() isn't 5994577Sahrens * called.) This is because ziltest causes spa_sync() 6004577Sahrens * to think it's committed, but actually it is not, so 6014577Sahrens * the intent log contains many txg's worth of changes. 6024577Sahrens * 6034577Sahrens * In particular, if object N is in the unlinked set in 6044577Sahrens * the last txg to actually sync, then it could be 6054577Sahrens * actually freed in a later txg and then reallocated in 6064577Sahrens * a yet later txg. This would write a "create object 6074577Sahrens * N" record to the intent log. Normally, this would be 6084577Sahrens * fine because the spa_sync() would have written out 6094577Sahrens * the fact that object N is free, before we could write 6104577Sahrens * the "create object N" intent log record. 6114577Sahrens * 6124577Sahrens * But when we are in ziltest mode, we advance the "open 6134577Sahrens * txg" without actually spa_sync()-ing the changes to 6144577Sahrens * disk. So we would see that object N is still 6154577Sahrens * allocated and in the unlinked set, and there is an 6164577Sahrens * intent log record saying to allocate it. 6171544Seschrock */ 6181544Seschrock zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 6193461Sahrens zfs_replay_vector); 6201544Seschrock 6211544Seschrock if (!zil_disable) 6221544Seschrock zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 6231544Seschrock } 6241544Seschrock 6251544Seschrock if (!zfsvfs->z_issnap) 6261544Seschrock zfsctl_create(zfsvfs); 6271544Seschrock out: 6281544Seschrock if (error) { 6291544Seschrock if (zfsvfs->z_os) 6301544Seschrock dmu_objset_close(zfsvfs->z_os); 6311544Seschrock kmem_free(zfsvfs, sizeof (zfsvfs_t)); 6321544Seschrock } else { 6331544Seschrock atomic_add_32(&zfs_active_fs_count, 1); 6341544Seschrock } 6351544Seschrock 6361544Seschrock return (error); 6371544Seschrock } 6381544Seschrock 6391544Seschrock void 6401544Seschrock zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 6411544Seschrock { 6421544Seschrock objset_t *os = zfsvfs->z_os; 6431544Seschrock struct dsl_dataset *ds; 6441544Seschrock 6451544Seschrock /* 6461544Seschrock * Unregister properties. 6471544Seschrock */ 6481544Seschrock if (!dmu_objset_is_snapshot(os)) { 6491544Seschrock ds = dmu_objset_ds(os); 6501544Seschrock VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 6511544Seschrock zfsvfs) == 0); 6521544Seschrock 6533234Sck153898 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 6543234Sck153898 zfsvfs) == 0); 6553234Sck153898 6561544Seschrock VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 6571544Seschrock zfsvfs) == 0); 6581544Seschrock 6591544Seschrock VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 6601544Seschrock zfsvfs) == 0); 6611544Seschrock 6621544Seschrock VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, 6631544Seschrock zfsvfs) == 0); 6641544Seschrock 6651544Seschrock VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 6661544Seschrock zfsvfs) == 0); 6671544Seschrock 6681544Seschrock VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 6691544Seschrock zfsvfs) == 0); 6701544Seschrock 6711544Seschrock VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 6721544Seschrock zfsvfs) == 0); 6731544Seschrock 6741544Seschrock VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 6751544Seschrock zfsvfs) == 0); 6761544Seschrock 6771544Seschrock VERIFY(dsl_prop_unregister(ds, "aclinherit", 6781544Seschrock acl_inherit_changed_cb, zfsvfs) == 0); 6791544Seschrock } 6801544Seschrock } 6811544Seschrock 6823912Slling /* 6833912Slling * Convert a decimal digit string to a uint64_t integer. 6843912Slling */ 6853912Slling static int 6863912Slling str_to_uint64(char *str, uint64_t *objnum) 6873912Slling { 6883912Slling uint64_t num = 0; 6893912Slling 6903912Slling while (*str) { 6913912Slling if (*str < '0' || *str > '9') 6923912Slling return (EINVAL); 6933912Slling 6943912Slling num = num*10 + *str++ - '0'; 6953912Slling } 6963912Slling 6973912Slling *objnum = num; 6983912Slling return (0); 6993912Slling } 7003912Slling 7013912Slling /* 7023912Slling * The boot path passed from the boot loader is in the form of 7033912Slling * "rootpool-name/root-filesystem-object-number'. Convert this 7043912Slling * string to a dataset name: "rootpool-name/root-filesystem-name". 7053912Slling */ 7063912Slling static int 7073912Slling parse_bootpath(char *bpath, char *outpath) 7083912Slling { 7093912Slling char *slashp; 7103912Slling uint64_t objnum; 7113912Slling int error; 7123912Slling 7133912Slling if (*bpath == 0 || *bpath == '/') 7143912Slling return (EINVAL); 7153912Slling 7163912Slling slashp = strchr(bpath, '/'); 7173912Slling 7183912Slling /* if no '/', just return the pool name */ 7193912Slling if (slashp == NULL) { 7203912Slling (void) strcpy(outpath, bpath); 7213912Slling return (0); 7223912Slling } 7233912Slling 7243912Slling if (error = str_to_uint64(slashp+1, &objnum)) 7253912Slling return (error); 7263912Slling 7273912Slling *slashp = '\0'; 7283912Slling error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 7293912Slling *slashp = '/'; 7303912Slling 7313912Slling return (error); 7323912Slling } 7333912Slling 7341544Seschrock static int 7351544Seschrock zfs_mountroot(vfs_t *vfsp, enum whymountroot why) 7361544Seschrock { 7371544Seschrock int error = 0; 7381544Seschrock int ret = 0; 7391544Seschrock static int zfsrootdone = 0; 7401544Seschrock zfsvfs_t *zfsvfs = NULL; 7411544Seschrock znode_t *zp = NULL; 7421544Seschrock vnode_t *vp = NULL; 7433912Slling char *zfs_bootpath; 7441544Seschrock 7451544Seschrock ASSERT(vfsp); 7461544Seschrock 7471544Seschrock /* 7483912Slling * The filesystem that we mount as root is defined in the 7493912Slling * "zfs-bootfs" property. 7501544Seschrock */ 7511544Seschrock if (why == ROOT_INIT) { 7521544Seschrock if (zfsrootdone++) 7531544Seschrock return (EBUSY); 7541544Seschrock 7553912Slling if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 7563912Slling DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) != 7573912Slling DDI_SUCCESS) 7583912Slling return (EIO); 7593912Slling 7603912Slling error = parse_bootpath(zfs_bootpath, rootfs.bo_name); 7613912Slling ddi_prop_free(zfs_bootpath); 7623912Slling 7633912Slling if (error) 7643912Slling return (error); 7651544Seschrock 7661544Seschrock if (error = vfs_lock(vfsp)) 7671544Seschrock return (error); 7681544Seschrock 7693912Slling if (error = zfs_domount(vfsp, rootfs.bo_name, CRED())) 7701544Seschrock goto out; 7711544Seschrock 7721544Seschrock zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 7731544Seschrock ASSERT(zfsvfs); 7741544Seschrock if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) 7751544Seschrock goto out; 7761544Seschrock 7771544Seschrock vp = ZTOV(zp); 7781544Seschrock mutex_enter(&vp->v_lock); 7791544Seschrock vp->v_flag |= VROOT; 7801544Seschrock mutex_exit(&vp->v_lock); 7811544Seschrock rootvp = vp; 7821544Seschrock 7831544Seschrock /* 7841544Seschrock * The zfs_zget call above returns with a hold on vp, we release 7851544Seschrock * it here. 7861544Seschrock */ 7871544Seschrock VN_RELE(vp); 7881544Seschrock 7891544Seschrock /* 7901544Seschrock * Mount root as readonly initially, it will be remouted 7911544Seschrock * read/write by /lib/svc/method/fs-usr. 7921544Seschrock */ 7931544Seschrock readonly_changed_cb(vfsp->vfs_data, B_TRUE); 7941544Seschrock vfs_add((struct vnode *)0, vfsp, 7951544Seschrock (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 7961544Seschrock out: 7971544Seschrock vfs_unlock(vfsp); 7981544Seschrock ret = (error) ? error : 0; 7991544Seschrock return (ret); 8001544Seschrock } else if (why == ROOT_REMOUNT) { 8011544Seschrock readonly_changed_cb(vfsp->vfs_data, B_FALSE); 8021544Seschrock vfsp->vfs_flag |= VFS_REMOUNT; 8034596Slling 8044596Slling /* refresh mount options */ 8054596Slling zfs_unregister_callbacks(vfsp->vfs_data); 8064596Slling return (zfs_register_callbacks(vfsp)); 8074596Slling 8081544Seschrock } else if (why == ROOT_UNMOUNT) { 8091544Seschrock zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 8101544Seschrock (void) zfs_sync(vfsp, 0, 0); 8111544Seschrock return (0); 8121544Seschrock } 8131544Seschrock 8141544Seschrock /* 8151544Seschrock * if "why" is equal to anything else other than ROOT_INIT, 8161544Seschrock * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 8171544Seschrock */ 8181544Seschrock return (ENOTSUP); 8191544Seschrock } 8201544Seschrock 821789Sahrens /*ARGSUSED*/ 822789Sahrens static int 823789Sahrens zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 824789Sahrens { 825789Sahrens char *osname; 826789Sahrens pathname_t spn; 827789Sahrens int error = 0; 828789Sahrens uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? 8293912Slling UIO_SYSSPACE : UIO_USERSPACE; 830789Sahrens int canwrite; 831789Sahrens 832789Sahrens if (mvp->v_type != VDIR) 833789Sahrens return (ENOTDIR); 834789Sahrens 835789Sahrens mutex_enter(&mvp->v_lock); 836789Sahrens if ((uap->flags & MS_REMOUNT) == 0 && 837789Sahrens (uap->flags & MS_OVERLAY) == 0 && 838789Sahrens (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 839789Sahrens mutex_exit(&mvp->v_lock); 840789Sahrens return (EBUSY); 841789Sahrens } 842789Sahrens mutex_exit(&mvp->v_lock); 843789Sahrens 844789Sahrens /* 845789Sahrens * ZFS does not support passing unparsed data in via MS_DATA. 846789Sahrens * Users should use the MS_OPTIONSTR interface; this means 847789Sahrens * that all option parsing is already done and the options struct 848789Sahrens * can be interrogated. 849789Sahrens */ 850789Sahrens if ((uap->flags & MS_DATA) && uap->datalen > 0) 851789Sahrens return (EINVAL); 852789Sahrens 853789Sahrens /* 854789Sahrens * Get the objset name (the "special" mount argument). 855789Sahrens */ 856789Sahrens if (error = pn_get(uap->spec, fromspace, &spn)) 857789Sahrens return (error); 858789Sahrens 859789Sahrens osname = spn.pn_path; 860789Sahrens 8614543Smarks /* 8624543Smarks * Check for mount privilege? 8634543Smarks * 8644543Smarks * If we don't have privilege then see if 8654543Smarks * we have local permission to allow it 8664543Smarks */ 8674543Smarks error = secpolicy_fs_mount(cr, mvp, vfsp); 8684543Smarks if (error) { 8694543Smarks error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 8704543Smarks if (error == 0) { 8714543Smarks vattr_t vattr; 8724543Smarks 8734543Smarks /* 8744543Smarks * Make sure user is the owner of the mount point 8754543Smarks * or has sufficient privileges. 8764543Smarks */ 8774543Smarks 8784543Smarks vattr.va_mask = AT_UID; 8794543Smarks 8804614Smarks if (error = VOP_GETATTR(mvp, &vattr, 0, cr)) { 8814543Smarks goto out; 8824543Smarks } 8834543Smarks 8844543Smarks if (error = secpolicy_vnode_owner(cr, vattr.va_uid)) { 8854543Smarks goto out; 8864543Smarks } 8874543Smarks 8884543Smarks if (error = VOP_ACCESS(mvp, VWRITE, 0, cr)) { 8894543Smarks goto out; 8904543Smarks } 8914543Smarks 8924543Smarks secpolicy_fs_mount_clearopts(cr, vfsp); 8934543Smarks } else { 8944543Smarks goto out; 8954543Smarks } 8964543Smarks } 897789Sahrens 898789Sahrens /* 899789Sahrens * Refuse to mount a filesystem if we are in a local zone and the 900789Sahrens * dataset is not visible. 901789Sahrens */ 902789Sahrens if (!INGLOBALZONE(curproc) && 903789Sahrens (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 904789Sahrens error = EPERM; 905789Sahrens goto out; 906789Sahrens } 907789Sahrens 9084596Slling /* 9094596Slling * When doing a remount, we simply refresh our temporary properties 9104596Slling * according to those options set in the current VFS options. 9114596Slling */ 9124596Slling if (uap->flags & MS_REMOUNT) { 9134596Slling /* refresh mount options */ 9144596Slling zfs_unregister_callbacks(vfsp->vfs_data); 9154596Slling error = zfs_register_callbacks(vfsp); 9164596Slling goto out; 9174596Slling } 9184596Slling 9191544Seschrock error = zfs_domount(vfsp, osname, cr); 920789Sahrens 921789Sahrens out: 922789Sahrens pn_free(&spn); 923789Sahrens return (error); 924789Sahrens } 925789Sahrens 926789Sahrens static int 927789Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) 928789Sahrens { 929789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 930789Sahrens dev32_t d32; 9312885Sahrens uint64_t refdbytes, availbytes, usedobjs, availobjs; 932789Sahrens 933789Sahrens ZFS_ENTER(zfsvfs); 934789Sahrens 9352885Sahrens dmu_objset_space(zfsvfs->z_os, 9362885Sahrens &refdbytes, &availbytes, &usedobjs, &availobjs); 937789Sahrens 938789Sahrens /* 939789Sahrens * The underlying storage pool actually uses multiple block sizes. 940789Sahrens * We report the fragsize as the smallest block size we support, 941789Sahrens * and we report our blocksize as the filesystem's maximum blocksize. 942789Sahrens */ 943789Sahrens statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; 944789Sahrens statp->f_bsize = zfsvfs->z_max_blksz; 945789Sahrens 946789Sahrens /* 947789Sahrens * The following report "total" blocks of various kinds in the 948789Sahrens * file system, but reported in terms of f_frsize - the 949789Sahrens * "fragment" size. 950789Sahrens */ 951789Sahrens 9522885Sahrens statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 9532885Sahrens statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; 954789Sahrens statp->f_bavail = statp->f_bfree; /* no root reservation */ 955789Sahrens 956789Sahrens /* 957789Sahrens * statvfs() should really be called statufs(), because it assumes 958789Sahrens * static metadata. ZFS doesn't preallocate files, so the best 959789Sahrens * we can do is report the max that could possibly fit in f_files, 960789Sahrens * and that minus the number actually used in f_ffree. 961789Sahrens * For f_ffree, report the smaller of the number of object available 962789Sahrens * and the number of blocks (each object will take at least a block). 963789Sahrens */ 9642885Sahrens statp->f_ffree = MIN(availobjs, statp->f_bfree); 965789Sahrens statp->f_favail = statp->f_ffree; /* no "root reservation" */ 9662885Sahrens statp->f_files = statp->f_ffree + usedobjs; 967789Sahrens 968789Sahrens (void) cmpldev(&d32, vfsp->vfs_dev); 969789Sahrens statp->f_fsid = d32; 970789Sahrens 971789Sahrens /* 972789Sahrens * We're a zfs filesystem. 973789Sahrens */ 974789Sahrens (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 975789Sahrens 9761123Smarks statp->f_flag = vf_to_stf(vfsp->vfs_flag); 977789Sahrens 978789Sahrens statp->f_namemax = ZFS_MAXNAMELEN; 979789Sahrens 980789Sahrens /* 981789Sahrens * We have all of 32 characters to stuff a string here. 982789Sahrens * Is there anything useful we could/should provide? 983789Sahrens */ 984789Sahrens bzero(statp->f_fstr, sizeof (statp->f_fstr)); 985789Sahrens 986789Sahrens ZFS_EXIT(zfsvfs); 987789Sahrens return (0); 988789Sahrens } 989789Sahrens 990789Sahrens static int 991789Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp) 992789Sahrens { 993789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 994789Sahrens znode_t *rootzp; 995789Sahrens int error; 996789Sahrens 997789Sahrens ZFS_ENTER(zfsvfs); 998789Sahrens 999789Sahrens error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1000789Sahrens if (error == 0) 1001789Sahrens *vpp = ZTOV(rootzp); 1002789Sahrens 1003789Sahrens ZFS_EXIT(zfsvfs); 1004789Sahrens return (error); 1005789Sahrens } 1006789Sahrens 1007789Sahrens /*ARGSUSED*/ 1008789Sahrens static int 1009789Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) 1010789Sahrens { 1011789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1012789Sahrens int ret; 1013789Sahrens 10144543Smarks ret = secpolicy_fs_unmount(cr, vfsp); 10154543Smarks if (ret) { 10164543Smarks ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 10174543Smarks ZFS_DELEG_PERM_MOUNT, cr); 10184543Smarks if (ret) 10194543Smarks return (ret); 10204543Smarks } 10211484Sek110237 1022*4736Sek110237 /* 1023*4736Sek110237 * We purge the parent filesystem's vfsp as the parent filesystem 1024*4736Sek110237 * and all of its snapshots have their vnode's v_vfsp set to the 1025*4736Sek110237 * parent's filesystem's vfsp. Note, 'z_parent' is self 1026*4736Sek110237 * referential for non-snapshots. 1027*4736Sek110237 */ 1028*4736Sek110237 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 10291484Sek110237 1030789Sahrens /* 1031789Sahrens * Unmount any snapshots mounted under .zfs before unmounting the 1032789Sahrens * dataset itself. 1033789Sahrens */ 1034789Sahrens if (zfsvfs->z_ctldir != NULL && 10354543Smarks (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { 1036789Sahrens return (ret); 10374543Smarks } 1038789Sahrens 1039789Sahrens if (fflag & MS_FORCE) { 1040789Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 1041789Sahrens zfsvfs->z_unmounted1 = B_TRUE; 1042789Sahrens 1043789Sahrens /* 10444480Sgw25295 * Ensure that z_unmounted1 reaches global visibility 10454480Sgw25295 * before z_op_cnt. 10464480Sgw25295 */ 10474480Sgw25295 membar_producer(); 10484480Sgw25295 10494480Sgw25295 /* 1050789Sahrens * Wait for all zfs threads to leave zfs. 1051789Sahrens * Grabbing a rwlock as reader in all vops and 1052789Sahrens * as writer here doesn't work because it too easy to get 1053789Sahrens * multiple reader enters as zfs can re-enter itself. 1054789Sahrens * This can lead to deadlock if there is an intervening 1055789Sahrens * rw_enter as writer. 1056789Sahrens * So a file system threads ref count (z_op_cnt) is used. 1057789Sahrens * A polling loop on z_op_cnt may seem inefficient, but 1058789Sahrens * - this saves all threads on exit from having to grab a 1059789Sahrens * mutex in order to cv_signal 1060789Sahrens * - only occurs on forced unmount in the rare case when 1061789Sahrens * there are outstanding threads within the file system. 1062789Sahrens */ 1063789Sahrens while (zfsvfs->z_op_cnt) { 1064789Sahrens delay(1); 1065789Sahrens } 1066789Sahrens 1067789Sahrens zfs_objset_close(zfsvfs); 1068789Sahrens 1069789Sahrens return (0); 1070789Sahrens } 1071789Sahrens /* 1072789Sahrens * Check the number of active vnodes in the file system. 1073789Sahrens * Our count is maintained in the vfs structure, but the number 1074789Sahrens * is off by 1 to indicate a hold on the vfs structure itself. 1075789Sahrens * 1076789Sahrens * The '.zfs' directory maintains a reference of its own, and any active 1077789Sahrens * references underneath are reflected in the vnode count. 1078789Sahrens */ 1079789Sahrens if (zfsvfs->z_ctldir == NULL) { 10803461Sahrens if (vfsp->vfs_count > 1) 1081789Sahrens return (EBUSY); 1082789Sahrens } else { 1083789Sahrens if (vfsp->vfs_count > 2 || 1084789Sahrens (zfsvfs->z_ctldir->v_count > 1 && !(fflag & MS_FORCE))) { 1085789Sahrens return (EBUSY); 1086789Sahrens } 1087789Sahrens } 1088789Sahrens 1089789Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 1090789Sahrens zfs_objset_close(zfsvfs); 1091789Sahrens 1092789Sahrens return (0); 1093789Sahrens } 1094789Sahrens 1095789Sahrens static int 1096789Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1097789Sahrens { 1098789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1099789Sahrens znode_t *zp; 1100789Sahrens uint64_t object = 0; 1101789Sahrens uint64_t fid_gen = 0; 1102789Sahrens uint64_t gen_mask; 1103789Sahrens uint64_t zp_gen; 1104789Sahrens int i, err; 1105789Sahrens 1106789Sahrens *vpp = NULL; 1107789Sahrens 1108789Sahrens ZFS_ENTER(zfsvfs); 1109789Sahrens 1110789Sahrens if (fidp->fid_len == LONG_FID_LEN) { 1111789Sahrens zfid_long_t *zlfid = (zfid_long_t *)fidp; 1112789Sahrens uint64_t objsetid = 0; 1113789Sahrens uint64_t setgen = 0; 1114789Sahrens 1115789Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1116789Sahrens objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1117789Sahrens 1118789Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1119789Sahrens setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1120789Sahrens 1121789Sahrens ZFS_EXIT(zfsvfs); 1122789Sahrens 1123789Sahrens err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1124789Sahrens if (err) 1125789Sahrens return (EINVAL); 1126789Sahrens ZFS_ENTER(zfsvfs); 1127789Sahrens } 1128789Sahrens 1129789Sahrens if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1130789Sahrens zfid_short_t *zfid = (zfid_short_t *)fidp; 1131789Sahrens 1132789Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 1133789Sahrens object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1134789Sahrens 1135789Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 1136789Sahrens fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1137789Sahrens } else { 1138789Sahrens ZFS_EXIT(zfsvfs); 1139789Sahrens return (EINVAL); 1140789Sahrens } 1141789Sahrens 1142789Sahrens /* A zero fid_gen means we are in the .zfs control directories */ 1143789Sahrens if (fid_gen == 0 && 1144789Sahrens (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1145789Sahrens *vpp = zfsvfs->z_ctldir; 1146789Sahrens ASSERT(*vpp != NULL); 1147789Sahrens if (object == ZFSCTL_INO_SNAPDIR) { 1148789Sahrens VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1149789Sahrens 0, NULL, NULL) == 0); 1150789Sahrens } else { 1151789Sahrens VN_HOLD(*vpp); 1152789Sahrens } 1153789Sahrens ZFS_EXIT(zfsvfs); 1154789Sahrens return (0); 1155789Sahrens } 1156789Sahrens 1157789Sahrens gen_mask = -1ULL >> (64 - 8 * i); 1158789Sahrens 1159789Sahrens dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1160789Sahrens if (err = zfs_zget(zfsvfs, object, &zp)) { 1161789Sahrens ZFS_EXIT(zfsvfs); 1162789Sahrens return (err); 1163789Sahrens } 1164789Sahrens zp_gen = zp->z_phys->zp_gen & gen_mask; 1165789Sahrens if (zp_gen == 0) 1166789Sahrens zp_gen = 1; 11673461Sahrens if (zp->z_unlinked || zp_gen != fid_gen) { 1168789Sahrens dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1169789Sahrens VN_RELE(ZTOV(zp)); 1170789Sahrens ZFS_EXIT(zfsvfs); 1171789Sahrens return (EINVAL); 1172789Sahrens } 1173789Sahrens 1174789Sahrens *vpp = ZTOV(zp); 1175789Sahrens ZFS_EXIT(zfsvfs); 1176789Sahrens return (0); 1177789Sahrens } 1178789Sahrens 1179789Sahrens static void 1180789Sahrens zfs_objset_close(zfsvfs_t *zfsvfs) 1181789Sahrens { 1182789Sahrens znode_t *zp, *nextzp; 1183789Sahrens objset_t *os = zfsvfs->z_os; 1184789Sahrens 1185789Sahrens /* 1186789Sahrens * For forced unmount, at this point all vops except zfs_inactive 1187789Sahrens * are erroring EIO. We need to now suspend zfs_inactive threads 1188789Sahrens * while we are freeing dbufs before switching zfs_inactive 1189789Sahrens * to use behaviour without a objset. 1190789Sahrens */ 1191789Sahrens rw_enter(&zfsvfs->z_um_lock, RW_WRITER); 1192789Sahrens 1193789Sahrens /* 1194789Sahrens * Release all holds on dbufs 1195789Sahrens * Note, although we have stopped all other vop threads and 1196789Sahrens * zfs_inactive(), the dmu can callback via znode_pageout_func() 1197789Sahrens * which can zfs_znode_free() the znode. 1198789Sahrens * So we lock z_all_znodes; search the list for a held 1199789Sahrens * dbuf; drop the lock (we know zp can't disappear if we hold 1200789Sahrens * a dbuf lock; then regrab the lock and restart. 1201789Sahrens */ 1202789Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 1203789Sahrens for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 1204789Sahrens nextzp = list_next(&zfsvfs->z_all_znodes, zp); 1205789Sahrens if (zp->z_dbuf_held) { 1206789Sahrens /* dbufs should only be held when force unmounting */ 1207789Sahrens zp->z_dbuf_held = 0; 1208789Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 12091544Seschrock dmu_buf_rele(zp->z_dbuf, NULL); 1210789Sahrens /* Start again */ 1211789Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 1212789Sahrens nextzp = list_head(&zfsvfs->z_all_znodes); 1213789Sahrens } 1214789Sahrens } 1215789Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 1216789Sahrens 1217789Sahrens /* 1218789Sahrens * Unregister properties. 1219789Sahrens */ 12201544Seschrock if (!dmu_objset_is_snapshot(os)) 12211544Seschrock zfs_unregister_callbacks(zfsvfs); 1222789Sahrens 1223789Sahrens /* 1224789Sahrens * Switch zfs_inactive to behaviour without an objset. 1225789Sahrens * It just tosses cached pages and frees the znode & vnode. 1226789Sahrens * Then re-enable zfs_inactive threads in that new behaviour. 1227789Sahrens */ 1228789Sahrens zfsvfs->z_unmounted2 = B_TRUE; 1229789Sahrens rw_exit(&zfsvfs->z_um_lock); /* re-enable any zfs_inactive threads */ 1230789Sahrens 1231789Sahrens /* 1232789Sahrens * Close the zil. Can't close the zil while zfs_inactive 1233789Sahrens * threads are blocked as zil_close can call zfs_inactive. 1234789Sahrens */ 1235789Sahrens if (zfsvfs->z_log) { 1236789Sahrens zil_close(zfsvfs->z_log); 1237789Sahrens zfsvfs->z_log = NULL; 1238789Sahrens } 1239789Sahrens 1240789Sahrens /* 12411544Seschrock * Evict all dbufs so that cached znodes will be freed 12421544Seschrock */ 12431646Sperrin if (dmu_objset_evict_dbufs(os, 1)) { 12441646Sperrin txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 12451646Sperrin (void) dmu_objset_evict_dbufs(os, 0); 12461646Sperrin } 12471544Seschrock 12481544Seschrock /* 1249789Sahrens * Finally close the objset 1250789Sahrens */ 1251789Sahrens dmu_objset_close(os); 1252789Sahrens 12531298Sperrin /* 12541298Sperrin * We can now safely destroy the '.zfs' directory node. 12551298Sperrin */ 12561298Sperrin if (zfsvfs->z_ctldir != NULL) 12571298Sperrin zfsctl_destroy(zfsvfs); 12581298Sperrin 1259789Sahrens } 1260789Sahrens 1261789Sahrens static void 1262789Sahrens zfs_freevfs(vfs_t *vfsp) 1263789Sahrens { 1264789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1265789Sahrens 1266789Sahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1267789Sahrens 1268789Sahrens atomic_add_32(&zfs_active_fs_count, -1); 1269789Sahrens } 1270789Sahrens 1271789Sahrens /* 1272789Sahrens * VFS_INIT() initialization. Note that there is no VFS_FINI(), 1273789Sahrens * so we can't safely do any non-idempotent initialization here. 1274789Sahrens * Leave that to zfs_init() and zfs_fini(), which are called 1275789Sahrens * from the module's _init() and _fini() entry points. 1276789Sahrens */ 1277789Sahrens /*ARGSUSED*/ 1278789Sahrens static int 1279789Sahrens zfs_vfsinit(int fstype, char *name) 1280789Sahrens { 1281789Sahrens int error; 1282789Sahrens 1283789Sahrens zfsfstype = fstype; 1284789Sahrens 1285789Sahrens /* 1286789Sahrens * Setup vfsops and vnodeops tables. 1287789Sahrens */ 1288789Sahrens error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); 1289789Sahrens if (error != 0) { 1290789Sahrens cmn_err(CE_WARN, "zfs: bad vfs ops template"); 1291789Sahrens } 1292789Sahrens 1293789Sahrens error = zfs_create_op_tables(); 1294789Sahrens if (error) { 1295789Sahrens zfs_remove_op_tables(); 1296789Sahrens cmn_err(CE_WARN, "zfs: bad vnode ops template"); 1297789Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 1298789Sahrens return (error); 1299789Sahrens } 1300789Sahrens 1301789Sahrens mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); 1302789Sahrens 1303789Sahrens /* 1304849Sbonwick * Unique major number for all zfs mounts. 1305849Sbonwick * If we run out of 32-bit minors, we'll getudev() another major. 1306789Sahrens */ 1307849Sbonwick zfs_major = ddi_name_to_major(ZFS_DRIVER); 1308849Sbonwick zfs_minor = ZFS_MIN_MINOR; 1309789Sahrens 1310789Sahrens return (0); 1311789Sahrens } 1312789Sahrens 1313789Sahrens void 1314789Sahrens zfs_init(void) 1315789Sahrens { 1316789Sahrens /* 1317789Sahrens * Initialize .zfs directory structures 1318789Sahrens */ 1319789Sahrens zfsctl_init(); 1320789Sahrens 1321789Sahrens /* 1322789Sahrens * Initialize znode cache, vnode ops, etc... 1323789Sahrens */ 1324789Sahrens zfs_znode_init(); 1325789Sahrens } 1326789Sahrens 1327789Sahrens void 1328789Sahrens zfs_fini(void) 1329789Sahrens { 1330789Sahrens zfsctl_fini(); 1331789Sahrens zfs_znode_fini(); 1332789Sahrens } 1333789Sahrens 1334789Sahrens int 1335789Sahrens zfs_busy(void) 1336789Sahrens { 1337789Sahrens return (zfs_active_fs_count != 0); 1338789Sahrens } 1339789Sahrens 13404577Sahrens int 13414577Sahrens zfs_get_stats(objset_t *os, nvlist_t *nv) 13424577Sahrens { 13434577Sahrens int error; 13444577Sahrens uint64_t val; 13454577Sahrens 13464577Sahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &val); 13474577Sahrens if (error == 0) 13484577Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VERSION, val); 13494577Sahrens 13504577Sahrens return (error); 13514577Sahrens } 13524577Sahrens 13534577Sahrens int 13544577Sahrens zfs_set_version(const char *name, uint64_t newvers) 13554577Sahrens { 13564577Sahrens int error; 13574577Sahrens objset_t *os; 13584577Sahrens dmu_tx_t *tx; 13594577Sahrens uint64_t curvers; 13604577Sahrens 13614577Sahrens /* 13624577Sahrens * XXX for now, require that the filesystem be unmounted. Would 13634577Sahrens * be nice to find the zfsvfs_t and just update that if 13644577Sahrens * possible. 13654577Sahrens */ 13664577Sahrens 13674577Sahrens if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 13684577Sahrens return (EINVAL); 13694577Sahrens 13704577Sahrens error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_PRIMARY, &os); 13714577Sahrens if (error) 13724577Sahrens return (error); 13734577Sahrens 13744577Sahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 13754577Sahrens 8, 1, &curvers); 13764577Sahrens if (error) 13774577Sahrens goto out; 13784577Sahrens if (newvers < curvers) { 13794577Sahrens error = EINVAL; 13804577Sahrens goto out; 13814577Sahrens } 13824577Sahrens 13834577Sahrens tx = dmu_tx_create(os); 13844577Sahrens dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); 13854577Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 13864577Sahrens if (error) { 13874577Sahrens dmu_tx_abort(tx); 13884577Sahrens goto out; 13894577Sahrens } 13904577Sahrens error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, 13914577Sahrens &newvers, tx); 13924577Sahrens 13934577Sahrens spa_history_internal_log(LOG_DS_UPGRADE, 13944577Sahrens dmu_objset_spa(os), tx, CRED(), 13954577Sahrens "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, 13964577Sahrens dmu_objset_id(os)); 13974577Sahrens dmu_tx_commit(tx); 13984577Sahrens 13994577Sahrens out: 14004577Sahrens dmu_objset_close(os); 14014577Sahrens return (error); 14024577Sahrens } 14034577Sahrens 1404789Sahrens static vfsdef_t vfw = { 1405789Sahrens VFSDEF_VERSION, 1406789Sahrens MNTTYPE_ZFS, 1407789Sahrens zfs_vfsinit, 14081488Srsb VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS, 1409789Sahrens &zfs_mntopts 1410789Sahrens }; 1411789Sahrens 1412789Sahrens struct modlfs zfs_modlfs = { 14134577Sahrens &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw 1414789Sahrens }; 1415