1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51484Sek110237 * Common Development and Distribution License (the "License"). 61484Sek110237 * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 223461Sahrens * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 263246Sck153898 #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/types.h> 29789Sahrens #include <sys/param.h> 30789Sahrens #include <sys/systm.h> 31789Sahrens #include <sys/sysmacros.h> 32789Sahrens #include <sys/kmem.h> 33789Sahrens #include <sys/pathname.h> 34789Sahrens #include <sys/vnode.h> 35789Sahrens #include <sys/vfs.h> 363898Srsb #include <sys/vfs_opreg.h> 37789Sahrens #include <sys/mntent.h> 38789Sahrens #include <sys/mount.h> 39789Sahrens #include <sys/cmn_err.h> 40789Sahrens #include "fs/fs_subr.h" 41789Sahrens #include <sys/zfs_znode.h> 423461Sahrens #include <sys/zfs_dir.h> 43789Sahrens #include <sys/zil.h> 44789Sahrens #include <sys/fs/zfs.h> 45789Sahrens #include <sys/dmu.h> 46789Sahrens #include <sys/dsl_prop.h> 473912Slling #include <sys/dsl_dataset.h> 484543Smarks #include <sys/dsl_deleg.h> 49789Sahrens #include <sys/spa.h> 50789Sahrens #include <sys/zap.h> 51789Sahrens #include <sys/varargs.h> 52789Sahrens #include <sys/policy.h> 53789Sahrens #include <sys/atomic.h> 54789Sahrens #include <sys/mkdev.h> 55789Sahrens #include <sys/modctl.h> 564543Smarks #include <sys/refstr.h> 57789Sahrens #include <sys/zfs_ioctl.h> 58789Sahrens #include <sys/zfs_ctldir.h> 591544Seschrock #include <sys/bootconf.h> 60849Sbonwick #include <sys/sunddi.h> 611484Sek110237 #include <sys/dnlc.h> 62789Sahrens 63789Sahrens int zfsfstype; 64789Sahrens vfsops_t *zfs_vfsops = NULL; 65849Sbonwick static major_t zfs_major; 66789Sahrens static minor_t zfs_minor; 67789Sahrens static kmutex_t zfs_dev_mtx; 68789Sahrens 69789Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); 70789Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); 711544Seschrock static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); 72789Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp); 73789Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); 74789Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); 75789Sahrens static void zfs_freevfs(vfs_t *vfsp); 76789Sahrens 77789Sahrens static const fs_operation_def_t zfs_vfsops_template[] = { 783898Srsb VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, 793898Srsb VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, 803898Srsb VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, 813898Srsb VFSNAME_ROOT, { .vfs_root = zfs_root }, 823898Srsb VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, 833898Srsb VFSNAME_SYNC, { .vfs_sync = zfs_sync }, 843898Srsb VFSNAME_VGET, { .vfs_vget = zfs_vget }, 853898Srsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 863898Srsb NULL, NULL 87789Sahrens }; 88789Sahrens 89789Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = { 903898Srsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 913898Srsb NULL, NULL 92789Sahrens }; 93789Sahrens 94789Sahrens /* 95789Sahrens * We need to keep a count of active fs's. 96789Sahrens * This is necessary to prevent our module 97789Sahrens * from being unloaded after a umount -f 98789Sahrens */ 99789Sahrens static uint32_t zfs_active_fs_count = 0; 100789Sahrens 101789Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 102789Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 1033234Sck153898 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 1043234Sck153898 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 105789Sahrens 1063234Sck153898 /* 1074596Slling * MO_DEFAULT is not used since the default value is determined 1084596Slling * by the equivalent property. 1093234Sck153898 */ 110789Sahrens static mntopt_t mntopts[] = { 1113234Sck153898 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, 1123234Sck153898 { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, 1134596Slling { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL }, 114789Sahrens { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } 115789Sahrens }; 116789Sahrens 117789Sahrens static mntopts_t zfs_mntopts = { 118789Sahrens sizeof (mntopts) / sizeof (mntopt_t), 119789Sahrens mntopts 120789Sahrens }; 121789Sahrens 122789Sahrens /*ARGSUSED*/ 123789Sahrens int 124789Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 125789Sahrens { 126789Sahrens /* 127789Sahrens * Data integrity is job one. We don't want a compromised kernel 128789Sahrens * writing to the storage pool, so we never sync during panic. 129789Sahrens */ 130789Sahrens if (panicstr) 131789Sahrens return (0); 132789Sahrens 133789Sahrens /* 134789Sahrens * SYNC_ATTR is used by fsflush() to force old filesystems like UFS 135789Sahrens * to sync metadata, which they would otherwise cache indefinitely. 136789Sahrens * Semantically, the only requirement is that the sync be initiated. 137789Sahrens * The DMU syncs out txgs frequently, so there's nothing to do. 138789Sahrens */ 139789Sahrens if (flag & SYNC_ATTR) 140789Sahrens return (0); 141789Sahrens 142789Sahrens if (vfsp != NULL) { 143789Sahrens /* 144789Sahrens * Sync a specific filesystem. 145789Sahrens */ 146789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 147789Sahrens 148789Sahrens ZFS_ENTER(zfsvfs); 149789Sahrens if (zfsvfs->z_log != NULL) 1502638Sperrin zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 151789Sahrens else 152789Sahrens txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 153789Sahrens ZFS_EXIT(zfsvfs); 154789Sahrens } else { 155789Sahrens /* 156789Sahrens * Sync all ZFS filesystems. This is what happens when you 157789Sahrens * run sync(1M). Unlike other filesystems, ZFS honors the 158789Sahrens * request by waiting for all pools to commit all dirty data. 159789Sahrens */ 160789Sahrens spa_sync_allpools(); 161789Sahrens } 162789Sahrens 163789Sahrens return (0); 164789Sahrens } 165789Sahrens 1661544Seschrock static int 1671544Seschrock zfs_create_unique_device(dev_t *dev) 1681544Seschrock { 1691544Seschrock major_t new_major; 1701544Seschrock 1711544Seschrock do { 1721544Seschrock ASSERT3U(zfs_minor, <=, MAXMIN32); 1731544Seschrock minor_t start = zfs_minor; 1741544Seschrock do { 1751544Seschrock mutex_enter(&zfs_dev_mtx); 1761544Seschrock if (zfs_minor >= MAXMIN32) { 1771544Seschrock /* 1781544Seschrock * If we're still using the real major 1791544Seschrock * keep out of /dev/zfs and /dev/zvol minor 1801544Seschrock * number space. If we're using a getudev()'ed 1811544Seschrock * major number, we can use all of its minors. 1821544Seschrock */ 1831544Seschrock if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 1841544Seschrock zfs_minor = ZFS_MIN_MINOR; 1851544Seschrock else 1861544Seschrock zfs_minor = 0; 1871544Seschrock } else { 1881544Seschrock zfs_minor++; 1891544Seschrock } 1901544Seschrock *dev = makedevice(zfs_major, zfs_minor); 1911544Seschrock mutex_exit(&zfs_dev_mtx); 1921544Seschrock } while (vfs_devismounted(*dev) && zfs_minor != start); 1931544Seschrock if (zfs_minor == start) { 1941544Seschrock /* 1951544Seschrock * We are using all ~262,000 minor numbers for the 1961544Seschrock * current major number. Create a new major number. 1971544Seschrock */ 1981544Seschrock if ((new_major = getudev()) == (major_t)-1) { 1991544Seschrock cmn_err(CE_WARN, 2001544Seschrock "zfs_mount: Can't get unique major " 2011544Seschrock "device number."); 2021544Seschrock return (-1); 2031544Seschrock } 2041544Seschrock mutex_enter(&zfs_dev_mtx); 2051544Seschrock zfs_major = new_major; 2061544Seschrock zfs_minor = 0; 2071544Seschrock 2081544Seschrock mutex_exit(&zfs_dev_mtx); 2091544Seschrock } else { 2101544Seschrock break; 2111544Seschrock } 2121544Seschrock /* CONSTANTCONDITION */ 2131544Seschrock } while (1); 2141544Seschrock 2151544Seschrock return (0); 2161544Seschrock } 2171544Seschrock 218789Sahrens static void 219789Sahrens atime_changed_cb(void *arg, uint64_t newval) 220789Sahrens { 221789Sahrens zfsvfs_t *zfsvfs = arg; 222789Sahrens 223789Sahrens if (newval == TRUE) { 224789Sahrens zfsvfs->z_atime = TRUE; 225789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 226789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 227789Sahrens } else { 228789Sahrens zfsvfs->z_atime = FALSE; 229789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 230789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 231789Sahrens } 232789Sahrens } 233789Sahrens 234789Sahrens static void 2353234Sck153898 xattr_changed_cb(void *arg, uint64_t newval) 2363234Sck153898 { 2373234Sck153898 zfsvfs_t *zfsvfs = arg; 2383234Sck153898 2393234Sck153898 if (newval == TRUE) { 2403234Sck153898 /* XXX locking on vfs_flag? */ 2413234Sck153898 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 2423234Sck153898 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 2433234Sck153898 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 2443234Sck153898 } else { 2453234Sck153898 /* XXX locking on vfs_flag? */ 2463234Sck153898 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 2473234Sck153898 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 2483234Sck153898 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 2493234Sck153898 } 2503234Sck153898 } 2513234Sck153898 2523234Sck153898 static void 253789Sahrens blksz_changed_cb(void *arg, uint64_t newval) 254789Sahrens { 255789Sahrens zfsvfs_t *zfsvfs = arg; 256789Sahrens 257789Sahrens if (newval < SPA_MINBLOCKSIZE || 258789Sahrens newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 259789Sahrens newval = SPA_MAXBLOCKSIZE; 260789Sahrens 261789Sahrens zfsvfs->z_max_blksz = newval; 262789Sahrens zfsvfs->z_vfs->vfs_bsize = newval; 263789Sahrens } 264789Sahrens 265789Sahrens static void 266789Sahrens readonly_changed_cb(void *arg, uint64_t newval) 267789Sahrens { 268789Sahrens zfsvfs_t *zfsvfs = arg; 269789Sahrens 270789Sahrens if (newval) { 271789Sahrens /* XXX locking on vfs_flag? */ 272789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 273789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 274789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 275789Sahrens } else { 276789Sahrens /* XXX locking on vfs_flag? */ 277789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 278789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 279789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 280789Sahrens } 281789Sahrens } 282789Sahrens 283789Sahrens static void 284789Sahrens devices_changed_cb(void *arg, uint64_t newval) 285789Sahrens { 286789Sahrens zfsvfs_t *zfsvfs = arg; 287789Sahrens 288789Sahrens if (newval == FALSE) { 289789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; 290789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); 291789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); 292789Sahrens } else { 293789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; 294789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); 295789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); 296789Sahrens } 297789Sahrens } 298789Sahrens 299789Sahrens static void 300789Sahrens setuid_changed_cb(void *arg, uint64_t newval) 301789Sahrens { 302789Sahrens zfsvfs_t *zfsvfs = arg; 303789Sahrens 304789Sahrens if (newval == FALSE) { 305789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 306789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 307789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 308789Sahrens } else { 309789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 310789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 311789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 312789Sahrens } 313789Sahrens } 314789Sahrens 315789Sahrens static void 316789Sahrens exec_changed_cb(void *arg, uint64_t newval) 317789Sahrens { 318789Sahrens zfsvfs_t *zfsvfs = arg; 319789Sahrens 320789Sahrens if (newval == FALSE) { 321789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 322789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 323789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 324789Sahrens } else { 325789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 326789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 327789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 328789Sahrens } 329789Sahrens } 330789Sahrens 331789Sahrens static void 332789Sahrens snapdir_changed_cb(void *arg, uint64_t newval) 333789Sahrens { 334789Sahrens zfsvfs_t *zfsvfs = arg; 335789Sahrens 336789Sahrens zfsvfs->z_show_ctldir = newval; 337789Sahrens } 338789Sahrens 339789Sahrens static void 340789Sahrens acl_mode_changed_cb(void *arg, uint64_t newval) 341789Sahrens { 342789Sahrens zfsvfs_t *zfsvfs = arg; 343789Sahrens 344789Sahrens zfsvfs->z_acl_mode = newval; 345789Sahrens } 346789Sahrens 347789Sahrens static void 348789Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval) 349789Sahrens { 350789Sahrens zfsvfs_t *zfsvfs = arg; 351789Sahrens 352789Sahrens zfsvfs->z_acl_inherit = newval; 353789Sahrens } 354789Sahrens 3551544Seschrock static int 3561544Seschrock zfs_register_callbacks(vfs_t *vfsp) 3571544Seschrock { 3581544Seschrock struct dsl_dataset *ds = NULL; 3591544Seschrock objset_t *os = NULL; 3601544Seschrock zfsvfs_t *zfsvfs = NULL; 3613265Sahrens int readonly, do_readonly = FALSE; 3623265Sahrens int setuid, do_setuid = FALSE; 3633265Sahrens int exec, do_exec = FALSE; 3643265Sahrens int devices, do_devices = FALSE; 3653265Sahrens int xattr, do_xattr = FALSE; 3664596Slling int atime, do_atime = FALSE; 3671544Seschrock int error = 0; 3681544Seschrock 3691544Seschrock ASSERT(vfsp); 3701544Seschrock zfsvfs = vfsp->vfs_data; 3711544Seschrock ASSERT(zfsvfs); 3721544Seschrock os = zfsvfs->z_os; 3731544Seschrock 3741544Seschrock /* 3751544Seschrock * The act of registering our callbacks will destroy any mount 3761544Seschrock * options we may have. In order to enable temporary overrides 3773234Sck153898 * of mount options, we stash away the current values and 3781544Seschrock * restore them after we register the callbacks. 3791544Seschrock */ 3801544Seschrock if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 3811544Seschrock readonly = B_TRUE; 3821544Seschrock do_readonly = B_TRUE; 3831544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 3841544Seschrock readonly = B_FALSE; 3851544Seschrock do_readonly = B_TRUE; 3861544Seschrock } 3871544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 3881544Seschrock devices = B_FALSE; 3891544Seschrock setuid = B_FALSE; 3901544Seschrock do_devices = B_TRUE; 3911544Seschrock do_setuid = B_TRUE; 3921544Seschrock } else { 3931544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 3941544Seschrock devices = B_FALSE; 3951544Seschrock do_devices = B_TRUE; 3963912Slling } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { 3971544Seschrock devices = B_TRUE; 3981544Seschrock do_devices = B_TRUE; 3991544Seschrock } 4001544Seschrock 4011544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 4021544Seschrock setuid = B_FALSE; 4031544Seschrock do_setuid = B_TRUE; 4041544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 4051544Seschrock setuid = B_TRUE; 4061544Seschrock do_setuid = B_TRUE; 4071544Seschrock } 4081544Seschrock } 4091544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 4101544Seschrock exec = B_FALSE; 4111544Seschrock do_exec = B_TRUE; 4121544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 4131544Seschrock exec = B_TRUE; 4141544Seschrock do_exec = B_TRUE; 4151544Seschrock } 4163234Sck153898 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 4173234Sck153898 xattr = B_FALSE; 4183234Sck153898 do_xattr = B_TRUE; 4193234Sck153898 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 4203234Sck153898 xattr = B_TRUE; 4213234Sck153898 do_xattr = B_TRUE; 4223234Sck153898 } 4234596Slling if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 4244596Slling atime = B_FALSE; 4254596Slling do_atime = B_TRUE; 4264596Slling } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 4274596Slling atime = B_TRUE; 4284596Slling do_atime = B_TRUE; 4294596Slling } 4301544Seschrock 4311544Seschrock /* 4321544Seschrock * Register property callbacks. 4331544Seschrock * 4341544Seschrock * It would probably be fine to just check for i/o error from 4351544Seschrock * the first prop_register(), but I guess I like to go 4361544Seschrock * overboard... 4371544Seschrock */ 4381544Seschrock ds = dmu_objset_ds(os); 4391544Seschrock error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 4401544Seschrock error = error ? error : dsl_prop_register(ds, 4413234Sck153898 "xattr", xattr_changed_cb, zfsvfs); 4423234Sck153898 error = error ? error : dsl_prop_register(ds, 4431544Seschrock "recordsize", blksz_changed_cb, zfsvfs); 4441544Seschrock error = error ? error : dsl_prop_register(ds, 4451544Seschrock "readonly", readonly_changed_cb, zfsvfs); 4461544Seschrock error = error ? error : dsl_prop_register(ds, 4471544Seschrock "devices", devices_changed_cb, zfsvfs); 4481544Seschrock error = error ? error : dsl_prop_register(ds, 4491544Seschrock "setuid", setuid_changed_cb, zfsvfs); 4501544Seschrock error = error ? error : dsl_prop_register(ds, 4511544Seschrock "exec", exec_changed_cb, zfsvfs); 4521544Seschrock error = error ? error : dsl_prop_register(ds, 4531544Seschrock "snapdir", snapdir_changed_cb, zfsvfs); 4541544Seschrock error = error ? error : dsl_prop_register(ds, 4551544Seschrock "aclmode", acl_mode_changed_cb, zfsvfs); 4561544Seschrock error = error ? error : dsl_prop_register(ds, 4571544Seschrock "aclinherit", acl_inherit_changed_cb, zfsvfs); 4581544Seschrock if (error) 4591544Seschrock goto unregister; 4601544Seschrock 4611544Seschrock /* 4621544Seschrock * Invoke our callbacks to restore temporary mount options. 4631544Seschrock */ 4641544Seschrock if (do_readonly) 4651544Seschrock readonly_changed_cb(zfsvfs, readonly); 4661544Seschrock if (do_setuid) 4671544Seschrock setuid_changed_cb(zfsvfs, setuid); 4681544Seschrock if (do_exec) 4691544Seschrock exec_changed_cb(zfsvfs, exec); 4701544Seschrock if (do_devices) 4711544Seschrock devices_changed_cb(zfsvfs, devices); 4723234Sck153898 if (do_xattr) 4733234Sck153898 xattr_changed_cb(zfsvfs, xattr); 4744596Slling if (do_atime) 4754596Slling atime_changed_cb(zfsvfs, atime); 4761544Seschrock 4771544Seschrock return (0); 4781544Seschrock 4791544Seschrock unregister: 4801544Seschrock /* 4811544Seschrock * We may attempt to unregister some callbacks that are not 4821544Seschrock * registered, but this is OK; it will simply return ENOMSG, 4831544Seschrock * which we will ignore. 4841544Seschrock */ 4851544Seschrock (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 4863234Sck153898 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 4871544Seschrock (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 4881544Seschrock (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 4891544Seschrock (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); 4901544Seschrock (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 4911544Seschrock (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 4921544Seschrock (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 4931544Seschrock (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 4941544Seschrock (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 4951544Seschrock zfsvfs); 4961544Seschrock return (error); 4971544Seschrock 4981544Seschrock } 4991544Seschrock 5001544Seschrock static int 5011544Seschrock zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) 5021544Seschrock { 5031544Seschrock dev_t mount_dev; 5041544Seschrock uint64_t recordsize, readonly; 5051544Seschrock int error = 0; 5061544Seschrock int mode; 5071544Seschrock zfsvfs_t *zfsvfs; 5081544Seschrock znode_t *zp = NULL; 5091544Seschrock 5101544Seschrock ASSERT(vfsp); 5111544Seschrock ASSERT(osname); 5121544Seschrock 5131544Seschrock /* 5141544Seschrock * Initialize the zfs-specific filesystem structure. 5151544Seschrock * Should probably make this a kmem cache, shuffle fields, 5161544Seschrock * and just bzero up to z_hold_mtx[]. 5171544Seschrock */ 5181544Seschrock zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 5191544Seschrock zfsvfs->z_vfs = vfsp; 5201544Seschrock zfsvfs->z_parent = zfsvfs; 5211544Seschrock zfsvfs->z_assign = TXG_NOWAIT; 5221544Seschrock zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 5231544Seschrock zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 5241544Seschrock 5251544Seschrock mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 5261544Seschrock list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 5271544Seschrock offsetof(znode_t, z_link_node)); 5284787Sahrens rw_init(&zfsvfs->z_unmount_lock, NULL, RW_DEFAULT, NULL); 5294787Sahrens rw_init(&zfsvfs->z_unmount_inactive_lock, NULL, RW_DEFAULT, NULL); 5301544Seschrock 5311544Seschrock /* Initialize the generic filesystem structure. */ 5321544Seschrock vfsp->vfs_bcount = 0; 5331544Seschrock vfsp->vfs_data = NULL; 5341544Seschrock 5351544Seschrock if (zfs_create_unique_device(&mount_dev) == -1) { 5361544Seschrock error = ENODEV; 5371544Seschrock goto out; 5381544Seschrock } 5391544Seschrock ASSERT(vfs_devismounted(mount_dev) == 0); 5401544Seschrock 5411544Seschrock if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 5421544Seschrock NULL)) 5431544Seschrock goto out; 5441544Seschrock 5451544Seschrock vfsp->vfs_dev = mount_dev; 5461544Seschrock vfsp->vfs_fstype = zfsfstype; 5471544Seschrock vfsp->vfs_bsize = recordsize; 5481544Seschrock vfsp->vfs_flag |= VFS_NOTRUNC; 5491544Seschrock vfsp->vfs_data = zfsvfs; 5501544Seschrock 5511544Seschrock if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 5521544Seschrock goto out; 5531544Seschrock 5541544Seschrock if (readonly) 5551544Seschrock mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 5561544Seschrock else 5571544Seschrock mode = DS_MODE_PRIMARY; 5581544Seschrock 5591544Seschrock error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 5601544Seschrock if (error == EROFS) { 5611544Seschrock mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 5621544Seschrock error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 5631544Seschrock &zfsvfs->z_os); 5641544Seschrock } 5651544Seschrock 5661544Seschrock if (error) 5671544Seschrock goto out; 5681544Seschrock 5691544Seschrock if (error = zfs_init_fs(zfsvfs, &zp, cr)) 5701544Seschrock goto out; 5711544Seschrock 5721544Seschrock /* The call to zfs_init_fs leaves the vnode held, release it here. */ 5731544Seschrock VN_RELE(ZTOV(zp)); 5741544Seschrock 5751544Seschrock if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 5763234Sck153898 uint64_t xattr; 5773234Sck153898 5781544Seschrock ASSERT(mode & DS_MODE_READONLY); 5791544Seschrock atime_changed_cb(zfsvfs, B_FALSE); 5801544Seschrock readonly_changed_cb(zfsvfs, B_TRUE); 5813234Sck153898 if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 5823234Sck153898 goto out; 5833234Sck153898 xattr_changed_cb(zfsvfs, xattr); 5841544Seschrock zfsvfs->z_issnap = B_TRUE; 5851544Seschrock } else { 5864935Sperrin uint_t readonly; 5874935Sperrin 5881544Seschrock error = zfs_register_callbacks(vfsp); 5891544Seschrock if (error) 5901544Seschrock goto out; 5911544Seschrock 5924935Sperrin /* 5934935Sperrin * During replay we remove the read only flag to 5944935Sperrin * allow replays to succeed. 5954935Sperrin */ 5964935Sperrin readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY; 5974935Sperrin if (readonly != 0) 5984935Sperrin zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 5994935Sperrin else 6004577Sahrens zfs_unlinked_drain(zfsvfs); 6011544Seschrock 6021544Seschrock /* 6031544Seschrock * Parse and replay the intent log. 6044577Sahrens * 6054577Sahrens * Because of ziltest, this must be done after 6064577Sahrens * zfs_unlinked_drain(). (Further note: ziltest doesn't 6074577Sahrens * use readonly mounts, where zfs_unlinked_drain() isn't 6084577Sahrens * called.) This is because ziltest causes spa_sync() 6094577Sahrens * to think it's committed, but actually it is not, so 6104577Sahrens * the intent log contains many txg's worth of changes. 6114577Sahrens * 6124577Sahrens * In particular, if object N is in the unlinked set in 6134577Sahrens * the last txg to actually sync, then it could be 6144577Sahrens * actually freed in a later txg and then reallocated in 6154577Sahrens * a yet later txg. This would write a "create object 6164577Sahrens * N" record to the intent log. Normally, this would be 6174577Sahrens * fine because the spa_sync() would have written out 6184577Sahrens * the fact that object N is free, before we could write 6194577Sahrens * the "create object N" intent log record. 6204577Sahrens * 6214577Sahrens * But when we are in ziltest mode, we advance the "open 6224577Sahrens * txg" without actually spa_sync()-ing the changes to 6234577Sahrens * disk. So we would see that object N is still 6244577Sahrens * allocated and in the unlinked set, and there is an 6254577Sahrens * intent log record saying to allocate it. 6261544Seschrock */ 6271544Seschrock zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 6283461Sahrens zfs_replay_vector); 6291544Seschrock 6304935Sperrin zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */ 6314935Sperrin 6321544Seschrock if (!zil_disable) 6331544Seschrock zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 6341544Seschrock } 6351544Seschrock 6361544Seschrock if (!zfsvfs->z_issnap) 6371544Seschrock zfsctl_create(zfsvfs); 6381544Seschrock out: 6391544Seschrock if (error) { 6401544Seschrock if (zfsvfs->z_os) 6411544Seschrock dmu_objset_close(zfsvfs->z_os); 6424831Sgw25295 mutex_destroy(&zfsvfs->z_znodes_lock); 6434831Sgw25295 list_destroy(&zfsvfs->z_all_znodes); 6444831Sgw25295 rw_destroy(&zfsvfs->z_unmount_lock); 6454831Sgw25295 rw_destroy(&zfsvfs->z_unmount_inactive_lock); 6461544Seschrock kmem_free(zfsvfs, sizeof (zfsvfs_t)); 6471544Seschrock } else { 6481544Seschrock atomic_add_32(&zfs_active_fs_count, 1); 6491544Seschrock } 6501544Seschrock 6511544Seschrock return (error); 6521544Seschrock } 6531544Seschrock 6541544Seschrock void 6551544Seschrock zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 6561544Seschrock { 6571544Seschrock objset_t *os = zfsvfs->z_os; 6581544Seschrock struct dsl_dataset *ds; 6591544Seschrock 6601544Seschrock /* 6611544Seschrock * Unregister properties. 6621544Seschrock */ 6631544Seschrock if (!dmu_objset_is_snapshot(os)) { 6641544Seschrock ds = dmu_objset_ds(os); 6651544Seschrock VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 6661544Seschrock zfsvfs) == 0); 6671544Seschrock 6683234Sck153898 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 6693234Sck153898 zfsvfs) == 0); 6703234Sck153898 6711544Seschrock VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 6721544Seschrock zfsvfs) == 0); 6731544Seschrock 6741544Seschrock VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 6751544Seschrock zfsvfs) == 0); 6761544Seschrock 6771544Seschrock VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, 6781544Seschrock zfsvfs) == 0); 6791544Seschrock 6801544Seschrock VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 6811544Seschrock zfsvfs) == 0); 6821544Seschrock 6831544Seschrock VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 6841544Seschrock zfsvfs) == 0); 6851544Seschrock 6861544Seschrock VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 6871544Seschrock zfsvfs) == 0); 6881544Seschrock 6891544Seschrock VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 6901544Seschrock zfsvfs) == 0); 6911544Seschrock 6921544Seschrock VERIFY(dsl_prop_unregister(ds, "aclinherit", 6931544Seschrock acl_inherit_changed_cb, zfsvfs) == 0); 6941544Seschrock } 6951544Seschrock } 6961544Seschrock 6973912Slling /* 6983912Slling * Convert a decimal digit string to a uint64_t integer. 6993912Slling */ 7003912Slling static int 7013912Slling str_to_uint64(char *str, uint64_t *objnum) 7023912Slling { 7033912Slling uint64_t num = 0; 7043912Slling 7053912Slling while (*str) { 7063912Slling if (*str < '0' || *str > '9') 7073912Slling return (EINVAL); 7083912Slling 7093912Slling num = num*10 + *str++ - '0'; 7103912Slling } 7113912Slling 7123912Slling *objnum = num; 7133912Slling return (0); 7143912Slling } 7153912Slling 7163912Slling /* 7173912Slling * The boot path passed from the boot loader is in the form of 7183912Slling * "rootpool-name/root-filesystem-object-number'. Convert this 7193912Slling * string to a dataset name: "rootpool-name/root-filesystem-name". 7203912Slling */ 7213912Slling static int 7223912Slling parse_bootpath(char *bpath, char *outpath) 7233912Slling { 7243912Slling char *slashp; 7253912Slling uint64_t objnum; 7263912Slling int error; 7273912Slling 7283912Slling if (*bpath == 0 || *bpath == '/') 7293912Slling return (EINVAL); 7303912Slling 7313912Slling slashp = strchr(bpath, '/'); 7323912Slling 7333912Slling /* if no '/', just return the pool name */ 7343912Slling if (slashp == NULL) { 7353912Slling (void) strcpy(outpath, bpath); 7363912Slling return (0); 7373912Slling } 7383912Slling 7393912Slling if (error = str_to_uint64(slashp+1, &objnum)) 7403912Slling return (error); 7413912Slling 7423912Slling *slashp = '\0'; 7433912Slling error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 7443912Slling *slashp = '/'; 7453912Slling 7463912Slling return (error); 7473912Slling } 7483912Slling 7491544Seschrock static int 7501544Seschrock zfs_mountroot(vfs_t *vfsp, enum whymountroot why) 7511544Seschrock { 7521544Seschrock int error = 0; 7531544Seschrock int ret = 0; 7541544Seschrock static int zfsrootdone = 0; 7551544Seschrock zfsvfs_t *zfsvfs = NULL; 7561544Seschrock znode_t *zp = NULL; 7571544Seschrock vnode_t *vp = NULL; 7583912Slling char *zfs_bootpath; 7591544Seschrock 7601544Seschrock ASSERT(vfsp); 7611544Seschrock 7621544Seschrock /* 7633912Slling * The filesystem that we mount as root is defined in the 7643912Slling * "zfs-bootfs" property. 7651544Seschrock */ 7661544Seschrock if (why == ROOT_INIT) { 7671544Seschrock if (zfsrootdone++) 7681544Seschrock return (EBUSY); 7691544Seschrock 7703912Slling if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 7713912Slling DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) != 7723912Slling DDI_SUCCESS) 7733912Slling return (EIO); 7743912Slling 7753912Slling error = parse_bootpath(zfs_bootpath, rootfs.bo_name); 7763912Slling ddi_prop_free(zfs_bootpath); 7773912Slling 7783912Slling if (error) 7793912Slling return (error); 7801544Seschrock 7811544Seschrock if (error = vfs_lock(vfsp)) 7821544Seschrock return (error); 7831544Seschrock 7843912Slling if (error = zfs_domount(vfsp, rootfs.bo_name, CRED())) 7851544Seschrock goto out; 7861544Seschrock 7871544Seschrock zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 7881544Seschrock ASSERT(zfsvfs); 7891544Seschrock if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) 7901544Seschrock goto out; 7911544Seschrock 7921544Seschrock vp = ZTOV(zp); 7931544Seschrock mutex_enter(&vp->v_lock); 7941544Seschrock vp->v_flag |= VROOT; 7951544Seschrock mutex_exit(&vp->v_lock); 7961544Seschrock rootvp = vp; 7971544Seschrock 7981544Seschrock /* 7991544Seschrock * The zfs_zget call above returns with a hold on vp, we release 8001544Seschrock * it here. 8011544Seschrock */ 8021544Seschrock VN_RELE(vp); 8031544Seschrock 8041544Seschrock /* 8051544Seschrock * Mount root as readonly initially, it will be remouted 8061544Seschrock * read/write by /lib/svc/method/fs-usr. 8071544Seschrock */ 8081544Seschrock readonly_changed_cb(vfsp->vfs_data, B_TRUE); 8091544Seschrock vfs_add((struct vnode *)0, vfsp, 8101544Seschrock (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 8111544Seschrock out: 8121544Seschrock vfs_unlock(vfsp); 8131544Seschrock ret = (error) ? error : 0; 8141544Seschrock return (ret); 8151544Seschrock } else if (why == ROOT_REMOUNT) { 8161544Seschrock readonly_changed_cb(vfsp->vfs_data, B_FALSE); 8171544Seschrock vfsp->vfs_flag |= VFS_REMOUNT; 8184596Slling 8194596Slling /* refresh mount options */ 8204596Slling zfs_unregister_callbacks(vfsp->vfs_data); 8214596Slling return (zfs_register_callbacks(vfsp)); 8224596Slling 8231544Seschrock } else if (why == ROOT_UNMOUNT) { 8241544Seschrock zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 8251544Seschrock (void) zfs_sync(vfsp, 0, 0); 8261544Seschrock return (0); 8271544Seschrock } 8281544Seschrock 8291544Seschrock /* 8301544Seschrock * if "why" is equal to anything else other than ROOT_INIT, 8311544Seschrock * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 8321544Seschrock */ 8331544Seschrock return (ENOTSUP); 8341544Seschrock } 8351544Seschrock 836789Sahrens /*ARGSUSED*/ 837789Sahrens static int 838789Sahrens zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 839789Sahrens { 840789Sahrens char *osname; 841789Sahrens pathname_t spn; 842789Sahrens int error = 0; 843789Sahrens uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? 8443912Slling UIO_SYSSPACE : UIO_USERSPACE; 845789Sahrens int canwrite; 846789Sahrens 847789Sahrens if (mvp->v_type != VDIR) 848789Sahrens return (ENOTDIR); 849789Sahrens 850789Sahrens mutex_enter(&mvp->v_lock); 851789Sahrens if ((uap->flags & MS_REMOUNT) == 0 && 852789Sahrens (uap->flags & MS_OVERLAY) == 0 && 853789Sahrens (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 854789Sahrens mutex_exit(&mvp->v_lock); 855789Sahrens return (EBUSY); 856789Sahrens } 857789Sahrens mutex_exit(&mvp->v_lock); 858789Sahrens 859789Sahrens /* 860789Sahrens * ZFS does not support passing unparsed data in via MS_DATA. 861789Sahrens * Users should use the MS_OPTIONSTR interface; this means 862789Sahrens * that all option parsing is already done and the options struct 863789Sahrens * can be interrogated. 864789Sahrens */ 865789Sahrens if ((uap->flags & MS_DATA) && uap->datalen > 0) 866789Sahrens return (EINVAL); 867789Sahrens 868789Sahrens /* 869789Sahrens * Get the objset name (the "special" mount argument). 870789Sahrens */ 871789Sahrens if (error = pn_get(uap->spec, fromspace, &spn)) 872789Sahrens return (error); 873789Sahrens 874789Sahrens osname = spn.pn_path; 875789Sahrens 8764543Smarks /* 8774543Smarks * Check for mount privilege? 8784543Smarks * 8794543Smarks * If we don't have privilege then see if 8804543Smarks * we have local permission to allow it 8814543Smarks */ 8824543Smarks error = secpolicy_fs_mount(cr, mvp, vfsp); 8834543Smarks if (error) { 8844543Smarks error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 8854543Smarks if (error == 0) { 8864543Smarks vattr_t vattr; 8874543Smarks 8884543Smarks /* 8894543Smarks * Make sure user is the owner of the mount point 8904543Smarks * or has sufficient privileges. 8914543Smarks */ 8924543Smarks 8934543Smarks vattr.va_mask = AT_UID; 8944543Smarks 8954614Smarks if (error = VOP_GETATTR(mvp, &vattr, 0, cr)) { 8964543Smarks goto out; 8974543Smarks } 8984543Smarks 8994543Smarks if (error = secpolicy_vnode_owner(cr, vattr.va_uid)) { 9004543Smarks goto out; 9014543Smarks } 9024543Smarks 9034543Smarks if (error = VOP_ACCESS(mvp, VWRITE, 0, cr)) { 9044543Smarks goto out; 9054543Smarks } 9064543Smarks 9074543Smarks secpolicy_fs_mount_clearopts(cr, vfsp); 9084543Smarks } else { 9094543Smarks goto out; 9104543Smarks } 9114543Smarks } 912789Sahrens 913789Sahrens /* 914789Sahrens * Refuse to mount a filesystem if we are in a local zone and the 915789Sahrens * dataset is not visible. 916789Sahrens */ 917789Sahrens if (!INGLOBALZONE(curproc) && 918789Sahrens (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 919789Sahrens error = EPERM; 920789Sahrens goto out; 921789Sahrens } 922789Sahrens 9234596Slling /* 9244596Slling * When doing a remount, we simply refresh our temporary properties 9254596Slling * according to those options set in the current VFS options. 9264596Slling */ 9274596Slling if (uap->flags & MS_REMOUNT) { 9284596Slling /* refresh mount options */ 9294596Slling zfs_unregister_callbacks(vfsp->vfs_data); 9304596Slling error = zfs_register_callbacks(vfsp); 9314596Slling goto out; 9324596Slling } 9334596Slling 9341544Seschrock error = zfs_domount(vfsp, osname, cr); 935789Sahrens 936789Sahrens out: 937789Sahrens pn_free(&spn); 938789Sahrens return (error); 939789Sahrens } 940789Sahrens 941789Sahrens static int 942789Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) 943789Sahrens { 944789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 945789Sahrens dev32_t d32; 9462885Sahrens uint64_t refdbytes, availbytes, usedobjs, availobjs; 947789Sahrens 948789Sahrens ZFS_ENTER(zfsvfs); 949789Sahrens 9502885Sahrens dmu_objset_space(zfsvfs->z_os, 9512885Sahrens &refdbytes, &availbytes, &usedobjs, &availobjs); 952789Sahrens 953789Sahrens /* 954789Sahrens * The underlying storage pool actually uses multiple block sizes. 955789Sahrens * We report the fragsize as the smallest block size we support, 956789Sahrens * and we report our blocksize as the filesystem's maximum blocksize. 957789Sahrens */ 958789Sahrens statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; 959789Sahrens statp->f_bsize = zfsvfs->z_max_blksz; 960789Sahrens 961789Sahrens /* 962789Sahrens * The following report "total" blocks of various kinds in the 963789Sahrens * file system, but reported in terms of f_frsize - the 964789Sahrens * "fragment" size. 965789Sahrens */ 966789Sahrens 9672885Sahrens statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 9682885Sahrens statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; 969789Sahrens statp->f_bavail = statp->f_bfree; /* no root reservation */ 970789Sahrens 971789Sahrens /* 972789Sahrens * statvfs() should really be called statufs(), because it assumes 973789Sahrens * static metadata. ZFS doesn't preallocate files, so the best 974789Sahrens * we can do is report the max that could possibly fit in f_files, 975789Sahrens * and that minus the number actually used in f_ffree. 976789Sahrens * For f_ffree, report the smaller of the number of object available 977789Sahrens * and the number of blocks (each object will take at least a block). 978789Sahrens */ 9792885Sahrens statp->f_ffree = MIN(availobjs, statp->f_bfree); 980789Sahrens statp->f_favail = statp->f_ffree; /* no "root reservation" */ 9812885Sahrens statp->f_files = statp->f_ffree + usedobjs; 982789Sahrens 983789Sahrens (void) cmpldev(&d32, vfsp->vfs_dev); 984789Sahrens statp->f_fsid = d32; 985789Sahrens 986789Sahrens /* 987789Sahrens * We're a zfs filesystem. 988789Sahrens */ 989789Sahrens (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 990789Sahrens 9911123Smarks statp->f_flag = vf_to_stf(vfsp->vfs_flag); 992789Sahrens 993789Sahrens statp->f_namemax = ZFS_MAXNAMELEN; 994789Sahrens 995789Sahrens /* 996789Sahrens * We have all of 32 characters to stuff a string here. 997789Sahrens * Is there anything useful we could/should provide? 998789Sahrens */ 999789Sahrens bzero(statp->f_fstr, sizeof (statp->f_fstr)); 1000789Sahrens 1001789Sahrens ZFS_EXIT(zfsvfs); 1002789Sahrens return (0); 1003789Sahrens } 1004789Sahrens 1005789Sahrens static int 1006789Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp) 1007789Sahrens { 1008789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1009789Sahrens znode_t *rootzp; 1010789Sahrens int error; 1011789Sahrens 1012789Sahrens ZFS_ENTER(zfsvfs); 1013789Sahrens 1014789Sahrens error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1015789Sahrens if (error == 0) 1016789Sahrens *vpp = ZTOV(rootzp); 1017789Sahrens 1018789Sahrens ZFS_EXIT(zfsvfs); 1019789Sahrens return (error); 1020789Sahrens } 1021789Sahrens 1022789Sahrens /*ARGSUSED*/ 1023789Sahrens static int 1024789Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) 1025789Sahrens { 1026789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 10274787Sahrens objset_t *os = zfsvfs->z_os; 10284787Sahrens znode_t *zp, *nextzp; 1029789Sahrens int ret; 1030789Sahrens 10314543Smarks ret = secpolicy_fs_unmount(cr, vfsp); 10324543Smarks if (ret) { 10334543Smarks ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 10344543Smarks ZFS_DELEG_PERM_MOUNT, cr); 10354543Smarks if (ret) 10364543Smarks return (ret); 10374543Smarks } 10381484Sek110237 10394736Sek110237 /* 10404736Sek110237 * We purge the parent filesystem's vfsp as the parent filesystem 10414736Sek110237 * and all of its snapshots have their vnode's v_vfsp set to the 10424736Sek110237 * parent's filesystem's vfsp. Note, 'z_parent' is self 10434736Sek110237 * referential for non-snapshots. 10444736Sek110237 */ 10454736Sek110237 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 10461484Sek110237 1047789Sahrens /* 1048789Sahrens * Unmount any snapshots mounted under .zfs before unmounting the 1049789Sahrens * dataset itself. 1050789Sahrens */ 1051789Sahrens if (zfsvfs->z_ctldir != NULL && 10524543Smarks (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { 1053789Sahrens return (ret); 10544543Smarks } 1055789Sahrens 10564787Sahrens if (!(fflag & MS_FORCE)) { 10574480Sgw25295 /* 10584787Sahrens * Check the number of active vnodes in the file system. 10594787Sahrens * Our count is maintained in the vfs structure, but the 10604787Sahrens * number is off by 1 to indicate a hold on the vfs 10614787Sahrens * structure itself. 10624787Sahrens * 10634787Sahrens * The '.zfs' directory maintains a reference of its 10644787Sahrens * own, and any active references underneath are 10654787Sahrens * reflected in the vnode count. 1066789Sahrens */ 10674787Sahrens if (zfsvfs->z_ctldir == NULL) { 10684787Sahrens if (vfsp->vfs_count > 1) 10694787Sahrens return (EBUSY); 10704787Sahrens } else { 10714787Sahrens if (vfsp->vfs_count > 2 || 10724787Sahrens zfsvfs->z_ctldir->v_count > 1) { 10734787Sahrens return (EBUSY); 10744787Sahrens } 1075789Sahrens } 1076789Sahrens } 1077789Sahrens 1078789Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 10794787Sahrens 10804787Sahrens rw_enter(&zfsvfs->z_unmount_lock, RW_WRITER); 10814787Sahrens rw_enter(&zfsvfs->z_unmount_inactive_lock, RW_WRITER); 10824787Sahrens 10834787Sahrens /* 10844787Sahrens * At this point there are no vops active, and any new vops will 10854787Sahrens * fail with EIO since we have z_unmount_lock for writer (only 10864787Sahrens * relavent for forced unmount). 10874787Sahrens * 10884787Sahrens * Release all holds on dbufs. 10894787Sahrens * Note, the dmu can still callback via znode_pageout_func() 10904787Sahrens * which can zfs_znode_free() the znode. So we lock 10914787Sahrens * z_all_znodes; search the list for a held dbuf; drop the lock 10924787Sahrens * (we know zp can't disappear if we hold a dbuf lock) then 10934787Sahrens * regrab the lock and restart. 10944787Sahrens */ 10954787Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 10964787Sahrens for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 10974787Sahrens nextzp = list_next(&zfsvfs->z_all_znodes, zp); 10984787Sahrens if (zp->z_dbuf_held) { 10994787Sahrens /* dbufs should only be held when force unmounting */ 11004787Sahrens zp->z_dbuf_held = 0; 11014787Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 11024787Sahrens dmu_buf_rele(zp->z_dbuf, NULL); 11034787Sahrens /* Start again */ 11044787Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 11054787Sahrens nextzp = list_head(&zfsvfs->z_all_znodes); 11064787Sahrens } 11074787Sahrens } 11084787Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 11094787Sahrens 11104787Sahrens /* 11114787Sahrens * Set the unmounted flag and let new vops unblock. 11124787Sahrens * zfs_inactive will have the unmounted behavior, and all other 11134787Sahrens * vops will fail with EIO. 11144787Sahrens */ 11154787Sahrens zfsvfs->z_unmounted = B_TRUE; 11164787Sahrens rw_exit(&zfsvfs->z_unmount_lock); 11174787Sahrens rw_exit(&zfsvfs->z_unmount_inactive_lock); 11184787Sahrens 11194787Sahrens /* 11204787Sahrens * Unregister properties. 11214787Sahrens */ 11224787Sahrens if (!dmu_objset_is_snapshot(os)) 11234787Sahrens zfs_unregister_callbacks(zfsvfs); 11244787Sahrens 11254787Sahrens /* 11264787Sahrens * Close the zil. NB: Can't close the zil while zfs_inactive 11274787Sahrens * threads are blocked as zil_close can call zfs_inactive. 11284787Sahrens */ 11294787Sahrens if (zfsvfs->z_log) { 11304787Sahrens zil_close(zfsvfs->z_log); 11314787Sahrens zfsvfs->z_log = NULL; 11324787Sahrens } 11334787Sahrens 11344787Sahrens /* 1135*4944Smaybee * Evict cached data 11364787Sahrens */ 1137*4944Smaybee (void) dmu_objset_evict_dbufs(os); 11384787Sahrens 11394787Sahrens /* 11404787Sahrens * Finally close the objset 11414787Sahrens */ 11424787Sahrens dmu_objset_close(os); 11434787Sahrens 11444787Sahrens /* 11454787Sahrens * We can now safely destroy the '.zfs' directory node. 11464787Sahrens */ 11474787Sahrens if (zfsvfs->z_ctldir != NULL) 11484787Sahrens zfsctl_destroy(zfsvfs); 1149789Sahrens 1150789Sahrens return (0); 1151789Sahrens } 1152789Sahrens 1153789Sahrens static int 1154789Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1155789Sahrens { 1156789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1157789Sahrens znode_t *zp; 1158789Sahrens uint64_t object = 0; 1159789Sahrens uint64_t fid_gen = 0; 1160789Sahrens uint64_t gen_mask; 1161789Sahrens uint64_t zp_gen; 1162789Sahrens int i, err; 1163789Sahrens 1164789Sahrens *vpp = NULL; 1165789Sahrens 1166789Sahrens ZFS_ENTER(zfsvfs); 1167789Sahrens 1168789Sahrens if (fidp->fid_len == LONG_FID_LEN) { 1169789Sahrens zfid_long_t *zlfid = (zfid_long_t *)fidp; 1170789Sahrens uint64_t objsetid = 0; 1171789Sahrens uint64_t setgen = 0; 1172789Sahrens 1173789Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1174789Sahrens objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1175789Sahrens 1176789Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1177789Sahrens setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1178789Sahrens 1179789Sahrens ZFS_EXIT(zfsvfs); 1180789Sahrens 1181789Sahrens err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1182789Sahrens if (err) 1183789Sahrens return (EINVAL); 1184789Sahrens ZFS_ENTER(zfsvfs); 1185789Sahrens } 1186789Sahrens 1187789Sahrens if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1188789Sahrens zfid_short_t *zfid = (zfid_short_t *)fidp; 1189789Sahrens 1190789Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 1191789Sahrens object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1192789Sahrens 1193789Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 1194789Sahrens fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1195789Sahrens } else { 1196789Sahrens ZFS_EXIT(zfsvfs); 1197789Sahrens return (EINVAL); 1198789Sahrens } 1199789Sahrens 1200789Sahrens /* A zero fid_gen means we are in the .zfs control directories */ 1201789Sahrens if (fid_gen == 0 && 1202789Sahrens (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1203789Sahrens *vpp = zfsvfs->z_ctldir; 1204789Sahrens ASSERT(*vpp != NULL); 1205789Sahrens if (object == ZFSCTL_INO_SNAPDIR) { 1206789Sahrens VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1207789Sahrens 0, NULL, NULL) == 0); 1208789Sahrens } else { 1209789Sahrens VN_HOLD(*vpp); 1210789Sahrens } 1211789Sahrens ZFS_EXIT(zfsvfs); 1212789Sahrens return (0); 1213789Sahrens } 1214789Sahrens 1215789Sahrens gen_mask = -1ULL >> (64 - 8 * i); 1216789Sahrens 1217789Sahrens dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1218789Sahrens if (err = zfs_zget(zfsvfs, object, &zp)) { 1219789Sahrens ZFS_EXIT(zfsvfs); 1220789Sahrens return (err); 1221789Sahrens } 1222789Sahrens zp_gen = zp->z_phys->zp_gen & gen_mask; 1223789Sahrens if (zp_gen == 0) 1224789Sahrens zp_gen = 1; 12253461Sahrens if (zp->z_unlinked || zp_gen != fid_gen) { 1226789Sahrens dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1227789Sahrens VN_RELE(ZTOV(zp)); 1228789Sahrens ZFS_EXIT(zfsvfs); 1229789Sahrens return (EINVAL); 1230789Sahrens } 1231789Sahrens 1232789Sahrens *vpp = ZTOV(zp); 1233789Sahrens ZFS_EXIT(zfsvfs); 1234789Sahrens return (0); 1235789Sahrens } 1236789Sahrens 1237789Sahrens static void 1238789Sahrens zfs_freevfs(vfs_t *vfsp) 1239789Sahrens { 1240789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 12414831Sgw25295 int i; 12424831Sgw25295 12434831Sgw25295 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 12444831Sgw25295 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1245789Sahrens 12464787Sahrens mutex_destroy(&zfsvfs->z_znodes_lock); 12474831Sgw25295 list_destroy(&zfsvfs->z_all_znodes); 12484787Sahrens rw_destroy(&zfsvfs->z_unmount_lock); 12494787Sahrens rw_destroy(&zfsvfs->z_unmount_inactive_lock); 1250789Sahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1251789Sahrens 1252789Sahrens atomic_add_32(&zfs_active_fs_count, -1); 1253789Sahrens } 1254789Sahrens 1255789Sahrens /* 1256789Sahrens * VFS_INIT() initialization. Note that there is no VFS_FINI(), 1257789Sahrens * so we can't safely do any non-idempotent initialization here. 1258789Sahrens * Leave that to zfs_init() and zfs_fini(), which are called 1259789Sahrens * from the module's _init() and _fini() entry points. 1260789Sahrens */ 1261789Sahrens /*ARGSUSED*/ 1262789Sahrens static int 1263789Sahrens zfs_vfsinit(int fstype, char *name) 1264789Sahrens { 1265789Sahrens int error; 1266789Sahrens 1267789Sahrens zfsfstype = fstype; 1268789Sahrens 1269789Sahrens /* 1270789Sahrens * Setup vfsops and vnodeops tables. 1271789Sahrens */ 1272789Sahrens error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); 1273789Sahrens if (error != 0) { 1274789Sahrens cmn_err(CE_WARN, "zfs: bad vfs ops template"); 1275789Sahrens } 1276789Sahrens 1277789Sahrens error = zfs_create_op_tables(); 1278789Sahrens if (error) { 1279789Sahrens zfs_remove_op_tables(); 1280789Sahrens cmn_err(CE_WARN, "zfs: bad vnode ops template"); 1281789Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 1282789Sahrens return (error); 1283789Sahrens } 1284789Sahrens 1285789Sahrens mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); 1286789Sahrens 1287789Sahrens /* 1288849Sbonwick * Unique major number for all zfs mounts. 1289849Sbonwick * If we run out of 32-bit minors, we'll getudev() another major. 1290789Sahrens */ 1291849Sbonwick zfs_major = ddi_name_to_major(ZFS_DRIVER); 1292849Sbonwick zfs_minor = ZFS_MIN_MINOR; 1293789Sahrens 1294789Sahrens return (0); 1295789Sahrens } 1296789Sahrens 1297789Sahrens void 1298789Sahrens zfs_init(void) 1299789Sahrens { 1300789Sahrens /* 1301789Sahrens * Initialize .zfs directory structures 1302789Sahrens */ 1303789Sahrens zfsctl_init(); 1304789Sahrens 1305789Sahrens /* 1306789Sahrens * Initialize znode cache, vnode ops, etc... 1307789Sahrens */ 1308789Sahrens zfs_znode_init(); 1309789Sahrens } 1310789Sahrens 1311789Sahrens void 1312789Sahrens zfs_fini(void) 1313789Sahrens { 1314789Sahrens zfsctl_fini(); 1315789Sahrens zfs_znode_fini(); 1316789Sahrens } 1317789Sahrens 1318789Sahrens int 1319789Sahrens zfs_busy(void) 1320789Sahrens { 1321789Sahrens return (zfs_active_fs_count != 0); 1322789Sahrens } 1323789Sahrens 13244577Sahrens int 13254577Sahrens zfs_get_stats(objset_t *os, nvlist_t *nv) 13264577Sahrens { 13274577Sahrens int error; 13284577Sahrens uint64_t val; 13294577Sahrens 13304577Sahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &val); 13314577Sahrens if (error == 0) 13324577Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VERSION, val); 13334577Sahrens 13344577Sahrens return (error); 13354577Sahrens } 13364577Sahrens 13374577Sahrens int 13384577Sahrens zfs_set_version(const char *name, uint64_t newvers) 13394577Sahrens { 13404577Sahrens int error; 13414577Sahrens objset_t *os; 13424577Sahrens dmu_tx_t *tx; 13434577Sahrens uint64_t curvers; 13444577Sahrens 13454577Sahrens /* 13464577Sahrens * XXX for now, require that the filesystem be unmounted. Would 13474577Sahrens * be nice to find the zfsvfs_t and just update that if 13484577Sahrens * possible. 13494577Sahrens */ 13504577Sahrens 13514577Sahrens if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 13524577Sahrens return (EINVAL); 13534577Sahrens 13544577Sahrens error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_PRIMARY, &os); 13554577Sahrens if (error) 13564577Sahrens return (error); 13574577Sahrens 13584577Sahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 13594577Sahrens 8, 1, &curvers); 13604577Sahrens if (error) 13614577Sahrens goto out; 13624577Sahrens if (newvers < curvers) { 13634577Sahrens error = EINVAL; 13644577Sahrens goto out; 13654577Sahrens } 13664577Sahrens 13674577Sahrens tx = dmu_tx_create(os); 13684577Sahrens dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); 13694577Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 13704577Sahrens if (error) { 13714577Sahrens dmu_tx_abort(tx); 13724577Sahrens goto out; 13734577Sahrens } 13744577Sahrens error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, 13754577Sahrens &newvers, tx); 13764577Sahrens 13774577Sahrens spa_history_internal_log(LOG_DS_UPGRADE, 13784577Sahrens dmu_objset_spa(os), tx, CRED(), 13794577Sahrens "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, 13804577Sahrens dmu_objset_id(os)); 13814577Sahrens dmu_tx_commit(tx); 13824577Sahrens 13834577Sahrens out: 13844577Sahrens dmu_objset_close(os); 13854577Sahrens return (error); 13864577Sahrens } 13874577Sahrens 1388789Sahrens static vfsdef_t vfw = { 1389789Sahrens VFSDEF_VERSION, 1390789Sahrens MNTTYPE_ZFS, 1391789Sahrens zfs_vfsinit, 13921488Srsb VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS, 1393789Sahrens &zfs_mntopts 1394789Sahrens }; 1395789Sahrens 1396789Sahrens struct modlfs zfs_modlfs = { 13974577Sahrens &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw 1398789Sahrens }; 1399