1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51484Sek110237 * Common Development and Distribution License (the "License"). 61484Sek110237 * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 223461Sahrens * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 263246Sck153898 #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/types.h> 29789Sahrens #include <sys/param.h> 30789Sahrens #include <sys/systm.h> 31789Sahrens #include <sys/sysmacros.h> 32789Sahrens #include <sys/kmem.h> 33789Sahrens #include <sys/pathname.h> 34789Sahrens #include <sys/vnode.h> 35789Sahrens #include <sys/vfs.h> 363898Srsb #include <sys/vfs_opreg.h> 37789Sahrens #include <sys/mntent.h> 38789Sahrens #include <sys/mount.h> 39789Sahrens #include <sys/cmn_err.h> 40789Sahrens #include "fs/fs_subr.h" 41789Sahrens #include <sys/zfs_znode.h> 423461Sahrens #include <sys/zfs_dir.h> 43789Sahrens #include <sys/zil.h> 44789Sahrens #include <sys/fs/zfs.h> 45789Sahrens #include <sys/dmu.h> 46789Sahrens #include <sys/dsl_prop.h> 473912Slling #include <sys/dsl_dataset.h> 484543Smarks #include <sys/dsl_deleg.h> 49789Sahrens #include <sys/spa.h> 50789Sahrens #include <sys/zap.h> 51789Sahrens #include <sys/varargs.h> 52789Sahrens #include <sys/policy.h> 53789Sahrens #include <sys/atomic.h> 54789Sahrens #include <sys/mkdev.h> 55789Sahrens #include <sys/modctl.h> 564543Smarks #include <sys/refstr.h> 57789Sahrens #include <sys/zfs_ioctl.h> 58789Sahrens #include <sys/zfs_ctldir.h> 591544Seschrock #include <sys/bootconf.h> 60849Sbonwick #include <sys/sunddi.h> 611484Sek110237 #include <sys/dnlc.h> 62789Sahrens 63789Sahrens int zfsfstype; 64789Sahrens vfsops_t *zfs_vfsops = NULL; 65849Sbonwick static major_t zfs_major; 66789Sahrens static minor_t zfs_minor; 67789Sahrens static kmutex_t zfs_dev_mtx; 68789Sahrens 69789Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); 70789Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); 711544Seschrock static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); 72789Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp); 73789Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); 74789Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); 75789Sahrens static void zfs_freevfs(vfs_t *vfsp); 76789Sahrens 77789Sahrens static const fs_operation_def_t zfs_vfsops_template[] = { 783898Srsb VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, 793898Srsb VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, 803898Srsb VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, 813898Srsb VFSNAME_ROOT, { .vfs_root = zfs_root }, 823898Srsb VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, 833898Srsb VFSNAME_SYNC, { .vfs_sync = zfs_sync }, 843898Srsb VFSNAME_VGET, { .vfs_vget = zfs_vget }, 853898Srsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 863898Srsb NULL, NULL 87789Sahrens }; 88789Sahrens 89789Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = { 903898Srsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 913898Srsb NULL, NULL 92789Sahrens }; 93789Sahrens 94789Sahrens /* 95789Sahrens * We need to keep a count of active fs's. 96789Sahrens * This is necessary to prevent our module 97789Sahrens * from being unloaded after a umount -f 98789Sahrens */ 99789Sahrens static uint32_t zfs_active_fs_count = 0; 100789Sahrens 101789Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 102789Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 1033234Sck153898 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 1043234Sck153898 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 105789Sahrens 1063234Sck153898 /* 1074596Slling * MO_DEFAULT is not used since the default value is determined 1084596Slling * by the equivalent property. 1093234Sck153898 */ 110789Sahrens static mntopt_t mntopts[] = { 1113234Sck153898 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, 1123234Sck153898 { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, 1134596Slling { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL }, 114789Sahrens { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } 115789Sahrens }; 116789Sahrens 117789Sahrens static mntopts_t zfs_mntopts = { 118789Sahrens sizeof (mntopts) / sizeof (mntopt_t), 119789Sahrens mntopts 120789Sahrens }; 121789Sahrens 122789Sahrens /*ARGSUSED*/ 123789Sahrens int 124789Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 125789Sahrens { 126789Sahrens /* 127789Sahrens * Data integrity is job one. We don't want a compromised kernel 128789Sahrens * writing to the storage pool, so we never sync during panic. 129789Sahrens */ 130789Sahrens if (panicstr) 131789Sahrens return (0); 132789Sahrens 133789Sahrens /* 134789Sahrens * SYNC_ATTR is used by fsflush() to force old filesystems like UFS 135789Sahrens * to sync metadata, which they would otherwise cache indefinitely. 136789Sahrens * Semantically, the only requirement is that the sync be initiated. 137789Sahrens * The DMU syncs out txgs frequently, so there's nothing to do. 138789Sahrens */ 139789Sahrens if (flag & SYNC_ATTR) 140789Sahrens return (0); 141789Sahrens 142789Sahrens if (vfsp != NULL) { 143789Sahrens /* 144789Sahrens * Sync a specific filesystem. 145789Sahrens */ 146789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 147789Sahrens 148789Sahrens ZFS_ENTER(zfsvfs); 149789Sahrens if (zfsvfs->z_log != NULL) 1502638Sperrin zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 151789Sahrens else 152789Sahrens txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 153789Sahrens ZFS_EXIT(zfsvfs); 154789Sahrens } else { 155789Sahrens /* 156789Sahrens * Sync all ZFS filesystems. This is what happens when you 157789Sahrens * run sync(1M). Unlike other filesystems, ZFS honors the 158789Sahrens * request by waiting for all pools to commit all dirty data. 159789Sahrens */ 160789Sahrens spa_sync_allpools(); 161789Sahrens } 162789Sahrens 163789Sahrens return (0); 164789Sahrens } 165789Sahrens 1661544Seschrock static int 1671544Seschrock zfs_create_unique_device(dev_t *dev) 1681544Seschrock { 1691544Seschrock major_t new_major; 1701544Seschrock 1711544Seschrock do { 1721544Seschrock ASSERT3U(zfs_minor, <=, MAXMIN32); 1731544Seschrock minor_t start = zfs_minor; 1741544Seschrock do { 1751544Seschrock mutex_enter(&zfs_dev_mtx); 1761544Seschrock if (zfs_minor >= MAXMIN32) { 1771544Seschrock /* 1781544Seschrock * If we're still using the real major 1791544Seschrock * keep out of /dev/zfs and /dev/zvol minor 1801544Seschrock * number space. If we're using a getudev()'ed 1811544Seschrock * major number, we can use all of its minors. 1821544Seschrock */ 1831544Seschrock if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 1841544Seschrock zfs_minor = ZFS_MIN_MINOR; 1851544Seschrock else 1861544Seschrock zfs_minor = 0; 1871544Seschrock } else { 1881544Seschrock zfs_minor++; 1891544Seschrock } 1901544Seschrock *dev = makedevice(zfs_major, zfs_minor); 1911544Seschrock mutex_exit(&zfs_dev_mtx); 1921544Seschrock } while (vfs_devismounted(*dev) && zfs_minor != start); 1931544Seschrock if (zfs_minor == start) { 1941544Seschrock /* 1951544Seschrock * We are using all ~262,000 minor numbers for the 1961544Seschrock * current major number. Create a new major number. 1971544Seschrock */ 1981544Seschrock if ((new_major = getudev()) == (major_t)-1) { 1991544Seschrock cmn_err(CE_WARN, 2001544Seschrock "zfs_mount: Can't get unique major " 2011544Seschrock "device number."); 2021544Seschrock return (-1); 2031544Seschrock } 2041544Seschrock mutex_enter(&zfs_dev_mtx); 2051544Seschrock zfs_major = new_major; 2061544Seschrock zfs_minor = 0; 2071544Seschrock 2081544Seschrock mutex_exit(&zfs_dev_mtx); 2091544Seschrock } else { 2101544Seschrock break; 2111544Seschrock } 2121544Seschrock /* CONSTANTCONDITION */ 2131544Seschrock } while (1); 2141544Seschrock 2151544Seschrock return (0); 2161544Seschrock } 2171544Seschrock 218789Sahrens static void 219789Sahrens atime_changed_cb(void *arg, uint64_t newval) 220789Sahrens { 221789Sahrens zfsvfs_t *zfsvfs = arg; 222789Sahrens 223789Sahrens if (newval == TRUE) { 224789Sahrens zfsvfs->z_atime = TRUE; 225789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 226789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 227789Sahrens } else { 228789Sahrens zfsvfs->z_atime = FALSE; 229789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 230789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 231789Sahrens } 232789Sahrens } 233789Sahrens 234789Sahrens static void 2353234Sck153898 xattr_changed_cb(void *arg, uint64_t newval) 2363234Sck153898 { 2373234Sck153898 zfsvfs_t *zfsvfs = arg; 2383234Sck153898 2393234Sck153898 if (newval == TRUE) { 2403234Sck153898 /* XXX locking on vfs_flag? */ 2413234Sck153898 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 2423234Sck153898 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 2433234Sck153898 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 2443234Sck153898 } else { 2453234Sck153898 /* XXX locking on vfs_flag? */ 2463234Sck153898 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 2473234Sck153898 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 2483234Sck153898 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 2493234Sck153898 } 2503234Sck153898 } 2513234Sck153898 2523234Sck153898 static void 253789Sahrens blksz_changed_cb(void *arg, uint64_t newval) 254789Sahrens { 255789Sahrens zfsvfs_t *zfsvfs = arg; 256789Sahrens 257789Sahrens if (newval < SPA_MINBLOCKSIZE || 258789Sahrens newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 259789Sahrens newval = SPA_MAXBLOCKSIZE; 260789Sahrens 261789Sahrens zfsvfs->z_max_blksz = newval; 262789Sahrens zfsvfs->z_vfs->vfs_bsize = newval; 263789Sahrens } 264789Sahrens 265789Sahrens static void 266789Sahrens readonly_changed_cb(void *arg, uint64_t newval) 267789Sahrens { 268789Sahrens zfsvfs_t *zfsvfs = arg; 269789Sahrens 270789Sahrens if (newval) { 271789Sahrens /* XXX locking on vfs_flag? */ 272789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 273789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 274789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 275789Sahrens } else { 276789Sahrens /* XXX locking on vfs_flag? */ 277789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 278789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 279789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 280789Sahrens } 281789Sahrens } 282789Sahrens 283789Sahrens static void 284789Sahrens devices_changed_cb(void *arg, uint64_t newval) 285789Sahrens { 286789Sahrens zfsvfs_t *zfsvfs = arg; 287789Sahrens 288789Sahrens if (newval == FALSE) { 289789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; 290789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); 291789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); 292789Sahrens } else { 293789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; 294789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); 295789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); 296789Sahrens } 297789Sahrens } 298789Sahrens 299789Sahrens static void 300789Sahrens setuid_changed_cb(void *arg, uint64_t newval) 301789Sahrens { 302789Sahrens zfsvfs_t *zfsvfs = arg; 303789Sahrens 304789Sahrens if (newval == FALSE) { 305789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 306789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 307789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 308789Sahrens } else { 309789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 310789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 311789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 312789Sahrens } 313789Sahrens } 314789Sahrens 315789Sahrens static void 316789Sahrens exec_changed_cb(void *arg, uint64_t newval) 317789Sahrens { 318789Sahrens zfsvfs_t *zfsvfs = arg; 319789Sahrens 320789Sahrens if (newval == FALSE) { 321789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 322789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 323789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 324789Sahrens } else { 325789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 326789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 327789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 328789Sahrens } 329789Sahrens } 330789Sahrens 331789Sahrens static void 332789Sahrens snapdir_changed_cb(void *arg, uint64_t newval) 333789Sahrens { 334789Sahrens zfsvfs_t *zfsvfs = arg; 335789Sahrens 336789Sahrens zfsvfs->z_show_ctldir = newval; 337789Sahrens } 338789Sahrens 339789Sahrens static void 340789Sahrens acl_mode_changed_cb(void *arg, uint64_t newval) 341789Sahrens { 342789Sahrens zfsvfs_t *zfsvfs = arg; 343789Sahrens 344789Sahrens zfsvfs->z_acl_mode = newval; 345789Sahrens } 346789Sahrens 347789Sahrens static void 348789Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval) 349789Sahrens { 350789Sahrens zfsvfs_t *zfsvfs = arg; 351789Sahrens 352789Sahrens zfsvfs->z_acl_inherit = newval; 353789Sahrens } 354789Sahrens 3551544Seschrock static int 3561544Seschrock zfs_register_callbacks(vfs_t *vfsp) 3571544Seschrock { 3581544Seschrock struct dsl_dataset *ds = NULL; 3591544Seschrock objset_t *os = NULL; 3601544Seschrock zfsvfs_t *zfsvfs = NULL; 3613265Sahrens int readonly, do_readonly = FALSE; 3623265Sahrens int setuid, do_setuid = FALSE; 3633265Sahrens int exec, do_exec = FALSE; 3643265Sahrens int devices, do_devices = FALSE; 3653265Sahrens int xattr, do_xattr = FALSE; 3664596Slling int atime, do_atime = FALSE; 3671544Seschrock int error = 0; 3681544Seschrock 3691544Seschrock ASSERT(vfsp); 3701544Seschrock zfsvfs = vfsp->vfs_data; 3711544Seschrock ASSERT(zfsvfs); 3721544Seschrock os = zfsvfs->z_os; 3731544Seschrock 3741544Seschrock /* 3751544Seschrock * The act of registering our callbacks will destroy any mount 3761544Seschrock * options we may have. In order to enable temporary overrides 3773234Sck153898 * of mount options, we stash away the current values and 3781544Seschrock * restore them after we register the callbacks. 3791544Seschrock */ 3801544Seschrock if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 3811544Seschrock readonly = B_TRUE; 3821544Seschrock do_readonly = B_TRUE; 3831544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 3841544Seschrock readonly = B_FALSE; 3851544Seschrock do_readonly = B_TRUE; 3861544Seschrock } 3871544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 3881544Seschrock devices = B_FALSE; 3891544Seschrock setuid = B_FALSE; 3901544Seschrock do_devices = B_TRUE; 3911544Seschrock do_setuid = B_TRUE; 3921544Seschrock } else { 3931544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 3941544Seschrock devices = B_FALSE; 3951544Seschrock do_devices = B_TRUE; 3963912Slling } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { 3971544Seschrock devices = B_TRUE; 3981544Seschrock do_devices = B_TRUE; 3991544Seschrock } 4001544Seschrock 4011544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 4021544Seschrock setuid = B_FALSE; 4031544Seschrock do_setuid = B_TRUE; 4041544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 4051544Seschrock setuid = B_TRUE; 4061544Seschrock do_setuid = B_TRUE; 4071544Seschrock } 4081544Seschrock } 4091544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 4101544Seschrock exec = B_FALSE; 4111544Seschrock do_exec = B_TRUE; 4121544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 4131544Seschrock exec = B_TRUE; 4141544Seschrock do_exec = B_TRUE; 4151544Seschrock } 4163234Sck153898 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 4173234Sck153898 xattr = B_FALSE; 4183234Sck153898 do_xattr = B_TRUE; 4193234Sck153898 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 4203234Sck153898 xattr = B_TRUE; 4213234Sck153898 do_xattr = B_TRUE; 4223234Sck153898 } 4234596Slling if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 4244596Slling atime = B_FALSE; 4254596Slling do_atime = B_TRUE; 4264596Slling } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 4274596Slling atime = B_TRUE; 4284596Slling do_atime = B_TRUE; 4294596Slling } 4301544Seschrock 4311544Seschrock /* 4321544Seschrock * Register property callbacks. 4331544Seschrock * 4341544Seschrock * It would probably be fine to just check for i/o error from 4351544Seschrock * the first prop_register(), but I guess I like to go 4361544Seschrock * overboard... 4371544Seschrock */ 4381544Seschrock ds = dmu_objset_ds(os); 4391544Seschrock error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 4401544Seschrock error = error ? error : dsl_prop_register(ds, 4413234Sck153898 "xattr", xattr_changed_cb, zfsvfs); 4423234Sck153898 error = error ? error : dsl_prop_register(ds, 4431544Seschrock "recordsize", blksz_changed_cb, zfsvfs); 4441544Seschrock error = error ? error : dsl_prop_register(ds, 4451544Seschrock "readonly", readonly_changed_cb, zfsvfs); 4461544Seschrock error = error ? error : dsl_prop_register(ds, 4471544Seschrock "devices", devices_changed_cb, zfsvfs); 4481544Seschrock error = error ? error : dsl_prop_register(ds, 4491544Seschrock "setuid", setuid_changed_cb, zfsvfs); 4501544Seschrock error = error ? error : dsl_prop_register(ds, 4511544Seschrock "exec", exec_changed_cb, zfsvfs); 4521544Seschrock error = error ? error : dsl_prop_register(ds, 4531544Seschrock "snapdir", snapdir_changed_cb, zfsvfs); 4541544Seschrock error = error ? error : dsl_prop_register(ds, 4551544Seschrock "aclmode", acl_mode_changed_cb, zfsvfs); 4561544Seschrock error = error ? error : dsl_prop_register(ds, 4571544Seschrock "aclinherit", acl_inherit_changed_cb, zfsvfs); 4581544Seschrock if (error) 4591544Seschrock goto unregister; 4601544Seschrock 4611544Seschrock /* 4621544Seschrock * Invoke our callbacks to restore temporary mount options. 4631544Seschrock */ 4641544Seschrock if (do_readonly) 4651544Seschrock readonly_changed_cb(zfsvfs, readonly); 4661544Seschrock if (do_setuid) 4671544Seschrock setuid_changed_cb(zfsvfs, setuid); 4681544Seschrock if (do_exec) 4691544Seschrock exec_changed_cb(zfsvfs, exec); 4701544Seschrock if (do_devices) 4711544Seschrock devices_changed_cb(zfsvfs, devices); 4723234Sck153898 if (do_xattr) 4733234Sck153898 xattr_changed_cb(zfsvfs, xattr); 4744596Slling if (do_atime) 4754596Slling atime_changed_cb(zfsvfs, atime); 4761544Seschrock 4771544Seschrock return (0); 4781544Seschrock 4791544Seschrock unregister: 4801544Seschrock /* 4811544Seschrock * We may attempt to unregister some callbacks that are not 4821544Seschrock * registered, but this is OK; it will simply return ENOMSG, 4831544Seschrock * which we will ignore. 4841544Seschrock */ 4851544Seschrock (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 4863234Sck153898 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 4871544Seschrock (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 4881544Seschrock (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 4891544Seschrock (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); 4901544Seschrock (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 4911544Seschrock (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 4921544Seschrock (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 4931544Seschrock (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 4941544Seschrock (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 4951544Seschrock zfsvfs); 4961544Seschrock return (error); 4971544Seschrock 4981544Seschrock } 4991544Seschrock 5001544Seschrock static int 5011544Seschrock zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) 5021544Seschrock { 5031544Seschrock dev_t mount_dev; 5041544Seschrock uint64_t recordsize, readonly; 5051544Seschrock int error = 0; 5061544Seschrock int mode; 5071544Seschrock zfsvfs_t *zfsvfs; 5081544Seschrock znode_t *zp = NULL; 5091544Seschrock 5101544Seschrock ASSERT(vfsp); 5111544Seschrock ASSERT(osname); 5121544Seschrock 5131544Seschrock /* 5141544Seschrock * Initialize the zfs-specific filesystem structure. 5151544Seschrock * Should probably make this a kmem cache, shuffle fields, 5161544Seschrock * and just bzero up to z_hold_mtx[]. 5171544Seschrock */ 5181544Seschrock zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 5191544Seschrock zfsvfs->z_vfs = vfsp; 5201544Seschrock zfsvfs->z_parent = zfsvfs; 5211544Seschrock zfsvfs->z_assign = TXG_NOWAIT; 5221544Seschrock zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 5231544Seschrock zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 5241544Seschrock 5251544Seschrock mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 5261544Seschrock list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 5271544Seschrock offsetof(znode_t, z_link_node)); 528*4787Sahrens rw_init(&zfsvfs->z_unmount_lock, NULL, RW_DEFAULT, NULL); 529*4787Sahrens rw_init(&zfsvfs->z_unmount_inactive_lock, NULL, RW_DEFAULT, NULL); 5301544Seschrock 5311544Seschrock /* Initialize the generic filesystem structure. */ 5321544Seschrock vfsp->vfs_bcount = 0; 5331544Seschrock vfsp->vfs_data = NULL; 5341544Seschrock 5351544Seschrock if (zfs_create_unique_device(&mount_dev) == -1) { 5361544Seschrock error = ENODEV; 5371544Seschrock goto out; 5381544Seschrock } 5391544Seschrock ASSERT(vfs_devismounted(mount_dev) == 0); 5401544Seschrock 5411544Seschrock if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 5421544Seschrock NULL)) 5431544Seschrock goto out; 5441544Seschrock 5451544Seschrock vfsp->vfs_dev = mount_dev; 5461544Seschrock vfsp->vfs_fstype = zfsfstype; 5471544Seschrock vfsp->vfs_bsize = recordsize; 5481544Seschrock vfsp->vfs_flag |= VFS_NOTRUNC; 5491544Seschrock vfsp->vfs_data = zfsvfs; 5501544Seschrock 5511544Seschrock if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 5521544Seschrock goto out; 5531544Seschrock 5541544Seschrock if (readonly) 5551544Seschrock mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 5561544Seschrock else 5571544Seschrock mode = DS_MODE_PRIMARY; 5581544Seschrock 5591544Seschrock error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 5601544Seschrock if (error == EROFS) { 5611544Seschrock mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 5621544Seschrock error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 5631544Seschrock &zfsvfs->z_os); 5641544Seschrock } 5651544Seschrock 5661544Seschrock if (error) 5671544Seschrock goto out; 5681544Seschrock 5691544Seschrock if (error = zfs_init_fs(zfsvfs, &zp, cr)) 5701544Seschrock goto out; 5711544Seschrock 5721544Seschrock /* The call to zfs_init_fs leaves the vnode held, release it here. */ 5731544Seschrock VN_RELE(ZTOV(zp)); 5741544Seschrock 5751544Seschrock if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 5763234Sck153898 uint64_t xattr; 5773234Sck153898 5781544Seschrock ASSERT(mode & DS_MODE_READONLY); 5791544Seschrock atime_changed_cb(zfsvfs, B_FALSE); 5801544Seschrock readonly_changed_cb(zfsvfs, B_TRUE); 5813234Sck153898 if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 5823234Sck153898 goto out; 5833234Sck153898 xattr_changed_cb(zfsvfs, xattr); 5841544Seschrock zfsvfs->z_issnap = B_TRUE; 5851544Seschrock } else { 5861544Seschrock error = zfs_register_callbacks(vfsp); 5871544Seschrock if (error) 5881544Seschrock goto out; 5891544Seschrock 5904577Sahrens if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)) 5914577Sahrens zfs_unlinked_drain(zfsvfs); 5921544Seschrock 5931544Seschrock /* 5941544Seschrock * Parse and replay the intent log. 5954577Sahrens * 5964577Sahrens * Because of ziltest, this must be done after 5974577Sahrens * zfs_unlinked_drain(). (Further note: ziltest doesn't 5984577Sahrens * use readonly mounts, where zfs_unlinked_drain() isn't 5994577Sahrens * called.) This is because ziltest causes spa_sync() 6004577Sahrens * to think it's committed, but actually it is not, so 6014577Sahrens * the intent log contains many txg's worth of changes. 6024577Sahrens * 6034577Sahrens * In particular, if object N is in the unlinked set in 6044577Sahrens * the last txg to actually sync, then it could be 6054577Sahrens * actually freed in a later txg and then reallocated in 6064577Sahrens * a yet later txg. This would write a "create object 6074577Sahrens * N" record to the intent log. Normally, this would be 6084577Sahrens * fine because the spa_sync() would have written out 6094577Sahrens * the fact that object N is free, before we could write 6104577Sahrens * the "create object N" intent log record. 6114577Sahrens * 6124577Sahrens * But when we are in ziltest mode, we advance the "open 6134577Sahrens * txg" without actually spa_sync()-ing the changes to 6144577Sahrens * disk. So we would see that object N is still 6154577Sahrens * allocated and in the unlinked set, and there is an 6164577Sahrens * intent log record saying to allocate it. 6171544Seschrock */ 6181544Seschrock zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 6193461Sahrens zfs_replay_vector); 6201544Seschrock 6211544Seschrock if (!zil_disable) 6221544Seschrock zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 6231544Seschrock } 6241544Seschrock 6251544Seschrock if (!zfsvfs->z_issnap) 6261544Seschrock zfsctl_create(zfsvfs); 6271544Seschrock out: 6281544Seschrock if (error) { 6291544Seschrock if (zfsvfs->z_os) 6301544Seschrock dmu_objset_close(zfsvfs->z_os); 6311544Seschrock kmem_free(zfsvfs, sizeof (zfsvfs_t)); 6321544Seschrock } else { 6331544Seschrock atomic_add_32(&zfs_active_fs_count, 1); 6341544Seschrock } 6351544Seschrock 6361544Seschrock return (error); 6371544Seschrock } 6381544Seschrock 6391544Seschrock void 6401544Seschrock zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 6411544Seschrock { 6421544Seschrock objset_t *os = zfsvfs->z_os; 6431544Seschrock struct dsl_dataset *ds; 6441544Seschrock 6451544Seschrock /* 6461544Seschrock * Unregister properties. 6471544Seschrock */ 6481544Seschrock if (!dmu_objset_is_snapshot(os)) { 6491544Seschrock ds = dmu_objset_ds(os); 6501544Seschrock VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 6511544Seschrock zfsvfs) == 0); 6521544Seschrock 6533234Sck153898 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 6543234Sck153898 zfsvfs) == 0); 6553234Sck153898 6561544Seschrock VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 6571544Seschrock zfsvfs) == 0); 6581544Seschrock 6591544Seschrock VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 6601544Seschrock zfsvfs) == 0); 6611544Seschrock 6621544Seschrock VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, 6631544Seschrock zfsvfs) == 0); 6641544Seschrock 6651544Seschrock VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 6661544Seschrock zfsvfs) == 0); 6671544Seschrock 6681544Seschrock VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 6691544Seschrock zfsvfs) == 0); 6701544Seschrock 6711544Seschrock VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 6721544Seschrock zfsvfs) == 0); 6731544Seschrock 6741544Seschrock VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 6751544Seschrock zfsvfs) == 0); 6761544Seschrock 6771544Seschrock VERIFY(dsl_prop_unregister(ds, "aclinherit", 6781544Seschrock acl_inherit_changed_cb, zfsvfs) == 0); 6791544Seschrock } 6801544Seschrock } 6811544Seschrock 6823912Slling /* 6833912Slling * Convert a decimal digit string to a uint64_t integer. 6843912Slling */ 6853912Slling static int 6863912Slling str_to_uint64(char *str, uint64_t *objnum) 6873912Slling { 6883912Slling uint64_t num = 0; 6893912Slling 6903912Slling while (*str) { 6913912Slling if (*str < '0' || *str > '9') 6923912Slling return (EINVAL); 6933912Slling 6943912Slling num = num*10 + *str++ - '0'; 6953912Slling } 6963912Slling 6973912Slling *objnum = num; 6983912Slling return (0); 6993912Slling } 7003912Slling 7013912Slling /* 7023912Slling * The boot path passed from the boot loader is in the form of 7033912Slling * "rootpool-name/root-filesystem-object-number'. Convert this 7043912Slling * string to a dataset name: "rootpool-name/root-filesystem-name". 7053912Slling */ 7063912Slling static int 7073912Slling parse_bootpath(char *bpath, char *outpath) 7083912Slling { 7093912Slling char *slashp; 7103912Slling uint64_t objnum; 7113912Slling int error; 7123912Slling 7133912Slling if (*bpath == 0 || *bpath == '/') 7143912Slling return (EINVAL); 7153912Slling 7163912Slling slashp = strchr(bpath, '/'); 7173912Slling 7183912Slling /* if no '/', just return the pool name */ 7193912Slling if (slashp == NULL) { 7203912Slling (void) strcpy(outpath, bpath); 7213912Slling return (0); 7223912Slling } 7233912Slling 7243912Slling if (error = str_to_uint64(slashp+1, &objnum)) 7253912Slling return (error); 7263912Slling 7273912Slling *slashp = '\0'; 7283912Slling error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 7293912Slling *slashp = '/'; 7303912Slling 7313912Slling return (error); 7323912Slling } 7333912Slling 7341544Seschrock static int 7351544Seschrock zfs_mountroot(vfs_t *vfsp, enum whymountroot why) 7361544Seschrock { 7371544Seschrock int error = 0; 7381544Seschrock int ret = 0; 7391544Seschrock static int zfsrootdone = 0; 7401544Seschrock zfsvfs_t *zfsvfs = NULL; 7411544Seschrock znode_t *zp = NULL; 7421544Seschrock vnode_t *vp = NULL; 7433912Slling char *zfs_bootpath; 7441544Seschrock 7451544Seschrock ASSERT(vfsp); 7461544Seschrock 7471544Seschrock /* 7483912Slling * The filesystem that we mount as root is defined in the 7493912Slling * "zfs-bootfs" property. 7501544Seschrock */ 7511544Seschrock if (why == ROOT_INIT) { 7521544Seschrock if (zfsrootdone++) 7531544Seschrock return (EBUSY); 7541544Seschrock 7553912Slling if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 7563912Slling DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) != 7573912Slling DDI_SUCCESS) 7583912Slling return (EIO); 7593912Slling 7603912Slling error = parse_bootpath(zfs_bootpath, rootfs.bo_name); 7613912Slling ddi_prop_free(zfs_bootpath); 7623912Slling 7633912Slling if (error) 7643912Slling return (error); 7651544Seschrock 7661544Seschrock if (error = vfs_lock(vfsp)) 7671544Seschrock return (error); 7681544Seschrock 7693912Slling if (error = zfs_domount(vfsp, rootfs.bo_name, CRED())) 7701544Seschrock goto out; 7711544Seschrock 7721544Seschrock zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 7731544Seschrock ASSERT(zfsvfs); 7741544Seschrock if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) 7751544Seschrock goto out; 7761544Seschrock 7771544Seschrock vp = ZTOV(zp); 7781544Seschrock mutex_enter(&vp->v_lock); 7791544Seschrock vp->v_flag |= VROOT; 7801544Seschrock mutex_exit(&vp->v_lock); 7811544Seschrock rootvp = vp; 7821544Seschrock 7831544Seschrock /* 7841544Seschrock * The zfs_zget call above returns with a hold on vp, we release 7851544Seschrock * it here. 7861544Seschrock */ 7871544Seschrock VN_RELE(vp); 7881544Seschrock 7891544Seschrock /* 7901544Seschrock * Mount root as readonly initially, it will be remouted 7911544Seschrock * read/write by /lib/svc/method/fs-usr. 7921544Seschrock */ 7931544Seschrock readonly_changed_cb(vfsp->vfs_data, B_TRUE); 7941544Seschrock vfs_add((struct vnode *)0, vfsp, 7951544Seschrock (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 7961544Seschrock out: 7971544Seschrock vfs_unlock(vfsp); 7981544Seschrock ret = (error) ? error : 0; 7991544Seschrock return (ret); 8001544Seschrock } else if (why == ROOT_REMOUNT) { 8011544Seschrock readonly_changed_cb(vfsp->vfs_data, B_FALSE); 8021544Seschrock vfsp->vfs_flag |= VFS_REMOUNT; 8034596Slling 8044596Slling /* refresh mount options */ 8054596Slling zfs_unregister_callbacks(vfsp->vfs_data); 8064596Slling return (zfs_register_callbacks(vfsp)); 8074596Slling 8081544Seschrock } else if (why == ROOT_UNMOUNT) { 8091544Seschrock zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 8101544Seschrock (void) zfs_sync(vfsp, 0, 0); 8111544Seschrock return (0); 8121544Seschrock } 8131544Seschrock 8141544Seschrock /* 8151544Seschrock * if "why" is equal to anything else other than ROOT_INIT, 8161544Seschrock * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 8171544Seschrock */ 8181544Seschrock return (ENOTSUP); 8191544Seschrock } 8201544Seschrock 821789Sahrens /*ARGSUSED*/ 822789Sahrens static int 823789Sahrens zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 824789Sahrens { 825789Sahrens char *osname; 826789Sahrens pathname_t spn; 827789Sahrens int error = 0; 828789Sahrens uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? 8293912Slling UIO_SYSSPACE : UIO_USERSPACE; 830789Sahrens int canwrite; 831789Sahrens 832789Sahrens if (mvp->v_type != VDIR) 833789Sahrens return (ENOTDIR); 834789Sahrens 835789Sahrens mutex_enter(&mvp->v_lock); 836789Sahrens if ((uap->flags & MS_REMOUNT) == 0 && 837789Sahrens (uap->flags & MS_OVERLAY) == 0 && 838789Sahrens (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 839789Sahrens mutex_exit(&mvp->v_lock); 840789Sahrens return (EBUSY); 841789Sahrens } 842789Sahrens mutex_exit(&mvp->v_lock); 843789Sahrens 844789Sahrens /* 845789Sahrens * ZFS does not support passing unparsed data in via MS_DATA. 846789Sahrens * Users should use the MS_OPTIONSTR interface; this means 847789Sahrens * that all option parsing is already done and the options struct 848789Sahrens * can be interrogated. 849789Sahrens */ 850789Sahrens if ((uap->flags & MS_DATA) && uap->datalen > 0) 851789Sahrens return (EINVAL); 852789Sahrens 853789Sahrens /* 854789Sahrens * Get the objset name (the "special" mount argument). 855789Sahrens */ 856789Sahrens if (error = pn_get(uap->spec, fromspace, &spn)) 857789Sahrens return (error); 858789Sahrens 859789Sahrens osname = spn.pn_path; 860789Sahrens 8614543Smarks /* 8624543Smarks * Check for mount privilege? 8634543Smarks * 8644543Smarks * If we don't have privilege then see if 8654543Smarks * we have local permission to allow it 8664543Smarks */ 8674543Smarks error = secpolicy_fs_mount(cr, mvp, vfsp); 8684543Smarks if (error) { 8694543Smarks error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 8704543Smarks if (error == 0) { 8714543Smarks vattr_t vattr; 8724543Smarks 8734543Smarks /* 8744543Smarks * Make sure user is the owner of the mount point 8754543Smarks * or has sufficient privileges. 8764543Smarks */ 8774543Smarks 8784543Smarks vattr.va_mask = AT_UID; 8794543Smarks 8804614Smarks if (error = VOP_GETATTR(mvp, &vattr, 0, cr)) { 8814543Smarks goto out; 8824543Smarks } 8834543Smarks 8844543Smarks if (error = secpolicy_vnode_owner(cr, vattr.va_uid)) { 8854543Smarks goto out; 8864543Smarks } 8874543Smarks 8884543Smarks if (error = VOP_ACCESS(mvp, VWRITE, 0, cr)) { 8894543Smarks goto out; 8904543Smarks } 8914543Smarks 8924543Smarks secpolicy_fs_mount_clearopts(cr, vfsp); 8934543Smarks } else { 8944543Smarks goto out; 8954543Smarks } 8964543Smarks } 897789Sahrens 898789Sahrens /* 899789Sahrens * Refuse to mount a filesystem if we are in a local zone and the 900789Sahrens * dataset is not visible. 901789Sahrens */ 902789Sahrens if (!INGLOBALZONE(curproc) && 903789Sahrens (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 904789Sahrens error = EPERM; 905789Sahrens goto out; 906789Sahrens } 907789Sahrens 9084596Slling /* 9094596Slling * When doing a remount, we simply refresh our temporary properties 9104596Slling * according to those options set in the current VFS options. 9114596Slling */ 9124596Slling if (uap->flags & MS_REMOUNT) { 9134596Slling /* refresh mount options */ 9144596Slling zfs_unregister_callbacks(vfsp->vfs_data); 9154596Slling error = zfs_register_callbacks(vfsp); 9164596Slling goto out; 9174596Slling } 9184596Slling 9191544Seschrock error = zfs_domount(vfsp, osname, cr); 920789Sahrens 921789Sahrens out: 922789Sahrens pn_free(&spn); 923789Sahrens return (error); 924789Sahrens } 925789Sahrens 926789Sahrens static int 927789Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) 928789Sahrens { 929789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 930789Sahrens dev32_t d32; 9312885Sahrens uint64_t refdbytes, availbytes, usedobjs, availobjs; 932789Sahrens 933789Sahrens ZFS_ENTER(zfsvfs); 934789Sahrens 9352885Sahrens dmu_objset_space(zfsvfs->z_os, 9362885Sahrens &refdbytes, &availbytes, &usedobjs, &availobjs); 937789Sahrens 938789Sahrens /* 939789Sahrens * The underlying storage pool actually uses multiple block sizes. 940789Sahrens * We report the fragsize as the smallest block size we support, 941789Sahrens * and we report our blocksize as the filesystem's maximum blocksize. 942789Sahrens */ 943789Sahrens statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; 944789Sahrens statp->f_bsize = zfsvfs->z_max_blksz; 945789Sahrens 946789Sahrens /* 947789Sahrens * The following report "total" blocks of various kinds in the 948789Sahrens * file system, but reported in terms of f_frsize - the 949789Sahrens * "fragment" size. 950789Sahrens */ 951789Sahrens 9522885Sahrens statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 9532885Sahrens statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; 954789Sahrens statp->f_bavail = statp->f_bfree; /* no root reservation */ 955789Sahrens 956789Sahrens /* 957789Sahrens * statvfs() should really be called statufs(), because it assumes 958789Sahrens * static metadata. ZFS doesn't preallocate files, so the best 959789Sahrens * we can do is report the max that could possibly fit in f_files, 960789Sahrens * and that minus the number actually used in f_ffree. 961789Sahrens * For f_ffree, report the smaller of the number of object available 962789Sahrens * and the number of blocks (each object will take at least a block). 963789Sahrens */ 9642885Sahrens statp->f_ffree = MIN(availobjs, statp->f_bfree); 965789Sahrens statp->f_favail = statp->f_ffree; /* no "root reservation" */ 9662885Sahrens statp->f_files = statp->f_ffree + usedobjs; 967789Sahrens 968789Sahrens (void) cmpldev(&d32, vfsp->vfs_dev); 969789Sahrens statp->f_fsid = d32; 970789Sahrens 971789Sahrens /* 972789Sahrens * We're a zfs filesystem. 973789Sahrens */ 974789Sahrens (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 975789Sahrens 9761123Smarks statp->f_flag = vf_to_stf(vfsp->vfs_flag); 977789Sahrens 978789Sahrens statp->f_namemax = ZFS_MAXNAMELEN; 979789Sahrens 980789Sahrens /* 981789Sahrens * We have all of 32 characters to stuff a string here. 982789Sahrens * Is there anything useful we could/should provide? 983789Sahrens */ 984789Sahrens bzero(statp->f_fstr, sizeof (statp->f_fstr)); 985789Sahrens 986789Sahrens ZFS_EXIT(zfsvfs); 987789Sahrens return (0); 988789Sahrens } 989789Sahrens 990789Sahrens static int 991789Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp) 992789Sahrens { 993789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 994789Sahrens znode_t *rootzp; 995789Sahrens int error; 996789Sahrens 997789Sahrens ZFS_ENTER(zfsvfs); 998789Sahrens 999789Sahrens error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1000789Sahrens if (error == 0) 1001789Sahrens *vpp = ZTOV(rootzp); 1002789Sahrens 1003789Sahrens ZFS_EXIT(zfsvfs); 1004789Sahrens return (error); 1005789Sahrens } 1006789Sahrens 1007789Sahrens /*ARGSUSED*/ 1008789Sahrens static int 1009789Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) 1010789Sahrens { 1011789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1012*4787Sahrens objset_t *os = zfsvfs->z_os; 1013*4787Sahrens znode_t *zp, *nextzp; 1014789Sahrens int ret; 1015789Sahrens 10164543Smarks ret = secpolicy_fs_unmount(cr, vfsp); 10174543Smarks if (ret) { 10184543Smarks ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 10194543Smarks ZFS_DELEG_PERM_MOUNT, cr); 10204543Smarks if (ret) 10214543Smarks return (ret); 10224543Smarks } 10231484Sek110237 10244736Sek110237 /* 10254736Sek110237 * We purge the parent filesystem's vfsp as the parent filesystem 10264736Sek110237 * and all of its snapshots have their vnode's v_vfsp set to the 10274736Sek110237 * parent's filesystem's vfsp. Note, 'z_parent' is self 10284736Sek110237 * referential for non-snapshots. 10294736Sek110237 */ 10304736Sek110237 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 10311484Sek110237 1032789Sahrens /* 1033789Sahrens * Unmount any snapshots mounted under .zfs before unmounting the 1034789Sahrens * dataset itself. 1035789Sahrens */ 1036789Sahrens if (zfsvfs->z_ctldir != NULL && 10374543Smarks (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { 1038789Sahrens return (ret); 10394543Smarks } 1040789Sahrens 1041*4787Sahrens if (!(fflag & MS_FORCE)) { 10424480Sgw25295 /* 1043*4787Sahrens * Check the number of active vnodes in the file system. 1044*4787Sahrens * Our count is maintained in the vfs structure, but the 1045*4787Sahrens * number is off by 1 to indicate a hold on the vfs 1046*4787Sahrens * structure itself. 1047*4787Sahrens * 1048*4787Sahrens * The '.zfs' directory maintains a reference of its 1049*4787Sahrens * own, and any active references underneath are 1050*4787Sahrens * reflected in the vnode count. 1051789Sahrens */ 1052*4787Sahrens if (zfsvfs->z_ctldir == NULL) { 1053*4787Sahrens if (vfsp->vfs_count > 1) 1054*4787Sahrens return (EBUSY); 1055*4787Sahrens } else { 1056*4787Sahrens if (vfsp->vfs_count > 2 || 1057*4787Sahrens zfsvfs->z_ctldir->v_count > 1) { 1058*4787Sahrens return (EBUSY); 1059*4787Sahrens } 1060789Sahrens } 1061789Sahrens } 1062789Sahrens 1063789Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 1064*4787Sahrens 1065*4787Sahrens rw_enter(&zfsvfs->z_unmount_lock, RW_WRITER); 1066*4787Sahrens rw_enter(&zfsvfs->z_unmount_inactive_lock, RW_WRITER); 1067*4787Sahrens 1068*4787Sahrens /* 1069*4787Sahrens * At this point there are no vops active, and any new vops will 1070*4787Sahrens * fail with EIO since we have z_unmount_lock for writer (only 1071*4787Sahrens * relavent for forced unmount). 1072*4787Sahrens * 1073*4787Sahrens * Release all holds on dbufs. 1074*4787Sahrens * Note, the dmu can still callback via znode_pageout_func() 1075*4787Sahrens * which can zfs_znode_free() the znode. So we lock 1076*4787Sahrens * z_all_znodes; search the list for a held dbuf; drop the lock 1077*4787Sahrens * (we know zp can't disappear if we hold a dbuf lock) then 1078*4787Sahrens * regrab the lock and restart. 1079*4787Sahrens */ 1080*4787Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 1081*4787Sahrens for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 1082*4787Sahrens nextzp = list_next(&zfsvfs->z_all_znodes, zp); 1083*4787Sahrens if (zp->z_dbuf_held) { 1084*4787Sahrens /* dbufs should only be held when force unmounting */ 1085*4787Sahrens zp->z_dbuf_held = 0; 1086*4787Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 1087*4787Sahrens dmu_buf_rele(zp->z_dbuf, NULL); 1088*4787Sahrens /* Start again */ 1089*4787Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 1090*4787Sahrens nextzp = list_head(&zfsvfs->z_all_znodes); 1091*4787Sahrens } 1092*4787Sahrens } 1093*4787Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 1094*4787Sahrens 1095*4787Sahrens /* 1096*4787Sahrens * Set the unmounted flag and let new vops unblock. 1097*4787Sahrens * zfs_inactive will have the unmounted behavior, and all other 1098*4787Sahrens * vops will fail with EIO. 1099*4787Sahrens */ 1100*4787Sahrens zfsvfs->z_unmounted = B_TRUE; 1101*4787Sahrens rw_exit(&zfsvfs->z_unmount_lock); 1102*4787Sahrens rw_exit(&zfsvfs->z_unmount_inactive_lock); 1103*4787Sahrens 1104*4787Sahrens /* 1105*4787Sahrens * Unregister properties. 1106*4787Sahrens */ 1107*4787Sahrens if (!dmu_objset_is_snapshot(os)) 1108*4787Sahrens zfs_unregister_callbacks(zfsvfs); 1109*4787Sahrens 1110*4787Sahrens /* 1111*4787Sahrens * Close the zil. NB: Can't close the zil while zfs_inactive 1112*4787Sahrens * threads are blocked as zil_close can call zfs_inactive. 1113*4787Sahrens */ 1114*4787Sahrens if (zfsvfs->z_log) { 1115*4787Sahrens zil_close(zfsvfs->z_log); 1116*4787Sahrens zfsvfs->z_log = NULL; 1117*4787Sahrens } 1118*4787Sahrens 1119*4787Sahrens /* 1120*4787Sahrens * Evict all dbufs so that cached znodes will be freed 1121*4787Sahrens */ 1122*4787Sahrens if (dmu_objset_evict_dbufs(os, B_TRUE)) { 1123*4787Sahrens txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 1124*4787Sahrens (void) dmu_objset_evict_dbufs(os, B_FALSE); 1125*4787Sahrens } 1126*4787Sahrens 1127*4787Sahrens /* 1128*4787Sahrens * Finally close the objset 1129*4787Sahrens */ 1130*4787Sahrens dmu_objset_close(os); 1131*4787Sahrens 1132*4787Sahrens /* 1133*4787Sahrens * We can now safely destroy the '.zfs' directory node. 1134*4787Sahrens */ 1135*4787Sahrens if (zfsvfs->z_ctldir != NULL) 1136*4787Sahrens zfsctl_destroy(zfsvfs); 1137789Sahrens 1138789Sahrens return (0); 1139789Sahrens } 1140789Sahrens 1141789Sahrens static int 1142789Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1143789Sahrens { 1144789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1145789Sahrens znode_t *zp; 1146789Sahrens uint64_t object = 0; 1147789Sahrens uint64_t fid_gen = 0; 1148789Sahrens uint64_t gen_mask; 1149789Sahrens uint64_t zp_gen; 1150789Sahrens int i, err; 1151789Sahrens 1152789Sahrens *vpp = NULL; 1153789Sahrens 1154789Sahrens ZFS_ENTER(zfsvfs); 1155789Sahrens 1156789Sahrens if (fidp->fid_len == LONG_FID_LEN) { 1157789Sahrens zfid_long_t *zlfid = (zfid_long_t *)fidp; 1158789Sahrens uint64_t objsetid = 0; 1159789Sahrens uint64_t setgen = 0; 1160789Sahrens 1161789Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1162789Sahrens objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1163789Sahrens 1164789Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1165789Sahrens setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1166789Sahrens 1167789Sahrens ZFS_EXIT(zfsvfs); 1168789Sahrens 1169789Sahrens err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1170789Sahrens if (err) 1171789Sahrens return (EINVAL); 1172789Sahrens ZFS_ENTER(zfsvfs); 1173789Sahrens } 1174789Sahrens 1175789Sahrens if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1176789Sahrens zfid_short_t *zfid = (zfid_short_t *)fidp; 1177789Sahrens 1178789Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 1179789Sahrens object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1180789Sahrens 1181789Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 1182789Sahrens fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1183789Sahrens } else { 1184789Sahrens ZFS_EXIT(zfsvfs); 1185789Sahrens return (EINVAL); 1186789Sahrens } 1187789Sahrens 1188789Sahrens /* A zero fid_gen means we are in the .zfs control directories */ 1189789Sahrens if (fid_gen == 0 && 1190789Sahrens (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1191789Sahrens *vpp = zfsvfs->z_ctldir; 1192789Sahrens ASSERT(*vpp != NULL); 1193789Sahrens if (object == ZFSCTL_INO_SNAPDIR) { 1194789Sahrens VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1195789Sahrens 0, NULL, NULL) == 0); 1196789Sahrens } else { 1197789Sahrens VN_HOLD(*vpp); 1198789Sahrens } 1199789Sahrens ZFS_EXIT(zfsvfs); 1200789Sahrens return (0); 1201789Sahrens } 1202789Sahrens 1203789Sahrens gen_mask = -1ULL >> (64 - 8 * i); 1204789Sahrens 1205789Sahrens dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1206789Sahrens if (err = zfs_zget(zfsvfs, object, &zp)) { 1207789Sahrens ZFS_EXIT(zfsvfs); 1208789Sahrens return (err); 1209789Sahrens } 1210789Sahrens zp_gen = zp->z_phys->zp_gen & gen_mask; 1211789Sahrens if (zp_gen == 0) 1212789Sahrens zp_gen = 1; 12133461Sahrens if (zp->z_unlinked || zp_gen != fid_gen) { 1214789Sahrens dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1215789Sahrens VN_RELE(ZTOV(zp)); 1216789Sahrens ZFS_EXIT(zfsvfs); 1217789Sahrens return (EINVAL); 1218789Sahrens } 1219789Sahrens 1220789Sahrens *vpp = ZTOV(zp); 1221789Sahrens ZFS_EXIT(zfsvfs); 1222789Sahrens return (0); 1223789Sahrens } 1224789Sahrens 1225789Sahrens static void 1226789Sahrens zfs_freevfs(vfs_t *vfsp) 1227789Sahrens { 1228789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1229789Sahrens 1230*4787Sahrens mutex_destroy(&zfsvfs->z_znodes_lock); 1231*4787Sahrens rw_destroy(&zfsvfs->z_unmount_lock); 1232*4787Sahrens rw_destroy(&zfsvfs->z_unmount_inactive_lock); 1233789Sahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1234789Sahrens 1235789Sahrens atomic_add_32(&zfs_active_fs_count, -1); 1236789Sahrens } 1237789Sahrens 1238789Sahrens /* 1239789Sahrens * VFS_INIT() initialization. Note that there is no VFS_FINI(), 1240789Sahrens * so we can't safely do any non-idempotent initialization here. 1241789Sahrens * Leave that to zfs_init() and zfs_fini(), which are called 1242789Sahrens * from the module's _init() and _fini() entry points. 1243789Sahrens */ 1244789Sahrens /*ARGSUSED*/ 1245789Sahrens static int 1246789Sahrens zfs_vfsinit(int fstype, char *name) 1247789Sahrens { 1248789Sahrens int error; 1249789Sahrens 1250789Sahrens zfsfstype = fstype; 1251789Sahrens 1252789Sahrens /* 1253789Sahrens * Setup vfsops and vnodeops tables. 1254789Sahrens */ 1255789Sahrens error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); 1256789Sahrens if (error != 0) { 1257789Sahrens cmn_err(CE_WARN, "zfs: bad vfs ops template"); 1258789Sahrens } 1259789Sahrens 1260789Sahrens error = zfs_create_op_tables(); 1261789Sahrens if (error) { 1262789Sahrens zfs_remove_op_tables(); 1263789Sahrens cmn_err(CE_WARN, "zfs: bad vnode ops template"); 1264789Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 1265789Sahrens return (error); 1266789Sahrens } 1267789Sahrens 1268789Sahrens mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); 1269789Sahrens 1270789Sahrens /* 1271849Sbonwick * Unique major number for all zfs mounts. 1272849Sbonwick * If we run out of 32-bit minors, we'll getudev() another major. 1273789Sahrens */ 1274849Sbonwick zfs_major = ddi_name_to_major(ZFS_DRIVER); 1275849Sbonwick zfs_minor = ZFS_MIN_MINOR; 1276789Sahrens 1277789Sahrens return (0); 1278789Sahrens } 1279789Sahrens 1280789Sahrens void 1281789Sahrens zfs_init(void) 1282789Sahrens { 1283789Sahrens /* 1284789Sahrens * Initialize .zfs directory structures 1285789Sahrens */ 1286789Sahrens zfsctl_init(); 1287789Sahrens 1288789Sahrens /* 1289789Sahrens * Initialize znode cache, vnode ops, etc... 1290789Sahrens */ 1291789Sahrens zfs_znode_init(); 1292789Sahrens } 1293789Sahrens 1294789Sahrens void 1295789Sahrens zfs_fini(void) 1296789Sahrens { 1297789Sahrens zfsctl_fini(); 1298789Sahrens zfs_znode_fini(); 1299789Sahrens } 1300789Sahrens 1301789Sahrens int 1302789Sahrens zfs_busy(void) 1303789Sahrens { 1304789Sahrens return (zfs_active_fs_count != 0); 1305789Sahrens } 1306789Sahrens 13074577Sahrens int 13084577Sahrens zfs_get_stats(objset_t *os, nvlist_t *nv) 13094577Sahrens { 13104577Sahrens int error; 13114577Sahrens uint64_t val; 13124577Sahrens 13134577Sahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &val); 13144577Sahrens if (error == 0) 13154577Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VERSION, val); 13164577Sahrens 13174577Sahrens return (error); 13184577Sahrens } 13194577Sahrens 13204577Sahrens int 13214577Sahrens zfs_set_version(const char *name, uint64_t newvers) 13224577Sahrens { 13234577Sahrens int error; 13244577Sahrens objset_t *os; 13254577Sahrens dmu_tx_t *tx; 13264577Sahrens uint64_t curvers; 13274577Sahrens 13284577Sahrens /* 13294577Sahrens * XXX for now, require that the filesystem be unmounted. Would 13304577Sahrens * be nice to find the zfsvfs_t and just update that if 13314577Sahrens * possible. 13324577Sahrens */ 13334577Sahrens 13344577Sahrens if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 13354577Sahrens return (EINVAL); 13364577Sahrens 13374577Sahrens error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_PRIMARY, &os); 13384577Sahrens if (error) 13394577Sahrens return (error); 13404577Sahrens 13414577Sahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 13424577Sahrens 8, 1, &curvers); 13434577Sahrens if (error) 13444577Sahrens goto out; 13454577Sahrens if (newvers < curvers) { 13464577Sahrens error = EINVAL; 13474577Sahrens goto out; 13484577Sahrens } 13494577Sahrens 13504577Sahrens tx = dmu_tx_create(os); 13514577Sahrens dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); 13524577Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 13534577Sahrens if (error) { 13544577Sahrens dmu_tx_abort(tx); 13554577Sahrens goto out; 13564577Sahrens } 13574577Sahrens error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, 13584577Sahrens &newvers, tx); 13594577Sahrens 13604577Sahrens spa_history_internal_log(LOG_DS_UPGRADE, 13614577Sahrens dmu_objset_spa(os), tx, CRED(), 13624577Sahrens "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, 13634577Sahrens dmu_objset_id(os)); 13644577Sahrens dmu_tx_commit(tx); 13654577Sahrens 13664577Sahrens out: 13674577Sahrens dmu_objset_close(os); 13684577Sahrens return (error); 13694577Sahrens } 13704577Sahrens 1371789Sahrens static vfsdef_t vfw = { 1372789Sahrens VFSDEF_VERSION, 1373789Sahrens MNTTYPE_ZFS, 1374789Sahrens zfs_vfsinit, 13751488Srsb VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS, 1376789Sahrens &zfs_mntopts 1377789Sahrens }; 1378789Sahrens 1379789Sahrens struct modlfs zfs_modlfs = { 13804577Sahrens &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw 1381789Sahrens }; 1382