1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51484Sek110237 * Common Development and Distribution License (the "License"). 61484Sek110237 * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 223461Sahrens * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 263246Sck153898 #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/types.h> 29789Sahrens #include <sys/param.h> 30789Sahrens #include <sys/systm.h> 31789Sahrens #include <sys/sysmacros.h> 32789Sahrens #include <sys/kmem.h> 33789Sahrens #include <sys/pathname.h> 34789Sahrens #include <sys/vnode.h> 35789Sahrens #include <sys/vfs.h> 363898Srsb #include <sys/vfs_opreg.h> 37789Sahrens #include <sys/mntent.h> 38789Sahrens #include <sys/mount.h> 39789Sahrens #include <sys/cmn_err.h> 40789Sahrens #include "fs/fs_subr.h" 41789Sahrens #include <sys/zfs_znode.h> 423461Sahrens #include <sys/zfs_dir.h> 43789Sahrens #include <sys/zil.h> 44789Sahrens #include <sys/fs/zfs.h> 45789Sahrens #include <sys/dmu.h> 46789Sahrens #include <sys/dsl_prop.h> 473912Slling #include <sys/dsl_dataset.h> 484543Smarks #include <sys/dsl_deleg.h> 49789Sahrens #include <sys/spa.h> 50789Sahrens #include <sys/zap.h> 51789Sahrens #include <sys/varargs.h> 52789Sahrens #include <sys/policy.h> 53789Sahrens #include <sys/atomic.h> 54789Sahrens #include <sys/mkdev.h> 55789Sahrens #include <sys/modctl.h> 564543Smarks #include <sys/refstr.h> 57789Sahrens #include <sys/zfs_ioctl.h> 58789Sahrens #include <sys/zfs_ctldir.h> 591544Seschrock #include <sys/bootconf.h> 60849Sbonwick #include <sys/sunddi.h> 611484Sek110237 #include <sys/dnlc.h> 62789Sahrens 63789Sahrens int zfsfstype; 64789Sahrens vfsops_t *zfs_vfsops = NULL; 65849Sbonwick static major_t zfs_major; 66789Sahrens static minor_t zfs_minor; 67789Sahrens static kmutex_t zfs_dev_mtx; 68789Sahrens 69789Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr); 70789Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr); 711544Seschrock static int zfs_mountroot(vfs_t *vfsp, enum whymountroot); 72789Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp); 73789Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp); 74789Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp); 75789Sahrens static void zfs_freevfs(vfs_t *vfsp); 76789Sahrens 77789Sahrens static const fs_operation_def_t zfs_vfsops_template[] = { 783898Srsb VFSNAME_MOUNT, { .vfs_mount = zfs_mount }, 793898Srsb VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot }, 803898Srsb VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount }, 813898Srsb VFSNAME_ROOT, { .vfs_root = zfs_root }, 823898Srsb VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs }, 833898Srsb VFSNAME_SYNC, { .vfs_sync = zfs_sync }, 843898Srsb VFSNAME_VGET, { .vfs_vget = zfs_vget }, 853898Srsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 863898Srsb NULL, NULL 87789Sahrens }; 88789Sahrens 89789Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = { 903898Srsb VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs }, 913898Srsb NULL, NULL 92789Sahrens }; 93789Sahrens 94789Sahrens /* 95789Sahrens * We need to keep a count of active fs's. 96789Sahrens * This is necessary to prevent our module 97789Sahrens * from being unloaded after a umount -f 98789Sahrens */ 99789Sahrens static uint32_t zfs_active_fs_count = 0; 100789Sahrens 101789Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL }; 102789Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL }; 1033234Sck153898 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 1043234Sck153898 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 105789Sahrens 1063234Sck153898 /* 1074596Slling * MO_DEFAULT is not used since the default value is determined 1084596Slling * by the equivalent property. 1093234Sck153898 */ 110789Sahrens static mntopt_t mntopts[] = { 1113234Sck153898 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL }, 1123234Sck153898 { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL }, 1134596Slling { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL }, 114789Sahrens { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL } 115789Sahrens }; 116789Sahrens 117789Sahrens static mntopts_t zfs_mntopts = { 118789Sahrens sizeof (mntopts) / sizeof (mntopt_t), 119789Sahrens mntopts 120789Sahrens }; 121789Sahrens 122789Sahrens /*ARGSUSED*/ 123789Sahrens int 124789Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr) 125789Sahrens { 126789Sahrens /* 127789Sahrens * Data integrity is job one. We don't want a compromised kernel 128789Sahrens * writing to the storage pool, so we never sync during panic. 129789Sahrens */ 130789Sahrens if (panicstr) 131789Sahrens return (0); 132789Sahrens 133789Sahrens /* 134789Sahrens * SYNC_ATTR is used by fsflush() to force old filesystems like UFS 135789Sahrens * to sync metadata, which they would otherwise cache indefinitely. 136789Sahrens * Semantically, the only requirement is that the sync be initiated. 137789Sahrens * The DMU syncs out txgs frequently, so there's nothing to do. 138789Sahrens */ 139789Sahrens if (flag & SYNC_ATTR) 140789Sahrens return (0); 141789Sahrens 142789Sahrens if (vfsp != NULL) { 143789Sahrens /* 144789Sahrens * Sync a specific filesystem. 145789Sahrens */ 146789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 147789Sahrens 148789Sahrens ZFS_ENTER(zfsvfs); 149789Sahrens if (zfsvfs->z_log != NULL) 1502638Sperrin zil_commit(zfsvfs->z_log, UINT64_MAX, 0); 151789Sahrens else 152789Sahrens txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 153789Sahrens ZFS_EXIT(zfsvfs); 154789Sahrens } else { 155789Sahrens /* 156789Sahrens * Sync all ZFS filesystems. This is what happens when you 157789Sahrens * run sync(1M). Unlike other filesystems, ZFS honors the 158789Sahrens * request by waiting for all pools to commit all dirty data. 159789Sahrens */ 160789Sahrens spa_sync_allpools(); 161789Sahrens } 162789Sahrens 163789Sahrens return (0); 164789Sahrens } 165789Sahrens 1661544Seschrock static int 1671544Seschrock zfs_create_unique_device(dev_t *dev) 1681544Seschrock { 1691544Seschrock major_t new_major; 1701544Seschrock 1711544Seschrock do { 1721544Seschrock ASSERT3U(zfs_minor, <=, MAXMIN32); 1731544Seschrock minor_t start = zfs_minor; 1741544Seschrock do { 1751544Seschrock mutex_enter(&zfs_dev_mtx); 1761544Seschrock if (zfs_minor >= MAXMIN32) { 1771544Seschrock /* 1781544Seschrock * If we're still using the real major 1791544Seschrock * keep out of /dev/zfs and /dev/zvol minor 1801544Seschrock * number space. If we're using a getudev()'ed 1811544Seschrock * major number, we can use all of its minors. 1821544Seschrock */ 1831544Seschrock if (zfs_major == ddi_name_to_major(ZFS_DRIVER)) 1841544Seschrock zfs_minor = ZFS_MIN_MINOR; 1851544Seschrock else 1861544Seschrock zfs_minor = 0; 1871544Seschrock } else { 1881544Seschrock zfs_minor++; 1891544Seschrock } 1901544Seschrock *dev = makedevice(zfs_major, zfs_minor); 1911544Seschrock mutex_exit(&zfs_dev_mtx); 1921544Seschrock } while (vfs_devismounted(*dev) && zfs_minor != start); 1931544Seschrock if (zfs_minor == start) { 1941544Seschrock /* 1951544Seschrock * We are using all ~262,000 minor numbers for the 1961544Seschrock * current major number. Create a new major number. 1971544Seschrock */ 1981544Seschrock if ((new_major = getudev()) == (major_t)-1) { 1991544Seschrock cmn_err(CE_WARN, 2001544Seschrock "zfs_mount: Can't get unique major " 2011544Seschrock "device number."); 2021544Seschrock return (-1); 2031544Seschrock } 2041544Seschrock mutex_enter(&zfs_dev_mtx); 2051544Seschrock zfs_major = new_major; 2061544Seschrock zfs_minor = 0; 2071544Seschrock 2081544Seschrock mutex_exit(&zfs_dev_mtx); 2091544Seschrock } else { 2101544Seschrock break; 2111544Seschrock } 2121544Seschrock /* CONSTANTCONDITION */ 2131544Seschrock } while (1); 2141544Seschrock 2151544Seschrock return (0); 2161544Seschrock } 2171544Seschrock 218789Sahrens static void 219789Sahrens atime_changed_cb(void *arg, uint64_t newval) 220789Sahrens { 221789Sahrens zfsvfs_t *zfsvfs = arg; 222789Sahrens 223789Sahrens if (newval == TRUE) { 224789Sahrens zfsvfs->z_atime = TRUE; 225789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME); 226789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0); 227789Sahrens } else { 228789Sahrens zfsvfs->z_atime = FALSE; 229789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME); 230789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0); 231789Sahrens } 232789Sahrens } 233789Sahrens 234789Sahrens static void 2353234Sck153898 xattr_changed_cb(void *arg, uint64_t newval) 2363234Sck153898 { 2373234Sck153898 zfsvfs_t *zfsvfs = arg; 2383234Sck153898 2393234Sck153898 if (newval == TRUE) { 2403234Sck153898 /* XXX locking on vfs_flag? */ 2413234Sck153898 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR; 2423234Sck153898 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR); 2433234Sck153898 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0); 2443234Sck153898 } else { 2453234Sck153898 /* XXX locking on vfs_flag? */ 2463234Sck153898 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR; 2473234Sck153898 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR); 2483234Sck153898 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0); 2493234Sck153898 } 2503234Sck153898 } 2513234Sck153898 2523234Sck153898 static void 253789Sahrens blksz_changed_cb(void *arg, uint64_t newval) 254789Sahrens { 255789Sahrens zfsvfs_t *zfsvfs = arg; 256789Sahrens 257789Sahrens if (newval < SPA_MINBLOCKSIZE || 258789Sahrens newval > SPA_MAXBLOCKSIZE || !ISP2(newval)) 259789Sahrens newval = SPA_MAXBLOCKSIZE; 260789Sahrens 261789Sahrens zfsvfs->z_max_blksz = newval; 262789Sahrens zfsvfs->z_vfs->vfs_bsize = newval; 263789Sahrens } 264789Sahrens 265789Sahrens static void 266789Sahrens readonly_changed_cb(void *arg, uint64_t newval) 267789Sahrens { 268789Sahrens zfsvfs_t *zfsvfs = arg; 269789Sahrens 270789Sahrens if (newval) { 271789Sahrens /* XXX locking on vfs_flag? */ 272789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY; 273789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW); 274789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0); 275789Sahrens } else { 276789Sahrens /* XXX locking on vfs_flag? */ 277789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY; 278789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO); 279789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0); 280789Sahrens } 281789Sahrens } 282789Sahrens 283789Sahrens static void 284789Sahrens devices_changed_cb(void *arg, uint64_t newval) 285789Sahrens { 286789Sahrens zfsvfs_t *zfsvfs = arg; 287789Sahrens 288789Sahrens if (newval == FALSE) { 289789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES; 290789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES); 291789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0); 292789Sahrens } else { 293789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES; 294789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES); 295789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0); 296789Sahrens } 297789Sahrens } 298789Sahrens 299789Sahrens static void 300789Sahrens setuid_changed_cb(void *arg, uint64_t newval) 301789Sahrens { 302789Sahrens zfsvfs_t *zfsvfs = arg; 303789Sahrens 304789Sahrens if (newval == FALSE) { 305789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID; 306789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID); 307789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0); 308789Sahrens } else { 309789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID; 310789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID); 311789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0); 312789Sahrens } 313789Sahrens } 314789Sahrens 315789Sahrens static void 316789Sahrens exec_changed_cb(void *arg, uint64_t newval) 317789Sahrens { 318789Sahrens zfsvfs_t *zfsvfs = arg; 319789Sahrens 320789Sahrens if (newval == FALSE) { 321789Sahrens zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC; 322789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC); 323789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0); 324789Sahrens } else { 325789Sahrens zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC; 326789Sahrens vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC); 327789Sahrens vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0); 328789Sahrens } 329789Sahrens } 330789Sahrens 331789Sahrens static void 332789Sahrens snapdir_changed_cb(void *arg, uint64_t newval) 333789Sahrens { 334789Sahrens zfsvfs_t *zfsvfs = arg; 335789Sahrens 336789Sahrens zfsvfs->z_show_ctldir = newval; 337789Sahrens } 338789Sahrens 339789Sahrens static void 340789Sahrens acl_mode_changed_cb(void *arg, uint64_t newval) 341789Sahrens { 342789Sahrens zfsvfs_t *zfsvfs = arg; 343789Sahrens 344789Sahrens zfsvfs->z_acl_mode = newval; 345789Sahrens } 346789Sahrens 347789Sahrens static void 348789Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval) 349789Sahrens { 350789Sahrens zfsvfs_t *zfsvfs = arg; 351789Sahrens 352789Sahrens zfsvfs->z_acl_inherit = newval; 353789Sahrens } 354789Sahrens 3551544Seschrock static int 3561544Seschrock zfs_register_callbacks(vfs_t *vfsp) 3571544Seschrock { 3581544Seschrock struct dsl_dataset *ds = NULL; 3591544Seschrock objset_t *os = NULL; 3601544Seschrock zfsvfs_t *zfsvfs = NULL; 3613265Sahrens int readonly, do_readonly = FALSE; 3623265Sahrens int setuid, do_setuid = FALSE; 3633265Sahrens int exec, do_exec = FALSE; 3643265Sahrens int devices, do_devices = FALSE; 3653265Sahrens int xattr, do_xattr = FALSE; 3664596Slling int atime, do_atime = FALSE; 3671544Seschrock int error = 0; 3681544Seschrock 3691544Seschrock ASSERT(vfsp); 3701544Seschrock zfsvfs = vfsp->vfs_data; 3711544Seschrock ASSERT(zfsvfs); 3721544Seschrock os = zfsvfs->z_os; 3731544Seschrock 3741544Seschrock /* 3751544Seschrock * The act of registering our callbacks will destroy any mount 3761544Seschrock * options we may have. In order to enable temporary overrides 3773234Sck153898 * of mount options, we stash away the current values and 3781544Seschrock * restore them after we register the callbacks. 3791544Seschrock */ 3801544Seschrock if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 3811544Seschrock readonly = B_TRUE; 3821544Seschrock do_readonly = B_TRUE; 3831544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) { 3841544Seschrock readonly = B_FALSE; 3851544Seschrock do_readonly = B_TRUE; 3861544Seschrock } 3871544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 3881544Seschrock devices = B_FALSE; 3891544Seschrock setuid = B_FALSE; 3901544Seschrock do_devices = B_TRUE; 3911544Seschrock do_setuid = B_TRUE; 3921544Seschrock } else { 3931544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 3941544Seschrock devices = B_FALSE; 3951544Seschrock do_devices = B_TRUE; 3963912Slling } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) { 3971544Seschrock devices = B_TRUE; 3981544Seschrock do_devices = B_TRUE; 3991544Seschrock } 4001544Seschrock 4011544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 4021544Seschrock setuid = B_FALSE; 4031544Seschrock do_setuid = B_TRUE; 4041544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) { 4051544Seschrock setuid = B_TRUE; 4061544Seschrock do_setuid = B_TRUE; 4071544Seschrock } 4081544Seschrock } 4091544Seschrock if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) { 4101544Seschrock exec = B_FALSE; 4111544Seschrock do_exec = B_TRUE; 4121544Seschrock } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) { 4131544Seschrock exec = B_TRUE; 4141544Seschrock do_exec = B_TRUE; 4151544Seschrock } 4163234Sck153898 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 4173234Sck153898 xattr = B_FALSE; 4183234Sck153898 do_xattr = B_TRUE; 4193234Sck153898 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) { 4203234Sck153898 xattr = B_TRUE; 4213234Sck153898 do_xattr = B_TRUE; 4223234Sck153898 } 4234596Slling if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) { 4244596Slling atime = B_FALSE; 4254596Slling do_atime = B_TRUE; 4264596Slling } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) { 4274596Slling atime = B_TRUE; 4284596Slling do_atime = B_TRUE; 4294596Slling } 4301544Seschrock 4311544Seschrock /* 4321544Seschrock * Register property callbacks. 4331544Seschrock * 4341544Seschrock * It would probably be fine to just check for i/o error from 4351544Seschrock * the first prop_register(), but I guess I like to go 4361544Seschrock * overboard... 4371544Seschrock */ 4381544Seschrock ds = dmu_objset_ds(os); 4391544Seschrock error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs); 4401544Seschrock error = error ? error : dsl_prop_register(ds, 4413234Sck153898 "xattr", xattr_changed_cb, zfsvfs); 4423234Sck153898 error = error ? error : dsl_prop_register(ds, 4431544Seschrock "recordsize", blksz_changed_cb, zfsvfs); 4441544Seschrock error = error ? error : dsl_prop_register(ds, 4451544Seschrock "readonly", readonly_changed_cb, zfsvfs); 4461544Seschrock error = error ? error : dsl_prop_register(ds, 4471544Seschrock "devices", devices_changed_cb, zfsvfs); 4481544Seschrock error = error ? error : dsl_prop_register(ds, 4491544Seschrock "setuid", setuid_changed_cb, zfsvfs); 4501544Seschrock error = error ? error : dsl_prop_register(ds, 4511544Seschrock "exec", exec_changed_cb, zfsvfs); 4521544Seschrock error = error ? error : dsl_prop_register(ds, 4531544Seschrock "snapdir", snapdir_changed_cb, zfsvfs); 4541544Seschrock error = error ? error : dsl_prop_register(ds, 4551544Seschrock "aclmode", acl_mode_changed_cb, zfsvfs); 4561544Seschrock error = error ? error : dsl_prop_register(ds, 4571544Seschrock "aclinherit", acl_inherit_changed_cb, zfsvfs); 4581544Seschrock if (error) 4591544Seschrock goto unregister; 4601544Seschrock 4611544Seschrock /* 4621544Seschrock * Invoke our callbacks to restore temporary mount options. 4631544Seschrock */ 4641544Seschrock if (do_readonly) 4651544Seschrock readonly_changed_cb(zfsvfs, readonly); 4661544Seschrock if (do_setuid) 4671544Seschrock setuid_changed_cb(zfsvfs, setuid); 4681544Seschrock if (do_exec) 4691544Seschrock exec_changed_cb(zfsvfs, exec); 4701544Seschrock if (do_devices) 4711544Seschrock devices_changed_cb(zfsvfs, devices); 4723234Sck153898 if (do_xattr) 4733234Sck153898 xattr_changed_cb(zfsvfs, xattr); 4744596Slling if (do_atime) 4754596Slling atime_changed_cb(zfsvfs, atime); 4761544Seschrock 4771544Seschrock return (0); 4781544Seschrock 4791544Seschrock unregister: 4801544Seschrock /* 4811544Seschrock * We may attempt to unregister some callbacks that are not 4821544Seschrock * registered, but this is OK; it will simply return ENOMSG, 4831544Seschrock * which we will ignore. 4841544Seschrock */ 4851544Seschrock (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs); 4863234Sck153898 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs); 4871544Seschrock (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs); 4881544Seschrock (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs); 4891544Seschrock (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs); 4901544Seschrock (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs); 4911544Seschrock (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs); 4921544Seschrock (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs); 4931544Seschrock (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs); 4941544Seschrock (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb, 4951544Seschrock zfsvfs); 4961544Seschrock return (error); 4971544Seschrock 4981544Seschrock } 4991544Seschrock 5001544Seschrock static int 5011544Seschrock zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr) 5021544Seschrock { 5031544Seschrock dev_t mount_dev; 5041544Seschrock uint64_t recordsize, readonly; 5051544Seschrock int error = 0; 5061544Seschrock int mode; 5071544Seschrock zfsvfs_t *zfsvfs; 5081544Seschrock znode_t *zp = NULL; 5091544Seschrock 5101544Seschrock ASSERT(vfsp); 5111544Seschrock ASSERT(osname); 5121544Seschrock 5131544Seschrock /* 5141544Seschrock * Initialize the zfs-specific filesystem structure. 5151544Seschrock * Should probably make this a kmem cache, shuffle fields, 5161544Seschrock * and just bzero up to z_hold_mtx[]. 5171544Seschrock */ 5181544Seschrock zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 5191544Seschrock zfsvfs->z_vfs = vfsp; 5201544Seschrock zfsvfs->z_parent = zfsvfs; 5211544Seschrock zfsvfs->z_assign = TXG_NOWAIT; 5221544Seschrock zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE; 5231544Seschrock zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE; 5241544Seschrock 5251544Seschrock mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 5261544Seschrock list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 5271544Seschrock offsetof(znode_t, z_link_node)); 5284787Sahrens rw_init(&zfsvfs->z_unmount_lock, NULL, RW_DEFAULT, NULL); 5294787Sahrens rw_init(&zfsvfs->z_unmount_inactive_lock, NULL, RW_DEFAULT, NULL); 5301544Seschrock 5311544Seschrock /* Initialize the generic filesystem structure. */ 5321544Seschrock vfsp->vfs_bcount = 0; 5331544Seschrock vfsp->vfs_data = NULL; 5341544Seschrock 5351544Seschrock if (zfs_create_unique_device(&mount_dev) == -1) { 5361544Seschrock error = ENODEV; 5371544Seschrock goto out; 5381544Seschrock } 5391544Seschrock ASSERT(vfs_devismounted(mount_dev) == 0); 5401544Seschrock 5411544Seschrock if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize, 5421544Seschrock NULL)) 5431544Seschrock goto out; 5441544Seschrock 5451544Seschrock vfsp->vfs_dev = mount_dev; 5461544Seschrock vfsp->vfs_fstype = zfsfstype; 5471544Seschrock vfsp->vfs_bsize = recordsize; 5481544Seschrock vfsp->vfs_flag |= VFS_NOTRUNC; 5491544Seschrock vfsp->vfs_data = zfsvfs; 5501544Seschrock 5511544Seschrock if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL)) 5521544Seschrock goto out; 5531544Seschrock 5541544Seschrock if (readonly) 5551544Seschrock mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 5561544Seschrock else 5571544Seschrock mode = DS_MODE_PRIMARY; 5581544Seschrock 5591544Seschrock error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os); 5601544Seschrock if (error == EROFS) { 5611544Seschrock mode = DS_MODE_PRIMARY | DS_MODE_READONLY; 5621544Seschrock error = dmu_objset_open(osname, DMU_OST_ZFS, mode, 5631544Seschrock &zfsvfs->z_os); 5641544Seschrock } 5651544Seschrock 5661544Seschrock if (error) 5671544Seschrock goto out; 5681544Seschrock 5691544Seschrock if (error = zfs_init_fs(zfsvfs, &zp, cr)) 5701544Seschrock goto out; 5711544Seschrock 5721544Seschrock /* The call to zfs_init_fs leaves the vnode held, release it here. */ 5731544Seschrock VN_RELE(ZTOV(zp)); 5741544Seschrock 5751544Seschrock if (dmu_objset_is_snapshot(zfsvfs->z_os)) { 5763234Sck153898 uint64_t xattr; 5773234Sck153898 5781544Seschrock ASSERT(mode & DS_MODE_READONLY); 5791544Seschrock atime_changed_cb(zfsvfs, B_FALSE); 5801544Seschrock readonly_changed_cb(zfsvfs, B_TRUE); 5813234Sck153898 if (error = dsl_prop_get_integer(osname, "xattr", &xattr, NULL)) 5823234Sck153898 goto out; 5833234Sck153898 xattr_changed_cb(zfsvfs, xattr); 5841544Seschrock zfsvfs->z_issnap = B_TRUE; 5851544Seschrock } else { 5861544Seschrock error = zfs_register_callbacks(vfsp); 5871544Seschrock if (error) 5881544Seschrock goto out; 5891544Seschrock 5904577Sahrens if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY)) 5914577Sahrens zfs_unlinked_drain(zfsvfs); 5921544Seschrock 5931544Seschrock /* 5941544Seschrock * Parse and replay the intent log. 5954577Sahrens * 5964577Sahrens * Because of ziltest, this must be done after 5974577Sahrens * zfs_unlinked_drain(). (Further note: ziltest doesn't 5984577Sahrens * use readonly mounts, where zfs_unlinked_drain() isn't 5994577Sahrens * called.) This is because ziltest causes spa_sync() 6004577Sahrens * to think it's committed, but actually it is not, so 6014577Sahrens * the intent log contains many txg's worth of changes. 6024577Sahrens * 6034577Sahrens * In particular, if object N is in the unlinked set in 6044577Sahrens * the last txg to actually sync, then it could be 6054577Sahrens * actually freed in a later txg and then reallocated in 6064577Sahrens * a yet later txg. This would write a "create object 6074577Sahrens * N" record to the intent log. Normally, this would be 6084577Sahrens * fine because the spa_sync() would have written out 6094577Sahrens * the fact that object N is free, before we could write 6104577Sahrens * the "create object N" intent log record. 6114577Sahrens * 6124577Sahrens * But when we are in ziltest mode, we advance the "open 6134577Sahrens * txg" without actually spa_sync()-ing the changes to 6144577Sahrens * disk. So we would see that object N is still 6154577Sahrens * allocated and in the unlinked set, and there is an 6164577Sahrens * intent log record saying to allocate it. 6171544Seschrock */ 6181544Seschrock zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign, 6193461Sahrens zfs_replay_vector); 6201544Seschrock 6211544Seschrock if (!zil_disable) 6221544Seschrock zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data); 6231544Seschrock } 6241544Seschrock 6251544Seschrock if (!zfsvfs->z_issnap) 6261544Seschrock zfsctl_create(zfsvfs); 6271544Seschrock out: 6281544Seschrock if (error) { 6291544Seschrock if (zfsvfs->z_os) 6301544Seschrock dmu_objset_close(zfsvfs->z_os); 631*4831Sgw25295 mutex_destroy(&zfsvfs->z_znodes_lock); 632*4831Sgw25295 list_destroy(&zfsvfs->z_all_znodes); 633*4831Sgw25295 rw_destroy(&zfsvfs->z_unmount_lock); 634*4831Sgw25295 rw_destroy(&zfsvfs->z_unmount_inactive_lock); 6351544Seschrock kmem_free(zfsvfs, sizeof (zfsvfs_t)); 6361544Seschrock } else { 6371544Seschrock atomic_add_32(&zfs_active_fs_count, 1); 6381544Seschrock } 6391544Seschrock 6401544Seschrock return (error); 6411544Seschrock } 6421544Seschrock 6431544Seschrock void 6441544Seschrock zfs_unregister_callbacks(zfsvfs_t *zfsvfs) 6451544Seschrock { 6461544Seschrock objset_t *os = zfsvfs->z_os; 6471544Seschrock struct dsl_dataset *ds; 6481544Seschrock 6491544Seschrock /* 6501544Seschrock * Unregister properties. 6511544Seschrock */ 6521544Seschrock if (!dmu_objset_is_snapshot(os)) { 6531544Seschrock ds = dmu_objset_ds(os); 6541544Seschrock VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb, 6551544Seschrock zfsvfs) == 0); 6561544Seschrock 6573234Sck153898 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb, 6583234Sck153898 zfsvfs) == 0); 6593234Sck153898 6601544Seschrock VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, 6611544Seschrock zfsvfs) == 0); 6621544Seschrock 6631544Seschrock VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb, 6641544Seschrock zfsvfs) == 0); 6651544Seschrock 6661544Seschrock VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb, 6671544Seschrock zfsvfs) == 0); 6681544Seschrock 6691544Seschrock VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb, 6701544Seschrock zfsvfs) == 0); 6711544Seschrock 6721544Seschrock VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb, 6731544Seschrock zfsvfs) == 0); 6741544Seschrock 6751544Seschrock VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, 6761544Seschrock zfsvfs) == 0); 6771544Seschrock 6781544Seschrock VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, 6791544Seschrock zfsvfs) == 0); 6801544Seschrock 6811544Seschrock VERIFY(dsl_prop_unregister(ds, "aclinherit", 6821544Seschrock acl_inherit_changed_cb, zfsvfs) == 0); 6831544Seschrock } 6841544Seschrock } 6851544Seschrock 6863912Slling /* 6873912Slling * Convert a decimal digit string to a uint64_t integer. 6883912Slling */ 6893912Slling static int 6903912Slling str_to_uint64(char *str, uint64_t *objnum) 6913912Slling { 6923912Slling uint64_t num = 0; 6933912Slling 6943912Slling while (*str) { 6953912Slling if (*str < '0' || *str > '9') 6963912Slling return (EINVAL); 6973912Slling 6983912Slling num = num*10 + *str++ - '0'; 6993912Slling } 7003912Slling 7013912Slling *objnum = num; 7023912Slling return (0); 7033912Slling } 7043912Slling 7053912Slling /* 7063912Slling * The boot path passed from the boot loader is in the form of 7073912Slling * "rootpool-name/root-filesystem-object-number'. Convert this 7083912Slling * string to a dataset name: "rootpool-name/root-filesystem-name". 7093912Slling */ 7103912Slling static int 7113912Slling parse_bootpath(char *bpath, char *outpath) 7123912Slling { 7133912Slling char *slashp; 7143912Slling uint64_t objnum; 7153912Slling int error; 7163912Slling 7173912Slling if (*bpath == 0 || *bpath == '/') 7183912Slling return (EINVAL); 7193912Slling 7203912Slling slashp = strchr(bpath, '/'); 7213912Slling 7223912Slling /* if no '/', just return the pool name */ 7233912Slling if (slashp == NULL) { 7243912Slling (void) strcpy(outpath, bpath); 7253912Slling return (0); 7263912Slling } 7273912Slling 7283912Slling if (error = str_to_uint64(slashp+1, &objnum)) 7293912Slling return (error); 7303912Slling 7313912Slling *slashp = '\0'; 7323912Slling error = dsl_dsobj_to_dsname(bpath, objnum, outpath); 7333912Slling *slashp = '/'; 7343912Slling 7353912Slling return (error); 7363912Slling } 7373912Slling 7381544Seschrock static int 7391544Seschrock zfs_mountroot(vfs_t *vfsp, enum whymountroot why) 7401544Seschrock { 7411544Seschrock int error = 0; 7421544Seschrock int ret = 0; 7431544Seschrock static int zfsrootdone = 0; 7441544Seschrock zfsvfs_t *zfsvfs = NULL; 7451544Seschrock znode_t *zp = NULL; 7461544Seschrock vnode_t *vp = NULL; 7473912Slling char *zfs_bootpath; 7481544Seschrock 7491544Seschrock ASSERT(vfsp); 7501544Seschrock 7511544Seschrock /* 7523912Slling * The filesystem that we mount as root is defined in the 7533912Slling * "zfs-bootfs" property. 7541544Seschrock */ 7551544Seschrock if (why == ROOT_INIT) { 7561544Seschrock if (zfsrootdone++) 7571544Seschrock return (EBUSY); 7581544Seschrock 7593912Slling if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(), 7603912Slling DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) != 7613912Slling DDI_SUCCESS) 7623912Slling return (EIO); 7633912Slling 7643912Slling error = parse_bootpath(zfs_bootpath, rootfs.bo_name); 7653912Slling ddi_prop_free(zfs_bootpath); 7663912Slling 7673912Slling if (error) 7683912Slling return (error); 7691544Seschrock 7701544Seschrock if (error = vfs_lock(vfsp)) 7711544Seschrock return (error); 7721544Seschrock 7733912Slling if (error = zfs_domount(vfsp, rootfs.bo_name, CRED())) 7741544Seschrock goto out; 7751544Seschrock 7761544Seschrock zfsvfs = (zfsvfs_t *)vfsp->vfs_data; 7771544Seschrock ASSERT(zfsvfs); 7781544Seschrock if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) 7791544Seschrock goto out; 7801544Seschrock 7811544Seschrock vp = ZTOV(zp); 7821544Seschrock mutex_enter(&vp->v_lock); 7831544Seschrock vp->v_flag |= VROOT; 7841544Seschrock mutex_exit(&vp->v_lock); 7851544Seschrock rootvp = vp; 7861544Seschrock 7871544Seschrock /* 7881544Seschrock * The zfs_zget call above returns with a hold on vp, we release 7891544Seschrock * it here. 7901544Seschrock */ 7911544Seschrock VN_RELE(vp); 7921544Seschrock 7931544Seschrock /* 7941544Seschrock * Mount root as readonly initially, it will be remouted 7951544Seschrock * read/write by /lib/svc/method/fs-usr. 7961544Seschrock */ 7971544Seschrock readonly_changed_cb(vfsp->vfs_data, B_TRUE); 7981544Seschrock vfs_add((struct vnode *)0, vfsp, 7991544Seschrock (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 8001544Seschrock out: 8011544Seschrock vfs_unlock(vfsp); 8021544Seschrock ret = (error) ? error : 0; 8031544Seschrock return (ret); 8041544Seschrock } else if (why == ROOT_REMOUNT) { 8051544Seschrock readonly_changed_cb(vfsp->vfs_data, B_FALSE); 8061544Seschrock vfsp->vfs_flag |= VFS_REMOUNT; 8074596Slling 8084596Slling /* refresh mount options */ 8094596Slling zfs_unregister_callbacks(vfsp->vfs_data); 8104596Slling return (zfs_register_callbacks(vfsp)); 8114596Slling 8121544Seschrock } else if (why == ROOT_UNMOUNT) { 8131544Seschrock zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data); 8141544Seschrock (void) zfs_sync(vfsp, 0, 0); 8151544Seschrock return (0); 8161544Seschrock } 8171544Seschrock 8181544Seschrock /* 8191544Seschrock * if "why" is equal to anything else other than ROOT_INIT, 8201544Seschrock * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it. 8211544Seschrock */ 8221544Seschrock return (ENOTSUP); 8231544Seschrock } 8241544Seschrock 825789Sahrens /*ARGSUSED*/ 826789Sahrens static int 827789Sahrens zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr) 828789Sahrens { 829789Sahrens char *osname; 830789Sahrens pathname_t spn; 831789Sahrens int error = 0; 832789Sahrens uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ? 8333912Slling UIO_SYSSPACE : UIO_USERSPACE; 834789Sahrens int canwrite; 835789Sahrens 836789Sahrens if (mvp->v_type != VDIR) 837789Sahrens return (ENOTDIR); 838789Sahrens 839789Sahrens mutex_enter(&mvp->v_lock); 840789Sahrens if ((uap->flags & MS_REMOUNT) == 0 && 841789Sahrens (uap->flags & MS_OVERLAY) == 0 && 842789Sahrens (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 843789Sahrens mutex_exit(&mvp->v_lock); 844789Sahrens return (EBUSY); 845789Sahrens } 846789Sahrens mutex_exit(&mvp->v_lock); 847789Sahrens 848789Sahrens /* 849789Sahrens * ZFS does not support passing unparsed data in via MS_DATA. 850789Sahrens * Users should use the MS_OPTIONSTR interface; this means 851789Sahrens * that all option parsing is already done and the options struct 852789Sahrens * can be interrogated. 853789Sahrens */ 854789Sahrens if ((uap->flags & MS_DATA) && uap->datalen > 0) 855789Sahrens return (EINVAL); 856789Sahrens 857789Sahrens /* 858789Sahrens * Get the objset name (the "special" mount argument). 859789Sahrens */ 860789Sahrens if (error = pn_get(uap->spec, fromspace, &spn)) 861789Sahrens return (error); 862789Sahrens 863789Sahrens osname = spn.pn_path; 864789Sahrens 8654543Smarks /* 8664543Smarks * Check for mount privilege? 8674543Smarks * 8684543Smarks * If we don't have privilege then see if 8694543Smarks * we have local permission to allow it 8704543Smarks */ 8714543Smarks error = secpolicy_fs_mount(cr, mvp, vfsp); 8724543Smarks if (error) { 8734543Smarks error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr); 8744543Smarks if (error == 0) { 8754543Smarks vattr_t vattr; 8764543Smarks 8774543Smarks /* 8784543Smarks * Make sure user is the owner of the mount point 8794543Smarks * or has sufficient privileges. 8804543Smarks */ 8814543Smarks 8824543Smarks vattr.va_mask = AT_UID; 8834543Smarks 8844614Smarks if (error = VOP_GETATTR(mvp, &vattr, 0, cr)) { 8854543Smarks goto out; 8864543Smarks } 8874543Smarks 8884543Smarks if (error = secpolicy_vnode_owner(cr, vattr.va_uid)) { 8894543Smarks goto out; 8904543Smarks } 8914543Smarks 8924543Smarks if (error = VOP_ACCESS(mvp, VWRITE, 0, cr)) { 8934543Smarks goto out; 8944543Smarks } 8954543Smarks 8964543Smarks secpolicy_fs_mount_clearopts(cr, vfsp); 8974543Smarks } else { 8984543Smarks goto out; 8994543Smarks } 9004543Smarks } 901789Sahrens 902789Sahrens /* 903789Sahrens * Refuse to mount a filesystem if we are in a local zone and the 904789Sahrens * dataset is not visible. 905789Sahrens */ 906789Sahrens if (!INGLOBALZONE(curproc) && 907789Sahrens (!zone_dataset_visible(osname, &canwrite) || !canwrite)) { 908789Sahrens error = EPERM; 909789Sahrens goto out; 910789Sahrens } 911789Sahrens 9124596Slling /* 9134596Slling * When doing a remount, we simply refresh our temporary properties 9144596Slling * according to those options set in the current VFS options. 9154596Slling */ 9164596Slling if (uap->flags & MS_REMOUNT) { 9174596Slling /* refresh mount options */ 9184596Slling zfs_unregister_callbacks(vfsp->vfs_data); 9194596Slling error = zfs_register_callbacks(vfsp); 9204596Slling goto out; 9214596Slling } 9224596Slling 9231544Seschrock error = zfs_domount(vfsp, osname, cr); 924789Sahrens 925789Sahrens out: 926789Sahrens pn_free(&spn); 927789Sahrens return (error); 928789Sahrens } 929789Sahrens 930789Sahrens static int 931789Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp) 932789Sahrens { 933789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 934789Sahrens dev32_t d32; 9352885Sahrens uint64_t refdbytes, availbytes, usedobjs, availobjs; 936789Sahrens 937789Sahrens ZFS_ENTER(zfsvfs); 938789Sahrens 9392885Sahrens dmu_objset_space(zfsvfs->z_os, 9402885Sahrens &refdbytes, &availbytes, &usedobjs, &availobjs); 941789Sahrens 942789Sahrens /* 943789Sahrens * The underlying storage pool actually uses multiple block sizes. 944789Sahrens * We report the fragsize as the smallest block size we support, 945789Sahrens * and we report our blocksize as the filesystem's maximum blocksize. 946789Sahrens */ 947789Sahrens statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT; 948789Sahrens statp->f_bsize = zfsvfs->z_max_blksz; 949789Sahrens 950789Sahrens /* 951789Sahrens * The following report "total" blocks of various kinds in the 952789Sahrens * file system, but reported in terms of f_frsize - the 953789Sahrens * "fragment" size. 954789Sahrens */ 955789Sahrens 9562885Sahrens statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT; 9572885Sahrens statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT; 958789Sahrens statp->f_bavail = statp->f_bfree; /* no root reservation */ 959789Sahrens 960789Sahrens /* 961789Sahrens * statvfs() should really be called statufs(), because it assumes 962789Sahrens * static metadata. ZFS doesn't preallocate files, so the best 963789Sahrens * we can do is report the max that could possibly fit in f_files, 964789Sahrens * and that minus the number actually used in f_ffree. 965789Sahrens * For f_ffree, report the smaller of the number of object available 966789Sahrens * and the number of blocks (each object will take at least a block). 967789Sahrens */ 9682885Sahrens statp->f_ffree = MIN(availobjs, statp->f_bfree); 969789Sahrens statp->f_favail = statp->f_ffree; /* no "root reservation" */ 9702885Sahrens statp->f_files = statp->f_ffree + usedobjs; 971789Sahrens 972789Sahrens (void) cmpldev(&d32, vfsp->vfs_dev); 973789Sahrens statp->f_fsid = d32; 974789Sahrens 975789Sahrens /* 976789Sahrens * We're a zfs filesystem. 977789Sahrens */ 978789Sahrens (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 979789Sahrens 9801123Smarks statp->f_flag = vf_to_stf(vfsp->vfs_flag); 981789Sahrens 982789Sahrens statp->f_namemax = ZFS_MAXNAMELEN; 983789Sahrens 984789Sahrens /* 985789Sahrens * We have all of 32 characters to stuff a string here. 986789Sahrens * Is there anything useful we could/should provide? 987789Sahrens */ 988789Sahrens bzero(statp->f_fstr, sizeof (statp->f_fstr)); 989789Sahrens 990789Sahrens ZFS_EXIT(zfsvfs); 991789Sahrens return (0); 992789Sahrens } 993789Sahrens 994789Sahrens static int 995789Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp) 996789Sahrens { 997789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 998789Sahrens znode_t *rootzp; 999789Sahrens int error; 1000789Sahrens 1001789Sahrens ZFS_ENTER(zfsvfs); 1002789Sahrens 1003789Sahrens error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); 1004789Sahrens if (error == 0) 1005789Sahrens *vpp = ZTOV(rootzp); 1006789Sahrens 1007789Sahrens ZFS_EXIT(zfsvfs); 1008789Sahrens return (error); 1009789Sahrens } 1010789Sahrens 1011789Sahrens /*ARGSUSED*/ 1012789Sahrens static int 1013789Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr) 1014789Sahrens { 1015789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 10164787Sahrens objset_t *os = zfsvfs->z_os; 10174787Sahrens znode_t *zp, *nextzp; 1018789Sahrens int ret; 1019789Sahrens 10204543Smarks ret = secpolicy_fs_unmount(cr, vfsp); 10214543Smarks if (ret) { 10224543Smarks ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource), 10234543Smarks ZFS_DELEG_PERM_MOUNT, cr); 10244543Smarks if (ret) 10254543Smarks return (ret); 10264543Smarks } 10271484Sek110237 10284736Sek110237 /* 10294736Sek110237 * We purge the parent filesystem's vfsp as the parent filesystem 10304736Sek110237 * and all of its snapshots have their vnode's v_vfsp set to the 10314736Sek110237 * parent's filesystem's vfsp. Note, 'z_parent' is self 10324736Sek110237 * referential for non-snapshots. 10334736Sek110237 */ 10344736Sek110237 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0); 10351484Sek110237 1036789Sahrens /* 1037789Sahrens * Unmount any snapshots mounted under .zfs before unmounting the 1038789Sahrens * dataset itself. 1039789Sahrens */ 1040789Sahrens if (zfsvfs->z_ctldir != NULL && 10414543Smarks (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) { 1042789Sahrens return (ret); 10434543Smarks } 1044789Sahrens 10454787Sahrens if (!(fflag & MS_FORCE)) { 10464480Sgw25295 /* 10474787Sahrens * Check the number of active vnodes in the file system. 10484787Sahrens * Our count is maintained in the vfs structure, but the 10494787Sahrens * number is off by 1 to indicate a hold on the vfs 10504787Sahrens * structure itself. 10514787Sahrens * 10524787Sahrens * The '.zfs' directory maintains a reference of its 10534787Sahrens * own, and any active references underneath are 10544787Sahrens * reflected in the vnode count. 1055789Sahrens */ 10564787Sahrens if (zfsvfs->z_ctldir == NULL) { 10574787Sahrens if (vfsp->vfs_count > 1) 10584787Sahrens return (EBUSY); 10594787Sahrens } else { 10604787Sahrens if (vfsp->vfs_count > 2 || 10614787Sahrens zfsvfs->z_ctldir->v_count > 1) { 10624787Sahrens return (EBUSY); 10634787Sahrens } 1064789Sahrens } 1065789Sahrens } 1066789Sahrens 1067789Sahrens vfsp->vfs_flag |= VFS_UNMOUNTED; 10684787Sahrens 10694787Sahrens rw_enter(&zfsvfs->z_unmount_lock, RW_WRITER); 10704787Sahrens rw_enter(&zfsvfs->z_unmount_inactive_lock, RW_WRITER); 10714787Sahrens 10724787Sahrens /* 10734787Sahrens * At this point there are no vops active, and any new vops will 10744787Sahrens * fail with EIO since we have z_unmount_lock for writer (only 10754787Sahrens * relavent for forced unmount). 10764787Sahrens * 10774787Sahrens * Release all holds on dbufs. 10784787Sahrens * Note, the dmu can still callback via znode_pageout_func() 10794787Sahrens * which can zfs_znode_free() the znode. So we lock 10804787Sahrens * z_all_znodes; search the list for a held dbuf; drop the lock 10814787Sahrens * (we know zp can't disappear if we hold a dbuf lock) then 10824787Sahrens * regrab the lock and restart. 10834787Sahrens */ 10844787Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 10854787Sahrens for (zp = list_head(&zfsvfs->z_all_znodes); zp; zp = nextzp) { 10864787Sahrens nextzp = list_next(&zfsvfs->z_all_znodes, zp); 10874787Sahrens if (zp->z_dbuf_held) { 10884787Sahrens /* dbufs should only be held when force unmounting */ 10894787Sahrens zp->z_dbuf_held = 0; 10904787Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 10914787Sahrens dmu_buf_rele(zp->z_dbuf, NULL); 10924787Sahrens /* Start again */ 10934787Sahrens mutex_enter(&zfsvfs->z_znodes_lock); 10944787Sahrens nextzp = list_head(&zfsvfs->z_all_znodes); 10954787Sahrens } 10964787Sahrens } 10974787Sahrens mutex_exit(&zfsvfs->z_znodes_lock); 10984787Sahrens 10994787Sahrens /* 11004787Sahrens * Set the unmounted flag and let new vops unblock. 11014787Sahrens * zfs_inactive will have the unmounted behavior, and all other 11024787Sahrens * vops will fail with EIO. 11034787Sahrens */ 11044787Sahrens zfsvfs->z_unmounted = B_TRUE; 11054787Sahrens rw_exit(&zfsvfs->z_unmount_lock); 11064787Sahrens rw_exit(&zfsvfs->z_unmount_inactive_lock); 11074787Sahrens 11084787Sahrens /* 11094787Sahrens * Unregister properties. 11104787Sahrens */ 11114787Sahrens if (!dmu_objset_is_snapshot(os)) 11124787Sahrens zfs_unregister_callbacks(zfsvfs); 11134787Sahrens 11144787Sahrens /* 11154787Sahrens * Close the zil. NB: Can't close the zil while zfs_inactive 11164787Sahrens * threads are blocked as zil_close can call zfs_inactive. 11174787Sahrens */ 11184787Sahrens if (zfsvfs->z_log) { 11194787Sahrens zil_close(zfsvfs->z_log); 11204787Sahrens zfsvfs->z_log = NULL; 11214787Sahrens } 11224787Sahrens 11234787Sahrens /* 11244787Sahrens * Evict all dbufs so that cached znodes will be freed 11254787Sahrens */ 11264787Sahrens if (dmu_objset_evict_dbufs(os, B_TRUE)) { 11274787Sahrens txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0); 11284787Sahrens (void) dmu_objset_evict_dbufs(os, B_FALSE); 11294787Sahrens } 11304787Sahrens 11314787Sahrens /* 11324787Sahrens * Finally close the objset 11334787Sahrens */ 11344787Sahrens dmu_objset_close(os); 11354787Sahrens 11364787Sahrens /* 11374787Sahrens * We can now safely destroy the '.zfs' directory node. 11384787Sahrens */ 11394787Sahrens if (zfsvfs->z_ctldir != NULL) 11404787Sahrens zfsctl_destroy(zfsvfs); 1141789Sahrens 1142789Sahrens return (0); 1143789Sahrens } 1144789Sahrens 1145789Sahrens static int 1146789Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp) 1147789Sahrens { 1148789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1149789Sahrens znode_t *zp; 1150789Sahrens uint64_t object = 0; 1151789Sahrens uint64_t fid_gen = 0; 1152789Sahrens uint64_t gen_mask; 1153789Sahrens uint64_t zp_gen; 1154789Sahrens int i, err; 1155789Sahrens 1156789Sahrens *vpp = NULL; 1157789Sahrens 1158789Sahrens ZFS_ENTER(zfsvfs); 1159789Sahrens 1160789Sahrens if (fidp->fid_len == LONG_FID_LEN) { 1161789Sahrens zfid_long_t *zlfid = (zfid_long_t *)fidp; 1162789Sahrens uint64_t objsetid = 0; 1163789Sahrens uint64_t setgen = 0; 1164789Sahrens 1165789Sahrens for (i = 0; i < sizeof (zlfid->zf_setid); i++) 1166789Sahrens objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i); 1167789Sahrens 1168789Sahrens for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 1169789Sahrens setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i); 1170789Sahrens 1171789Sahrens ZFS_EXIT(zfsvfs); 1172789Sahrens 1173789Sahrens err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs); 1174789Sahrens if (err) 1175789Sahrens return (EINVAL); 1176789Sahrens ZFS_ENTER(zfsvfs); 1177789Sahrens } 1178789Sahrens 1179789Sahrens if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) { 1180789Sahrens zfid_short_t *zfid = (zfid_short_t *)fidp; 1181789Sahrens 1182789Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 1183789Sahrens object |= ((uint64_t)zfid->zf_object[i]) << (8 * i); 1184789Sahrens 1185789Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 1186789Sahrens fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i); 1187789Sahrens } else { 1188789Sahrens ZFS_EXIT(zfsvfs); 1189789Sahrens return (EINVAL); 1190789Sahrens } 1191789Sahrens 1192789Sahrens /* A zero fid_gen means we are in the .zfs control directories */ 1193789Sahrens if (fid_gen == 0 && 1194789Sahrens (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) { 1195789Sahrens *vpp = zfsvfs->z_ctldir; 1196789Sahrens ASSERT(*vpp != NULL); 1197789Sahrens if (object == ZFSCTL_INO_SNAPDIR) { 1198789Sahrens VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL, 1199789Sahrens 0, NULL, NULL) == 0); 1200789Sahrens } else { 1201789Sahrens VN_HOLD(*vpp); 1202789Sahrens } 1203789Sahrens ZFS_EXIT(zfsvfs); 1204789Sahrens return (0); 1205789Sahrens } 1206789Sahrens 1207789Sahrens gen_mask = -1ULL >> (64 - 8 * i); 1208789Sahrens 1209789Sahrens dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask); 1210789Sahrens if (err = zfs_zget(zfsvfs, object, &zp)) { 1211789Sahrens ZFS_EXIT(zfsvfs); 1212789Sahrens return (err); 1213789Sahrens } 1214789Sahrens zp_gen = zp->z_phys->zp_gen & gen_mask; 1215789Sahrens if (zp_gen == 0) 1216789Sahrens zp_gen = 1; 12173461Sahrens if (zp->z_unlinked || zp_gen != fid_gen) { 1218789Sahrens dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen); 1219789Sahrens VN_RELE(ZTOV(zp)); 1220789Sahrens ZFS_EXIT(zfsvfs); 1221789Sahrens return (EINVAL); 1222789Sahrens } 1223789Sahrens 1224789Sahrens *vpp = ZTOV(zp); 1225789Sahrens ZFS_EXIT(zfsvfs); 1226789Sahrens return (0); 1227789Sahrens } 1228789Sahrens 1229789Sahrens static void 1230789Sahrens zfs_freevfs(vfs_t *vfsp) 1231789Sahrens { 1232789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 1233*4831Sgw25295 int i; 1234*4831Sgw25295 1235*4831Sgw25295 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) 1236*4831Sgw25295 mutex_destroy(&zfsvfs->z_hold_mtx[i]); 1237789Sahrens 12384787Sahrens mutex_destroy(&zfsvfs->z_znodes_lock); 1239*4831Sgw25295 list_destroy(&zfsvfs->z_all_znodes); 12404787Sahrens rw_destroy(&zfsvfs->z_unmount_lock); 12414787Sahrens rw_destroy(&zfsvfs->z_unmount_inactive_lock); 1242789Sahrens kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1243789Sahrens 1244789Sahrens atomic_add_32(&zfs_active_fs_count, -1); 1245789Sahrens } 1246789Sahrens 1247789Sahrens /* 1248789Sahrens * VFS_INIT() initialization. Note that there is no VFS_FINI(), 1249789Sahrens * so we can't safely do any non-idempotent initialization here. 1250789Sahrens * Leave that to zfs_init() and zfs_fini(), which are called 1251789Sahrens * from the module's _init() and _fini() entry points. 1252789Sahrens */ 1253789Sahrens /*ARGSUSED*/ 1254789Sahrens static int 1255789Sahrens zfs_vfsinit(int fstype, char *name) 1256789Sahrens { 1257789Sahrens int error; 1258789Sahrens 1259789Sahrens zfsfstype = fstype; 1260789Sahrens 1261789Sahrens /* 1262789Sahrens * Setup vfsops and vnodeops tables. 1263789Sahrens */ 1264789Sahrens error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops); 1265789Sahrens if (error != 0) { 1266789Sahrens cmn_err(CE_WARN, "zfs: bad vfs ops template"); 1267789Sahrens } 1268789Sahrens 1269789Sahrens error = zfs_create_op_tables(); 1270789Sahrens if (error) { 1271789Sahrens zfs_remove_op_tables(); 1272789Sahrens cmn_err(CE_WARN, "zfs: bad vnode ops template"); 1273789Sahrens (void) vfs_freevfsops_by_type(zfsfstype); 1274789Sahrens return (error); 1275789Sahrens } 1276789Sahrens 1277789Sahrens mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL); 1278789Sahrens 1279789Sahrens /* 1280849Sbonwick * Unique major number for all zfs mounts. 1281849Sbonwick * If we run out of 32-bit minors, we'll getudev() another major. 1282789Sahrens */ 1283849Sbonwick zfs_major = ddi_name_to_major(ZFS_DRIVER); 1284849Sbonwick zfs_minor = ZFS_MIN_MINOR; 1285789Sahrens 1286789Sahrens return (0); 1287789Sahrens } 1288789Sahrens 1289789Sahrens void 1290789Sahrens zfs_init(void) 1291789Sahrens { 1292789Sahrens /* 1293789Sahrens * Initialize .zfs directory structures 1294789Sahrens */ 1295789Sahrens zfsctl_init(); 1296789Sahrens 1297789Sahrens /* 1298789Sahrens * Initialize znode cache, vnode ops, etc... 1299789Sahrens */ 1300789Sahrens zfs_znode_init(); 1301789Sahrens } 1302789Sahrens 1303789Sahrens void 1304789Sahrens zfs_fini(void) 1305789Sahrens { 1306789Sahrens zfsctl_fini(); 1307789Sahrens zfs_znode_fini(); 1308789Sahrens } 1309789Sahrens 1310789Sahrens int 1311789Sahrens zfs_busy(void) 1312789Sahrens { 1313789Sahrens return (zfs_active_fs_count != 0); 1314789Sahrens } 1315789Sahrens 13164577Sahrens int 13174577Sahrens zfs_get_stats(objset_t *os, nvlist_t *nv) 13184577Sahrens { 13194577Sahrens int error; 13204577Sahrens uint64_t val; 13214577Sahrens 13224577Sahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, &val); 13234577Sahrens if (error == 0) 13244577Sahrens dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VERSION, val); 13254577Sahrens 13264577Sahrens return (error); 13274577Sahrens } 13284577Sahrens 13294577Sahrens int 13304577Sahrens zfs_set_version(const char *name, uint64_t newvers) 13314577Sahrens { 13324577Sahrens int error; 13334577Sahrens objset_t *os; 13344577Sahrens dmu_tx_t *tx; 13354577Sahrens uint64_t curvers; 13364577Sahrens 13374577Sahrens /* 13384577Sahrens * XXX for now, require that the filesystem be unmounted. Would 13394577Sahrens * be nice to find the zfsvfs_t and just update that if 13404577Sahrens * possible. 13414577Sahrens */ 13424577Sahrens 13434577Sahrens if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION) 13444577Sahrens return (EINVAL); 13454577Sahrens 13464577Sahrens error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_PRIMARY, &os); 13474577Sahrens if (error) 13484577Sahrens return (error); 13494577Sahrens 13504577Sahrens error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 13514577Sahrens 8, 1, &curvers); 13524577Sahrens if (error) 13534577Sahrens goto out; 13544577Sahrens if (newvers < curvers) { 13554577Sahrens error = EINVAL; 13564577Sahrens goto out; 13574577Sahrens } 13584577Sahrens 13594577Sahrens tx = dmu_tx_create(os); 13604577Sahrens dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR); 13614577Sahrens error = dmu_tx_assign(tx, TXG_WAIT); 13624577Sahrens if (error) { 13634577Sahrens dmu_tx_abort(tx); 13644577Sahrens goto out; 13654577Sahrens } 13664577Sahrens error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, 13674577Sahrens &newvers, tx); 13684577Sahrens 13694577Sahrens spa_history_internal_log(LOG_DS_UPGRADE, 13704577Sahrens dmu_objset_spa(os), tx, CRED(), 13714577Sahrens "oldver=%llu newver=%llu dataset = %llu", curvers, newvers, 13724577Sahrens dmu_objset_id(os)); 13734577Sahrens dmu_tx_commit(tx); 13744577Sahrens 13754577Sahrens out: 13764577Sahrens dmu_objset_close(os); 13774577Sahrens return (error); 13784577Sahrens } 13794577Sahrens 1380789Sahrens static vfsdef_t vfw = { 1381789Sahrens VFSDEF_VERSION, 1382789Sahrens MNTTYPE_ZFS, 1383789Sahrens zfs_vfsinit, 13841488Srsb VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS, 1385789Sahrens &zfs_mntopts 1386789Sahrens }; 1387789Sahrens 1388789Sahrens struct modlfs zfs_modlfs = { 13894577Sahrens &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw 1390789Sahrens }; 1391