1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy /* 22eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23eda14cbcSMatt Macy * Copyright (c) 2012, 2015 by Delphix. All rights reserved. 24eda14cbcSMatt Macy * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. 25eda14cbcSMatt Macy */ 26eda14cbcSMatt Macy 27eda14cbcSMatt Macy /* 28eda14cbcSMatt Macy * ZFS control directory (a.k.a. ".zfs") 29eda14cbcSMatt Macy * 30eda14cbcSMatt Macy * This directory provides a common location for all ZFS meta-objects. 31eda14cbcSMatt Macy * Currently, this is only the 'snapshot' directory, but this may expand in the 32eda14cbcSMatt Macy * future. The elements are built using the GFS primitives, as the hierarchy 33eda14cbcSMatt Macy * does not actually exist on disk. 34eda14cbcSMatt Macy * 35eda14cbcSMatt Macy * For 'snapshot', we don't want to have all snapshots always mounted, because 36eda14cbcSMatt Macy * this would take up a huge amount of space in /etc/mnttab. We have three 37eda14cbcSMatt Macy * types of objects: 38eda14cbcSMatt Macy * 39eda14cbcSMatt Macy * ctldir ------> snapshotdir -------> snapshot 40eda14cbcSMatt Macy * | 41eda14cbcSMatt Macy * | 42eda14cbcSMatt Macy * V 43eda14cbcSMatt Macy * mounted fs 44eda14cbcSMatt Macy * 45eda14cbcSMatt Macy * The 'snapshot' node contains just enough information to lookup '..' and act 46eda14cbcSMatt Macy * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we 47eda14cbcSMatt Macy * perform an automount of the underlying filesystem and return the 48eda14cbcSMatt Macy * corresponding vnode. 49eda14cbcSMatt Macy * 50eda14cbcSMatt Macy * All mounts are handled automatically by the kernel, but unmounts are 51eda14cbcSMatt Macy * (currently) handled from user land. The main reason is that there is no 52eda14cbcSMatt Macy * reliable way to auto-unmount the filesystem when it's "no longer in use". 53eda14cbcSMatt Macy * When the user unmounts a filesystem, we call zfsctl_unmount(), which 54eda14cbcSMatt Macy * unmounts any snapshots within the snapshot directory. 55eda14cbcSMatt Macy * 56eda14cbcSMatt Macy * The '.zfs', '.zfs/snapshot', and all directories created under 57eda14cbcSMatt Macy * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') are all GFS nodes and 58eda14cbcSMatt Macy * share the same vfs_t as the head filesystem (what '.zfs' lives under). 59eda14cbcSMatt Macy * 60eda14cbcSMatt Macy * File systems mounted ontop of the GFS nodes '.zfs/snapshot/<snapname>' 61eda14cbcSMatt Macy * (ie: snapshots) are ZFS nodes and have their own unique vfs_t. 62eda14cbcSMatt Macy * However, vnodes within these mounted on file systems have their v_vfsp 63eda14cbcSMatt Macy * fields set to the head filesystem to make NFS happy (see 64eda14cbcSMatt Macy * zfsctl_snapdir_lookup()). We VFS_HOLD the head filesystem's vfs_t 65eda14cbcSMatt Macy * so that it cannot be freed until all snapshots have been unmounted. 66eda14cbcSMatt Macy */ 67eda14cbcSMatt Macy 68eda14cbcSMatt Macy #include <sys/types.h> 69eda14cbcSMatt Macy #include <sys/param.h> 70eda14cbcSMatt Macy #include <sys/libkern.h> 71eda14cbcSMatt Macy #include <sys/dirent.h> 72eda14cbcSMatt Macy #include <sys/zfs_context.h> 73eda14cbcSMatt Macy #include <sys/zfs_ctldir.h> 74eda14cbcSMatt Macy #include <sys/zfs_ioctl.h> 75eda14cbcSMatt Macy #include <sys/zfs_vfsops.h> 76eda14cbcSMatt Macy #include <sys/namei.h> 77eda14cbcSMatt Macy #include <sys/stat.h> 78eda14cbcSMatt Macy #include <sys/dmu.h> 79eda14cbcSMatt Macy #include <sys/dsl_dataset.h> 80eda14cbcSMatt Macy #include <sys/dsl_destroy.h> 81eda14cbcSMatt Macy #include <sys/dsl_deleg.h> 82eda14cbcSMatt Macy #include <sys/mount.h> 83eda14cbcSMatt Macy #include <sys/zap.h> 84eda14cbcSMatt Macy #include <sys/sysproto.h> 85eda14cbcSMatt Macy 86eda14cbcSMatt Macy #include "zfs_namecheck.h" 87eda14cbcSMatt Macy 88eda14cbcSMatt Macy #include <sys/kernel.h> 89eda14cbcSMatt Macy #include <sys/ccompat.h> 90eda14cbcSMatt Macy 91eda14cbcSMatt Macy /* Common access mode for all virtual directories under the ctldir */ 92eda14cbcSMatt Macy const uint16_t zfsctl_ctldir_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | 93eda14cbcSMatt Macy S_IROTH | S_IXOTH; 94eda14cbcSMatt Macy 95eda14cbcSMatt Macy /* 96eda14cbcSMatt Macy * "Synthetic" filesystem implementation. 97eda14cbcSMatt Macy */ 98eda14cbcSMatt Macy 99eda14cbcSMatt Macy /* 100eda14cbcSMatt Macy * Assert that A implies B. 101eda14cbcSMatt Macy */ 102eda14cbcSMatt Macy #define KASSERT_IMPLY(A, B, msg) KASSERT(!(A) || (B), (msg)); 103eda14cbcSMatt Macy 104eda14cbcSMatt Macy static MALLOC_DEFINE(M_SFSNODES, "sfs_nodes", "synthetic-fs nodes"); 105eda14cbcSMatt Macy 106eda14cbcSMatt Macy typedef struct sfs_node { 107eda14cbcSMatt Macy char sn_name[ZFS_MAX_DATASET_NAME_LEN]; 108eda14cbcSMatt Macy uint64_t sn_parent_id; 109eda14cbcSMatt Macy uint64_t sn_id; 110eda14cbcSMatt Macy } sfs_node_t; 111eda14cbcSMatt Macy 112eda14cbcSMatt Macy /* 113eda14cbcSMatt Macy * Check the parent's ID as well as the node's to account for a chance 114eda14cbcSMatt Macy * that IDs originating from different domains (snapshot IDs, artificial 115eda14cbcSMatt Macy * IDs, znode IDs) may clash. 116eda14cbcSMatt Macy */ 117eda14cbcSMatt Macy static int 118eda14cbcSMatt Macy sfs_compare_ids(struct vnode *vp, void *arg) 119eda14cbcSMatt Macy { 120eda14cbcSMatt Macy sfs_node_t *n1 = vp->v_data; 121eda14cbcSMatt Macy sfs_node_t *n2 = arg; 122eda14cbcSMatt Macy bool equal; 123eda14cbcSMatt Macy 124eda14cbcSMatt Macy equal = n1->sn_id == n2->sn_id && 125eda14cbcSMatt Macy n1->sn_parent_id == n2->sn_parent_id; 126eda14cbcSMatt Macy 127eda14cbcSMatt Macy /* Zero means equality. */ 128eda14cbcSMatt Macy return (!equal); 129eda14cbcSMatt Macy } 130eda14cbcSMatt Macy 131eda14cbcSMatt Macy static int 132eda14cbcSMatt Macy sfs_vnode_get(const struct mount *mp, int flags, uint64_t parent_id, 133eda14cbcSMatt Macy uint64_t id, struct vnode **vpp) 134eda14cbcSMatt Macy { 135eda14cbcSMatt Macy sfs_node_t search; 136eda14cbcSMatt Macy int err; 137eda14cbcSMatt Macy 138eda14cbcSMatt Macy search.sn_id = id; 139eda14cbcSMatt Macy search.sn_parent_id = parent_id; 140eda14cbcSMatt Macy err = vfs_hash_get(mp, (uint32_t)id, flags, curthread, vpp, 141eda14cbcSMatt Macy sfs_compare_ids, &search); 142eda14cbcSMatt Macy return (err); 143eda14cbcSMatt Macy } 144eda14cbcSMatt Macy 145eda14cbcSMatt Macy static int 146eda14cbcSMatt Macy sfs_vnode_insert(struct vnode *vp, int flags, uint64_t parent_id, 147eda14cbcSMatt Macy uint64_t id, struct vnode **vpp) 148eda14cbcSMatt Macy { 149eda14cbcSMatt Macy int err; 150eda14cbcSMatt Macy 151eda14cbcSMatt Macy KASSERT(vp->v_data != NULL, ("sfs_vnode_insert with NULL v_data")); 152eda14cbcSMatt Macy err = vfs_hash_insert(vp, (uint32_t)id, flags, curthread, vpp, 153eda14cbcSMatt Macy sfs_compare_ids, vp->v_data); 154eda14cbcSMatt Macy return (err); 155eda14cbcSMatt Macy } 156eda14cbcSMatt Macy 157eda14cbcSMatt Macy static void 158eda14cbcSMatt Macy sfs_vnode_remove(struct vnode *vp) 159eda14cbcSMatt Macy { 160eda14cbcSMatt Macy vfs_hash_remove(vp); 161eda14cbcSMatt Macy } 162eda14cbcSMatt Macy 163eda14cbcSMatt Macy typedef void sfs_vnode_setup_fn(vnode_t *vp, void *arg); 164eda14cbcSMatt Macy 165eda14cbcSMatt Macy static int 166eda14cbcSMatt Macy sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id, 167eda14cbcSMatt Macy const char *tag, struct vop_vector *vops, 168eda14cbcSMatt Macy sfs_vnode_setup_fn setup, void *arg, 169eda14cbcSMatt Macy struct vnode **vpp) 170eda14cbcSMatt Macy { 171eda14cbcSMatt Macy struct vnode *vp; 172eda14cbcSMatt Macy int error; 173eda14cbcSMatt Macy 174eda14cbcSMatt Macy error = sfs_vnode_get(mp, flags, parent_id, id, vpp); 175eda14cbcSMatt Macy if (error != 0 || *vpp != NULL) { 176eda14cbcSMatt Macy KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL, 177eda14cbcSMatt Macy "sfs vnode with no data"); 178eda14cbcSMatt Macy return (error); 179eda14cbcSMatt Macy } 180eda14cbcSMatt Macy 181eda14cbcSMatt Macy /* Allocate a new vnode/inode. */ 182eda14cbcSMatt Macy error = getnewvnode(tag, mp, vops, &vp); 183eda14cbcSMatt Macy if (error != 0) { 184eda14cbcSMatt Macy *vpp = NULL; 185eda14cbcSMatt Macy return (error); 186eda14cbcSMatt Macy } 187eda14cbcSMatt Macy 188eda14cbcSMatt Macy /* 189eda14cbcSMatt Macy * Exclusively lock the vnode vnode while it's being constructed. 190eda14cbcSMatt Macy */ 191eda14cbcSMatt Macy lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); 192eda14cbcSMatt Macy error = insmntque(vp, mp); 193eda14cbcSMatt Macy if (error != 0) { 194eda14cbcSMatt Macy *vpp = NULL; 195eda14cbcSMatt Macy return (error); 196eda14cbcSMatt Macy } 197eda14cbcSMatt Macy 198eda14cbcSMatt Macy setup(vp, arg); 199eda14cbcSMatt Macy 200eda14cbcSMatt Macy error = sfs_vnode_insert(vp, flags, parent_id, id, vpp); 201eda14cbcSMatt Macy if (error != 0 || *vpp != NULL) { 202eda14cbcSMatt Macy KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL, 203eda14cbcSMatt Macy "sfs vnode with no data"); 204eda14cbcSMatt Macy return (error); 205eda14cbcSMatt Macy } 206eda14cbcSMatt Macy 2077ff31438SMateusz Guzik #if __FreeBSD_version >= 1400077 2087ff31438SMateusz Guzik vn_set_state(vp, VSTATE_CONSTRUCTED); 2097ff31438SMateusz Guzik #endif 2107ff31438SMateusz Guzik 211eda14cbcSMatt Macy *vpp = vp; 212eda14cbcSMatt Macy return (0); 213eda14cbcSMatt Macy } 214eda14cbcSMatt Macy 215eda14cbcSMatt Macy static void 216eda14cbcSMatt Macy sfs_print_node(sfs_node_t *node) 217eda14cbcSMatt Macy { 218eda14cbcSMatt Macy printf("\tname = %s\n", node->sn_name); 219eda14cbcSMatt Macy printf("\tparent_id = %ju\n", (uintmax_t)node->sn_parent_id); 220eda14cbcSMatt Macy printf("\tid = %ju\n", (uintmax_t)node->sn_id); 221eda14cbcSMatt Macy } 222eda14cbcSMatt Macy 223eda14cbcSMatt Macy static sfs_node_t * 224eda14cbcSMatt Macy sfs_alloc_node(size_t size, const char *name, uint64_t parent_id, uint64_t id) 225eda14cbcSMatt Macy { 226eda14cbcSMatt Macy struct sfs_node *node; 227eda14cbcSMatt Macy 228eda14cbcSMatt Macy KASSERT(strlen(name) < sizeof (node->sn_name), 229eda14cbcSMatt Macy ("sfs node name is too long")); 230eda14cbcSMatt Macy KASSERT(size >= sizeof (*node), ("sfs node size is too small")); 231eda14cbcSMatt Macy node = malloc(size, M_SFSNODES, M_WAITOK | M_ZERO); 232eda14cbcSMatt Macy strlcpy(node->sn_name, name, sizeof (node->sn_name)); 233eda14cbcSMatt Macy node->sn_parent_id = parent_id; 234eda14cbcSMatt Macy node->sn_id = id; 235eda14cbcSMatt Macy 236eda14cbcSMatt Macy return (node); 237eda14cbcSMatt Macy } 238eda14cbcSMatt Macy 239eda14cbcSMatt Macy static void 240eda14cbcSMatt Macy sfs_destroy_node(sfs_node_t *node) 241eda14cbcSMatt Macy { 242eda14cbcSMatt Macy free(node, M_SFSNODES); 243eda14cbcSMatt Macy } 244eda14cbcSMatt Macy 245eda14cbcSMatt Macy static void * 246eda14cbcSMatt Macy sfs_reclaim_vnode(vnode_t *vp) 247eda14cbcSMatt Macy { 248eda14cbcSMatt Macy void *data; 249eda14cbcSMatt Macy 250eda14cbcSMatt Macy sfs_vnode_remove(vp); 251eda14cbcSMatt Macy data = vp->v_data; 252eda14cbcSMatt Macy vp->v_data = NULL; 253eda14cbcSMatt Macy return (data); 254eda14cbcSMatt Macy } 255eda14cbcSMatt Macy 256eda14cbcSMatt Macy static int 257eda14cbcSMatt Macy sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap, 258184c1b94SMartin Matuska zfs_uio_t *uio, off_t *offp) 259eda14cbcSMatt Macy { 260eda14cbcSMatt Macy struct dirent entry; 261eda14cbcSMatt Macy int error; 262eda14cbcSMatt Macy 263eda14cbcSMatt Macy /* Reset ncookies for subsequent use of vfs_read_dirent. */ 264eda14cbcSMatt Macy if (ap->a_ncookies != NULL) 265eda14cbcSMatt Macy *ap->a_ncookies = 0; 266eda14cbcSMatt Macy 267184c1b94SMartin Matuska if (zfs_uio_resid(uio) < sizeof (entry)) 268eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 269eda14cbcSMatt Macy 270184c1b94SMartin Matuska if (zfs_uio_offset(uio) < 0) 271eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 272184c1b94SMartin Matuska if (zfs_uio_offset(uio) == 0) { 273eda14cbcSMatt Macy entry.d_fileno = id; 274eda14cbcSMatt Macy entry.d_type = DT_DIR; 275eda14cbcSMatt Macy entry.d_name[0] = '.'; 276eda14cbcSMatt Macy entry.d_name[1] = '\0'; 277eda14cbcSMatt Macy entry.d_namlen = 1; 278eda14cbcSMatt Macy entry.d_reclen = sizeof (entry); 279184c1b94SMartin Matuska error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio)); 280eda14cbcSMatt Macy if (error != 0) 281eda14cbcSMatt Macy return (SET_ERROR(error)); 282eda14cbcSMatt Macy } 283eda14cbcSMatt Macy 284184c1b94SMartin Matuska if (zfs_uio_offset(uio) < sizeof (entry)) 285eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 286184c1b94SMartin Matuska if (zfs_uio_offset(uio) == sizeof (entry)) { 287eda14cbcSMatt Macy entry.d_fileno = parent_id; 288eda14cbcSMatt Macy entry.d_type = DT_DIR; 289eda14cbcSMatt Macy entry.d_name[0] = '.'; 290eda14cbcSMatt Macy entry.d_name[1] = '.'; 291eda14cbcSMatt Macy entry.d_name[2] = '\0'; 292eda14cbcSMatt Macy entry.d_namlen = 2; 293eda14cbcSMatt Macy entry.d_reclen = sizeof (entry); 294184c1b94SMartin Matuska error = vfs_read_dirent(ap, &entry, zfs_uio_offset(uio)); 295eda14cbcSMatt Macy if (error != 0) 296eda14cbcSMatt Macy return (SET_ERROR(error)); 297eda14cbcSMatt Macy } 298eda14cbcSMatt Macy 299eda14cbcSMatt Macy if (offp != NULL) 300eda14cbcSMatt Macy *offp = 2 * sizeof (entry); 301eda14cbcSMatt Macy return (0); 302eda14cbcSMatt Macy } 303eda14cbcSMatt Macy 304eda14cbcSMatt Macy 305eda14cbcSMatt Macy /* 306eda14cbcSMatt Macy * .zfs inode namespace 307eda14cbcSMatt Macy * 308eda14cbcSMatt Macy * We need to generate unique inode numbers for all files and directories 309eda14cbcSMatt Macy * within the .zfs pseudo-filesystem. We use the following scheme: 310eda14cbcSMatt Macy * 311eda14cbcSMatt Macy * ENTRY ZFSCTL_INODE 312eda14cbcSMatt Macy * .zfs 1 313eda14cbcSMatt Macy * .zfs/snapshot 2 314eda14cbcSMatt Macy * .zfs/snapshot/<snap> objectid(snap) 315eda14cbcSMatt Macy */ 316eda14cbcSMatt Macy #define ZFSCTL_INO_SNAP(id) (id) 317eda14cbcSMatt Macy 318eda14cbcSMatt Macy static struct vop_vector zfsctl_ops_root; 319eda14cbcSMatt Macy static struct vop_vector zfsctl_ops_snapdir; 320eda14cbcSMatt Macy static struct vop_vector zfsctl_ops_snapshot; 321eda14cbcSMatt Macy 322eda14cbcSMatt Macy void 323eda14cbcSMatt Macy zfsctl_init(void) 324eda14cbcSMatt Macy { 325eda14cbcSMatt Macy } 326eda14cbcSMatt Macy 327eda14cbcSMatt Macy void 328eda14cbcSMatt Macy zfsctl_fini(void) 329eda14cbcSMatt Macy { 330eda14cbcSMatt Macy } 331eda14cbcSMatt Macy 332eda14cbcSMatt Macy boolean_t 333eda14cbcSMatt Macy zfsctl_is_node(vnode_t *vp) 334eda14cbcSMatt Macy { 335eda14cbcSMatt Macy return (vn_matchops(vp, zfsctl_ops_root) || 336eda14cbcSMatt Macy vn_matchops(vp, zfsctl_ops_snapdir) || 337e8796acaSMateusz Guzik vn_matchops(vp, zfsctl_ops_snapshot)); 338eda14cbcSMatt Macy 339eda14cbcSMatt Macy } 340eda14cbcSMatt Macy 341eda14cbcSMatt Macy typedef struct zfsctl_root { 342eda14cbcSMatt Macy sfs_node_t node; 343eda14cbcSMatt Macy sfs_node_t *snapdir; 344eda14cbcSMatt Macy timestruc_t cmtime; 345eda14cbcSMatt Macy } zfsctl_root_t; 346eda14cbcSMatt Macy 347eda14cbcSMatt Macy 348eda14cbcSMatt Macy /* 349eda14cbcSMatt Macy * Create the '.zfs' directory. 350eda14cbcSMatt Macy */ 351eda14cbcSMatt Macy void 352eda14cbcSMatt Macy zfsctl_create(zfsvfs_t *zfsvfs) 353eda14cbcSMatt Macy { 354eda14cbcSMatt Macy zfsctl_root_t *dot_zfs; 355eda14cbcSMatt Macy sfs_node_t *snapdir; 356eda14cbcSMatt Macy vnode_t *rvp; 357eda14cbcSMatt Macy uint64_t crtime[2]; 358eda14cbcSMatt Macy 35916038816SMartin Matuska ASSERT3P(zfsvfs->z_ctldir, ==, NULL); 360eda14cbcSMatt Macy 361eda14cbcSMatt Macy snapdir = sfs_alloc_node(sizeof (*snapdir), "snapshot", ZFSCTL_INO_ROOT, 362eda14cbcSMatt Macy ZFSCTL_INO_SNAPDIR); 363eda14cbcSMatt Macy dot_zfs = (zfsctl_root_t *)sfs_alloc_node(sizeof (*dot_zfs), ".zfs", 0, 364eda14cbcSMatt Macy ZFSCTL_INO_ROOT); 365eda14cbcSMatt Macy dot_zfs->snapdir = snapdir; 366eda14cbcSMatt Macy 36716038816SMartin Matuska VERIFY0(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp)); 36816038816SMartin Matuska VERIFY0(sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), 369eda14cbcSMatt Macy &crtime, sizeof (crtime))); 370eda14cbcSMatt Macy ZFS_TIME_DECODE(&dot_zfs->cmtime, crtime); 371eda14cbcSMatt Macy vput(rvp); 372eda14cbcSMatt Macy 373eda14cbcSMatt Macy zfsvfs->z_ctldir = dot_zfs; 374eda14cbcSMatt Macy } 375eda14cbcSMatt Macy 376eda14cbcSMatt Macy /* 377eda14cbcSMatt Macy * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. 378eda14cbcSMatt Macy * The nodes must not have any associated vnodes by now as they should be 379eda14cbcSMatt Macy * vflush-ed. 380eda14cbcSMatt Macy */ 381eda14cbcSMatt Macy void 382eda14cbcSMatt Macy zfsctl_destroy(zfsvfs_t *zfsvfs) 383eda14cbcSMatt Macy { 384eda14cbcSMatt Macy sfs_destroy_node(zfsvfs->z_ctldir->snapdir); 385eda14cbcSMatt Macy sfs_destroy_node((sfs_node_t *)zfsvfs->z_ctldir); 386eda14cbcSMatt Macy zfsvfs->z_ctldir = NULL; 387eda14cbcSMatt Macy } 388eda14cbcSMatt Macy 389eda14cbcSMatt Macy static int 390eda14cbcSMatt Macy zfsctl_fs_root_vnode(struct mount *mp, void *arg __unused, int flags, 391eda14cbcSMatt Macy struct vnode **vpp) 392eda14cbcSMatt Macy { 393eda14cbcSMatt Macy return (VFS_ROOT(mp, flags, vpp)); 394eda14cbcSMatt Macy } 395eda14cbcSMatt Macy 396eda14cbcSMatt Macy static void 397eda14cbcSMatt Macy zfsctl_common_vnode_setup(vnode_t *vp, void *arg) 398eda14cbcSMatt Macy { 399eda14cbcSMatt Macy ASSERT_VOP_ELOCKED(vp, __func__); 400eda14cbcSMatt Macy 401eda14cbcSMatt Macy /* We support shared locking. */ 402eda14cbcSMatt Macy VN_LOCK_ASHARE(vp); 403eda14cbcSMatt Macy vp->v_type = VDIR; 404eda14cbcSMatt Macy vp->v_data = arg; 405eda14cbcSMatt Macy } 406eda14cbcSMatt Macy 407eda14cbcSMatt Macy static int 408eda14cbcSMatt Macy zfsctl_root_vnode(struct mount *mp, void *arg __unused, int flags, 409eda14cbcSMatt Macy struct vnode **vpp) 410eda14cbcSMatt Macy { 411eda14cbcSMatt Macy void *node; 412eda14cbcSMatt Macy int err; 413eda14cbcSMatt Macy 414eda14cbcSMatt Macy node = ((zfsvfs_t *)mp->mnt_data)->z_ctldir; 415eda14cbcSMatt Macy err = sfs_vgetx(mp, flags, 0, ZFSCTL_INO_ROOT, "zfs", &zfsctl_ops_root, 416eda14cbcSMatt Macy zfsctl_common_vnode_setup, node, vpp); 417eda14cbcSMatt Macy return (err); 418eda14cbcSMatt Macy } 419eda14cbcSMatt Macy 420eda14cbcSMatt Macy static int 421eda14cbcSMatt Macy zfsctl_snapdir_vnode(struct mount *mp, void *arg __unused, int flags, 422eda14cbcSMatt Macy struct vnode **vpp) 423eda14cbcSMatt Macy { 424eda14cbcSMatt Macy void *node; 425eda14cbcSMatt Macy int err; 426eda14cbcSMatt Macy 427eda14cbcSMatt Macy node = ((zfsvfs_t *)mp->mnt_data)->z_ctldir->snapdir; 428eda14cbcSMatt Macy err = sfs_vgetx(mp, flags, ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, "zfs", 429eda14cbcSMatt Macy &zfsctl_ops_snapdir, zfsctl_common_vnode_setup, node, vpp); 430eda14cbcSMatt Macy return (err); 431eda14cbcSMatt Macy } 432eda14cbcSMatt Macy 433eda14cbcSMatt Macy /* 434eda14cbcSMatt Macy * Given a root znode, retrieve the associated .zfs directory. 435eda14cbcSMatt Macy * Add a hold to the vnode and return it. 436eda14cbcSMatt Macy */ 437eda14cbcSMatt Macy int 438eda14cbcSMatt Macy zfsctl_root(zfsvfs_t *zfsvfs, int flags, vnode_t **vpp) 439eda14cbcSMatt Macy { 440eda14cbcSMatt Macy int error; 441eda14cbcSMatt Macy 442eda14cbcSMatt Macy error = zfsctl_root_vnode(zfsvfs->z_vfs, NULL, flags, vpp); 443eda14cbcSMatt Macy return (error); 444eda14cbcSMatt Macy } 445eda14cbcSMatt Macy 446eda14cbcSMatt Macy /* 447eda14cbcSMatt Macy * Common open routine. Disallow any write access. 448eda14cbcSMatt Macy */ 449eda14cbcSMatt Macy static int 450eda14cbcSMatt Macy zfsctl_common_open(struct vop_open_args *ap) 451eda14cbcSMatt Macy { 452eda14cbcSMatt Macy int flags = ap->a_mode; 453eda14cbcSMatt Macy 454eda14cbcSMatt Macy if (flags & FWRITE) 455eda14cbcSMatt Macy return (SET_ERROR(EACCES)); 456eda14cbcSMatt Macy 457eda14cbcSMatt Macy return (0); 458eda14cbcSMatt Macy } 459eda14cbcSMatt Macy 460eda14cbcSMatt Macy /* 461eda14cbcSMatt Macy * Common close routine. Nothing to do here. 462eda14cbcSMatt Macy */ 463eda14cbcSMatt Macy static int 464eda14cbcSMatt Macy zfsctl_common_close(struct vop_close_args *ap) 465eda14cbcSMatt Macy { 466c03c5b1cSMartin Matuska (void) ap; 467eda14cbcSMatt Macy return (0); 468eda14cbcSMatt Macy } 469eda14cbcSMatt Macy 470eda14cbcSMatt Macy /* 471eda14cbcSMatt Macy * Common access routine. Disallow writes. 472eda14cbcSMatt Macy */ 473eda14cbcSMatt Macy static int 474eda14cbcSMatt Macy zfsctl_common_access(struct vop_access_args *ap) 475eda14cbcSMatt Macy { 476eda14cbcSMatt Macy accmode_t accmode = ap->a_accmode; 477eda14cbcSMatt Macy 478eda14cbcSMatt Macy if (accmode & VWRITE) 479eda14cbcSMatt Macy return (SET_ERROR(EACCES)); 480eda14cbcSMatt Macy return (0); 481eda14cbcSMatt Macy } 482eda14cbcSMatt Macy 483eda14cbcSMatt Macy /* 484eda14cbcSMatt Macy * Common getattr function. Fill in basic information. 485eda14cbcSMatt Macy */ 486eda14cbcSMatt Macy static void 487eda14cbcSMatt Macy zfsctl_common_getattr(vnode_t *vp, vattr_t *vap) 488eda14cbcSMatt Macy { 489eda14cbcSMatt Macy timestruc_t now; 490eda14cbcSMatt Macy sfs_node_t *node; 491eda14cbcSMatt Macy 492eda14cbcSMatt Macy node = vp->v_data; 493eda14cbcSMatt Macy 494eda14cbcSMatt Macy vap->va_uid = 0; 495eda14cbcSMatt Macy vap->va_gid = 0; 496eda14cbcSMatt Macy vap->va_rdev = 0; 497eda14cbcSMatt Macy /* 498eda14cbcSMatt Macy * We are a purely virtual object, so we have no 499eda14cbcSMatt Macy * blocksize or allocated blocks. 500eda14cbcSMatt Macy */ 501eda14cbcSMatt Macy vap->va_blksize = 0; 502eda14cbcSMatt Macy vap->va_nblocks = 0; 503e92ffd9bSMartin Matuska vap->va_gen = 0; 504eda14cbcSMatt Macy vn_fsid(vp, vap); 505eda14cbcSMatt Macy vap->va_mode = zfsctl_ctldir_mode; 506eda14cbcSMatt Macy vap->va_type = VDIR; 507eda14cbcSMatt Macy /* 508eda14cbcSMatt Macy * We live in the now (for atime). 509eda14cbcSMatt Macy */ 510eda14cbcSMatt Macy gethrestime(&now); 511eda14cbcSMatt Macy vap->va_atime = now; 512eda14cbcSMatt Macy /* FreeBSD: Reset chflags(2) flags. */ 513eda14cbcSMatt Macy vap->va_flags = 0; 514eda14cbcSMatt Macy 515eda14cbcSMatt Macy vap->va_nodeid = node->sn_id; 516eda14cbcSMatt Macy 517eda14cbcSMatt Macy /* At least '.' and '..'. */ 518eda14cbcSMatt Macy vap->va_nlink = 2; 519eda14cbcSMatt Macy } 520eda14cbcSMatt Macy 521eda14cbcSMatt Macy #ifndef _OPENSOLARIS_SYS_VNODE_H_ 522eda14cbcSMatt Macy struct vop_fid_args { 523eda14cbcSMatt Macy struct vnode *a_vp; 524eda14cbcSMatt Macy struct fid *a_fid; 525eda14cbcSMatt Macy }; 526eda14cbcSMatt Macy #endif 527eda14cbcSMatt Macy 528eda14cbcSMatt Macy static int 529eda14cbcSMatt Macy zfsctl_common_fid(struct vop_fid_args *ap) 530eda14cbcSMatt Macy { 531eda14cbcSMatt Macy vnode_t *vp = ap->a_vp; 532eda14cbcSMatt Macy fid_t *fidp = (void *)ap->a_fid; 533eda14cbcSMatt Macy sfs_node_t *node = vp->v_data; 534eda14cbcSMatt Macy uint64_t object = node->sn_id; 535eda14cbcSMatt Macy zfid_short_t *zfid; 536eda14cbcSMatt Macy int i; 537eda14cbcSMatt Macy 538eda14cbcSMatt Macy zfid = (zfid_short_t *)fidp; 539eda14cbcSMatt Macy zfid->zf_len = SHORT_FID_LEN; 540eda14cbcSMatt Macy 541eda14cbcSMatt Macy for (i = 0; i < sizeof (zfid->zf_object); i++) 542eda14cbcSMatt Macy zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 543eda14cbcSMatt Macy 544eda14cbcSMatt Macy /* .zfs nodes always have a generation number of 0 */ 545eda14cbcSMatt Macy for (i = 0; i < sizeof (zfid->zf_gen); i++) 546eda14cbcSMatt Macy zfid->zf_gen[i] = 0; 547eda14cbcSMatt Macy 548eda14cbcSMatt Macy return (0); 549eda14cbcSMatt Macy } 550eda14cbcSMatt Macy 551eda14cbcSMatt Macy #ifndef _SYS_SYSPROTO_H_ 552eda14cbcSMatt Macy struct vop_reclaim_args { 553eda14cbcSMatt Macy struct vnode *a_vp; 554eda14cbcSMatt Macy struct thread *a_td; 555eda14cbcSMatt Macy }; 556eda14cbcSMatt Macy #endif 557eda14cbcSMatt Macy 558eda14cbcSMatt Macy static int 559eda14cbcSMatt Macy zfsctl_common_reclaim(struct vop_reclaim_args *ap) 560eda14cbcSMatt Macy { 561eda14cbcSMatt Macy vnode_t *vp = ap->a_vp; 562eda14cbcSMatt Macy 563eda14cbcSMatt Macy (void) sfs_reclaim_vnode(vp); 564eda14cbcSMatt Macy return (0); 565eda14cbcSMatt Macy } 566eda14cbcSMatt Macy 567eda14cbcSMatt Macy #ifndef _SYS_SYSPROTO_H_ 568eda14cbcSMatt Macy struct vop_print_args { 569eda14cbcSMatt Macy struct vnode *a_vp; 570eda14cbcSMatt Macy }; 571eda14cbcSMatt Macy #endif 572eda14cbcSMatt Macy 573eda14cbcSMatt Macy static int 574eda14cbcSMatt Macy zfsctl_common_print(struct vop_print_args *ap) 575eda14cbcSMatt Macy { 576eda14cbcSMatt Macy sfs_print_node(ap->a_vp->v_data); 577eda14cbcSMatt Macy return (0); 578eda14cbcSMatt Macy } 579eda14cbcSMatt Macy 580eda14cbcSMatt Macy #ifndef _SYS_SYSPROTO_H_ 581eda14cbcSMatt Macy struct vop_getattr_args { 582eda14cbcSMatt Macy struct vnode *a_vp; 583eda14cbcSMatt Macy struct vattr *a_vap; 584eda14cbcSMatt Macy struct ucred *a_cred; 585eda14cbcSMatt Macy }; 586eda14cbcSMatt Macy #endif 587eda14cbcSMatt Macy 588eda14cbcSMatt Macy /* 589eda14cbcSMatt Macy * Get root directory attributes. 590eda14cbcSMatt Macy */ 591eda14cbcSMatt Macy static int 592eda14cbcSMatt Macy zfsctl_root_getattr(struct vop_getattr_args *ap) 593eda14cbcSMatt Macy { 594eda14cbcSMatt Macy struct vnode *vp = ap->a_vp; 595eda14cbcSMatt Macy struct vattr *vap = ap->a_vap; 596eda14cbcSMatt Macy zfsctl_root_t *node = vp->v_data; 597eda14cbcSMatt Macy 598eda14cbcSMatt Macy zfsctl_common_getattr(vp, vap); 599eda14cbcSMatt Macy vap->va_ctime = node->cmtime; 600eda14cbcSMatt Macy vap->va_mtime = vap->va_ctime; 601eda14cbcSMatt Macy vap->va_birthtime = vap->va_ctime; 602eda14cbcSMatt Macy vap->va_nlink += 1; /* snapdir */ 603eda14cbcSMatt Macy vap->va_size = vap->va_nlink; 604eda14cbcSMatt Macy return (0); 605eda14cbcSMatt Macy } 606eda14cbcSMatt Macy 607eda14cbcSMatt Macy /* 608eda14cbcSMatt Macy * When we lookup "." we still can be asked to lock it 609eda14cbcSMatt Macy * differently, can't we? 610eda14cbcSMatt Macy */ 611eda14cbcSMatt Macy static int 612eda14cbcSMatt Macy zfsctl_relock_dot(vnode_t *dvp, int ltype) 613eda14cbcSMatt Macy { 614eda14cbcSMatt Macy vref(dvp); 615eda14cbcSMatt Macy if (ltype != VOP_ISLOCKED(dvp)) { 616eda14cbcSMatt Macy if (ltype == LK_EXCLUSIVE) 617eda14cbcSMatt Macy vn_lock(dvp, LK_UPGRADE | LK_RETRY); 618eda14cbcSMatt Macy else /* if (ltype == LK_SHARED) */ 619eda14cbcSMatt Macy vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); 620eda14cbcSMatt Macy 621eda14cbcSMatt Macy /* Relock for the "." case may left us with reclaimed vnode. */ 622eda14cbcSMatt Macy if (VN_IS_DOOMED(dvp)) { 623eda14cbcSMatt Macy vrele(dvp); 624eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 625eda14cbcSMatt Macy } 626eda14cbcSMatt Macy } 627eda14cbcSMatt Macy return (0); 628eda14cbcSMatt Macy } 629eda14cbcSMatt Macy 630eda14cbcSMatt Macy /* 631eda14cbcSMatt Macy * Special case the handling of "..". 632eda14cbcSMatt Macy */ 633eda14cbcSMatt Macy static int 634eda14cbcSMatt Macy zfsctl_root_lookup(struct vop_lookup_args *ap) 635eda14cbcSMatt Macy { 636eda14cbcSMatt Macy struct componentname *cnp = ap->a_cnp; 637eda14cbcSMatt Macy vnode_t *dvp = ap->a_dvp; 638eda14cbcSMatt Macy vnode_t **vpp = ap->a_vpp; 639eda14cbcSMatt Macy int flags = ap->a_cnp->cn_flags; 640eda14cbcSMatt Macy int lkflags = ap->a_cnp->cn_lkflags; 641eda14cbcSMatt Macy int nameiop = ap->a_cnp->cn_nameiop; 642eda14cbcSMatt Macy int err; 643eda14cbcSMatt Macy 64416038816SMartin Matuska ASSERT3S(dvp->v_type, ==, VDIR); 645eda14cbcSMatt Macy 646eda14cbcSMatt Macy if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP) 647eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 648eda14cbcSMatt Macy 649eda14cbcSMatt Macy if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') { 650eda14cbcSMatt Macy err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK); 651eda14cbcSMatt Macy if (err == 0) 652eda14cbcSMatt Macy *vpp = dvp; 653eda14cbcSMatt Macy } else if ((flags & ISDOTDOT) != 0) { 654eda14cbcSMatt Macy err = vn_vget_ino_gen(dvp, zfsctl_fs_root_vnode, NULL, 655eda14cbcSMatt Macy lkflags, vpp); 656eda14cbcSMatt Macy } else if (strncmp(cnp->cn_nameptr, "snapshot", cnp->cn_namelen) == 0) { 657eda14cbcSMatt Macy err = zfsctl_snapdir_vnode(dvp->v_mount, NULL, lkflags, vpp); 658eda14cbcSMatt Macy } else { 659eda14cbcSMatt Macy err = SET_ERROR(ENOENT); 660eda14cbcSMatt Macy } 661eda14cbcSMatt Macy if (err != 0) 662eda14cbcSMatt Macy *vpp = NULL; 663eda14cbcSMatt Macy return (err); 664eda14cbcSMatt Macy } 665eda14cbcSMatt Macy 666eda14cbcSMatt Macy static int 667eda14cbcSMatt Macy zfsctl_root_readdir(struct vop_readdir_args *ap) 668eda14cbcSMatt Macy { 669eda14cbcSMatt Macy struct dirent entry; 670eda14cbcSMatt Macy vnode_t *vp = ap->a_vp; 671eda14cbcSMatt Macy zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 672eda14cbcSMatt Macy zfsctl_root_t *node = vp->v_data; 673184c1b94SMartin Matuska zfs_uio_t uio; 674eda14cbcSMatt Macy int *eofp = ap->a_eofflag; 675eda14cbcSMatt Macy off_t dots_offset; 676eda14cbcSMatt Macy int error; 677eda14cbcSMatt Macy 678184c1b94SMartin Matuska zfs_uio_init(&uio, ap->a_uio); 679184c1b94SMartin Matuska 68016038816SMartin Matuska ASSERT3S(vp->v_type, ==, VDIR); 681eda14cbcSMatt Macy 6829c4e2708SMateusz Guzik /* 6839c4e2708SMateusz Guzik * FIXME: this routine only ever emits 3 entries and does not tolerate 6849c4e2708SMateusz Guzik * being called with a buffer too small to handle all of them. 6859c4e2708SMateusz Guzik * 6869c4e2708SMateusz Guzik * The check below facilitates the idiom of repeating calls until the 6879c4e2708SMateusz Guzik * count to return is 0. 6889c4e2708SMateusz Guzik */ 6899c4e2708SMateusz Guzik if (zfs_uio_offset(&uio) == 3 * sizeof (entry)) { 6909c4e2708SMateusz Guzik return (0); 6919c4e2708SMateusz Guzik } 6929c4e2708SMateusz Guzik 693184c1b94SMartin Matuska error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio, 694eda14cbcSMatt Macy &dots_offset); 695eda14cbcSMatt Macy if (error != 0) { 696eda14cbcSMatt Macy if (error == ENAMETOOLONG) /* ran out of destination space */ 697eda14cbcSMatt Macy error = 0; 698eda14cbcSMatt Macy return (error); 699eda14cbcSMatt Macy } 700184c1b94SMartin Matuska if (zfs_uio_offset(&uio) != dots_offset) 701eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 702eda14cbcSMatt Macy 703c03c5b1cSMartin Matuska _Static_assert(sizeof (node->snapdir->sn_name) <= sizeof (entry.d_name), 704c03c5b1cSMartin Matuska "node->snapdir->sn_name too big for entry.d_name"); 705eda14cbcSMatt Macy entry.d_fileno = node->snapdir->sn_id; 706eda14cbcSMatt Macy entry.d_type = DT_DIR; 707eda14cbcSMatt Macy strcpy(entry.d_name, node->snapdir->sn_name); 708eda14cbcSMatt Macy entry.d_namlen = strlen(entry.d_name); 709eda14cbcSMatt Macy entry.d_reclen = sizeof (entry); 710184c1b94SMartin Matuska error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio)); 711eda14cbcSMatt Macy if (error != 0) { 712eda14cbcSMatt Macy if (error == ENAMETOOLONG) 713eda14cbcSMatt Macy error = 0; 714eda14cbcSMatt Macy return (SET_ERROR(error)); 715eda14cbcSMatt Macy } 716eda14cbcSMatt Macy if (eofp != NULL) 717eda14cbcSMatt Macy *eofp = 1; 718eda14cbcSMatt Macy return (0); 719eda14cbcSMatt Macy } 720eda14cbcSMatt Macy 721eda14cbcSMatt Macy static int 722eda14cbcSMatt Macy zfsctl_root_vptocnp(struct vop_vptocnp_args *ap) 723eda14cbcSMatt Macy { 724eda14cbcSMatt Macy static const char dotzfs_name[4] = ".zfs"; 725eda14cbcSMatt Macy vnode_t *dvp; 726eda14cbcSMatt Macy int error; 727eda14cbcSMatt Macy 728eda14cbcSMatt Macy if (*ap->a_buflen < sizeof (dotzfs_name)) 729eda14cbcSMatt Macy return (SET_ERROR(ENOMEM)); 730eda14cbcSMatt Macy 731eda14cbcSMatt Macy error = vn_vget_ino_gen(ap->a_vp, zfsctl_fs_root_vnode, NULL, 732eda14cbcSMatt Macy LK_SHARED, &dvp); 733eda14cbcSMatt Macy if (error != 0) 734eda14cbcSMatt Macy return (SET_ERROR(error)); 735eda14cbcSMatt Macy 736*ce4dcb97SMartin Matuska VOP_UNLOCK(dvp); 737eda14cbcSMatt Macy *ap->a_vpp = dvp; 738eda14cbcSMatt Macy *ap->a_buflen -= sizeof (dotzfs_name); 739da5137abSMartin Matuska memcpy(ap->a_buf + *ap->a_buflen, dotzfs_name, sizeof (dotzfs_name)); 740eda14cbcSMatt Macy return (0); 741eda14cbcSMatt Macy } 742eda14cbcSMatt Macy 743eda14cbcSMatt Macy static int 744eda14cbcSMatt Macy zfsctl_common_pathconf(struct vop_pathconf_args *ap) 745eda14cbcSMatt Macy { 746eda14cbcSMatt Macy /* 747eda14cbcSMatt Macy * We care about ACL variables so that user land utilities like ls 748eda14cbcSMatt Macy * can display them correctly. Since the ctldir's st_dev is set to be 749eda14cbcSMatt Macy * the same as the parent dataset, we must support all variables that 750eda14cbcSMatt Macy * it supports. 751eda14cbcSMatt Macy */ 752eda14cbcSMatt Macy switch (ap->a_name) { 753eda14cbcSMatt Macy case _PC_LINK_MAX: 754eda14cbcSMatt Macy *ap->a_retval = MIN(LONG_MAX, ZFS_LINK_MAX); 755eda14cbcSMatt Macy return (0); 756eda14cbcSMatt Macy 757eda14cbcSMatt Macy case _PC_FILESIZEBITS: 758eda14cbcSMatt Macy *ap->a_retval = 64; 759eda14cbcSMatt Macy return (0); 760eda14cbcSMatt Macy 761eda14cbcSMatt Macy case _PC_MIN_HOLE_SIZE: 762eda14cbcSMatt Macy *ap->a_retval = (int)SPA_MINBLOCKSIZE; 763eda14cbcSMatt Macy return (0); 764eda14cbcSMatt Macy 765eda14cbcSMatt Macy case _PC_ACL_EXTENDED: 766eda14cbcSMatt Macy *ap->a_retval = 0; 767eda14cbcSMatt Macy return (0); 768eda14cbcSMatt Macy 769eda14cbcSMatt Macy case _PC_ACL_NFS4: 770eda14cbcSMatt Macy *ap->a_retval = 1; 771eda14cbcSMatt Macy return (0); 772eda14cbcSMatt Macy 773eda14cbcSMatt Macy case _PC_ACL_PATH_MAX: 774eda14cbcSMatt Macy *ap->a_retval = ACL_MAX_ENTRIES; 775eda14cbcSMatt Macy return (0); 776eda14cbcSMatt Macy 777eda14cbcSMatt Macy case _PC_NAME_MAX: 778eda14cbcSMatt Macy *ap->a_retval = NAME_MAX; 779eda14cbcSMatt Macy return (0); 780eda14cbcSMatt Macy 781eda14cbcSMatt Macy default: 782eda14cbcSMatt Macy return (vop_stdpathconf(ap)); 783eda14cbcSMatt Macy } 784eda14cbcSMatt Macy } 785eda14cbcSMatt Macy 786eda14cbcSMatt Macy /* 787eda14cbcSMatt Macy * Returns a trivial ACL 788eda14cbcSMatt Macy */ 789eda14cbcSMatt Macy static int 790eda14cbcSMatt Macy zfsctl_common_getacl(struct vop_getacl_args *ap) 791eda14cbcSMatt Macy { 792eda14cbcSMatt Macy int i; 793eda14cbcSMatt Macy 794eda14cbcSMatt Macy if (ap->a_type != ACL_TYPE_NFS4) 795eda14cbcSMatt Macy return (EINVAL); 796eda14cbcSMatt Macy 797eda14cbcSMatt Macy acl_nfs4_sync_acl_from_mode(ap->a_aclp, zfsctl_ctldir_mode, 0); 798eda14cbcSMatt Macy /* 799eda14cbcSMatt Macy * acl_nfs4_sync_acl_from_mode assumes that the owner can always modify 800eda14cbcSMatt Macy * attributes. That is not the case for the ctldir, so we must clear 801eda14cbcSMatt Macy * those bits. We also must clear ACL_READ_NAMED_ATTRS, because xattrs 802eda14cbcSMatt Macy * aren't supported by the ctldir. 803eda14cbcSMatt Macy */ 804eda14cbcSMatt Macy for (i = 0; i < ap->a_aclp->acl_cnt; i++) { 805eda14cbcSMatt Macy struct acl_entry *entry; 806eda14cbcSMatt Macy entry = &(ap->a_aclp->acl_entry[i]); 807eda14cbcSMatt Macy entry->ae_perm &= ~(ACL_WRITE_ACL | ACL_WRITE_OWNER | 808eda14cbcSMatt Macy ACL_WRITE_ATTRIBUTES | ACL_WRITE_NAMED_ATTRS | 809eda14cbcSMatt Macy ACL_READ_NAMED_ATTRS); 810eda14cbcSMatt Macy } 811eda14cbcSMatt Macy 812eda14cbcSMatt Macy return (0); 813eda14cbcSMatt Macy } 814eda14cbcSMatt Macy 815eda14cbcSMatt Macy static struct vop_vector zfsctl_ops_root = { 816eda14cbcSMatt Macy .vop_default = &default_vnodeops, 817b8208228SMateusz Guzik .vop_fplookup_vexec = VOP_EAGAIN, 818e2d997d1SMateusz Guzik .vop_fplookup_symlink = VOP_EAGAIN, 819eda14cbcSMatt Macy .vop_open = zfsctl_common_open, 820eda14cbcSMatt Macy .vop_close = zfsctl_common_close, 821eda14cbcSMatt Macy .vop_ioctl = VOP_EINVAL, 822eda14cbcSMatt Macy .vop_getattr = zfsctl_root_getattr, 823eda14cbcSMatt Macy .vop_access = zfsctl_common_access, 824eda14cbcSMatt Macy .vop_readdir = zfsctl_root_readdir, 825eda14cbcSMatt Macy .vop_lookup = zfsctl_root_lookup, 826eda14cbcSMatt Macy .vop_inactive = VOP_NULL, 827eda14cbcSMatt Macy .vop_reclaim = zfsctl_common_reclaim, 828eda14cbcSMatt Macy .vop_fid = zfsctl_common_fid, 829eda14cbcSMatt Macy .vop_print = zfsctl_common_print, 830eda14cbcSMatt Macy .vop_vptocnp = zfsctl_root_vptocnp, 831eda14cbcSMatt Macy .vop_pathconf = zfsctl_common_pathconf, 832eda14cbcSMatt Macy .vop_getacl = zfsctl_common_getacl, 833681ce946SMartin Matuska #if __FreeBSD_version >= 1400043 8343ffcfa59SMateusz Guzik .vop_add_writecount = vop_stdadd_writecount_nomsync, 835681ce946SMartin Matuska #endif 836eda14cbcSMatt Macy }; 837eda14cbcSMatt Macy VFS_VOP_VECTOR_REGISTER(zfsctl_ops_root); 838eda14cbcSMatt Macy 839eda14cbcSMatt Macy static int 840eda14cbcSMatt Macy zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname) 841eda14cbcSMatt Macy { 842eda14cbcSMatt Macy objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; 843eda14cbcSMatt Macy 844eda14cbcSMatt Macy dmu_objset_name(os, zname); 845eda14cbcSMatt Macy if (strlen(zname) + 1 + strlen(name) >= len) 846eda14cbcSMatt Macy return (SET_ERROR(ENAMETOOLONG)); 847eda14cbcSMatt Macy (void) strcat(zname, "@"); 848eda14cbcSMatt Macy (void) strcat(zname, name); 849eda14cbcSMatt Macy return (0); 850eda14cbcSMatt Macy } 851eda14cbcSMatt Macy 852eda14cbcSMatt Macy static int 853eda14cbcSMatt Macy zfsctl_snapshot_lookup(vnode_t *vp, const char *name, uint64_t *id) 854eda14cbcSMatt Macy { 855eda14cbcSMatt Macy objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; 856eda14cbcSMatt Macy int err; 857eda14cbcSMatt Macy 858eda14cbcSMatt Macy err = dsl_dataset_snap_lookup(dmu_objset_ds(os), name, id); 859eda14cbcSMatt Macy return (err); 860eda14cbcSMatt Macy } 861eda14cbcSMatt Macy 862eda14cbcSMatt Macy /* 863eda14cbcSMatt Macy * Given a vnode get a root vnode of a filesystem mounted on top of 864eda14cbcSMatt Macy * the vnode, if any. The root vnode is referenced and locked. 865eda14cbcSMatt Macy * If no filesystem is mounted then the orinal vnode remains referenced 866eda14cbcSMatt Macy * and locked. If any error happens the orinal vnode is unlocked and 867eda14cbcSMatt Macy * released. 868eda14cbcSMatt Macy */ 869eda14cbcSMatt Macy static int 870eda14cbcSMatt Macy zfsctl_mounted_here(vnode_t **vpp, int flags) 871eda14cbcSMatt Macy { 872eda14cbcSMatt Macy struct mount *mp; 873eda14cbcSMatt Macy int err; 874eda14cbcSMatt Macy 875eda14cbcSMatt Macy ASSERT_VOP_LOCKED(*vpp, __func__); 876eda14cbcSMatt Macy ASSERT3S((*vpp)->v_type, ==, VDIR); 877eda14cbcSMatt Macy 878eda14cbcSMatt Macy if ((mp = (*vpp)->v_mountedhere) != NULL) { 879eda14cbcSMatt Macy err = vfs_busy(mp, 0); 880eda14cbcSMatt Macy KASSERT(err == 0, ("vfs_busy(mp, 0) failed with %d", err)); 881eda14cbcSMatt Macy KASSERT(vrefcnt(*vpp) > 1, ("unreferenced mountpoint")); 882eda14cbcSMatt Macy vput(*vpp); 883eda14cbcSMatt Macy err = VFS_ROOT(mp, flags, vpp); 884eda14cbcSMatt Macy vfs_unbusy(mp); 885eda14cbcSMatt Macy return (err); 886eda14cbcSMatt Macy } 887eda14cbcSMatt Macy return (EJUSTRETURN); 888eda14cbcSMatt Macy } 889eda14cbcSMatt Macy 890eda14cbcSMatt Macy typedef struct { 891eda14cbcSMatt Macy const char *snap_name; 892eda14cbcSMatt Macy uint64_t snap_id; 893eda14cbcSMatt Macy } snapshot_setup_arg_t; 894eda14cbcSMatt Macy 895eda14cbcSMatt Macy static void 896eda14cbcSMatt Macy zfsctl_snapshot_vnode_setup(vnode_t *vp, void *arg) 897eda14cbcSMatt Macy { 898eda14cbcSMatt Macy snapshot_setup_arg_t *ssa = arg; 899eda14cbcSMatt Macy sfs_node_t *node; 900eda14cbcSMatt Macy 901eda14cbcSMatt Macy ASSERT_VOP_ELOCKED(vp, __func__); 902eda14cbcSMatt Macy 903eda14cbcSMatt Macy node = sfs_alloc_node(sizeof (sfs_node_t), 904eda14cbcSMatt Macy ssa->snap_name, ZFSCTL_INO_SNAPDIR, ssa->snap_id); 905eda14cbcSMatt Macy zfsctl_common_vnode_setup(vp, node); 906eda14cbcSMatt Macy 907eda14cbcSMatt Macy /* We have to support recursive locking. */ 908eda14cbcSMatt Macy VN_LOCK_AREC(vp); 909eda14cbcSMatt Macy } 910eda14cbcSMatt Macy 911eda14cbcSMatt Macy /* 912eda14cbcSMatt Macy * Lookup entry point for the 'snapshot' directory. Try to open the 913eda14cbcSMatt Macy * snapshot if it exist, creating the pseudo filesystem vnode as necessary. 914eda14cbcSMatt Macy * Perform a mount of the associated dataset on top of the vnode. 915eda14cbcSMatt Macy * There are four possibilities: 916eda14cbcSMatt Macy * - the snapshot node and vnode do not exist 917eda14cbcSMatt Macy * - the snapshot vnode is covered by the mounted snapshot 918eda14cbcSMatt Macy * - the snapshot vnode is not covered yet, the mount operation is in progress 919eda14cbcSMatt Macy * - the snapshot vnode is not covered, because the snapshot has been unmounted 920eda14cbcSMatt Macy * The last two states are transient and should be relatively short-lived. 921eda14cbcSMatt Macy */ 922eda14cbcSMatt Macy static int 923eda14cbcSMatt Macy zfsctl_snapdir_lookup(struct vop_lookup_args *ap) 924eda14cbcSMatt Macy { 925eda14cbcSMatt Macy vnode_t *dvp = ap->a_dvp; 926eda14cbcSMatt Macy vnode_t **vpp = ap->a_vpp; 927eda14cbcSMatt Macy struct componentname *cnp = ap->a_cnp; 928eda14cbcSMatt Macy char name[NAME_MAX + 1]; 929eda14cbcSMatt Macy char fullname[ZFS_MAX_DATASET_NAME_LEN]; 930eda14cbcSMatt Macy char *mountpoint; 931eda14cbcSMatt Macy size_t mountpoint_len; 932eda14cbcSMatt Macy zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 933eda14cbcSMatt Macy uint64_t snap_id; 934eda14cbcSMatt Macy int nameiop = cnp->cn_nameiop; 935eda14cbcSMatt Macy int lkflags = cnp->cn_lkflags; 936eda14cbcSMatt Macy int flags = cnp->cn_flags; 937eda14cbcSMatt Macy int err; 938eda14cbcSMatt Macy 93916038816SMartin Matuska ASSERT3S(dvp->v_type, ==, VDIR); 940eda14cbcSMatt Macy 941eda14cbcSMatt Macy if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP) 942eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 943eda14cbcSMatt Macy 944eda14cbcSMatt Macy if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') { 945eda14cbcSMatt Macy err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK); 946eda14cbcSMatt Macy if (err == 0) 947eda14cbcSMatt Macy *vpp = dvp; 948eda14cbcSMatt Macy return (err); 949eda14cbcSMatt Macy } 950eda14cbcSMatt Macy if (flags & ISDOTDOT) { 951eda14cbcSMatt Macy err = vn_vget_ino_gen(dvp, zfsctl_root_vnode, NULL, lkflags, 952eda14cbcSMatt Macy vpp); 953eda14cbcSMatt Macy return (err); 954eda14cbcSMatt Macy } 955eda14cbcSMatt Macy 956eda14cbcSMatt Macy if (cnp->cn_namelen >= sizeof (name)) 957eda14cbcSMatt Macy return (SET_ERROR(ENAMETOOLONG)); 958eda14cbcSMatt Macy 959eda14cbcSMatt Macy strlcpy(name, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1); 960eda14cbcSMatt Macy err = zfsctl_snapshot_lookup(dvp, name, &snap_id); 961eda14cbcSMatt Macy if (err != 0) 962eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 963eda14cbcSMatt Macy 964eda14cbcSMatt Macy for (;;) { 965eda14cbcSMatt Macy snapshot_setup_arg_t ssa; 966eda14cbcSMatt Macy 967eda14cbcSMatt Macy ssa.snap_name = name; 968eda14cbcSMatt Macy ssa.snap_id = snap_id; 969eda14cbcSMatt Macy err = sfs_vgetx(dvp->v_mount, LK_SHARED, ZFSCTL_INO_SNAPDIR, 970eda14cbcSMatt Macy snap_id, "zfs", &zfsctl_ops_snapshot, 971eda14cbcSMatt Macy zfsctl_snapshot_vnode_setup, &ssa, vpp); 972eda14cbcSMatt Macy if (err != 0) 973eda14cbcSMatt Macy return (err); 974eda14cbcSMatt Macy 975eda14cbcSMatt Macy /* Check if a new vnode has just been created. */ 976eda14cbcSMatt Macy if (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE) 977eda14cbcSMatt Macy break; 978eda14cbcSMatt Macy 979eda14cbcSMatt Macy /* 980eda14cbcSMatt Macy * Check if a snapshot is already mounted on top of the vnode. 981eda14cbcSMatt Macy */ 982eda14cbcSMatt Macy err = zfsctl_mounted_here(vpp, lkflags); 983eda14cbcSMatt Macy if (err != EJUSTRETURN) 984eda14cbcSMatt Macy return (err); 985eda14cbcSMatt Macy 986eda14cbcSMatt Macy /* 987eda14cbcSMatt Macy * If the vnode is not covered, then either the mount operation 988eda14cbcSMatt Macy * is in progress or the snapshot has already been unmounted 989eda14cbcSMatt Macy * but the vnode hasn't been inactivated and reclaimed yet. 990eda14cbcSMatt Macy * We can try to re-use the vnode in the latter case. 991eda14cbcSMatt Macy */ 992eda14cbcSMatt Macy VI_LOCK(*vpp); 993eda14cbcSMatt Macy if (((*vpp)->v_iflag & VI_MOUNT) == 0) { 9944e1262acSMateusz Guzik VI_UNLOCK(*vpp); 995eda14cbcSMatt Macy /* 996eda14cbcSMatt Macy * Upgrade to exclusive lock in order to: 997eda14cbcSMatt Macy * - avoid race conditions 998eda14cbcSMatt Macy * - satisfy the contract of mount_snapshot() 999eda14cbcSMatt Macy */ 10004e1262acSMateusz Guzik err = VOP_LOCK(*vpp, LK_TRYUPGRADE); 1001eda14cbcSMatt Macy if (err == 0) 1002eda14cbcSMatt Macy break; 1003eda14cbcSMatt Macy } else { 1004eda14cbcSMatt Macy VI_UNLOCK(*vpp); 1005eda14cbcSMatt Macy } 1006eda14cbcSMatt Macy 1007eda14cbcSMatt Macy /* 1008eda14cbcSMatt Macy * In this state we can loop on uncontested locks and starve 1009eda14cbcSMatt Macy * the thread doing the lengthy, non-trivial mount operation. 1010eda14cbcSMatt Macy * So, yield to prevent that from happening. 1011eda14cbcSMatt Macy */ 1012eda14cbcSMatt Macy vput(*vpp); 1013eda14cbcSMatt Macy kern_yield(PRI_USER); 1014eda14cbcSMatt Macy } 1015eda14cbcSMatt Macy 1016eda14cbcSMatt Macy VERIFY0(zfsctl_snapshot_zname(dvp, name, sizeof (fullname), fullname)); 1017eda14cbcSMatt Macy 1018eda14cbcSMatt Macy mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) + 1019eda14cbcSMatt Macy strlen("/" ZFS_CTLDIR_NAME "/snapshot/") + strlen(name) + 1; 1020eda14cbcSMatt Macy mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP); 1021eda14cbcSMatt Macy (void) snprintf(mountpoint, mountpoint_len, 1022eda14cbcSMatt Macy "%s/" ZFS_CTLDIR_NAME "/snapshot/%s", 1023eda14cbcSMatt Macy dvp->v_vfsp->mnt_stat.f_mntonname, name); 1024eda14cbcSMatt Macy 10252276e539SMartin Matuska err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0, 10262276e539SMartin Matuska dvp->v_vfsp); 1027eda14cbcSMatt Macy kmem_free(mountpoint, mountpoint_len); 1028eda14cbcSMatt Macy if (err == 0) { 1029eda14cbcSMatt Macy /* 1030eda14cbcSMatt Macy * Fix up the root vnode mounted on .zfs/snapshot/<snapname>. 1031eda14cbcSMatt Macy * 1032eda14cbcSMatt Macy * This is where we lie about our v_vfsp in order to 1033eda14cbcSMatt Macy * make .zfs/snapshot/<snapname> accessible over NFS 1034eda14cbcSMatt Macy * without requiring manual mounts of <snapname>. 1035eda14cbcSMatt Macy */ 103616038816SMartin Matuska ASSERT3P(VTOZ(*vpp)->z_zfsvfs, !=, zfsvfs); 1037eda14cbcSMatt Macy VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs; 1038eda14cbcSMatt Macy 1039eda14cbcSMatt Macy /* Clear the root flag (set via VFS_ROOT) as well. */ 1040eda14cbcSMatt Macy (*vpp)->v_vflag &= ~VV_ROOT; 1041eda14cbcSMatt Macy } 1042eda14cbcSMatt Macy 1043eda14cbcSMatt Macy if (err != 0) 1044eda14cbcSMatt Macy *vpp = NULL; 1045eda14cbcSMatt Macy return (err); 1046eda14cbcSMatt Macy } 1047eda14cbcSMatt Macy 1048eda14cbcSMatt Macy static int 1049eda14cbcSMatt Macy zfsctl_snapdir_readdir(struct vop_readdir_args *ap) 1050eda14cbcSMatt Macy { 1051eda14cbcSMatt Macy char snapname[ZFS_MAX_DATASET_NAME_LEN]; 1052eda14cbcSMatt Macy struct dirent entry; 1053eda14cbcSMatt Macy vnode_t *vp = ap->a_vp; 1054eda14cbcSMatt Macy zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 1055184c1b94SMartin Matuska zfs_uio_t uio; 1056eda14cbcSMatt Macy int *eofp = ap->a_eofflag; 1057eda14cbcSMatt Macy off_t dots_offset; 1058eda14cbcSMatt Macy int error; 1059eda14cbcSMatt Macy 1060184c1b94SMartin Matuska zfs_uio_init(&uio, ap->a_uio); 1061184c1b94SMartin Matuska 106216038816SMartin Matuska ASSERT3S(vp->v_type, ==, VDIR); 1063eda14cbcSMatt Macy 1064184c1b94SMartin Matuska error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap, 1065184c1b94SMartin Matuska &uio, &dots_offset); 1066eda14cbcSMatt Macy if (error != 0) { 1067eda14cbcSMatt Macy if (error == ENAMETOOLONG) /* ran out of destination space */ 1068eda14cbcSMatt Macy error = 0; 1069eda14cbcSMatt Macy return (error); 1070eda14cbcSMatt Macy } 1071eda14cbcSMatt Macy 1072c7046f76SMartin Matuska if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1073c7046f76SMartin Matuska return (error); 1074eda14cbcSMatt Macy for (;;) { 1075eda14cbcSMatt Macy uint64_t cookie; 1076eda14cbcSMatt Macy uint64_t id; 1077eda14cbcSMatt Macy 1078184c1b94SMartin Matuska cookie = zfs_uio_offset(&uio) - dots_offset; 1079eda14cbcSMatt Macy 1080eda14cbcSMatt Macy dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG); 1081eda14cbcSMatt Macy error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname), 1082eda14cbcSMatt Macy snapname, &id, &cookie, NULL); 1083eda14cbcSMatt Macy dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG); 1084eda14cbcSMatt Macy if (error != 0) { 1085eda14cbcSMatt Macy if (error == ENOENT) { 1086eda14cbcSMatt Macy if (eofp != NULL) 1087eda14cbcSMatt Macy *eofp = 1; 1088eda14cbcSMatt Macy error = 0; 1089eda14cbcSMatt Macy } 1090c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 1091eda14cbcSMatt Macy return (error); 1092eda14cbcSMatt Macy } 1093eda14cbcSMatt Macy 1094eda14cbcSMatt Macy entry.d_fileno = id; 1095eda14cbcSMatt Macy entry.d_type = DT_DIR; 1096eda14cbcSMatt Macy strcpy(entry.d_name, snapname); 1097eda14cbcSMatt Macy entry.d_namlen = strlen(entry.d_name); 1098eda14cbcSMatt Macy entry.d_reclen = sizeof (entry); 1099184c1b94SMartin Matuska error = vfs_read_dirent(ap, &entry, zfs_uio_offset(&uio)); 1100eda14cbcSMatt Macy if (error != 0) { 1101eda14cbcSMatt Macy if (error == ENAMETOOLONG) 1102eda14cbcSMatt Macy error = 0; 1103c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 1104eda14cbcSMatt Macy return (SET_ERROR(error)); 1105eda14cbcSMatt Macy } 1106184c1b94SMartin Matuska zfs_uio_setoffset(&uio, cookie + dots_offset); 1107eda14cbcSMatt Macy } 11081f88aa09SMartin Matuska __builtin_unreachable(); 1109eda14cbcSMatt Macy } 1110eda14cbcSMatt Macy 1111eda14cbcSMatt Macy static int 1112eda14cbcSMatt Macy zfsctl_snapdir_getattr(struct vop_getattr_args *ap) 1113eda14cbcSMatt Macy { 1114eda14cbcSMatt Macy vnode_t *vp = ap->a_vp; 1115eda14cbcSMatt Macy vattr_t *vap = ap->a_vap; 1116eda14cbcSMatt Macy zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 1117eda14cbcSMatt Macy dsl_dataset_t *ds; 1118eda14cbcSMatt Macy uint64_t snap_count; 1119eda14cbcSMatt Macy int err; 1120eda14cbcSMatt Macy 1121c7046f76SMartin Matuska if ((err = zfs_enter(zfsvfs, FTAG)) != 0) 1122c7046f76SMartin Matuska return (err); 1123eda14cbcSMatt Macy ds = dmu_objset_ds(zfsvfs->z_os); 1124eda14cbcSMatt Macy zfsctl_common_getattr(vp, vap); 1125eda14cbcSMatt Macy vap->va_ctime = dmu_objset_snap_cmtime(zfsvfs->z_os); 1126eda14cbcSMatt Macy vap->va_mtime = vap->va_ctime; 1127eda14cbcSMatt Macy vap->va_birthtime = vap->va_ctime; 1128eda14cbcSMatt Macy if (dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0) { 1129eda14cbcSMatt Macy err = zap_count(dmu_objset_pool(ds->ds_objset)->dp_meta_objset, 1130eda14cbcSMatt Macy dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count); 1131eda14cbcSMatt Macy if (err != 0) { 1132c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 1133eda14cbcSMatt Macy return (err); 1134eda14cbcSMatt Macy } 1135eda14cbcSMatt Macy vap->va_nlink += snap_count; 1136eda14cbcSMatt Macy } 1137eda14cbcSMatt Macy vap->va_size = vap->va_nlink; 1138eda14cbcSMatt Macy 1139c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 1140eda14cbcSMatt Macy return (0); 1141eda14cbcSMatt Macy } 1142eda14cbcSMatt Macy 1143eda14cbcSMatt Macy static struct vop_vector zfsctl_ops_snapdir = { 1144eda14cbcSMatt Macy .vop_default = &default_vnodeops, 1145b8208228SMateusz Guzik .vop_fplookup_vexec = VOP_EAGAIN, 1146e2d997d1SMateusz Guzik .vop_fplookup_symlink = VOP_EAGAIN, 1147eda14cbcSMatt Macy .vop_open = zfsctl_common_open, 1148eda14cbcSMatt Macy .vop_close = zfsctl_common_close, 1149eda14cbcSMatt Macy .vop_getattr = zfsctl_snapdir_getattr, 1150eda14cbcSMatt Macy .vop_access = zfsctl_common_access, 1151eda14cbcSMatt Macy .vop_readdir = zfsctl_snapdir_readdir, 1152eda14cbcSMatt Macy .vop_lookup = zfsctl_snapdir_lookup, 1153eda14cbcSMatt Macy .vop_reclaim = zfsctl_common_reclaim, 1154eda14cbcSMatt Macy .vop_fid = zfsctl_common_fid, 1155eda14cbcSMatt Macy .vop_print = zfsctl_common_print, 1156eda14cbcSMatt Macy .vop_pathconf = zfsctl_common_pathconf, 1157eda14cbcSMatt Macy .vop_getacl = zfsctl_common_getacl, 1158681ce946SMartin Matuska #if __FreeBSD_version >= 1400043 11593ffcfa59SMateusz Guzik .vop_add_writecount = vop_stdadd_writecount_nomsync, 1160681ce946SMartin Matuska #endif 1161eda14cbcSMatt Macy }; 1162eda14cbcSMatt Macy VFS_VOP_VECTOR_REGISTER(zfsctl_ops_snapdir); 1163eda14cbcSMatt Macy 1164eda14cbcSMatt Macy 1165eda14cbcSMatt Macy static int 1166eda14cbcSMatt Macy zfsctl_snapshot_inactive(struct vop_inactive_args *ap) 1167eda14cbcSMatt Macy { 1168eda14cbcSMatt Macy vnode_t *vp = ap->a_vp; 1169eda14cbcSMatt Macy 11702a58b312SMartin Matuska vrecycle(vp); 1171eda14cbcSMatt Macy return (0); 1172eda14cbcSMatt Macy } 1173eda14cbcSMatt Macy 1174eda14cbcSMatt Macy static int 1175eda14cbcSMatt Macy zfsctl_snapshot_reclaim(struct vop_reclaim_args *ap) 1176eda14cbcSMatt Macy { 1177eda14cbcSMatt Macy vnode_t *vp = ap->a_vp; 1178eda14cbcSMatt Macy void *data = vp->v_data; 1179eda14cbcSMatt Macy 1180eda14cbcSMatt Macy sfs_reclaim_vnode(vp); 1181eda14cbcSMatt Macy sfs_destroy_node(data); 1182eda14cbcSMatt Macy return (0); 1183eda14cbcSMatt Macy } 1184eda14cbcSMatt Macy 1185eda14cbcSMatt Macy static int 1186eda14cbcSMatt Macy zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap) 1187eda14cbcSMatt Macy { 1188eda14cbcSMatt Macy struct mount *mp; 1189eda14cbcSMatt Macy vnode_t *dvp; 1190eda14cbcSMatt Macy vnode_t *vp; 1191eda14cbcSMatt Macy sfs_node_t *node; 1192eda14cbcSMatt Macy size_t len; 1193eda14cbcSMatt Macy int locked; 1194eda14cbcSMatt Macy int error; 1195eda14cbcSMatt Macy 1196eda14cbcSMatt Macy vp = ap->a_vp; 1197eda14cbcSMatt Macy node = vp->v_data; 1198eda14cbcSMatt Macy len = strlen(node->sn_name); 1199eda14cbcSMatt Macy if (*ap->a_buflen < len) 1200eda14cbcSMatt Macy return (SET_ERROR(ENOMEM)); 1201eda14cbcSMatt Macy 1202eda14cbcSMatt Macy /* 1203eda14cbcSMatt Macy * Prevent unmounting of the snapshot while the vnode lock 1204eda14cbcSMatt Macy * is not held. That is not strictly required, but allows 1205eda14cbcSMatt Macy * us to assert that an uncovered snapshot vnode is never 1206eda14cbcSMatt Macy * "leaked". 1207eda14cbcSMatt Macy */ 1208eda14cbcSMatt Macy mp = vp->v_mountedhere; 1209eda14cbcSMatt Macy if (mp == NULL) 1210eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 1211eda14cbcSMatt Macy error = vfs_busy(mp, 0); 1212eda14cbcSMatt Macy KASSERT(error == 0, ("vfs_busy(mp, 0) failed with %d", error)); 1213eda14cbcSMatt Macy 1214eda14cbcSMatt Macy /* 1215eda14cbcSMatt Macy * We can vput the vnode as we can now depend on the reference owned 1216eda14cbcSMatt Macy * by the busied mp. But we also need to hold the vnode, because 1217eda14cbcSMatt Macy * the reference may go after vfs_unbusy() which has to be called 1218eda14cbcSMatt Macy * before we can lock the vnode again. 1219eda14cbcSMatt Macy */ 1220eda14cbcSMatt Macy locked = VOP_ISLOCKED(vp); 1221eda14cbcSMatt Macy enum vgetstate vs = vget_prep(vp); 1222eda14cbcSMatt Macy vput(vp); 1223eda14cbcSMatt Macy 1224eda14cbcSMatt Macy /* Look up .zfs/snapshot, our parent. */ 1225eda14cbcSMatt Macy error = zfsctl_snapdir_vnode(vp->v_mount, NULL, LK_SHARED, &dvp); 1226eda14cbcSMatt Macy if (error == 0) { 1227*ce4dcb97SMartin Matuska VOP_UNLOCK(dvp); 1228eda14cbcSMatt Macy *ap->a_vpp = dvp; 1229eda14cbcSMatt Macy *ap->a_buflen -= len; 1230da5137abSMartin Matuska memcpy(ap->a_buf + *ap->a_buflen, node->sn_name, len); 1231eda14cbcSMatt Macy } 1232eda14cbcSMatt Macy vfs_unbusy(mp); 1233eda14cbcSMatt Macy vget_finish(vp, locked | LK_RETRY, vs); 1234eda14cbcSMatt Macy return (error); 1235eda14cbcSMatt Macy } 1236eda14cbcSMatt Macy 1237eda14cbcSMatt Macy /* 1238eda14cbcSMatt Macy * These VP's should never see the light of day. They should always 1239eda14cbcSMatt Macy * be covered. 1240eda14cbcSMatt Macy */ 1241eda14cbcSMatt Macy static struct vop_vector zfsctl_ops_snapshot = { 1242ed9215c8SMartin Matuska .vop_default = NULL, /* ensure very restricted access */ 1243b8208228SMateusz Guzik .vop_fplookup_vexec = VOP_EAGAIN, 1244e2d997d1SMateusz Guzik .vop_fplookup_symlink = VOP_EAGAIN, 12452a58b312SMartin Matuska .vop_open = zfsctl_common_open, 12462a58b312SMartin Matuska .vop_close = zfsctl_common_close, 1247eda14cbcSMatt Macy .vop_inactive = zfsctl_snapshot_inactive, 1248eda14cbcSMatt Macy .vop_need_inactive = vop_stdneed_inactive, 1249eda14cbcSMatt Macy .vop_reclaim = zfsctl_snapshot_reclaim, 1250eda14cbcSMatt Macy .vop_vptocnp = zfsctl_snapshot_vptocnp, 1251eda14cbcSMatt Macy .vop_lock1 = vop_stdlock, 1252eda14cbcSMatt Macy .vop_unlock = vop_stdunlock, 1253eda14cbcSMatt Macy .vop_islocked = vop_stdislocked, 1254eda14cbcSMatt Macy .vop_advlockpurge = vop_stdadvlockpurge, /* called by vgone */ 1255eda14cbcSMatt Macy .vop_print = zfsctl_common_print, 1256681ce946SMartin Matuska #if __FreeBSD_version >= 1400043 12573ffcfa59SMateusz Guzik .vop_add_writecount = vop_stdadd_writecount_nomsync, 1258681ce946SMartin Matuska #endif 1259eda14cbcSMatt Macy }; 1260eda14cbcSMatt Macy VFS_VOP_VECTOR_REGISTER(zfsctl_ops_snapshot); 1261eda14cbcSMatt Macy 1262eda14cbcSMatt Macy int 1263eda14cbcSMatt Macy zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) 1264eda14cbcSMatt Macy { 1265eda14cbcSMatt Macy zfsvfs_t *zfsvfs __unused = vfsp->vfs_data; 1266eda14cbcSMatt Macy vnode_t *vp; 1267eda14cbcSMatt Macy int error; 1268eda14cbcSMatt Macy 126916038816SMartin Matuska ASSERT3P(zfsvfs->z_ctldir, !=, NULL); 1270eda14cbcSMatt Macy *zfsvfsp = NULL; 1271eda14cbcSMatt Macy error = sfs_vnode_get(vfsp, LK_EXCLUSIVE, 1272eda14cbcSMatt Macy ZFSCTL_INO_SNAPDIR, objsetid, &vp); 1273eda14cbcSMatt Macy if (error == 0 && vp != NULL) { 1274eda14cbcSMatt Macy /* 1275eda14cbcSMatt Macy * XXX Probably need to at least reference, if not busy, the mp. 1276eda14cbcSMatt Macy */ 1277eda14cbcSMatt Macy if (vp->v_mountedhere != NULL) 1278eda14cbcSMatt Macy *zfsvfsp = vp->v_mountedhere->mnt_data; 1279eda14cbcSMatt Macy vput(vp); 1280eda14cbcSMatt Macy } 1281eda14cbcSMatt Macy if (*zfsvfsp == NULL) 1282eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 1283eda14cbcSMatt Macy return (0); 1284eda14cbcSMatt Macy } 1285eda14cbcSMatt Macy 1286eda14cbcSMatt Macy /* 1287eda14cbcSMatt Macy * Unmount any snapshots for the given filesystem. This is called from 1288eda14cbcSMatt Macy * zfs_umount() - if we have a ctldir, then go through and unmount all the 1289eda14cbcSMatt Macy * snapshots. 1290eda14cbcSMatt Macy */ 1291eda14cbcSMatt Macy int 1292eda14cbcSMatt Macy zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) 1293eda14cbcSMatt Macy { 1294eda14cbcSMatt Macy char snapname[ZFS_MAX_DATASET_NAME_LEN]; 1295eda14cbcSMatt Macy zfsvfs_t *zfsvfs = vfsp->vfs_data; 1296eda14cbcSMatt Macy struct mount *mp; 1297eda14cbcSMatt Macy vnode_t *vp; 1298eda14cbcSMatt Macy uint64_t cookie; 1299eda14cbcSMatt Macy int error; 1300eda14cbcSMatt Macy 130116038816SMartin Matuska ASSERT3P(zfsvfs->z_ctldir, !=, NULL); 1302eda14cbcSMatt Macy 1303eda14cbcSMatt Macy cookie = 0; 1304eda14cbcSMatt Macy for (;;) { 1305eda14cbcSMatt Macy uint64_t id; 1306eda14cbcSMatt Macy 1307eda14cbcSMatt Macy dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG); 1308eda14cbcSMatt Macy error = dmu_snapshot_list_next(zfsvfs->z_os, sizeof (snapname), 1309eda14cbcSMatt Macy snapname, &id, &cookie, NULL); 1310eda14cbcSMatt Macy dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG); 1311eda14cbcSMatt Macy if (error != 0) { 1312eda14cbcSMatt Macy if (error == ENOENT) 1313eda14cbcSMatt Macy error = 0; 1314eda14cbcSMatt Macy break; 1315eda14cbcSMatt Macy } 1316eda14cbcSMatt Macy 1317eda14cbcSMatt Macy for (;;) { 1318eda14cbcSMatt Macy error = sfs_vnode_get(vfsp, LK_EXCLUSIVE, 1319eda14cbcSMatt Macy ZFSCTL_INO_SNAPDIR, id, &vp); 1320eda14cbcSMatt Macy if (error != 0 || vp == NULL) 1321eda14cbcSMatt Macy break; 1322eda14cbcSMatt Macy 1323eda14cbcSMatt Macy mp = vp->v_mountedhere; 1324eda14cbcSMatt Macy 1325eda14cbcSMatt Macy /* 1326eda14cbcSMatt Macy * v_mountedhere being NULL means that the 1327eda14cbcSMatt Macy * (uncovered) vnode is in a transient state 1328eda14cbcSMatt Macy * (mounting or unmounting), so loop until it 1329eda14cbcSMatt Macy * settles down. 1330eda14cbcSMatt Macy */ 1331eda14cbcSMatt Macy if (mp != NULL) 1332eda14cbcSMatt Macy break; 1333eda14cbcSMatt Macy vput(vp); 1334eda14cbcSMatt Macy } 1335eda14cbcSMatt Macy if (error != 0) 1336eda14cbcSMatt Macy break; 1337eda14cbcSMatt Macy if (vp == NULL) 1338eda14cbcSMatt Macy continue; /* no mountpoint, nothing to do */ 1339eda14cbcSMatt Macy 1340eda14cbcSMatt Macy /* 1341eda14cbcSMatt Macy * The mount-point vnode is kept locked to avoid spurious EBUSY 1342eda14cbcSMatt Macy * from a concurrent umount. 1343eda14cbcSMatt Macy * The vnode lock must have recursive locking enabled. 1344eda14cbcSMatt Macy */ 1345eda14cbcSMatt Macy vfs_ref(mp); 1346eda14cbcSMatt Macy error = dounmount(mp, fflags, curthread); 1347eda14cbcSMatt Macy KASSERT_IMPLY(error == 0, vrefcnt(vp) == 1, 1348eda14cbcSMatt Macy ("extra references after unmount")); 1349eda14cbcSMatt Macy vput(vp); 1350eda14cbcSMatt Macy if (error != 0) 1351eda14cbcSMatt Macy break; 1352eda14cbcSMatt Macy } 1353eda14cbcSMatt Macy KASSERT_IMPLY((fflags & MS_FORCE) != 0, error == 0, 1354eda14cbcSMatt Macy ("force unmounting failed")); 1355eda14cbcSMatt Macy return (error); 1356eda14cbcSMatt Macy } 1357eda14cbcSMatt Macy 1358eda14cbcSMatt Macy int 1359180f8225SMatt Macy zfsctl_snapshot_unmount(const char *snapname, int flags __unused) 1360eda14cbcSMatt Macy { 1361eda14cbcSMatt Macy vfs_t *vfsp = NULL; 1362eda14cbcSMatt Macy zfsvfs_t *zfsvfs = NULL; 1363eda14cbcSMatt Macy 1364eda14cbcSMatt Macy if (strchr(snapname, '@') == NULL) 1365eda14cbcSMatt Macy return (0); 1366eda14cbcSMatt Macy 1367eda14cbcSMatt Macy int err = getzfsvfs(snapname, &zfsvfs); 1368eda14cbcSMatt Macy if (err != 0) { 1369eda14cbcSMatt Macy ASSERT3P(zfsvfs, ==, NULL); 1370eda14cbcSMatt Macy return (0); 1371eda14cbcSMatt Macy } 1372eda14cbcSMatt Macy vfsp = zfsvfs->z_vfs; 1373eda14cbcSMatt Macy 1374eda14cbcSMatt Macy ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os))); 1375eda14cbcSMatt Macy 1376eda14cbcSMatt Macy vfs_ref(vfsp); 1377eda14cbcSMatt Macy vfs_unbusy(vfsp); 1378eda14cbcSMatt Macy return (dounmount(vfsp, MS_FORCE, curthread)); 1379eda14cbcSMatt Macy } 1380