1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy /* 22eda14cbcSMatt Macy * 23eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24eda14cbcSMatt Macy * Copyright (C) 2011 Lawrence Livermore National Security, LLC. 25eda14cbcSMatt Macy * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). 26eda14cbcSMatt Macy * LLNL-CODE-403049. 27eda14cbcSMatt Macy * Rewritten for Linux by: 28eda14cbcSMatt Macy * Rohan Puri <rohan.puri15@gmail.com> 29eda14cbcSMatt Macy * Brian Behlendorf <behlendorf1@llnl.gov> 30eda14cbcSMatt Macy * Copyright (c) 2013 by Delphix. All rights reserved. 31eda14cbcSMatt Macy * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved. 32eda14cbcSMatt Macy * Copyright (c) 2018 George Melikov. All Rights Reserved. 33eda14cbcSMatt Macy * Copyright (c) 2019 Datto, Inc. All rights reserved. 34ac0bf12eSMatt Macy * Copyright (c) 2020 The MathWorks, Inc. All rights reserved. 35eda14cbcSMatt Macy */ 36eda14cbcSMatt Macy 37eda14cbcSMatt Macy /* 38eda14cbcSMatt Macy * ZFS control directory (a.k.a. ".zfs") 39eda14cbcSMatt Macy * 40eda14cbcSMatt Macy * This directory provides a common location for all ZFS meta-objects. 41eda14cbcSMatt Macy * Currently, this is only the 'snapshot' and 'shares' directory, but this may 42eda14cbcSMatt Macy * expand in the future. The elements are built dynamically, as the hierarchy 43eda14cbcSMatt Macy * does not actually exist on disk. 44eda14cbcSMatt Macy * 45eda14cbcSMatt Macy * For 'snapshot', we don't want to have all snapshots always mounted, because 46eda14cbcSMatt Macy * this would take up a huge amount of space in /etc/mnttab. We have three 47eda14cbcSMatt Macy * types of objects: 48eda14cbcSMatt Macy * 49eda14cbcSMatt Macy * ctldir ------> snapshotdir -------> snapshot 50eda14cbcSMatt Macy * | 51eda14cbcSMatt Macy * | 52eda14cbcSMatt Macy * V 53eda14cbcSMatt Macy * mounted fs 54eda14cbcSMatt Macy * 55eda14cbcSMatt Macy * The 'snapshot' node contains just enough information to lookup '..' and act 56eda14cbcSMatt Macy * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we 57eda14cbcSMatt Macy * perform an automount of the underlying filesystem and return the 58eda14cbcSMatt Macy * corresponding inode. 59eda14cbcSMatt Macy * 60eda14cbcSMatt Macy * All mounts are handled automatically by an user mode helper which invokes 61eda14cbcSMatt Macy * the mount procedure. Unmounts are handled by allowing the mount 62eda14cbcSMatt Macy * point to expire so the kernel may automatically unmount it. 63eda14cbcSMatt Macy * 64eda14cbcSMatt Macy * The '.zfs', '.zfs/snapshot', and all directories created under 65eda14cbcSMatt Macy * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same 66eda14cbcSMatt Macy * zfsvfs_t as the head filesystem (what '.zfs' lives under). 67eda14cbcSMatt Macy * 68eda14cbcSMatt Macy * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths 69eda14cbcSMatt Macy * (ie: snapshots) are complete ZFS filesystems and have their own unique 70eda14cbcSMatt Macy * zfsvfs_t. However, the fsid reported by these mounts will be the same 71eda14cbcSMatt Macy * as that used by the parent zfsvfs_t to make NFS happy. 72eda14cbcSMatt Macy */ 73eda14cbcSMatt Macy 74eda14cbcSMatt Macy #include <sys/types.h> 75eda14cbcSMatt Macy #include <sys/param.h> 76eda14cbcSMatt Macy #include <sys/time.h> 77eda14cbcSMatt Macy #include <sys/sysmacros.h> 78eda14cbcSMatt Macy #include <sys/pathname.h> 79eda14cbcSMatt Macy #include <sys/vfs.h> 80eda14cbcSMatt Macy #include <sys/zfs_ctldir.h> 81eda14cbcSMatt Macy #include <sys/zfs_ioctl.h> 82eda14cbcSMatt Macy #include <sys/zfs_vfsops.h> 83eda14cbcSMatt Macy #include <sys/zfs_vnops.h> 84eda14cbcSMatt Macy #include <sys/stat.h> 85eda14cbcSMatt Macy #include <sys/dmu.h> 86eda14cbcSMatt Macy #include <sys/dmu_objset.h> 87eda14cbcSMatt Macy #include <sys/dsl_destroy.h> 88eda14cbcSMatt Macy #include <sys/dsl_deleg.h> 89eda14cbcSMatt Macy #include <sys/zpl.h> 90eda14cbcSMatt Macy #include <sys/mntent.h> 91eda14cbcSMatt Macy #include "zfs_namecheck.h" 92eda14cbcSMatt Macy 93eda14cbcSMatt Macy /* 94eda14cbcSMatt Macy * Two AVL trees are maintained which contain all currently automounted 95eda14cbcSMatt Macy * snapshots. Every automounted snapshots maps to a single zfs_snapentry_t 96eda14cbcSMatt Macy * entry which MUST: 97eda14cbcSMatt Macy * 98eda14cbcSMatt Macy * - be attached to both trees, and 99eda14cbcSMatt Macy * - be unique, no duplicate entries are allowed. 100eda14cbcSMatt Macy * 101eda14cbcSMatt Macy * The zfs_snapshots_by_name tree is indexed by the full dataset name 102eda14cbcSMatt Macy * while the zfs_snapshots_by_objsetid tree is indexed by the unique 103eda14cbcSMatt Macy * objsetid. This allows for fast lookups either by name or objsetid. 104eda14cbcSMatt Macy */ 105eda14cbcSMatt Macy static avl_tree_t zfs_snapshots_by_name; 106eda14cbcSMatt Macy static avl_tree_t zfs_snapshots_by_objsetid; 107eda14cbcSMatt Macy static krwlock_t zfs_snapshot_lock; 108eda14cbcSMatt Macy 109eda14cbcSMatt Macy /* 110eda14cbcSMatt Macy * Control Directory Tunables (.zfs) 111eda14cbcSMatt Macy */ 112eda14cbcSMatt Macy int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT; 113e92ffd9bSMartin Matuska static int zfs_admin_snapshot = 0; 1147a7741afSMartin Matuska static int zfs_snapshot_no_setuid = 0; 115eda14cbcSMatt Macy 116eda14cbcSMatt Macy typedef struct { 117eda14cbcSMatt Macy char *se_name; /* full snapshot name */ 118eda14cbcSMatt Macy char *se_path; /* full mount path */ 119eda14cbcSMatt Macy spa_t *se_spa; /* pool spa */ 120eda14cbcSMatt Macy uint64_t se_objsetid; /* snapshot objset id */ 121eda14cbcSMatt Macy struct dentry *se_root_dentry; /* snapshot root dentry */ 12281b22a98SMartin Matuska krwlock_t se_taskqid_lock; /* scheduled unmount taskqid lock */ 123eda14cbcSMatt Macy taskqid_t se_taskqid; /* scheduled unmount taskqid */ 124eda14cbcSMatt Macy avl_node_t se_node_name; /* zfs_snapshots_by_name link */ 125eda14cbcSMatt Macy avl_node_t se_node_objsetid; /* zfs_snapshots_by_objsetid link */ 126eda14cbcSMatt Macy zfs_refcount_t se_refcount; /* reference count */ 127eda14cbcSMatt Macy } zfs_snapentry_t; 128eda14cbcSMatt Macy 129eda14cbcSMatt Macy static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay); 130eda14cbcSMatt Macy 131eda14cbcSMatt Macy /* 132eda14cbcSMatt Macy * Allocate a new zfs_snapentry_t being careful to make a copy of the 133eda14cbcSMatt Macy * the snapshot name and provided mount point. No reference is taken. 134eda14cbcSMatt Macy */ 135eda14cbcSMatt Macy static zfs_snapentry_t * 136180f8225SMatt Macy zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa, 137eda14cbcSMatt Macy uint64_t objsetid, struct dentry *root_dentry) 138eda14cbcSMatt Macy { 139eda14cbcSMatt Macy zfs_snapentry_t *se; 140eda14cbcSMatt Macy 141eda14cbcSMatt Macy se = kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP); 142eda14cbcSMatt Macy 143eda14cbcSMatt Macy se->se_name = kmem_strdup(full_name); 144eda14cbcSMatt Macy se->se_path = kmem_strdup(full_path); 145eda14cbcSMatt Macy se->se_spa = spa; 146eda14cbcSMatt Macy se->se_objsetid = objsetid; 147eda14cbcSMatt Macy se->se_root_dentry = root_dentry; 148eda14cbcSMatt Macy se->se_taskqid = TASKQID_INVALID; 14981b22a98SMartin Matuska rw_init(&se->se_taskqid_lock, NULL, RW_DEFAULT, NULL); 150eda14cbcSMatt Macy 151eda14cbcSMatt Macy zfs_refcount_create(&se->se_refcount); 152eda14cbcSMatt Macy 153eda14cbcSMatt Macy return (se); 154eda14cbcSMatt Macy } 155eda14cbcSMatt Macy 156eda14cbcSMatt Macy /* 157eda14cbcSMatt Macy * Free a zfs_snapentry_t the caller must ensure there are no active 158eda14cbcSMatt Macy * references. 159eda14cbcSMatt Macy */ 160eda14cbcSMatt Macy static void 161eda14cbcSMatt Macy zfsctl_snapshot_free(zfs_snapentry_t *se) 162eda14cbcSMatt Macy { 163eda14cbcSMatt Macy zfs_refcount_destroy(&se->se_refcount); 164eda14cbcSMatt Macy kmem_strfree(se->se_name); 165eda14cbcSMatt Macy kmem_strfree(se->se_path); 166716fd348SMartin Matuska rw_destroy(&se->se_taskqid_lock); 167eda14cbcSMatt Macy 168eda14cbcSMatt Macy kmem_free(se, sizeof (zfs_snapentry_t)); 169eda14cbcSMatt Macy } 170eda14cbcSMatt Macy 171eda14cbcSMatt Macy /* 172eda14cbcSMatt Macy * Hold a reference on the zfs_snapentry_t. 173eda14cbcSMatt Macy */ 174eda14cbcSMatt Macy static void 175eda14cbcSMatt Macy zfsctl_snapshot_hold(zfs_snapentry_t *se) 176eda14cbcSMatt Macy { 177eda14cbcSMatt Macy zfs_refcount_add(&se->se_refcount, NULL); 178eda14cbcSMatt Macy } 179eda14cbcSMatt Macy 180eda14cbcSMatt Macy /* 181eda14cbcSMatt Macy * Release a reference on the zfs_snapentry_t. When the number of 182eda14cbcSMatt Macy * references drops to zero the structure will be freed. 183eda14cbcSMatt Macy */ 184eda14cbcSMatt Macy static void 185eda14cbcSMatt Macy zfsctl_snapshot_rele(zfs_snapentry_t *se) 186eda14cbcSMatt Macy { 187eda14cbcSMatt Macy if (zfs_refcount_remove(&se->se_refcount, NULL) == 0) 188eda14cbcSMatt Macy zfsctl_snapshot_free(se); 189eda14cbcSMatt Macy } 190eda14cbcSMatt Macy 191eda14cbcSMatt Macy /* 192eda14cbcSMatt Macy * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and 193eda14cbcSMatt Macy * zfs_snapshots_by_objsetid trees. While the zfs_snapentry_t is part 194eda14cbcSMatt Macy * of the trees a reference is held. 195eda14cbcSMatt Macy */ 196eda14cbcSMatt Macy static void 197eda14cbcSMatt Macy zfsctl_snapshot_add(zfs_snapentry_t *se) 198eda14cbcSMatt Macy { 199eda14cbcSMatt Macy ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); 200eda14cbcSMatt Macy zfsctl_snapshot_hold(se); 201eda14cbcSMatt Macy avl_add(&zfs_snapshots_by_name, se); 202eda14cbcSMatt Macy avl_add(&zfs_snapshots_by_objsetid, se); 203eda14cbcSMatt Macy } 204eda14cbcSMatt Macy 205eda14cbcSMatt Macy /* 206eda14cbcSMatt Macy * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and 207eda14cbcSMatt Macy * zfs_snapshots_by_objsetid trees. Upon removal a reference is dropped, 208eda14cbcSMatt Macy * this can result in the structure being freed if that was the last 209eda14cbcSMatt Macy * remaining reference. 210eda14cbcSMatt Macy */ 211eda14cbcSMatt Macy static void 212eda14cbcSMatt Macy zfsctl_snapshot_remove(zfs_snapentry_t *se) 213eda14cbcSMatt Macy { 214eda14cbcSMatt Macy ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); 215eda14cbcSMatt Macy avl_remove(&zfs_snapshots_by_name, se); 216eda14cbcSMatt Macy avl_remove(&zfs_snapshots_by_objsetid, se); 217eda14cbcSMatt Macy zfsctl_snapshot_rele(se); 218eda14cbcSMatt Macy } 219eda14cbcSMatt Macy 220eda14cbcSMatt Macy /* 221eda14cbcSMatt Macy * Snapshot name comparison function for the zfs_snapshots_by_name. 222eda14cbcSMatt Macy */ 223eda14cbcSMatt Macy static int 224eda14cbcSMatt Macy snapentry_compare_by_name(const void *a, const void *b) 225eda14cbcSMatt Macy { 226eda14cbcSMatt Macy const zfs_snapentry_t *se_a = a; 227eda14cbcSMatt Macy const zfs_snapentry_t *se_b = b; 228eda14cbcSMatt Macy int ret; 229eda14cbcSMatt Macy 230eda14cbcSMatt Macy ret = strcmp(se_a->se_name, se_b->se_name); 231eda14cbcSMatt Macy 232eda14cbcSMatt Macy if (ret < 0) 233eda14cbcSMatt Macy return (-1); 234eda14cbcSMatt Macy else if (ret > 0) 235eda14cbcSMatt Macy return (1); 236eda14cbcSMatt Macy else 237eda14cbcSMatt Macy return (0); 238eda14cbcSMatt Macy } 239eda14cbcSMatt Macy 240eda14cbcSMatt Macy /* 241eda14cbcSMatt Macy * Snapshot name comparison function for the zfs_snapshots_by_objsetid. 242eda14cbcSMatt Macy */ 243eda14cbcSMatt Macy static int 244eda14cbcSMatt Macy snapentry_compare_by_objsetid(const void *a, const void *b) 245eda14cbcSMatt Macy { 246eda14cbcSMatt Macy const zfs_snapentry_t *se_a = a; 247eda14cbcSMatt Macy const zfs_snapentry_t *se_b = b; 248eda14cbcSMatt Macy 249eda14cbcSMatt Macy if (se_a->se_spa != se_b->se_spa) 250eda14cbcSMatt Macy return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1); 251eda14cbcSMatt Macy 252eda14cbcSMatt Macy if (se_a->se_objsetid < se_b->se_objsetid) 253eda14cbcSMatt Macy return (-1); 254eda14cbcSMatt Macy else if (se_a->se_objsetid > se_b->se_objsetid) 255eda14cbcSMatt Macy return (1); 256eda14cbcSMatt Macy else 257eda14cbcSMatt Macy return (0); 258eda14cbcSMatt Macy } 259eda14cbcSMatt Macy 260eda14cbcSMatt Macy /* 261eda14cbcSMatt Macy * Find a zfs_snapentry_t in zfs_snapshots_by_name. If the snapname 262eda14cbcSMatt Macy * is found a pointer to the zfs_snapentry_t is returned and a reference 263eda14cbcSMatt Macy * taken on the structure. The caller is responsible for dropping the 264eda14cbcSMatt Macy * reference with zfsctl_snapshot_rele(). If the snapname is not found 265eda14cbcSMatt Macy * NULL will be returned. 266eda14cbcSMatt Macy */ 267eda14cbcSMatt Macy static zfs_snapentry_t * 268180f8225SMatt Macy zfsctl_snapshot_find_by_name(const char *snapname) 269eda14cbcSMatt Macy { 270eda14cbcSMatt Macy zfs_snapentry_t *se, search; 271eda14cbcSMatt Macy 272eda14cbcSMatt Macy ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); 273eda14cbcSMatt Macy 274180f8225SMatt Macy search.se_name = (char *)snapname; 275eda14cbcSMatt Macy se = avl_find(&zfs_snapshots_by_name, &search, NULL); 276eda14cbcSMatt Macy if (se) 277eda14cbcSMatt Macy zfsctl_snapshot_hold(se); 278eda14cbcSMatt Macy 279eda14cbcSMatt Macy return (se); 280eda14cbcSMatt Macy } 281eda14cbcSMatt Macy 282eda14cbcSMatt Macy /* 283eda14cbcSMatt Macy * Find a zfs_snapentry_t in zfs_snapshots_by_objsetid given the objset id 284eda14cbcSMatt Macy * rather than the snapname. In all other respects it behaves the same 285eda14cbcSMatt Macy * as zfsctl_snapshot_find_by_name(). 286eda14cbcSMatt Macy */ 287eda14cbcSMatt Macy static zfs_snapentry_t * 288eda14cbcSMatt Macy zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid) 289eda14cbcSMatt Macy { 290eda14cbcSMatt Macy zfs_snapentry_t *se, search; 291eda14cbcSMatt Macy 292eda14cbcSMatt Macy ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock)); 293eda14cbcSMatt Macy 294eda14cbcSMatt Macy search.se_spa = spa; 295eda14cbcSMatt Macy search.se_objsetid = objsetid; 296eda14cbcSMatt Macy se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL); 297eda14cbcSMatt Macy if (se) 298eda14cbcSMatt Macy zfsctl_snapshot_hold(se); 299eda14cbcSMatt Macy 300eda14cbcSMatt Macy return (se); 301eda14cbcSMatt Macy } 302eda14cbcSMatt Macy 303eda14cbcSMatt Macy /* 304eda14cbcSMatt Macy * Rename a zfs_snapentry_t in the zfs_snapshots_by_name. The structure is 305eda14cbcSMatt Macy * removed, renamed, and added back to the new correct location in the tree. 306eda14cbcSMatt Macy */ 307eda14cbcSMatt Macy static int 308180f8225SMatt Macy zfsctl_snapshot_rename(const char *old_snapname, const char *new_snapname) 309eda14cbcSMatt Macy { 310eda14cbcSMatt Macy zfs_snapentry_t *se; 311eda14cbcSMatt Macy 312eda14cbcSMatt Macy ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock)); 313eda14cbcSMatt Macy 314eda14cbcSMatt Macy se = zfsctl_snapshot_find_by_name(old_snapname); 315eda14cbcSMatt Macy if (se == NULL) 316eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 317eda14cbcSMatt Macy 318eda14cbcSMatt Macy zfsctl_snapshot_remove(se); 319eda14cbcSMatt Macy kmem_strfree(se->se_name); 320eda14cbcSMatt Macy se->se_name = kmem_strdup(new_snapname); 321eda14cbcSMatt Macy zfsctl_snapshot_add(se); 322eda14cbcSMatt Macy zfsctl_snapshot_rele(se); 323eda14cbcSMatt Macy 324eda14cbcSMatt Macy return (0); 325eda14cbcSMatt Macy } 326eda14cbcSMatt Macy 327eda14cbcSMatt Macy /* 328eda14cbcSMatt Macy * Delayed task responsible for unmounting an expired automounted snapshot. 329eda14cbcSMatt Macy */ 330eda14cbcSMatt Macy static void 331eda14cbcSMatt Macy snapentry_expire(void *data) 332eda14cbcSMatt Macy { 333eda14cbcSMatt Macy zfs_snapentry_t *se = (zfs_snapentry_t *)data; 334eda14cbcSMatt Macy spa_t *spa = se->se_spa; 335eda14cbcSMatt Macy uint64_t objsetid = se->se_objsetid; 336eda14cbcSMatt Macy 337eda14cbcSMatt Macy if (zfs_expire_snapshot <= 0) { 338eda14cbcSMatt Macy zfsctl_snapshot_rele(se); 339eda14cbcSMatt Macy return; 340eda14cbcSMatt Macy } 341eda14cbcSMatt Macy 34281b22a98SMartin Matuska rw_enter(&se->se_taskqid_lock, RW_WRITER); 343eda14cbcSMatt Macy se->se_taskqid = TASKQID_INVALID; 34481b22a98SMartin Matuska rw_exit(&se->se_taskqid_lock); 345eda14cbcSMatt Macy (void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE); 346eda14cbcSMatt Macy zfsctl_snapshot_rele(se); 347eda14cbcSMatt Macy 348eda14cbcSMatt Macy /* 349eda14cbcSMatt Macy * Reschedule the unmount if the zfs_snapentry_t wasn't removed. 350eda14cbcSMatt Macy * This can occur when the snapshot is busy. 351eda14cbcSMatt Macy */ 352eda14cbcSMatt Macy rw_enter(&zfs_snapshot_lock, RW_READER); 353eda14cbcSMatt Macy if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) { 354eda14cbcSMatt Macy zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot); 355eda14cbcSMatt Macy zfsctl_snapshot_rele(se); 356eda14cbcSMatt Macy } 357eda14cbcSMatt Macy rw_exit(&zfs_snapshot_lock); 358eda14cbcSMatt Macy } 359eda14cbcSMatt Macy 360eda14cbcSMatt Macy /* 361eda14cbcSMatt Macy * Cancel an automatic unmount of a snapname. This callback is responsible 362eda14cbcSMatt Macy * for dropping the reference on the zfs_snapentry_t which was taken when 363eda14cbcSMatt Macy * during dispatch. 364eda14cbcSMatt Macy */ 365eda14cbcSMatt Macy static void 366eda14cbcSMatt Macy zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se) 367eda14cbcSMatt Macy { 36881b22a98SMartin Matuska int err = 0; 36981b22a98SMartin Matuska rw_enter(&se->se_taskqid_lock, RW_WRITER); 37081b22a98SMartin Matuska err = taskq_cancel_id(system_delay_taskq, se->se_taskqid); 37181b22a98SMartin Matuska /* 37281b22a98SMartin Matuska * if we get ENOENT, the taskq couldn't be found to be 37381b22a98SMartin Matuska * canceled, so we can just mark it as invalid because 37481b22a98SMartin Matuska * it's already gone. If we got EBUSY, then we already 37581b22a98SMartin Matuska * blocked until it was gone _anyway_, so we don't care. 37681b22a98SMartin Matuska */ 377eda14cbcSMatt Macy se->se_taskqid = TASKQID_INVALID; 37881b22a98SMartin Matuska rw_exit(&se->se_taskqid_lock); 37981b22a98SMartin Matuska if (err == 0) { 380eda14cbcSMatt Macy zfsctl_snapshot_rele(se); 381eda14cbcSMatt Macy } 382eda14cbcSMatt Macy } 383eda14cbcSMatt Macy 384eda14cbcSMatt Macy /* 385eda14cbcSMatt Macy * Dispatch the unmount task for delayed handling with a hold protecting it. 386eda14cbcSMatt Macy */ 387eda14cbcSMatt Macy static void 388eda14cbcSMatt Macy zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay) 389eda14cbcSMatt Macy { 390eda14cbcSMatt Macy 391eda14cbcSMatt Macy if (delay <= 0) 392eda14cbcSMatt Macy return; 393eda14cbcSMatt Macy 394eda14cbcSMatt Macy zfsctl_snapshot_hold(se); 39581b22a98SMartin Matuska rw_enter(&se->se_taskqid_lock, RW_WRITER); 396c9539b89SMartin Matuska /* 397c9539b89SMartin Matuska * If this condition happens, we managed to: 398c9539b89SMartin Matuska * - dispatch once 399c9539b89SMartin Matuska * - want to dispatch _again_ before it returned 400c9539b89SMartin Matuska * 401c9539b89SMartin Matuska * So let's just return - if that task fails at unmounting, 402c9539b89SMartin Matuska * we'll eventually dispatch again, and if it succeeds, 403c9539b89SMartin Matuska * no problem. 404c9539b89SMartin Matuska */ 405c9539b89SMartin Matuska if (se->se_taskqid != TASKQID_INVALID) { 406c9539b89SMartin Matuska rw_exit(&se->se_taskqid_lock); 407c9539b89SMartin Matuska zfsctl_snapshot_rele(se); 408c9539b89SMartin Matuska return; 409c9539b89SMartin Matuska } 410eda14cbcSMatt Macy se->se_taskqid = taskq_dispatch_delay(system_delay_taskq, 411eda14cbcSMatt Macy snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ); 41281b22a98SMartin Matuska rw_exit(&se->se_taskqid_lock); 413eda14cbcSMatt Macy } 414eda14cbcSMatt Macy 415eda14cbcSMatt Macy /* 416eda14cbcSMatt Macy * Schedule an automatic unmount of objset id to occur in delay seconds from 417eda14cbcSMatt Macy * now. Any previous delayed unmount will be cancelled in favor of the 418eda14cbcSMatt Macy * updated deadline. A reference is taken by zfsctl_snapshot_find_by_name() 419eda14cbcSMatt Macy * and held until the outstanding task is handled or cancelled. 420eda14cbcSMatt Macy */ 421eda14cbcSMatt Macy int 422eda14cbcSMatt Macy zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay) 423eda14cbcSMatt Macy { 424eda14cbcSMatt Macy zfs_snapentry_t *se; 425eda14cbcSMatt Macy int error = ENOENT; 426eda14cbcSMatt Macy 427eda14cbcSMatt Macy rw_enter(&zfs_snapshot_lock, RW_READER); 428eda14cbcSMatt Macy if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) { 429eda14cbcSMatt Macy zfsctl_snapshot_unmount_cancel(se); 430eda14cbcSMatt Macy zfsctl_snapshot_unmount_delay_impl(se, delay); 431eda14cbcSMatt Macy zfsctl_snapshot_rele(se); 432eda14cbcSMatt Macy error = 0; 433eda14cbcSMatt Macy } 434eda14cbcSMatt Macy rw_exit(&zfs_snapshot_lock); 435eda14cbcSMatt Macy 436eda14cbcSMatt Macy return (error); 437eda14cbcSMatt Macy } 438eda14cbcSMatt Macy 439eda14cbcSMatt Macy /* 440eda14cbcSMatt Macy * Check if snapname is currently mounted. Returned non-zero when mounted 441eda14cbcSMatt Macy * and zero when unmounted. 442eda14cbcSMatt Macy */ 443eda14cbcSMatt Macy static boolean_t 444180f8225SMatt Macy zfsctl_snapshot_ismounted(const char *snapname) 445eda14cbcSMatt Macy { 446eda14cbcSMatt Macy zfs_snapentry_t *se; 447eda14cbcSMatt Macy boolean_t ismounted = B_FALSE; 448eda14cbcSMatt Macy 449eda14cbcSMatt Macy rw_enter(&zfs_snapshot_lock, RW_READER); 450eda14cbcSMatt Macy if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) { 451eda14cbcSMatt Macy zfsctl_snapshot_rele(se); 452eda14cbcSMatt Macy ismounted = B_TRUE; 453eda14cbcSMatt Macy } 454eda14cbcSMatt Macy rw_exit(&zfs_snapshot_lock); 455eda14cbcSMatt Macy 456eda14cbcSMatt Macy return (ismounted); 457eda14cbcSMatt Macy } 458eda14cbcSMatt Macy 459eda14cbcSMatt Macy /* 460eda14cbcSMatt Macy * Check if the given inode is a part of the virtual .zfs directory. 461eda14cbcSMatt Macy */ 462eda14cbcSMatt Macy boolean_t 463eda14cbcSMatt Macy zfsctl_is_node(struct inode *ip) 464eda14cbcSMatt Macy { 465eda14cbcSMatt Macy return (ITOZ(ip)->z_is_ctldir); 466eda14cbcSMatt Macy } 467eda14cbcSMatt Macy 468eda14cbcSMatt Macy /* 469eda14cbcSMatt Macy * Check if the given inode is a .zfs/snapshots/snapname directory. 470eda14cbcSMatt Macy */ 471eda14cbcSMatt Macy boolean_t 472eda14cbcSMatt Macy zfsctl_is_snapdir(struct inode *ip) 473eda14cbcSMatt Macy { 474eda14cbcSMatt Macy return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS)); 475eda14cbcSMatt Macy } 476eda14cbcSMatt Macy 477eda14cbcSMatt Macy /* 478eda14cbcSMatt Macy * Allocate a new inode with the passed id and ops. 479eda14cbcSMatt Macy */ 480eda14cbcSMatt Macy static struct inode * 481eda14cbcSMatt Macy zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, 482315ee00fSMartin Matuska const struct file_operations *fops, const struct inode_operations *ops, 483315ee00fSMartin Matuska uint64_t creation) 484eda14cbcSMatt Macy { 485eda14cbcSMatt Macy struct inode *ip; 486eda14cbcSMatt Macy znode_t *zp; 487315ee00fSMartin Matuska inode_timespec_t now = {.tv_sec = creation}; 488eda14cbcSMatt Macy 489eda14cbcSMatt Macy ip = new_inode(zfsvfs->z_sb); 490eda14cbcSMatt Macy if (ip == NULL) 491eda14cbcSMatt Macy return (NULL); 492eda14cbcSMatt Macy 493315ee00fSMartin Matuska if (!creation) 494eda14cbcSMatt Macy now = current_time(ip); 495eda14cbcSMatt Macy zp = ITOZ(ip); 496eda14cbcSMatt Macy ASSERT3P(zp->z_dirlocks, ==, NULL); 497eda14cbcSMatt Macy ASSERT3P(zp->z_acl_cached, ==, NULL); 498eda14cbcSMatt Macy ASSERT3P(zp->z_xattr_cached, ==, NULL); 499eda14cbcSMatt Macy zp->z_id = id; 500eda14cbcSMatt Macy zp->z_unlinked = B_FALSE; 501eda14cbcSMatt Macy zp->z_atime_dirty = B_FALSE; 502eda14cbcSMatt Macy zp->z_zn_prefetch = B_FALSE; 503eda14cbcSMatt Macy zp->z_is_sa = B_FALSE; 504eda14cbcSMatt Macy zp->z_is_ctldir = B_TRUE; 505eda14cbcSMatt Macy zp->z_sa_hdl = NULL; 506eda14cbcSMatt Macy zp->z_blksz = 0; 507eda14cbcSMatt Macy zp->z_seq = 0; 508eda14cbcSMatt Macy zp->z_mapcnt = 0; 509eda14cbcSMatt Macy zp->z_size = 0; 510eda14cbcSMatt Macy zp->z_pflags = 0; 511eda14cbcSMatt Macy zp->z_mode = 0; 512eda14cbcSMatt Macy zp->z_sync_cnt = 0; 513716fd348SMartin Matuska zp->z_sync_writes_cnt = 0; 514716fd348SMartin Matuska zp->z_async_writes_cnt = 0; 515eda14cbcSMatt Macy ip->i_generation = 0; 516eda14cbcSMatt Macy ip->i_ino = id; 517eda14cbcSMatt Macy ip->i_mode = (S_IFDIR | S_IRWXUGO); 518eda14cbcSMatt Macy ip->i_uid = SUID_TO_KUID(0); 519eda14cbcSMatt Macy ip->i_gid = SGID_TO_KGID(0); 520eda14cbcSMatt Macy ip->i_blkbits = SPA_MINBLOCKSHIFT; 521b356da80SMartin Matuska zpl_inode_set_atime_to_ts(ip, now); 522b356da80SMartin Matuska zpl_inode_set_mtime_to_ts(ip, now); 523abcdc1b9SMartin Matuska zpl_inode_set_ctime_to_ts(ip, now); 524eda14cbcSMatt Macy ip->i_fop = fops; 525eda14cbcSMatt Macy ip->i_op = ops; 526eda14cbcSMatt Macy #if defined(IOP_XATTR) 527eda14cbcSMatt Macy ip->i_opflags &= ~IOP_XATTR; 528eda14cbcSMatt Macy #endif 529eda14cbcSMatt Macy 530eda14cbcSMatt Macy if (insert_inode_locked(ip)) { 531eda14cbcSMatt Macy unlock_new_inode(ip); 532eda14cbcSMatt Macy iput(ip); 533eda14cbcSMatt Macy return (NULL); 534eda14cbcSMatt Macy } 535eda14cbcSMatt Macy 536eda14cbcSMatt Macy mutex_enter(&zfsvfs->z_znodes_lock); 537eda14cbcSMatt Macy list_insert_tail(&zfsvfs->z_all_znodes, zp); 538eda14cbcSMatt Macy membar_producer(); 539eda14cbcSMatt Macy mutex_exit(&zfsvfs->z_znodes_lock); 540eda14cbcSMatt Macy 541eda14cbcSMatt Macy unlock_new_inode(ip); 542eda14cbcSMatt Macy 543eda14cbcSMatt Macy return (ip); 544eda14cbcSMatt Macy } 545eda14cbcSMatt Macy 546eda14cbcSMatt Macy /* 547eda14cbcSMatt Macy * Lookup the inode with given id, it will be allocated if needed. 548eda14cbcSMatt Macy */ 549eda14cbcSMatt Macy static struct inode * 550eda14cbcSMatt Macy zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id, 551eda14cbcSMatt Macy const struct file_operations *fops, const struct inode_operations *ops) 552eda14cbcSMatt Macy { 553eda14cbcSMatt Macy struct inode *ip = NULL; 554315ee00fSMartin Matuska uint64_t creation = 0; 555315ee00fSMartin Matuska dsl_dataset_t *snap_ds; 556315ee00fSMartin Matuska dsl_pool_t *pool; 557eda14cbcSMatt Macy 558eda14cbcSMatt Macy while (ip == NULL) { 559eda14cbcSMatt Macy ip = ilookup(zfsvfs->z_sb, (unsigned long)id); 560eda14cbcSMatt Macy if (ip) 561eda14cbcSMatt Macy break; 562eda14cbcSMatt Macy 563315ee00fSMartin Matuska if (id <= ZFSCTL_INO_SNAPDIRS && !creation) { 564315ee00fSMartin Matuska pool = dmu_objset_pool(zfsvfs->z_os); 565315ee00fSMartin Matuska dsl_pool_config_enter(pool, FTAG); 566315ee00fSMartin Matuska if (!dsl_dataset_hold_obj(pool, 567315ee00fSMartin Matuska ZFSCTL_INO_SNAPDIRS - id, FTAG, &snap_ds)) { 568315ee00fSMartin Matuska creation = dsl_get_creation(snap_ds); 569315ee00fSMartin Matuska dsl_dataset_rele(snap_ds, FTAG); 570315ee00fSMartin Matuska } 571315ee00fSMartin Matuska dsl_pool_config_exit(pool, FTAG); 572315ee00fSMartin Matuska } 573315ee00fSMartin Matuska 574eda14cbcSMatt Macy /* May fail due to concurrent zfsctl_inode_alloc() */ 575315ee00fSMartin Matuska ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops, creation); 576eda14cbcSMatt Macy } 577eda14cbcSMatt Macy 578eda14cbcSMatt Macy return (ip); 579eda14cbcSMatt Macy } 580eda14cbcSMatt Macy 581eda14cbcSMatt Macy /* 582eda14cbcSMatt Macy * Create the '.zfs' directory. This directory is cached as part of the VFS 583eda14cbcSMatt Macy * structure. This results in a hold on the zfsvfs_t. The code in zfs_umount() 584eda14cbcSMatt Macy * therefore checks against a vfs_count of 2 instead of 1. This reference 585eda14cbcSMatt Macy * is removed when the ctldir is destroyed in the unmount. All other entities 586eda14cbcSMatt Macy * under the '.zfs' directory are created dynamically as needed. 587eda14cbcSMatt Macy * 588eda14cbcSMatt Macy * Because the dynamically created '.zfs' directory entries assume the use 589eda14cbcSMatt Macy * of 64-bit inode numbers this support must be disabled on 32-bit systems. 590eda14cbcSMatt Macy */ 591eda14cbcSMatt Macy int 592eda14cbcSMatt Macy zfsctl_create(zfsvfs_t *zfsvfs) 593eda14cbcSMatt Macy { 594eda14cbcSMatt Macy ASSERT(zfsvfs->z_ctldir == NULL); 595eda14cbcSMatt Macy 596eda14cbcSMatt Macy zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT, 597315ee00fSMartin Matuska &zpl_fops_root, &zpl_ops_root, 0); 598eda14cbcSMatt Macy if (zfsvfs->z_ctldir == NULL) 599eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 600eda14cbcSMatt Macy 601eda14cbcSMatt Macy return (0); 602eda14cbcSMatt Macy } 603eda14cbcSMatt Macy 604eda14cbcSMatt Macy /* 605eda14cbcSMatt Macy * Destroy the '.zfs' directory or remove a snapshot from zfs_snapshots_by_name. 606eda14cbcSMatt Macy * Only called when the filesystem is unmounted. 607eda14cbcSMatt Macy */ 608eda14cbcSMatt Macy void 609eda14cbcSMatt Macy zfsctl_destroy(zfsvfs_t *zfsvfs) 610eda14cbcSMatt Macy { 611eda14cbcSMatt Macy if (zfsvfs->z_issnap) { 612eda14cbcSMatt Macy zfs_snapentry_t *se; 613eda14cbcSMatt Macy spa_t *spa = zfsvfs->z_os->os_spa; 614eda14cbcSMatt Macy uint64_t objsetid = dmu_objset_id(zfsvfs->z_os); 615eda14cbcSMatt Macy 616eda14cbcSMatt Macy rw_enter(&zfs_snapshot_lock, RW_WRITER); 617eda14cbcSMatt Macy se = zfsctl_snapshot_find_by_objsetid(spa, objsetid); 618eda14cbcSMatt Macy if (se != NULL) 619eda14cbcSMatt Macy zfsctl_snapshot_remove(se); 620eda14cbcSMatt Macy rw_exit(&zfs_snapshot_lock); 621eda14cbcSMatt Macy if (se != NULL) { 622eda14cbcSMatt Macy zfsctl_snapshot_unmount_cancel(se); 623eda14cbcSMatt Macy zfsctl_snapshot_rele(se); 624eda14cbcSMatt Macy } 625eda14cbcSMatt Macy } else if (zfsvfs->z_ctldir) { 626eda14cbcSMatt Macy iput(zfsvfs->z_ctldir); 627eda14cbcSMatt Macy zfsvfs->z_ctldir = NULL; 628eda14cbcSMatt Macy } 629eda14cbcSMatt Macy } 630eda14cbcSMatt Macy 631eda14cbcSMatt Macy /* 632eda14cbcSMatt Macy * Given a root znode, retrieve the associated .zfs directory. 633eda14cbcSMatt Macy * Add a hold to the vnode and return it. 634eda14cbcSMatt Macy */ 635eda14cbcSMatt Macy struct inode * 636eda14cbcSMatt Macy zfsctl_root(znode_t *zp) 637eda14cbcSMatt Macy { 638eda14cbcSMatt Macy ASSERT(zfs_has_ctldir(zp)); 639f9693befSMartin Matuska /* Must have an existing ref, so igrab() cannot return NULL */ 640f9693befSMartin Matuska VERIFY3P(igrab(ZTOZSB(zp)->z_ctldir), !=, NULL); 641eda14cbcSMatt Macy return (ZTOZSB(zp)->z_ctldir); 642eda14cbcSMatt Macy } 643eda14cbcSMatt Macy 644eda14cbcSMatt Macy /* 645eda14cbcSMatt Macy * Generate a long fid to indicate a snapdir. We encode whether snapdir is 646eda14cbcSMatt Macy * already mounted in gen field. We do this because nfsd lookup will not 647eda14cbcSMatt Macy * trigger automount. Next time the nfsd does fh_to_dentry, we will notice 648eda14cbcSMatt Macy * this and do automount and return ESTALE to force nfsd revalidate and follow 649eda14cbcSMatt Macy * mount. 650eda14cbcSMatt Macy */ 651eda14cbcSMatt Macy static int 652eda14cbcSMatt Macy zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp) 653eda14cbcSMatt Macy { 654eda14cbcSMatt Macy zfid_short_t *zfid = (zfid_short_t *)fidp; 655eda14cbcSMatt Macy zfid_long_t *zlfid = (zfid_long_t *)fidp; 656eda14cbcSMatt Macy uint32_t gen = 0; 657eda14cbcSMatt Macy uint64_t object; 658eda14cbcSMatt Macy uint64_t objsetid; 659eda14cbcSMatt Macy int i; 660eda14cbcSMatt Macy struct dentry *dentry; 661eda14cbcSMatt Macy 662eda14cbcSMatt Macy if (fidp->fid_len < LONG_FID_LEN) { 663eda14cbcSMatt Macy fidp->fid_len = LONG_FID_LEN; 664eda14cbcSMatt Macy return (SET_ERROR(ENOSPC)); 665eda14cbcSMatt Macy } 666eda14cbcSMatt Macy 667eda14cbcSMatt Macy object = ip->i_ino; 668eda14cbcSMatt Macy objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino; 669eda14cbcSMatt Macy zfid->zf_len = LONG_FID_LEN; 670eda14cbcSMatt Macy 671eda14cbcSMatt Macy dentry = d_obtain_alias(igrab(ip)); 672eda14cbcSMatt Macy if (!IS_ERR(dentry)) { 673eda14cbcSMatt Macy gen = !!d_mountpoint(dentry); 674eda14cbcSMatt Macy dput(dentry); 675eda14cbcSMatt Macy } 676eda14cbcSMatt Macy 677eda14cbcSMatt Macy for (i = 0; i < sizeof (zfid->zf_object); i++) 678eda14cbcSMatt Macy zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 679eda14cbcSMatt Macy 680eda14cbcSMatt Macy for (i = 0; i < sizeof (zfid->zf_gen); i++) 681eda14cbcSMatt Macy zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i)); 682eda14cbcSMatt Macy 683eda14cbcSMatt Macy for (i = 0; i < sizeof (zlfid->zf_setid); i++) 684eda14cbcSMatt Macy zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i)); 685eda14cbcSMatt Macy 686eda14cbcSMatt Macy for (i = 0; i < sizeof (zlfid->zf_setgen); i++) 687eda14cbcSMatt Macy zlfid->zf_setgen[i] = 0; 688eda14cbcSMatt Macy 689eda14cbcSMatt Macy return (0); 690eda14cbcSMatt Macy } 691eda14cbcSMatt Macy 692eda14cbcSMatt Macy /* 693eda14cbcSMatt Macy * Generate an appropriate fid for an entry in the .zfs directory. 694eda14cbcSMatt Macy */ 695eda14cbcSMatt Macy int 696eda14cbcSMatt Macy zfsctl_fid(struct inode *ip, fid_t *fidp) 697eda14cbcSMatt Macy { 698eda14cbcSMatt Macy znode_t *zp = ITOZ(ip); 699eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ITOZSB(ip); 700eda14cbcSMatt Macy uint64_t object = zp->z_id; 701eda14cbcSMatt Macy zfid_short_t *zfid; 702eda14cbcSMatt Macy int i; 703c7046f76SMartin Matuska int error; 704eda14cbcSMatt Macy 705c7046f76SMartin Matuska if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 706c7046f76SMartin Matuska return (error); 707eda14cbcSMatt Macy 708eda14cbcSMatt Macy if (zfsctl_is_snapdir(ip)) { 709c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 710eda14cbcSMatt Macy return (zfsctl_snapdir_fid(ip, fidp)); 711eda14cbcSMatt Macy } 712eda14cbcSMatt Macy 713eda14cbcSMatt Macy if (fidp->fid_len < SHORT_FID_LEN) { 714eda14cbcSMatt Macy fidp->fid_len = SHORT_FID_LEN; 715c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 716eda14cbcSMatt Macy return (SET_ERROR(ENOSPC)); 717eda14cbcSMatt Macy } 718eda14cbcSMatt Macy 719eda14cbcSMatt Macy zfid = (zfid_short_t *)fidp; 720eda14cbcSMatt Macy 721eda14cbcSMatt Macy zfid->zf_len = SHORT_FID_LEN; 722eda14cbcSMatt Macy 723eda14cbcSMatt Macy for (i = 0; i < sizeof (zfid->zf_object); i++) 724eda14cbcSMatt Macy zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 725eda14cbcSMatt Macy 726eda14cbcSMatt Macy /* .zfs znodes always have a generation number of 0 */ 727eda14cbcSMatt Macy for (i = 0; i < sizeof (zfid->zf_gen); i++) 728eda14cbcSMatt Macy zfid->zf_gen[i] = 0; 729eda14cbcSMatt Macy 730c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 731eda14cbcSMatt Macy return (0); 732eda14cbcSMatt Macy } 733eda14cbcSMatt Macy 734eda14cbcSMatt Macy /* 735eda14cbcSMatt Macy * Construct a full dataset name in full_name: "pool/dataset@snap_name" 736eda14cbcSMatt Macy */ 737eda14cbcSMatt Macy static int 738eda14cbcSMatt Macy zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len, 739eda14cbcSMatt Macy char *full_name) 740eda14cbcSMatt Macy { 741eda14cbcSMatt Macy objset_t *os = zfsvfs->z_os; 742eda14cbcSMatt Macy 743eda14cbcSMatt Macy if (zfs_component_namecheck(snap_name, NULL, NULL) != 0) 744eda14cbcSMatt Macy return (SET_ERROR(EILSEQ)); 745eda14cbcSMatt Macy 746eda14cbcSMatt Macy dmu_objset_name(os, full_name); 747eda14cbcSMatt Macy if ((strlen(full_name) + 1 + strlen(snap_name)) >= len) 748eda14cbcSMatt Macy return (SET_ERROR(ENAMETOOLONG)); 749eda14cbcSMatt Macy 750eda14cbcSMatt Macy (void) strcat(full_name, "@"); 751eda14cbcSMatt Macy (void) strcat(full_name, snap_name); 752eda14cbcSMatt Macy 753eda14cbcSMatt Macy return (0); 754eda14cbcSMatt Macy } 755eda14cbcSMatt Macy 756eda14cbcSMatt Macy /* 757eda14cbcSMatt Macy * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/" 758eda14cbcSMatt Macy */ 759eda14cbcSMatt Macy static int 760eda14cbcSMatt Macy zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid, 761eda14cbcSMatt Macy int path_len, char *full_path) 762eda14cbcSMatt Macy { 763eda14cbcSMatt Macy objset_t *os = zfsvfs->z_os; 764eda14cbcSMatt Macy fstrans_cookie_t cookie; 765eda14cbcSMatt Macy char *snapname; 766eda14cbcSMatt Macy boolean_t case_conflict; 767eda14cbcSMatt Macy uint64_t id, pos = 0; 768eda14cbcSMatt Macy int error = 0; 769eda14cbcSMatt Macy 770eda14cbcSMatt Macy cookie = spl_fstrans_mark(); 771eda14cbcSMatt Macy snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 772eda14cbcSMatt Macy 773eda14cbcSMatt Macy while (error == 0) { 774eda14cbcSMatt Macy dsl_pool_config_enter(dmu_objset_pool(os), FTAG); 775eda14cbcSMatt Macy error = dmu_snapshot_list_next(zfsvfs->z_os, 776eda14cbcSMatt Macy ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos, 777eda14cbcSMatt Macy &case_conflict); 778eda14cbcSMatt Macy dsl_pool_config_exit(dmu_objset_pool(os), FTAG); 779eda14cbcSMatt Macy if (error) 780eda14cbcSMatt Macy goto out; 781eda14cbcSMatt Macy 782eda14cbcSMatt Macy if (id == objsetid) 783eda14cbcSMatt Macy break; 784eda14cbcSMatt Macy } 785eda14cbcSMatt Macy 786*87bf66d4SMartin Matuska mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock); 787*87bf66d4SMartin Matuska if (zfsvfs->z_vfs->vfs_mntpoint != NULL) { 788eda14cbcSMatt Macy snprintf(full_path, path_len, "%s/.zfs/snapshot/%s", 789eda14cbcSMatt Macy zfsvfs->z_vfs->vfs_mntpoint, snapname); 790*87bf66d4SMartin Matuska } else 791*87bf66d4SMartin Matuska error = SET_ERROR(ENOENT); 792*87bf66d4SMartin Matuska mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock); 793*87bf66d4SMartin Matuska 794eda14cbcSMatt Macy out: 795eda14cbcSMatt Macy kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN); 796eda14cbcSMatt Macy spl_fstrans_unmark(cookie); 797eda14cbcSMatt Macy 798eda14cbcSMatt Macy return (error); 799eda14cbcSMatt Macy } 800eda14cbcSMatt Macy 801eda14cbcSMatt Macy /* 802eda14cbcSMatt Macy * Special case the handling of "..". 803eda14cbcSMatt Macy */ 804eda14cbcSMatt Macy int 805180f8225SMatt Macy zfsctl_root_lookup(struct inode *dip, const char *name, struct inode **ipp, 806eda14cbcSMatt Macy int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) 807eda14cbcSMatt Macy { 808eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ITOZSB(dip); 809eda14cbcSMatt Macy int error = 0; 810eda14cbcSMatt Macy 811c7046f76SMartin Matuska if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 812c7046f76SMartin Matuska return (error); 813eda14cbcSMatt Macy 8147a7741afSMartin Matuska if (zfsvfs->z_show_ctldir == ZFS_SNAPDIR_DISABLED) { 8157a7741afSMartin Matuska *ipp = NULL; 8167a7741afSMartin Matuska } else if (strcmp(name, "..") == 0) { 817eda14cbcSMatt Macy *ipp = dip->i_sb->s_root->d_inode; 818eda14cbcSMatt Macy } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) { 819eda14cbcSMatt Macy *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR, 820eda14cbcSMatt Macy &zpl_fops_snapdir, &zpl_ops_snapdir); 821eda14cbcSMatt Macy } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) { 822eda14cbcSMatt Macy *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SHARES, 823eda14cbcSMatt Macy &zpl_fops_shares, &zpl_ops_shares); 824eda14cbcSMatt Macy } else { 825eda14cbcSMatt Macy *ipp = NULL; 826eda14cbcSMatt Macy } 827eda14cbcSMatt Macy 828eda14cbcSMatt Macy if (*ipp == NULL) 829eda14cbcSMatt Macy error = SET_ERROR(ENOENT); 830eda14cbcSMatt Macy 831c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 832eda14cbcSMatt Macy 833eda14cbcSMatt Macy return (error); 834eda14cbcSMatt Macy } 835eda14cbcSMatt Macy 836eda14cbcSMatt Macy /* 837eda14cbcSMatt Macy * Lookup entry point for the 'snapshot' directory. Try to open the 838eda14cbcSMatt Macy * snapshot if it exist, creating the pseudo filesystem inode as necessary. 839eda14cbcSMatt Macy */ 840eda14cbcSMatt Macy int 841180f8225SMatt Macy zfsctl_snapdir_lookup(struct inode *dip, const char *name, struct inode **ipp, 842eda14cbcSMatt Macy int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) 843eda14cbcSMatt Macy { 844eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ITOZSB(dip); 845eda14cbcSMatt Macy uint64_t id; 846eda14cbcSMatt Macy int error; 847eda14cbcSMatt Macy 848c7046f76SMartin Matuska if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 849c7046f76SMartin Matuska return (error); 850eda14cbcSMatt Macy 851eda14cbcSMatt Macy error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id); 852eda14cbcSMatt Macy if (error) { 853c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 854eda14cbcSMatt Macy return (error); 855eda14cbcSMatt Macy } 856eda14cbcSMatt Macy 857eda14cbcSMatt Macy *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIRS - id, 858eda14cbcSMatt Macy &simple_dir_operations, &simple_dir_inode_operations); 859eda14cbcSMatt Macy if (*ipp == NULL) 860eda14cbcSMatt Macy error = SET_ERROR(ENOENT); 861eda14cbcSMatt Macy 862c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 863eda14cbcSMatt Macy 864eda14cbcSMatt Macy return (error); 865eda14cbcSMatt Macy } 866eda14cbcSMatt Macy 867eda14cbcSMatt Macy /* 868eda14cbcSMatt Macy * Renaming a directory under '.zfs/snapshot' will automatically trigger 869eda14cbcSMatt Macy * a rename of the snapshot to the new given name. The rename is confined 870eda14cbcSMatt Macy * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere. 871eda14cbcSMatt Macy */ 872eda14cbcSMatt Macy int 873180f8225SMatt Macy zfsctl_snapdir_rename(struct inode *sdip, const char *snm, 874180f8225SMatt Macy struct inode *tdip, const char *tnm, cred_t *cr, int flags) 875eda14cbcSMatt Macy { 876eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ITOZSB(sdip); 877eda14cbcSMatt Macy char *to, *from, *real, *fsname; 878eda14cbcSMatt Macy int error; 879eda14cbcSMatt Macy 880eda14cbcSMatt Macy if (!zfs_admin_snapshot) 881eda14cbcSMatt Macy return (SET_ERROR(EACCES)); 882eda14cbcSMatt Macy 883c7046f76SMartin Matuska if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 884c7046f76SMartin Matuska return (error); 885eda14cbcSMatt Macy 886eda14cbcSMatt Macy to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 887eda14cbcSMatt Macy from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 888eda14cbcSMatt Macy real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 889eda14cbcSMatt Macy fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 890eda14cbcSMatt Macy 891eda14cbcSMatt Macy if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 892eda14cbcSMatt Macy error = dmu_snapshot_realname(zfsvfs->z_os, snm, real, 893eda14cbcSMatt Macy ZFS_MAX_DATASET_NAME_LEN, NULL); 894eda14cbcSMatt Macy if (error == 0) { 895eda14cbcSMatt Macy snm = real; 896eda14cbcSMatt Macy } else if (error != ENOTSUP) { 897eda14cbcSMatt Macy goto out; 898eda14cbcSMatt Macy } 899eda14cbcSMatt Macy } 900eda14cbcSMatt Macy 901eda14cbcSMatt Macy dmu_objset_name(zfsvfs->z_os, fsname); 902eda14cbcSMatt Macy 903eda14cbcSMatt Macy error = zfsctl_snapshot_name(ITOZSB(sdip), snm, 904eda14cbcSMatt Macy ZFS_MAX_DATASET_NAME_LEN, from); 905eda14cbcSMatt Macy if (error == 0) 906eda14cbcSMatt Macy error = zfsctl_snapshot_name(ITOZSB(tdip), tnm, 907eda14cbcSMatt Macy ZFS_MAX_DATASET_NAME_LEN, to); 908eda14cbcSMatt Macy if (error == 0) 909eda14cbcSMatt Macy error = zfs_secpolicy_rename_perms(from, to, cr); 910eda14cbcSMatt Macy if (error != 0) 911eda14cbcSMatt Macy goto out; 912eda14cbcSMatt Macy 913eda14cbcSMatt Macy /* 914eda14cbcSMatt Macy * Cannot move snapshots out of the snapdir. 915eda14cbcSMatt Macy */ 916eda14cbcSMatt Macy if (sdip != tdip) { 917eda14cbcSMatt Macy error = SET_ERROR(EINVAL); 918eda14cbcSMatt Macy goto out; 919eda14cbcSMatt Macy } 920eda14cbcSMatt Macy 921eda14cbcSMatt Macy /* 922eda14cbcSMatt Macy * No-op when names are identical. 923eda14cbcSMatt Macy */ 924eda14cbcSMatt Macy if (strcmp(snm, tnm) == 0) { 925eda14cbcSMatt Macy error = 0; 926eda14cbcSMatt Macy goto out; 927eda14cbcSMatt Macy } 928eda14cbcSMatt Macy 929eda14cbcSMatt Macy rw_enter(&zfs_snapshot_lock, RW_WRITER); 930eda14cbcSMatt Macy 931eda14cbcSMatt Macy error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE); 932eda14cbcSMatt Macy if (error == 0) 933eda14cbcSMatt Macy (void) zfsctl_snapshot_rename(snm, tnm); 934eda14cbcSMatt Macy 935eda14cbcSMatt Macy rw_exit(&zfs_snapshot_lock); 936eda14cbcSMatt Macy out: 937eda14cbcSMatt Macy kmem_free(from, ZFS_MAX_DATASET_NAME_LEN); 938eda14cbcSMatt Macy kmem_free(to, ZFS_MAX_DATASET_NAME_LEN); 939eda14cbcSMatt Macy kmem_free(real, ZFS_MAX_DATASET_NAME_LEN); 940eda14cbcSMatt Macy kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN); 941eda14cbcSMatt Macy 942c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 943eda14cbcSMatt Macy 944eda14cbcSMatt Macy return (error); 945eda14cbcSMatt Macy } 946eda14cbcSMatt Macy 947eda14cbcSMatt Macy /* 948eda14cbcSMatt Macy * Removing a directory under '.zfs/snapshot' will automatically trigger 949eda14cbcSMatt Macy * the removal of the snapshot with the given name. 950eda14cbcSMatt Macy */ 951eda14cbcSMatt Macy int 952180f8225SMatt Macy zfsctl_snapdir_remove(struct inode *dip, const char *name, cred_t *cr, 953180f8225SMatt Macy int flags) 954eda14cbcSMatt Macy { 955eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ITOZSB(dip); 956eda14cbcSMatt Macy char *snapname, *real; 957eda14cbcSMatt Macy int error; 958eda14cbcSMatt Macy 959eda14cbcSMatt Macy if (!zfs_admin_snapshot) 960eda14cbcSMatt Macy return (SET_ERROR(EACCES)); 961eda14cbcSMatt Macy 962c7046f76SMartin Matuska if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 963c7046f76SMartin Matuska return (error); 964eda14cbcSMatt Macy 965eda14cbcSMatt Macy snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 966eda14cbcSMatt Macy real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 967eda14cbcSMatt Macy 968eda14cbcSMatt Macy if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) { 969eda14cbcSMatt Macy error = dmu_snapshot_realname(zfsvfs->z_os, name, real, 970eda14cbcSMatt Macy ZFS_MAX_DATASET_NAME_LEN, NULL); 971eda14cbcSMatt Macy if (error == 0) { 972eda14cbcSMatt Macy name = real; 973eda14cbcSMatt Macy } else if (error != ENOTSUP) { 974eda14cbcSMatt Macy goto out; 975eda14cbcSMatt Macy } 976eda14cbcSMatt Macy } 977eda14cbcSMatt Macy 978eda14cbcSMatt Macy error = zfsctl_snapshot_name(ITOZSB(dip), name, 979eda14cbcSMatt Macy ZFS_MAX_DATASET_NAME_LEN, snapname); 980eda14cbcSMatt Macy if (error == 0) 981eda14cbcSMatt Macy error = zfs_secpolicy_destroy_perms(snapname, cr); 982eda14cbcSMatt Macy if (error != 0) 983eda14cbcSMatt Macy goto out; 984eda14cbcSMatt Macy 985eda14cbcSMatt Macy error = zfsctl_snapshot_unmount(snapname, MNT_FORCE); 986eda14cbcSMatt Macy if ((error == 0) || (error == ENOENT)) 987eda14cbcSMatt Macy error = dsl_destroy_snapshot(snapname, B_FALSE); 988eda14cbcSMatt Macy out: 989eda14cbcSMatt Macy kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN); 990eda14cbcSMatt Macy kmem_free(real, ZFS_MAX_DATASET_NAME_LEN); 991eda14cbcSMatt Macy 992c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 993eda14cbcSMatt Macy 994eda14cbcSMatt Macy return (error); 995eda14cbcSMatt Macy } 996eda14cbcSMatt Macy 997eda14cbcSMatt Macy /* 998eda14cbcSMatt Macy * Creating a directory under '.zfs/snapshot' will automatically trigger 999eda14cbcSMatt Macy * the creation of a new snapshot with the given name. 1000eda14cbcSMatt Macy */ 1001eda14cbcSMatt Macy int 1002180f8225SMatt Macy zfsctl_snapdir_mkdir(struct inode *dip, const char *dirname, vattr_t *vap, 1003eda14cbcSMatt Macy struct inode **ipp, cred_t *cr, int flags) 1004eda14cbcSMatt Macy { 1005eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ITOZSB(dip); 1006eda14cbcSMatt Macy char *dsname; 1007eda14cbcSMatt Macy int error; 1008eda14cbcSMatt Macy 1009eda14cbcSMatt Macy if (!zfs_admin_snapshot) 1010eda14cbcSMatt Macy return (SET_ERROR(EACCES)); 1011eda14cbcSMatt Macy 1012eda14cbcSMatt Macy dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 1013eda14cbcSMatt Macy 1014eda14cbcSMatt Macy if (zfs_component_namecheck(dirname, NULL, NULL) != 0) { 1015eda14cbcSMatt Macy error = SET_ERROR(EILSEQ); 1016eda14cbcSMatt Macy goto out; 1017eda14cbcSMatt Macy } 1018eda14cbcSMatt Macy 1019eda14cbcSMatt Macy dmu_objset_name(zfsvfs->z_os, dsname); 1020eda14cbcSMatt Macy 1021eda14cbcSMatt Macy error = zfs_secpolicy_snapshot_perms(dsname, cr); 1022eda14cbcSMatt Macy if (error != 0) 1023eda14cbcSMatt Macy goto out; 1024eda14cbcSMatt Macy 1025eda14cbcSMatt Macy if (error == 0) { 1026eda14cbcSMatt Macy error = dmu_objset_snapshot_one(dsname, dirname); 1027eda14cbcSMatt Macy if (error != 0) 1028eda14cbcSMatt Macy goto out; 1029eda14cbcSMatt Macy 1030eda14cbcSMatt Macy error = zfsctl_snapdir_lookup(dip, dirname, ipp, 1031eda14cbcSMatt Macy 0, cr, NULL, NULL); 1032eda14cbcSMatt Macy } 1033eda14cbcSMatt Macy out: 1034eda14cbcSMatt Macy kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN); 1035eda14cbcSMatt Macy 1036eda14cbcSMatt Macy return (error); 1037eda14cbcSMatt Macy } 1038eda14cbcSMatt Macy 1039eda14cbcSMatt Macy /* 1040ac0bf12eSMatt Macy * Flush everything out of the kernel's export table and such. 1041ac0bf12eSMatt Macy * This is needed as once the snapshot is used over NFS, its 1042ac0bf12eSMatt Macy * entries in svc_export and svc_expkey caches hold reference 1043ac0bf12eSMatt Macy * to the snapshot mount point. There is no known way of flushing 1044ac0bf12eSMatt Macy * only the entries related to the snapshot. 1045ac0bf12eSMatt Macy */ 1046ac0bf12eSMatt Macy static void 1047ac0bf12eSMatt Macy exportfs_flush(void) 1048ac0bf12eSMatt Macy { 1049ac0bf12eSMatt Macy char *argv[] = { "/usr/sbin/exportfs", "-f", NULL }; 1050ac0bf12eSMatt Macy char *envp[] = { NULL }; 1051ac0bf12eSMatt Macy 1052ac0bf12eSMatt Macy (void) call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 1053ac0bf12eSMatt Macy } 1054ac0bf12eSMatt Macy 1055ac0bf12eSMatt Macy /* 1056*87bf66d4SMartin Matuska * Returns the path in char format for given struct path. Uses 1057*87bf66d4SMartin Matuska * d_path exported by kernel to convert struct path to char 1058*87bf66d4SMartin Matuska * format. Returns the correct path for mountpoints and chroot 1059*87bf66d4SMartin Matuska * environments. 1060*87bf66d4SMartin Matuska * 1061*87bf66d4SMartin Matuska * If chroot environment has directories that are mounted with 1062*87bf66d4SMartin Matuska * --bind or --rbind flag, d_path returns the complete path inside 1063*87bf66d4SMartin Matuska * chroot environment but does not return the absolute path, i.e. 1064*87bf66d4SMartin Matuska * the path to chroot environment is missing. 1065*87bf66d4SMartin Matuska */ 1066*87bf66d4SMartin Matuska static int 1067*87bf66d4SMartin Matuska get_root_path(struct path *path, char *buff, int len) 1068*87bf66d4SMartin Matuska { 1069*87bf66d4SMartin Matuska char *path_buffer, *path_ptr; 1070*87bf66d4SMartin Matuska int error = 0; 1071*87bf66d4SMartin Matuska 1072*87bf66d4SMartin Matuska path_get(path); 1073*87bf66d4SMartin Matuska path_buffer = kmem_zalloc(len, KM_SLEEP); 1074*87bf66d4SMartin Matuska path_ptr = d_path(path, path_buffer, len); 1075*87bf66d4SMartin Matuska if (IS_ERR(path_ptr)) 1076*87bf66d4SMartin Matuska error = SET_ERROR(-PTR_ERR(path_ptr)); 1077*87bf66d4SMartin Matuska else 1078*87bf66d4SMartin Matuska strcpy(buff, path_ptr); 1079*87bf66d4SMartin Matuska 1080*87bf66d4SMartin Matuska kmem_free(path_buffer, len); 1081*87bf66d4SMartin Matuska path_put(path); 1082*87bf66d4SMartin Matuska return (error); 1083*87bf66d4SMartin Matuska } 1084*87bf66d4SMartin Matuska 1085*87bf66d4SMartin Matuska /* 1086*87bf66d4SMartin Matuska * Returns if the current process root is chrooted or not. Linux 1087*87bf66d4SMartin Matuska * kernel exposes the task_struct for current process and init. 1088*87bf66d4SMartin Matuska * Since init process root points to actual root filesystem when 1089*87bf66d4SMartin Matuska * Linux runtime is reached, we can compare the current process 1090*87bf66d4SMartin Matuska * root with init process root to determine if root of the current 1091*87bf66d4SMartin Matuska * process is different from init, which can reliably determine if 1092*87bf66d4SMartin Matuska * current process is in chroot context or not. 1093*87bf66d4SMartin Matuska */ 1094*87bf66d4SMartin Matuska static int 1095*87bf66d4SMartin Matuska is_current_chrooted(void) 1096*87bf66d4SMartin Matuska { 1097*87bf66d4SMartin Matuska struct task_struct *curr = current, *global = &init_task; 1098*87bf66d4SMartin Matuska struct path cr_root, gl_root; 1099*87bf66d4SMartin Matuska 1100*87bf66d4SMartin Matuska task_lock(curr); 1101*87bf66d4SMartin Matuska get_fs_root(curr->fs, &cr_root); 1102*87bf66d4SMartin Matuska task_unlock(curr); 1103*87bf66d4SMartin Matuska 1104*87bf66d4SMartin Matuska task_lock(global); 1105*87bf66d4SMartin Matuska get_fs_root(global->fs, &gl_root); 1106*87bf66d4SMartin Matuska task_unlock(global); 1107*87bf66d4SMartin Matuska 1108*87bf66d4SMartin Matuska int chrooted = !path_equal(&cr_root, &gl_root); 1109*87bf66d4SMartin Matuska path_put(&gl_root); 1110*87bf66d4SMartin Matuska path_put(&cr_root); 1111*87bf66d4SMartin Matuska 1112*87bf66d4SMartin Matuska return (chrooted); 1113*87bf66d4SMartin Matuska } 1114*87bf66d4SMartin Matuska 1115*87bf66d4SMartin Matuska /* 1116eda14cbcSMatt Macy * Attempt to unmount a snapshot by making a call to user space. 1117eda14cbcSMatt Macy * There is no assurance that this can or will succeed, is just a 1118eda14cbcSMatt Macy * best effort. In the case where it does fail, perhaps because 1119eda14cbcSMatt Macy * it's in use, the unmount will fail harmlessly. 1120eda14cbcSMatt Macy */ 1121eda14cbcSMatt Macy int 1122180f8225SMatt Macy zfsctl_snapshot_unmount(const char *snapname, int flags) 1123eda14cbcSMatt Macy { 1124eda14cbcSMatt Macy char *argv[] = { "/usr/bin/env", "umount", "-t", "zfs", "-n", NULL, 1125eda14cbcSMatt Macy NULL }; 1126eda14cbcSMatt Macy char *envp[] = { NULL }; 1127eda14cbcSMatt Macy zfs_snapentry_t *se; 1128eda14cbcSMatt Macy int error; 1129eda14cbcSMatt Macy 1130eda14cbcSMatt Macy rw_enter(&zfs_snapshot_lock, RW_READER); 1131eda14cbcSMatt Macy if ((se = zfsctl_snapshot_find_by_name(snapname)) == NULL) { 1132eda14cbcSMatt Macy rw_exit(&zfs_snapshot_lock); 1133eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 1134eda14cbcSMatt Macy } 1135eda14cbcSMatt Macy rw_exit(&zfs_snapshot_lock); 1136eda14cbcSMatt Macy 1137ac0bf12eSMatt Macy exportfs_flush(); 1138ac0bf12eSMatt Macy 1139eda14cbcSMatt Macy if (flags & MNT_FORCE) 1140eda14cbcSMatt Macy argv[4] = "-fn"; 1141eda14cbcSMatt Macy argv[5] = se->se_path; 1142eda14cbcSMatt Macy dprintf("unmount; path=%s\n", se->se_path); 1143eda14cbcSMatt Macy error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 1144eda14cbcSMatt Macy zfsctl_snapshot_rele(se); 1145eda14cbcSMatt Macy 1146eda14cbcSMatt Macy 1147eda14cbcSMatt Macy /* 1148eda14cbcSMatt Macy * The umount system utility will return 256 on error. We must 1149eda14cbcSMatt Macy * assume this error is because the file system is busy so it is 1150eda14cbcSMatt Macy * converted to the more sensible EBUSY. 1151eda14cbcSMatt Macy */ 1152eda14cbcSMatt Macy if (error) 1153eda14cbcSMatt Macy error = SET_ERROR(EBUSY); 1154eda14cbcSMatt Macy 1155eda14cbcSMatt Macy return (error); 1156eda14cbcSMatt Macy } 1157eda14cbcSMatt Macy 1158eda14cbcSMatt Macy int 1159eda14cbcSMatt Macy zfsctl_snapshot_mount(struct path *path, int flags) 1160eda14cbcSMatt Macy { 1161eda14cbcSMatt Macy struct dentry *dentry = path->dentry; 1162eda14cbcSMatt Macy struct inode *ip = dentry->d_inode; 1163eda14cbcSMatt Macy zfsvfs_t *zfsvfs; 1164eda14cbcSMatt Macy zfsvfs_t *snap_zfsvfs; 1165eda14cbcSMatt Macy zfs_snapentry_t *se; 11667a7741afSMartin Matuska char *full_name, *full_path, *options; 1167e2df9bb4SMartin Matuska char *argv[] = { "/usr/bin/env", "mount", "-i", "-t", "zfs", "-n", 11687a7741afSMartin Matuska "-o", NULL, NULL, NULL, NULL }; 1169eda14cbcSMatt Macy char *envp[] = { NULL }; 1170eda14cbcSMatt Macy int error; 1171eda14cbcSMatt Macy struct path spath; 1172eda14cbcSMatt Macy 1173eda14cbcSMatt Macy if (ip == NULL) 1174eda14cbcSMatt Macy return (SET_ERROR(EISDIR)); 1175eda14cbcSMatt Macy 1176eda14cbcSMatt Macy zfsvfs = ITOZSB(ip); 1177c7046f76SMartin Matuska if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1178c7046f76SMartin Matuska return (error); 1179eda14cbcSMatt Macy 1180eda14cbcSMatt Macy full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 1181eda14cbcSMatt Macy full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 11827a7741afSMartin Matuska options = kmem_zalloc(7, KM_SLEEP); 1183eda14cbcSMatt Macy 1184eda14cbcSMatt Macy error = zfsctl_snapshot_name(zfsvfs, dname(dentry), 1185eda14cbcSMatt Macy ZFS_MAX_DATASET_NAME_LEN, full_name); 1186eda14cbcSMatt Macy if (error) 1187eda14cbcSMatt Macy goto error; 1188eda14cbcSMatt Macy 1189*87bf66d4SMartin Matuska if (is_current_chrooted() == 0) { 1190*87bf66d4SMartin Matuska /* 1191*87bf66d4SMartin Matuska * Current process is not in chroot context 1192*87bf66d4SMartin Matuska */ 1193*87bf66d4SMartin Matuska 1194*87bf66d4SMartin Matuska char *m = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1195*87bf66d4SMartin Matuska struct path mnt_path; 1196*87bf66d4SMartin Matuska mnt_path.mnt = path->mnt; 1197*87bf66d4SMartin Matuska mnt_path.dentry = path->mnt->mnt_root; 1198*87bf66d4SMartin Matuska 1199*87bf66d4SMartin Matuska /* 1200*87bf66d4SMartin Matuska * Get path to current mountpoint 1201*87bf66d4SMartin Matuska */ 1202*87bf66d4SMartin Matuska error = get_root_path(&mnt_path, m, MAXPATHLEN); 1203*87bf66d4SMartin Matuska if (error != 0) { 1204*87bf66d4SMartin Matuska kmem_free(m, MAXPATHLEN); 1205*87bf66d4SMartin Matuska goto error; 1206*87bf66d4SMartin Matuska } 1207*87bf66d4SMartin Matuska mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock); 1208*87bf66d4SMartin Matuska if (zfsvfs->z_vfs->vfs_mntpoint != NULL) { 1209*87bf66d4SMartin Matuska /* 1210*87bf66d4SMartin Matuska * If current mnountpoint and vfs_mntpoint are not same, 1211*87bf66d4SMartin Matuska * store current mountpoint in vfs_mntpoint. 1212*87bf66d4SMartin Matuska */ 1213*87bf66d4SMartin Matuska if (strcmp(zfsvfs->z_vfs->vfs_mntpoint, m) != 0) { 1214*87bf66d4SMartin Matuska kmem_strfree(zfsvfs->z_vfs->vfs_mntpoint); 1215*87bf66d4SMartin Matuska zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m); 1216*87bf66d4SMartin Matuska } 1217*87bf66d4SMartin Matuska } else 1218*87bf66d4SMartin Matuska zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m); 1219*87bf66d4SMartin Matuska mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock); 1220*87bf66d4SMartin Matuska kmem_free(m, MAXPATHLEN); 1221*87bf66d4SMartin Matuska } 1222*87bf66d4SMartin Matuska 1223eda14cbcSMatt Macy /* 1224eda14cbcSMatt Macy * Construct a mount point path from sb of the ctldir inode and dirent 1225eda14cbcSMatt Macy * name, instead of from d_path(), so that chroot'd process doesn't fail 1226eda14cbcSMatt Macy * on mount.zfs(8). 1227eda14cbcSMatt Macy */ 1228*87bf66d4SMartin Matuska mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock); 1229eda14cbcSMatt Macy snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s", 1230eda14cbcSMatt Macy zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "", 1231eda14cbcSMatt Macy dname(dentry)); 1232*87bf66d4SMartin Matuska mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock); 1233eda14cbcSMatt Macy 12347a7741afSMartin Matuska snprintf(options, 7, "%s", 12357a7741afSMartin Matuska zfs_snapshot_no_setuid ? "nosuid" : "suid"); 12367a7741afSMartin Matuska 1237eda14cbcSMatt Macy /* 1238eda14cbcSMatt Macy * Multiple concurrent automounts of a snapshot are never allowed. 1239eda14cbcSMatt Macy * The snapshot may be manually mounted as many times as desired. 1240eda14cbcSMatt Macy */ 1241eda14cbcSMatt Macy if (zfsctl_snapshot_ismounted(full_name)) { 1242eda14cbcSMatt Macy error = 0; 1243eda14cbcSMatt Macy goto error; 1244eda14cbcSMatt Macy } 1245eda14cbcSMatt Macy 1246eda14cbcSMatt Macy /* 1247eda14cbcSMatt Macy * Attempt to mount the snapshot from user space. Normally this 1248eda14cbcSMatt Macy * would be done using the vfs_kern_mount() function, however that 1249eda14cbcSMatt Macy * function is marked GPL-only and cannot be used. On error we 1250eda14cbcSMatt Macy * careful to log the real error to the console and return EISDIR 1251eda14cbcSMatt Macy * to safely abort the automount. This should be very rare. 1252eda14cbcSMatt Macy * 1253eda14cbcSMatt Macy * If the user mode helper happens to return EBUSY, a concurrent 1254eda14cbcSMatt Macy * mount is already in progress in which case the error is ignored. 1255eda14cbcSMatt Macy * Take note that if the program was executed successfully the return 1256eda14cbcSMatt Macy * value from call_usermodehelper() will be (exitcode << 8 + signal). 1257eda14cbcSMatt Macy */ 1258eda14cbcSMatt Macy dprintf("mount; name=%s path=%s\n", full_name, full_path); 12597a7741afSMartin Matuska argv[7] = options; 12607a7741afSMartin Matuska argv[8] = full_name; 12617a7741afSMartin Matuska argv[9] = full_path; 1262eda14cbcSMatt Macy error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); 1263eda14cbcSMatt Macy if (error) { 1264eda14cbcSMatt Macy if (!(error & MOUNT_BUSY << 8)) { 1265eda14cbcSMatt Macy zfs_dbgmsg("Unable to automount %s error=%d", 1266eda14cbcSMatt Macy full_path, error); 1267eda14cbcSMatt Macy error = SET_ERROR(EISDIR); 1268eda14cbcSMatt Macy } else { 1269eda14cbcSMatt Macy /* 1270eda14cbcSMatt Macy * EBUSY, this could mean a concurrent mount, or the 1271eda14cbcSMatt Macy * snapshot has already been mounted at completely 1272eda14cbcSMatt Macy * different place. We return 0 so VFS will retry. For 1273eda14cbcSMatt Macy * the latter case the VFS will retry several times 1274eda14cbcSMatt Macy * and return ELOOP, which is probably not a very good 1275eda14cbcSMatt Macy * behavior. 1276eda14cbcSMatt Macy */ 1277eda14cbcSMatt Macy error = 0; 1278eda14cbcSMatt Macy } 1279eda14cbcSMatt Macy goto error; 1280eda14cbcSMatt Macy } 1281eda14cbcSMatt Macy 1282eda14cbcSMatt Macy /* 1283eda14cbcSMatt Macy * Follow down in to the mounted snapshot and set MNT_SHRINKABLE 1284eda14cbcSMatt Macy * to identify this as an automounted filesystem. 1285eda14cbcSMatt Macy */ 1286eda14cbcSMatt Macy spath = *path; 1287eda14cbcSMatt Macy path_get(&spath); 1288eda14cbcSMatt Macy if (follow_down_one(&spath)) { 1289eda14cbcSMatt Macy snap_zfsvfs = ITOZSB(spath.dentry->d_inode); 1290eda14cbcSMatt Macy snap_zfsvfs->z_parent = zfsvfs; 1291eda14cbcSMatt Macy dentry = spath.dentry; 1292eda14cbcSMatt Macy spath.mnt->mnt_flags |= MNT_SHRINKABLE; 1293eda14cbcSMatt Macy 1294eda14cbcSMatt Macy rw_enter(&zfs_snapshot_lock, RW_WRITER); 1295eda14cbcSMatt Macy se = zfsctl_snapshot_alloc(full_name, full_path, 1296eda14cbcSMatt Macy snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os), 1297eda14cbcSMatt Macy dentry); 1298eda14cbcSMatt Macy zfsctl_snapshot_add(se); 1299eda14cbcSMatt Macy zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot); 1300eda14cbcSMatt Macy rw_exit(&zfs_snapshot_lock); 1301eda14cbcSMatt Macy } 1302eda14cbcSMatt Macy path_put(&spath); 1303eda14cbcSMatt Macy error: 1304eda14cbcSMatt Macy kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN); 1305eda14cbcSMatt Macy kmem_free(full_path, MAXPATHLEN); 1306eda14cbcSMatt Macy 1307c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 1308eda14cbcSMatt Macy 1309eda14cbcSMatt Macy return (error); 1310eda14cbcSMatt Macy } 1311eda14cbcSMatt Macy 1312eda14cbcSMatt Macy /* 1313eda14cbcSMatt Macy * Get the snapdir inode from fid 1314eda14cbcSMatt Macy */ 1315eda14cbcSMatt Macy int 1316eda14cbcSMatt Macy zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen, 1317eda14cbcSMatt Macy struct inode **ipp) 1318eda14cbcSMatt Macy { 1319eda14cbcSMatt Macy int error; 1320eda14cbcSMatt Macy struct path path; 1321eda14cbcSMatt Macy char *mnt; 1322eda14cbcSMatt Macy struct dentry *dentry; 1323eda14cbcSMatt Macy 1324eda14cbcSMatt Macy mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1325eda14cbcSMatt Macy 1326eda14cbcSMatt Macy error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid, 1327eda14cbcSMatt Macy MAXPATHLEN, mnt); 1328eda14cbcSMatt Macy if (error) 1329eda14cbcSMatt Macy goto out; 1330eda14cbcSMatt Macy 1331eda14cbcSMatt Macy /* Trigger automount */ 1332eda14cbcSMatt Macy error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path); 1333eda14cbcSMatt Macy if (error) 1334eda14cbcSMatt Macy goto out; 1335eda14cbcSMatt Macy 1336eda14cbcSMatt Macy path_put(&path); 1337eda14cbcSMatt Macy /* 1338eda14cbcSMatt Macy * Get the snapdir inode. Note, we don't want to use the above 1339eda14cbcSMatt Macy * path because it contains the root of the snapshot rather 1340eda14cbcSMatt Macy * than the snapdir. 1341eda14cbcSMatt Macy */ 1342eda14cbcSMatt Macy *ipp = ilookup(sb, ZFSCTL_INO_SNAPDIRS - objsetid); 1343eda14cbcSMatt Macy if (*ipp == NULL) { 1344eda14cbcSMatt Macy error = SET_ERROR(ENOENT); 1345eda14cbcSMatt Macy goto out; 1346eda14cbcSMatt Macy } 1347eda14cbcSMatt Macy 1348eda14cbcSMatt Macy /* check gen, see zfsctl_snapdir_fid */ 1349eda14cbcSMatt Macy dentry = d_obtain_alias(igrab(*ipp)); 1350eda14cbcSMatt Macy if (gen != (!IS_ERR(dentry) && d_mountpoint(dentry))) { 1351eda14cbcSMatt Macy iput(*ipp); 1352eda14cbcSMatt Macy *ipp = NULL; 1353eda14cbcSMatt Macy error = SET_ERROR(ENOENT); 1354eda14cbcSMatt Macy } 1355eda14cbcSMatt Macy if (!IS_ERR(dentry)) 1356eda14cbcSMatt Macy dput(dentry); 1357eda14cbcSMatt Macy out: 1358eda14cbcSMatt Macy kmem_free(mnt, MAXPATHLEN); 1359eda14cbcSMatt Macy return (error); 1360eda14cbcSMatt Macy } 1361eda14cbcSMatt Macy 1362eda14cbcSMatt Macy int 1363eda14cbcSMatt Macy zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp, 1364eda14cbcSMatt Macy int flags, cred_t *cr, int *direntflags, pathname_t *realpnp) 1365eda14cbcSMatt Macy { 1366eda14cbcSMatt Macy zfsvfs_t *zfsvfs = ITOZSB(dip); 1367eda14cbcSMatt Macy znode_t *zp; 1368eda14cbcSMatt Macy znode_t *dzp; 1369eda14cbcSMatt Macy int error; 1370eda14cbcSMatt Macy 1371c7046f76SMartin Matuska if ((error = zfs_enter(zfsvfs, FTAG)) != 0) 1372c7046f76SMartin Matuska return (error); 1373eda14cbcSMatt Macy 1374eda14cbcSMatt Macy if (zfsvfs->z_shares_dir == 0) { 1375c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 1376eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 1377eda14cbcSMatt Macy } 1378eda14cbcSMatt Macy 1379eda14cbcSMatt Macy if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { 1380eda14cbcSMatt Macy error = zfs_lookup(dzp, name, &zp, 0, cr, NULL, NULL); 1381eda14cbcSMatt Macy zrele(dzp); 1382eda14cbcSMatt Macy } 1383eda14cbcSMatt Macy 1384c7046f76SMartin Matuska zfs_exit(zfsvfs, FTAG); 1385eda14cbcSMatt Macy 1386eda14cbcSMatt Macy return (error); 1387eda14cbcSMatt Macy } 1388eda14cbcSMatt Macy 1389eda14cbcSMatt Macy /* 1390eda14cbcSMatt Macy * Initialize the various pieces we'll need to create and manipulate .zfs 1391eda14cbcSMatt Macy * directories. Currently this is unused but available. 1392eda14cbcSMatt Macy */ 1393eda14cbcSMatt Macy void 1394eda14cbcSMatt Macy zfsctl_init(void) 1395eda14cbcSMatt Macy { 1396eda14cbcSMatt Macy avl_create(&zfs_snapshots_by_name, snapentry_compare_by_name, 1397eda14cbcSMatt Macy sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, 1398eda14cbcSMatt Macy se_node_name)); 1399eda14cbcSMatt Macy avl_create(&zfs_snapshots_by_objsetid, snapentry_compare_by_objsetid, 1400eda14cbcSMatt Macy sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, 1401eda14cbcSMatt Macy se_node_objsetid)); 1402eda14cbcSMatt Macy rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL); 1403eda14cbcSMatt Macy } 1404eda14cbcSMatt Macy 1405eda14cbcSMatt Macy /* 1406eda14cbcSMatt Macy * Cleanup the various pieces we needed for .zfs directories. In particular 1407eda14cbcSMatt Macy * ensure the expiry timer is canceled safely. 1408eda14cbcSMatt Macy */ 1409eda14cbcSMatt Macy void 1410eda14cbcSMatt Macy zfsctl_fini(void) 1411eda14cbcSMatt Macy { 1412eda14cbcSMatt Macy avl_destroy(&zfs_snapshots_by_name); 1413eda14cbcSMatt Macy avl_destroy(&zfs_snapshots_by_objsetid); 1414eda14cbcSMatt Macy rw_destroy(&zfs_snapshot_lock); 1415eda14cbcSMatt Macy } 1416eda14cbcSMatt Macy 1417eda14cbcSMatt Macy module_param(zfs_admin_snapshot, int, 0644); 1418eda14cbcSMatt Macy MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot"); 1419eda14cbcSMatt Macy 1420eda14cbcSMatt Macy module_param(zfs_expire_snapshot, int, 0644); 1421eda14cbcSMatt Macy MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot"); 14227a7741afSMartin Matuska 14237a7741afSMartin Matuska module_param(zfs_snapshot_no_setuid, int, 0644); 14247a7741afSMartin Matuska MODULE_PARM_DESC(zfs_snapshot_no_setuid, 14257a7741afSMartin Matuska "Disable setuid/setgid for automounts in .zfs/snapshot"); 1426