1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51512Sek110237 * Common Development and Distribution License (the "License"). 61512Sek110237 * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 221298Sperrin * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens /* 29789Sahrens * ZFS control directory (a.k.a. ".zfs") 30789Sahrens * 31789Sahrens * This directory provides a common location for all ZFS meta-objects. 32789Sahrens * Currently, this is only the 'snapshot' directory, but this may expand in the 33789Sahrens * future. The elements are built using the GFS primitives, as the hierarchy 34789Sahrens * does not actually exist on disk. 35789Sahrens * 36789Sahrens * For 'snapshot', we don't want to have all snapshots always mounted, because 37789Sahrens * this would take up a huge amount of space in /etc/mnttab. We have three 38789Sahrens * types of objects: 39789Sahrens * 40789Sahrens * ctldir ------> snapshotdir -------> snapshot 41789Sahrens * | 42789Sahrens * | 43789Sahrens * V 44789Sahrens * mounted fs 45789Sahrens * 46789Sahrens * The 'snapshot' node contains just enough information to lookup '..' and act 47789Sahrens * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we 48789Sahrens * perform an automount of the underlying filesystem and return the 49789Sahrens * corresponding vnode. 50789Sahrens * 51789Sahrens * All mounts are handled automatically by the kernel, but unmounts are 52789Sahrens * (currently) handled from user land. The main reason is that there is no 53789Sahrens * reliable way to auto-unmount the filesystem when it's "no longer in use". 54789Sahrens * When the user unmounts a filesystem, we call zfsctl_unmount(), which 55789Sahrens * unmounts any snapshots within the snapshot directory. 56789Sahrens */ 57789Sahrens 58789Sahrens #include <fs/fs_subr.h> 59789Sahrens #include <sys/zfs_ctldir.h> 60789Sahrens #include <sys/zfs_ioctl.h> 61789Sahrens #include <sys/zfs_vfsops.h> 62789Sahrens #include <sys/gfs.h> 63789Sahrens #include <sys/stat.h> 64789Sahrens #include <sys/dmu.h> 65789Sahrens #include <sys/mount.h> 66789Sahrens 67789Sahrens typedef struct { 68789Sahrens char *se_name; 69789Sahrens vnode_t *se_root; 70789Sahrens avl_node_t se_node; 71789Sahrens } zfs_snapentry_t; 72789Sahrens 73789Sahrens static int 74789Sahrens snapentry_compare(const void *a, const void *b) 75789Sahrens { 76789Sahrens const zfs_snapentry_t *sa = a; 77789Sahrens const zfs_snapentry_t *sb = b; 78789Sahrens int ret = strcmp(sa->se_name, sb->se_name); 79789Sahrens 80789Sahrens if (ret < 0) 81789Sahrens return (-1); 82789Sahrens else if (ret > 0) 83789Sahrens return (1); 84789Sahrens else 85789Sahrens return (0); 86789Sahrens } 87789Sahrens 88789Sahrens vnodeops_t *zfsctl_ops_root; 89789Sahrens vnodeops_t *zfsctl_ops_snapdir; 90789Sahrens vnodeops_t *zfsctl_ops_snapshot; 91789Sahrens 92789Sahrens static const fs_operation_def_t zfsctl_tops_root[]; 93789Sahrens static const fs_operation_def_t zfsctl_tops_snapdir[]; 94789Sahrens static const fs_operation_def_t zfsctl_tops_snapshot[]; 95789Sahrens 96789Sahrens static vnode_t *zfsctl_mknode_snapdir(vnode_t *); 97789Sahrens static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset); 98789Sahrens 99789Sahrens static gfs_opsvec_t zfsctl_opsvec[] = { 100789Sahrens { ".zfs", zfsctl_tops_root, &zfsctl_ops_root }, 101789Sahrens { ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir }, 102789Sahrens { ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot }, 103789Sahrens { NULL } 104789Sahrens }; 105789Sahrens 106789Sahrens typedef struct zfsctl_node { 107789Sahrens gfs_dir_t zc_gfs_private; 108789Sahrens uint64_t zc_id; 1091571Sek110237 timestruc_t zc_cmtime; /* ctime and mtime, always the same */ 110789Sahrens } zfsctl_node_t; 111789Sahrens 112789Sahrens typedef struct zfsctl_snapdir { 113789Sahrens zfsctl_node_t sd_node; 114789Sahrens kmutex_t sd_lock; 115789Sahrens avl_tree_t sd_snaps; 116789Sahrens } zfsctl_snapdir_t; 117789Sahrens 118789Sahrens /* 119789Sahrens * Root directory elements. We have only a single static entry, 'snapshot'. 120789Sahrens */ 121789Sahrens static gfs_dirent_t zfsctl_root_entries[] = { 122789Sahrens { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE }, 123789Sahrens { NULL } 124789Sahrens }; 125789Sahrens 126789Sahrens /* include . and .. in the calculation */ 127789Sahrens #define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \ 128789Sahrens sizeof (gfs_dirent_t)) + 1) 129789Sahrens 130789Sahrens 131789Sahrens /* 132789Sahrens * Initialize the various GFS pieces we'll need to create and manipulate .zfs 133789Sahrens * directories. This is called from the ZFS init routine, and initializes the 134789Sahrens * vnode ops vectors that we'll be using. 135789Sahrens */ 136789Sahrens void 137789Sahrens zfsctl_init(void) 138789Sahrens { 139789Sahrens VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0); 140789Sahrens } 141789Sahrens 142789Sahrens void 143789Sahrens zfsctl_fini(void) 144789Sahrens { 145789Sahrens /* 146789Sahrens * Remove vfsctl vnode ops 147789Sahrens */ 148789Sahrens if (zfsctl_ops_root) 149789Sahrens vn_freevnodeops(zfsctl_ops_root); 150789Sahrens if (zfsctl_ops_snapdir) 151789Sahrens vn_freevnodeops(zfsctl_ops_snapdir); 152789Sahrens if (zfsctl_ops_snapshot) 153789Sahrens vn_freevnodeops(zfsctl_ops_snapshot); 154789Sahrens 155789Sahrens zfsctl_ops_root = NULL; 156789Sahrens zfsctl_ops_snapdir = NULL; 157789Sahrens zfsctl_ops_snapshot = NULL; 158789Sahrens } 159789Sahrens 160789Sahrens /* 161789Sahrens * Return the inode number associated with the 'snapshot' directory. 162789Sahrens */ 163789Sahrens /* ARGSUSED */ 164789Sahrens static ino64_t 165789Sahrens zfsctl_root_inode_cb(vnode_t *vp, int index) 166789Sahrens { 167789Sahrens ASSERT(index == 0); 168789Sahrens return (ZFSCTL_INO_SNAPDIR); 169789Sahrens } 170789Sahrens 171789Sahrens /* 172789Sahrens * Create the '.zfs' directory. This directory is cached as part of the VFS 173789Sahrens * structure. This results in a hold on the vfs_t. The code in zfs_umount() 174789Sahrens * therefore checks against a vfs_count of 2 instead of 1. This reference 175789Sahrens * is removed when the ctldir is destroyed in the unmount. 176789Sahrens */ 177789Sahrens void 178789Sahrens zfsctl_create(zfsvfs_t *zfsvfs) 179789Sahrens { 1801571Sek110237 vnode_t *vp, *rvp; 181789Sahrens zfsctl_node_t *zcp; 182789Sahrens 183789Sahrens ASSERT(zfsvfs->z_ctldir == NULL); 184789Sahrens 185789Sahrens vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs, 186789Sahrens zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries, 187789Sahrens zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL); 188789Sahrens zcp = vp->v_data; 189789Sahrens zcp->zc_id = ZFSCTL_INO_ROOT; 190789Sahrens 1911571Sek110237 VERIFY(VFS_ROOT(zfsvfs->z_vfs, &rvp) == 0); 1921571Sek110237 ZFS_TIME_DECODE(&zcp->zc_cmtime, VTOZ(rvp)->z_phys->zp_crtime); 1931571Sek110237 VN_RELE(rvp); 1941571Sek110237 195789Sahrens /* 196789Sahrens * We're only faking the fact that we have a root of a filesystem for 197789Sahrens * the sake of the GFS interfaces. Undo the flag manipulation it did 198789Sahrens * for us. 199789Sahrens */ 200789Sahrens vp->v_flag &= ~(VROOT | VNOCACHE | VNOMAP | VNOSWAP | VNOMOUNT); 201789Sahrens 202789Sahrens zfsvfs->z_ctldir = vp; 203789Sahrens } 204789Sahrens 205789Sahrens /* 2061298Sperrin * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. 2071298Sperrin * There might still be more references if we were force unmounted, but only 2081298Sperrin * new zfs_inactive() calls can occur and they don't reference .zfs 209789Sahrens */ 210789Sahrens void 211789Sahrens zfsctl_destroy(zfsvfs_t *zfsvfs) 212789Sahrens { 213789Sahrens VN_RELE(zfsvfs->z_ctldir); 214789Sahrens zfsvfs->z_ctldir = NULL; 215789Sahrens } 216789Sahrens 217789Sahrens /* 218789Sahrens * Given a root znode, retrieve the associated .zfs directory. 219789Sahrens * Add a hold to the vnode and return it. 220789Sahrens */ 221789Sahrens vnode_t * 222789Sahrens zfsctl_root(znode_t *zp) 223789Sahrens { 224789Sahrens ASSERT(zfs_has_ctldir(zp)); 225789Sahrens VN_HOLD(zp->z_zfsvfs->z_ctldir); 226789Sahrens return (zp->z_zfsvfs->z_ctldir); 227789Sahrens } 228789Sahrens 229789Sahrens /* 230789Sahrens * Common open routine. Disallow any write access. 231789Sahrens */ 232789Sahrens /* ARGSUSED */ 233789Sahrens static int 234789Sahrens zfsctl_common_open(vnode_t **vpp, int flags, cred_t *cr) 235789Sahrens { 236789Sahrens if (flags & FWRITE) 237789Sahrens return (EACCES); 238789Sahrens 239789Sahrens return (0); 240789Sahrens } 241789Sahrens 242789Sahrens /* 243789Sahrens * Common close routine. Nothing to do here. 244789Sahrens */ 245789Sahrens /* ARGSUSED */ 246789Sahrens static int 247789Sahrens zfsctl_common_close(vnode_t *vpp, int flags, int count, offset_t off, 248789Sahrens cred_t *cr) 249789Sahrens { 250789Sahrens return (0); 251789Sahrens } 252789Sahrens 253789Sahrens /* 254789Sahrens * Common access routine. Disallow writes. 255789Sahrens */ 256789Sahrens /* ARGSUSED */ 257789Sahrens static int 258789Sahrens zfsctl_common_access(vnode_t *vp, int mode, int flags, cred_t *cr) 259789Sahrens { 260789Sahrens if (mode & VWRITE) 261789Sahrens return (EACCES); 262789Sahrens 263789Sahrens return (0); 264789Sahrens } 265789Sahrens 266789Sahrens /* 267789Sahrens * Common getattr function. Fill in basic information. 268789Sahrens */ 269789Sahrens static void 270789Sahrens zfsctl_common_getattr(vnode_t *vp, vattr_t *vap) 271789Sahrens { 2721571Sek110237 zfsctl_node_t *zcp = vp->v_data; 2731571Sek110237 timestruc_t now; 274789Sahrens 275789Sahrens vap->va_uid = 0; 276789Sahrens vap->va_gid = 0; 277789Sahrens vap->va_rdev = 0; 278789Sahrens /* 279789Sahrens * We are a purly virtual object, so we have no 280789Sahrens * blocksize or allocated blocks. 281789Sahrens */ 282789Sahrens vap->va_blksize = 0; 283789Sahrens vap->va_nblocks = 0; 284789Sahrens vap->va_seq = 0; 285789Sahrens vap->va_fsid = vp->v_vfsp->vfs_dev; 286789Sahrens vap->va_mode = S_IRUSR | S_IXUSR | S_IRGRP | S_IXGRP | 287789Sahrens S_IROTH | S_IXOTH; 288789Sahrens vap->va_type = VDIR; 289789Sahrens /* 2901571Sek110237 * We live in the now (for atime). 291789Sahrens */ 292789Sahrens gethrestime(&now); 2931571Sek110237 vap->va_atime = now; 2941571Sek110237 vap->va_mtime = vap->va_ctime = zcp->zc_cmtime; 295789Sahrens } 296789Sahrens 297789Sahrens static int 298789Sahrens zfsctl_common_fid(vnode_t *vp, fid_t *fidp) 299789Sahrens { 300789Sahrens zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 301789Sahrens zfsctl_node_t *zcp = vp->v_data; 302789Sahrens uint64_t object = zcp->zc_id; 303789Sahrens zfid_short_t *zfid; 304789Sahrens int i; 305789Sahrens 306789Sahrens ZFS_ENTER(zfsvfs); 307789Sahrens 308789Sahrens if (fidp->fid_len < SHORT_FID_LEN) { 309789Sahrens fidp->fid_len = SHORT_FID_LEN; 3101512Sek110237 ZFS_EXIT(zfsvfs); 311789Sahrens return (ENOSPC); 312789Sahrens } 313789Sahrens 314789Sahrens zfid = (zfid_short_t *)fidp; 315789Sahrens 316789Sahrens zfid->zf_len = SHORT_FID_LEN; 317789Sahrens 318789Sahrens for (i = 0; i < sizeof (zfid->zf_object); i++) 319789Sahrens zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); 320789Sahrens 321789Sahrens /* .zfs znodes always have a generation number of 0 */ 322789Sahrens for (i = 0; i < sizeof (zfid->zf_gen); i++) 323789Sahrens zfid->zf_gen[i] = 0; 324789Sahrens 325789Sahrens ZFS_EXIT(zfsvfs); 326789Sahrens return (0); 327789Sahrens } 328789Sahrens 329789Sahrens /* 330789Sahrens * .zfs inode namespace 331789Sahrens * 332789Sahrens * We need to generate unique inode numbers for all files and directories 333789Sahrens * within the .zfs pseudo-filesystem. We use the following scheme: 334789Sahrens * 335789Sahrens * ENTRY ZFSCTL_INODE 336789Sahrens * .zfs 1 337789Sahrens * .zfs/snapshot 2 338789Sahrens * .zfs/snapshot/<snap> objectid(snap) 339789Sahrens */ 340789Sahrens 341789Sahrens #define ZFSCTL_INO_SNAP(id) (id) 342789Sahrens 343789Sahrens /* 344789Sahrens * Get root directory attributes. 345789Sahrens */ 346789Sahrens /* ARGSUSED */ 347789Sahrens static int 348789Sahrens zfsctl_root_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) 349789Sahrens { 350789Sahrens zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 351789Sahrens 352789Sahrens ZFS_ENTER(zfsvfs); 353789Sahrens vap->va_nodeid = ZFSCTL_INO_ROOT; 354789Sahrens vap->va_nlink = vap->va_size = NROOT_ENTRIES; 355789Sahrens 356789Sahrens zfsctl_common_getattr(vp, vap); 357789Sahrens ZFS_EXIT(zfsvfs); 358789Sahrens 359789Sahrens return (0); 360789Sahrens } 361789Sahrens 362789Sahrens /* 363789Sahrens * Special case the handling of "..". 364789Sahrens */ 365789Sahrens /* ARGSUSED */ 366789Sahrens int 367789Sahrens zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, 368789Sahrens int flags, vnode_t *rdir, cred_t *cr) 369789Sahrens { 370789Sahrens zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 371789Sahrens int err; 372789Sahrens 373789Sahrens ZFS_ENTER(zfsvfs); 374789Sahrens 375789Sahrens if (strcmp(nm, "..") == 0) { 376789Sahrens err = VFS_ROOT(dvp->v_vfsp, vpp); 377789Sahrens } else { 378789Sahrens err = gfs_dir_lookup(dvp, nm, vpp); 379789Sahrens } 380789Sahrens 381789Sahrens ZFS_EXIT(zfsvfs); 382789Sahrens 383789Sahrens return (err); 384789Sahrens } 385789Sahrens 386789Sahrens static const fs_operation_def_t zfsctl_tops_root[] = { 387789Sahrens { VOPNAME_OPEN, zfsctl_common_open }, 388789Sahrens { VOPNAME_CLOSE, zfsctl_common_close }, 389789Sahrens { VOPNAME_IOCTL, fs_inval }, 390789Sahrens { VOPNAME_GETATTR, zfsctl_root_getattr }, 391789Sahrens { VOPNAME_ACCESS, zfsctl_common_access }, 392789Sahrens { VOPNAME_READDIR, gfs_vop_readdir }, 393789Sahrens { VOPNAME_LOOKUP, zfsctl_root_lookup }, 394789Sahrens { VOPNAME_SEEK, fs_seek }, 395789Sahrens { VOPNAME_INACTIVE, (fs_generic_func_p) gfs_vop_inactive }, 396789Sahrens { VOPNAME_FID, zfsctl_common_fid }, 397789Sahrens { NULL } 398789Sahrens }; 399789Sahrens 400789Sahrens static int 401789Sahrens zfsctl_snapshot_zname(vnode_t *vp, const char *name, int len, char *zname) 402789Sahrens { 403789Sahrens objset_t *os = ((zfsvfs_t *)((vp)->v_vfsp->vfs_data))->z_os; 404789Sahrens 405789Sahrens dmu_objset_name(os, zname); 4061154Smaybee if (strlen(zname) + 1 + strlen(name) >= len) 4071154Smaybee return (ENAMETOOLONG); 408789Sahrens (void) strcat(zname, "@"); 409789Sahrens (void) strcat(zname, name); 410789Sahrens return (0); 411789Sahrens } 412789Sahrens 413789Sahrens static int 414789Sahrens zfsctl_unmount_snap(vnode_t *dvp, const char *name, int force, cred_t *cr) 415789Sahrens { 416789Sahrens zfsctl_snapdir_t *sdp = dvp->v_data; 417789Sahrens zfs_snapentry_t search, *sep; 418789Sahrens avl_index_t where; 419789Sahrens int err; 420789Sahrens 421789Sahrens ASSERT(MUTEX_HELD(&sdp->sd_lock)); 422789Sahrens 423789Sahrens search.se_name = (char *)name; 424789Sahrens if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) 425789Sahrens return (ENOENT); 426789Sahrens 427789Sahrens ASSERT(vn_ismntpt(sep->se_root)); 428789Sahrens 429789Sahrens /* this will be dropped by dounmount() */ 430789Sahrens if ((err = vn_vfswlock(sep->se_root)) != 0) 431789Sahrens return (err); 432789Sahrens 433789Sahrens VN_HOLD(sep->se_root); 434789Sahrens if ((err = dounmount(vn_mountedvfs(sep->se_root), force, kcred)) != 0) 435789Sahrens return (err); 436789Sahrens ASSERT(sep->se_root->v_count == 1); 437789Sahrens gfs_vop_inactive(sep->se_root, cr); 438789Sahrens 439789Sahrens avl_remove(&sdp->sd_snaps, sep); 440789Sahrens kmem_free(sep->se_name, strlen(sep->se_name) + 1); 441789Sahrens kmem_free(sep, sizeof (zfs_snapentry_t)); 442789Sahrens 443789Sahrens return (0); 444789Sahrens } 445789Sahrens 446789Sahrens 4471154Smaybee static void 448789Sahrens zfsctl_rename_snap(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep, const char *nm) 449789Sahrens { 450789Sahrens avl_index_t where; 451789Sahrens vfs_t *vfsp; 452789Sahrens refstr_t *pathref; 453789Sahrens char newpath[MAXNAMELEN]; 454789Sahrens const char *oldpath; 455789Sahrens char *tail; 456789Sahrens 457789Sahrens ASSERT(MUTEX_HELD(&sdp->sd_lock)); 458789Sahrens ASSERT(sep != NULL); 459789Sahrens 460789Sahrens vfsp = vn_mountedvfs(sep->se_root); 461789Sahrens ASSERT(vfsp != NULL); 462789Sahrens 4631154Smaybee vfs_lock_wait(vfsp); 464789Sahrens 465789Sahrens /* 466789Sahrens * Change the name in the AVL tree. 467789Sahrens */ 468789Sahrens avl_remove(&sdp->sd_snaps, sep); 469789Sahrens kmem_free(sep->se_name, strlen(sep->se_name) + 1); 470789Sahrens sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); 471789Sahrens (void) strcpy(sep->se_name, nm); 472789Sahrens VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL); 473789Sahrens avl_insert(&sdp->sd_snaps, sep, where); 474789Sahrens 475789Sahrens /* 476789Sahrens * Change the current mountpoint info: 477789Sahrens * - update the tail of the mntpoint path 478789Sahrens * - update the tail of the resource path 479789Sahrens */ 480789Sahrens pathref = vfs_getmntpoint(vfsp); 481789Sahrens oldpath = refstr_value(pathref); 482789Sahrens VERIFY((tail = strrchr(oldpath, '/')) != NULL); 483789Sahrens ASSERT((tail - oldpath) + strlen(nm) + 2 < MAXNAMELEN); 484789Sahrens (void) strncpy(newpath, oldpath, tail - oldpath + 1); 485789Sahrens (void) strcat(newpath, nm); 486789Sahrens refstr_rele(pathref); 487789Sahrens vfs_setmntpoint(vfsp, newpath); 488789Sahrens 489789Sahrens pathref = vfs_getresource(vfsp); 490789Sahrens oldpath = refstr_value(pathref); 491789Sahrens VERIFY((tail = strrchr(oldpath, '@')) != NULL); 492789Sahrens ASSERT((tail - oldpath) + strlen(nm) + 2 < MAXNAMELEN); 493789Sahrens (void) strncpy(newpath, oldpath, tail - oldpath + 1); 494789Sahrens (void) strcat(newpath, nm); 495789Sahrens refstr_rele(pathref); 496789Sahrens vfs_setresource(vfsp, newpath); 497789Sahrens 498789Sahrens vfs_unlock(vfsp); 499789Sahrens } 500789Sahrens 501789Sahrens static int 502789Sahrens zfsctl_snapdir_rename(vnode_t *sdvp, char *snm, vnode_t *tdvp, char *tnm, 503789Sahrens cred_t *cr) 504789Sahrens { 505789Sahrens zfsctl_snapdir_t *sdp = sdvp->v_data; 506789Sahrens zfs_snapentry_t search, *sep; 507789Sahrens avl_index_t where; 508789Sahrens char from[MAXNAMELEN], to[MAXNAMELEN]; 509789Sahrens int err; 510789Sahrens 5111154Smaybee err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from); 5121154Smaybee if (err) 5131154Smaybee return (err); 514789Sahrens err = zfs_secpolicy_write(from, NULL, cr); 515789Sahrens if (err) 516789Sahrens return (err); 517789Sahrens 518789Sahrens /* 519789Sahrens * Cannot move snapshots out of the snapdir. 520789Sahrens */ 521789Sahrens if (sdvp != tdvp) 522789Sahrens return (EINVAL); 523789Sahrens 524789Sahrens if (strcmp(snm, tnm) == 0) 525789Sahrens return (0); 526789Sahrens 5271154Smaybee err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to); 5281154Smaybee if (err) 5291154Smaybee return (err); 5301154Smaybee 531789Sahrens mutex_enter(&sdp->sd_lock); 532789Sahrens 533789Sahrens search.se_name = (char *)snm; 5341154Smaybee if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) { 5351154Smaybee mutex_exit(&sdp->sd_lock); 5361154Smaybee return (ENOENT); 537789Sahrens } 538789Sahrens 539789Sahrens err = dmu_objset_rename(from, to); 5401154Smaybee if (err == 0) 5411154Smaybee zfsctl_rename_snap(sdp, sep, tnm); 542789Sahrens 543789Sahrens mutex_exit(&sdp->sd_lock); 544789Sahrens 545789Sahrens return (err); 546789Sahrens } 547789Sahrens 548789Sahrens /* ARGSUSED */ 549789Sahrens static int 550789Sahrens zfsctl_snapdir_remove(vnode_t *dvp, char *name, vnode_t *cwd, cred_t *cr) 551789Sahrens { 552789Sahrens zfsctl_snapdir_t *sdp = dvp->v_data; 553789Sahrens char snapname[MAXNAMELEN]; 554789Sahrens int err; 555789Sahrens 5561154Smaybee err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname); 5571154Smaybee if (err) 5581154Smaybee return (err); 559789Sahrens err = zfs_secpolicy_write(snapname, NULL, cr); 560789Sahrens if (err) 561789Sahrens return (err); 562789Sahrens 563789Sahrens mutex_enter(&sdp->sd_lock); 564789Sahrens 565789Sahrens err = zfsctl_unmount_snap(dvp, name, 0, cr); 566789Sahrens if (err) { 567789Sahrens mutex_exit(&sdp->sd_lock); 568789Sahrens return (err); 569789Sahrens } 570789Sahrens 571789Sahrens err = dmu_objset_destroy(snapname); 572789Sahrens 573789Sahrens mutex_exit(&sdp->sd_lock); 574789Sahrens 575789Sahrens return (err); 576789Sahrens } 577789Sahrens 578789Sahrens /* 579789Sahrens * Lookup entry point for the 'snapshot' directory. Try to open the 580789Sahrens * snapshot if it exist, creating the pseudo filesystem vnode as necessary. 581789Sahrens * Perform a mount of the associated dataset on top of the vnode. 582789Sahrens */ 583789Sahrens /* ARGSUSED */ 584789Sahrens static int 585789Sahrens zfsctl_snapdir_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, 586789Sahrens int flags, vnode_t *rdir, cred_t *cr) 587789Sahrens { 588789Sahrens zfsctl_snapdir_t *sdp = dvp->v_data; 589789Sahrens objset_t *snap; 590789Sahrens char snapname[MAXNAMELEN]; 591789Sahrens char *mountpoint; 592789Sahrens zfs_snapentry_t *sep, search; 593789Sahrens struct mounta margs; 594789Sahrens vfs_t *vfsp; 595789Sahrens size_t mountpoint_len; 596789Sahrens avl_index_t where; 597789Sahrens zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; 598789Sahrens int err; 599789Sahrens 600789Sahrens ASSERT(dvp->v_type == VDIR); 601789Sahrens 602789Sahrens if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) 603789Sahrens return (0); 604789Sahrens 605789Sahrens /* 606789Sahrens * If we get a recursive call, that means we got called 607789Sahrens * from the domount() code while it was trying to look up the 608789Sahrens * spec (which looks like a local path for zfs). We need to 609789Sahrens * add some flag to domount() to tell it not to do this lookup. 610789Sahrens */ 611789Sahrens if (MUTEX_HELD(&sdp->sd_lock)) 612789Sahrens return (ENOENT); 613789Sahrens 614789Sahrens ZFS_ENTER(zfsvfs); 615789Sahrens 616789Sahrens mutex_enter(&sdp->sd_lock); 617789Sahrens search.se_name = (char *)nm; 618789Sahrens if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) { 619789Sahrens *vpp = sep->se_root; 620789Sahrens VN_HOLD(*vpp); 621789Sahrens /* 6221566Smaybee * If the snapshot was unmounted behind our backs, 6231566Smaybee * try to remount it. 624789Sahrens */ 6251566Smaybee if (traverse(vpp) != 0) { 6261566Smaybee ASSERT(!vn_ismntpt(*vpp)); 627789Sahrens goto domount; 6281566Smaybee } 629789Sahrens mutex_exit(&sdp->sd_lock); 630789Sahrens ZFS_EXIT(zfsvfs); 631789Sahrens return (0); 632789Sahrens } 633789Sahrens 634789Sahrens /* 635789Sahrens * The requested snapshot is not currently mounted, look it up. 636789Sahrens */ 6371154Smaybee err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname); 6381154Smaybee if (err) { 6391154Smaybee mutex_exit(&sdp->sd_lock); 6401154Smaybee ZFS_EXIT(zfsvfs); 6411154Smaybee return (err); 6421154Smaybee } 643789Sahrens if (dmu_objset_open(snapname, DMU_OST_ZFS, 644789Sahrens DS_MODE_STANDARD | DS_MODE_READONLY, &snap) != 0) { 645789Sahrens mutex_exit(&sdp->sd_lock); 646789Sahrens ZFS_EXIT(zfsvfs); 647789Sahrens return (ENOENT); 648789Sahrens } 649789Sahrens 650789Sahrens sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP); 651789Sahrens sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP); 652789Sahrens (void) strcpy(sep->se_name, nm); 653789Sahrens *vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap)); 654789Sahrens avl_insert(&sdp->sd_snaps, sep, where); 655789Sahrens 656789Sahrens dmu_objset_close(snap); 657789Sahrens domount: 658789Sahrens mountpoint_len = strlen(refstr_value(dvp->v_vfsp->vfs_mntpt)) + 659789Sahrens strlen("/.zfs/snapshot/") + strlen(nm) + 1; 660789Sahrens mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP); 661789Sahrens (void) snprintf(mountpoint, mountpoint_len, "%s/.zfs/snapshot/%s", 662789Sahrens refstr_value(dvp->v_vfsp->vfs_mntpt), nm); 663789Sahrens 664789Sahrens margs.spec = snapname; 665789Sahrens margs.dir = mountpoint; 666789Sahrens margs.flags = MS_SYSSPACE | MS_NOMNTTAB; 667789Sahrens margs.fstype = "zfs"; 668789Sahrens margs.dataptr = NULL; 669789Sahrens margs.datalen = 0; 670789Sahrens margs.optptr = NULL; 671789Sahrens margs.optlen = 0; 672789Sahrens 673789Sahrens err = domount("zfs", &margs, *vpp, kcred, &vfsp); 674789Sahrens kmem_free(mountpoint, mountpoint_len); 675789Sahrens 676816Smaybee if (err == 0) { 677816Smaybee /* 678816Smaybee * Return the mounted root rather than the covered mount point. 679816Smaybee */ 680816Smaybee VFS_RELE(vfsp); 681816Smaybee err = traverse(vpp); 682816Smaybee } 683789Sahrens 684816Smaybee if (err == 0) { 685816Smaybee /* 686816Smaybee * Fix up the root vnode. 687816Smaybee */ 688816Smaybee ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs); 689816Smaybee VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs; 690816Smaybee (*vpp)->v_vfsp = zfsvfs->z_vfs; 691816Smaybee (*vpp)->v_flag &= ~VROOT; 692816Smaybee } 693789Sahrens mutex_exit(&sdp->sd_lock); 694789Sahrens ZFS_EXIT(zfsvfs); 695789Sahrens 6961566Smaybee /* 6971566Smaybee * If we had an error, drop our hold on the vnode and 6981566Smaybee * zfsctl_snapshot_inactive() will clean up. 6991566Smaybee */ 7001566Smaybee if (err) { 701816Smaybee VN_RELE(*vpp); 7021566Smaybee *vpp = NULL; 7031566Smaybee } 704816Smaybee return (err); 705789Sahrens } 706789Sahrens 707789Sahrens /* ARGSUSED */ 708789Sahrens static int 709789Sahrens zfsctl_snapdir_readdir_cb(vnode_t *vp, struct dirent64 *dp, int *eofp, 710789Sahrens offset_t *offp, offset_t *nextp, void *data) 711789Sahrens { 712789Sahrens zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 713789Sahrens char snapname[MAXNAMELEN]; 714789Sahrens uint64_t id, cookie; 715789Sahrens 716789Sahrens ZFS_ENTER(zfsvfs); 717789Sahrens 718789Sahrens cookie = *offp; 719789Sahrens if (dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id, 720789Sahrens &cookie) == ENOENT) { 721789Sahrens *eofp = 1; 722789Sahrens ZFS_EXIT(zfsvfs); 723789Sahrens return (0); 724789Sahrens } 725789Sahrens 726789Sahrens (void) strcpy(dp->d_name, snapname); 727789Sahrens dp->d_ino = ZFSCTL_INO_SNAP(id); 728789Sahrens *nextp = cookie; 729789Sahrens 730789Sahrens ZFS_EXIT(zfsvfs); 731789Sahrens 732789Sahrens return (0); 733789Sahrens } 734789Sahrens 735789Sahrens vnode_t * 736789Sahrens zfsctl_mknode_snapdir(vnode_t *pvp) 737789Sahrens { 738789Sahrens vnode_t *vp; 739789Sahrens zfsctl_snapdir_t *sdp; 740789Sahrens 741789Sahrens vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp, 742789Sahrens zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN, 743789Sahrens zfsctl_snapdir_readdir_cb, NULL); 744789Sahrens sdp = vp->v_data; 745789Sahrens sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR; 7461571Sek110237 sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime; 747789Sahrens mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL); 748789Sahrens avl_create(&sdp->sd_snaps, snapentry_compare, 749789Sahrens sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node)); 750789Sahrens return (vp); 751789Sahrens } 752789Sahrens 753789Sahrens /* ARGSUSED */ 754789Sahrens static int 755789Sahrens zfsctl_snapdir_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *cr) 756789Sahrens { 757789Sahrens zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; 758789Sahrens zfsctl_snapdir_t *sdp = vp->v_data; 759789Sahrens 760789Sahrens ZFS_ENTER(zfsvfs); 761789Sahrens zfsctl_common_getattr(vp, vap); 762789Sahrens vap->va_nodeid = gfs_file_inode(vp); 763789Sahrens vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2; 764789Sahrens ZFS_EXIT(zfsvfs); 765789Sahrens 766789Sahrens return (0); 767789Sahrens } 768789Sahrens 7691566Smaybee /* ARGSUSED */ 770789Sahrens static void 771789Sahrens zfsctl_snapdir_inactive(vnode_t *vp, cred_t *cr) 772789Sahrens { 773789Sahrens zfsctl_snapdir_t *sdp = vp->v_data; 7741566Smaybee void *private; 775789Sahrens 7761566Smaybee private = gfs_dir_inactive(vp); 7771566Smaybee if (private != NULL) { 7781566Smaybee ASSERT(avl_numnodes(&sdp->sd_snaps) == 0); 7791566Smaybee mutex_destroy(&sdp->sd_lock); 7801566Smaybee avl_destroy(&sdp->sd_snaps); 7811566Smaybee kmem_free(private, sizeof (zfsctl_snapdir_t)); 7821566Smaybee } 783789Sahrens } 784789Sahrens 785789Sahrens static const fs_operation_def_t zfsctl_tops_snapdir[] = { 786789Sahrens { VOPNAME_OPEN, zfsctl_common_open }, 787789Sahrens { VOPNAME_CLOSE, zfsctl_common_close }, 788789Sahrens { VOPNAME_IOCTL, fs_inval }, 789789Sahrens { VOPNAME_GETATTR, zfsctl_snapdir_getattr }, 790789Sahrens { VOPNAME_ACCESS, zfsctl_common_access }, 791789Sahrens { VOPNAME_RENAME, zfsctl_snapdir_rename }, 792789Sahrens { VOPNAME_RMDIR, zfsctl_snapdir_remove }, 793789Sahrens { VOPNAME_READDIR, gfs_vop_readdir }, 794789Sahrens { VOPNAME_LOOKUP, zfsctl_snapdir_lookup }, 795789Sahrens { VOPNAME_SEEK, fs_seek }, 796789Sahrens { VOPNAME_INACTIVE, (fs_generic_func_p) zfsctl_snapdir_inactive }, 797789Sahrens { VOPNAME_FID, zfsctl_common_fid }, 798789Sahrens { NULL } 799789Sahrens }; 800789Sahrens 801789Sahrens static vnode_t * 802789Sahrens zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset) 803789Sahrens { 804789Sahrens vnode_t *vp; 805789Sahrens zfsctl_node_t *zcp; 806789Sahrens 807789Sahrens vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, 808789Sahrens zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL); 809789Sahrens zcp = vp->v_data; 810789Sahrens zcp->zc_id = objset; 811789Sahrens 812789Sahrens return (vp); 813789Sahrens } 814789Sahrens 815789Sahrens static void 816789Sahrens zfsctl_snapshot_inactive(vnode_t *vp, cred_t *cr) 817789Sahrens { 818789Sahrens zfsctl_snapdir_t *sdp; 819789Sahrens zfs_snapentry_t *sep, *next; 820789Sahrens vnode_t *dvp; 821789Sahrens 822789Sahrens VERIFY(gfs_dir_lookup(vp, "..", &dvp) == 0); 823789Sahrens sdp = dvp->v_data; 824789Sahrens 825789Sahrens mutex_enter(&sdp->sd_lock); 826789Sahrens 827789Sahrens if (vp->v_count > 1) { 828789Sahrens mutex_exit(&sdp->sd_lock); 829789Sahrens return; 830789Sahrens } 831789Sahrens ASSERT(!vn_ismntpt(vp)); 832789Sahrens 833789Sahrens sep = avl_first(&sdp->sd_snaps); 834789Sahrens while (sep != NULL) { 835789Sahrens next = AVL_NEXT(&sdp->sd_snaps, sep); 836789Sahrens 837789Sahrens if (sep->se_root == vp) { 838789Sahrens avl_remove(&sdp->sd_snaps, sep); 839789Sahrens kmem_free(sep->se_name, strlen(sep->se_name) + 1); 840789Sahrens kmem_free(sep, sizeof (zfs_snapentry_t)); 841789Sahrens break; 842789Sahrens } 843789Sahrens sep = next; 844789Sahrens } 845789Sahrens ASSERT(sep != NULL); 846789Sahrens 847789Sahrens mutex_exit(&sdp->sd_lock); 848789Sahrens VN_RELE(dvp); 849789Sahrens 8501566Smaybee /* 8511566Smaybee * Dispose of the vnode for the snapshot mount point. 8521566Smaybee * This is safe to do because once this entry has been removed 8531566Smaybee * from the AVL tree, it can't be found again, so cannot become 8541566Smaybee * "active". If we lookup the same name again we will end up 8551566Smaybee * creating a new vnode. 8561566Smaybee */ 857789Sahrens gfs_vop_inactive(vp, cr); 858789Sahrens } 859789Sahrens 860789Sahrens 861789Sahrens /* 862789Sahrens * These VP's should never see the light of day. They should always 863789Sahrens * be covered. 864789Sahrens */ 865789Sahrens static const fs_operation_def_t zfsctl_tops_snapshot[] = { 866789Sahrens VOPNAME_INACTIVE, (fs_generic_func_p) zfsctl_snapshot_inactive, 867789Sahrens NULL, NULL 868789Sahrens }; 869789Sahrens 870789Sahrens int 871789Sahrens zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp) 872789Sahrens { 873789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 874789Sahrens vnode_t *dvp, *vp; 875789Sahrens zfsctl_snapdir_t *sdp; 876789Sahrens zfsctl_node_t *zcp; 877789Sahrens zfs_snapentry_t *sep; 878789Sahrens int error; 879789Sahrens 880789Sahrens ASSERT(zfsvfs->z_ctldir != NULL); 881789Sahrens error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, 882789Sahrens NULL, 0, NULL, kcred); 883789Sahrens if (error != 0) 884789Sahrens return (error); 885789Sahrens sdp = dvp->v_data; 886789Sahrens 887789Sahrens mutex_enter(&sdp->sd_lock); 888789Sahrens sep = avl_first(&sdp->sd_snaps); 889789Sahrens while (sep != NULL) { 890789Sahrens vp = sep->se_root; 891789Sahrens zcp = vp->v_data; 892789Sahrens if (zcp->zc_id == objsetid) 893789Sahrens break; 894789Sahrens 895789Sahrens sep = AVL_NEXT(&sdp->sd_snaps, sep); 896789Sahrens } 897789Sahrens 898789Sahrens if (sep != NULL) { 899789Sahrens VN_HOLD(vp); 900789Sahrens error = traverse(&vp); 901789Sahrens if (error == 0) 902789Sahrens *zfsvfsp = VTOZ(vp)->z_zfsvfs; 903*1572Snd150628 mutex_exit(&sdp->sd_lock); 904789Sahrens VN_RELE(vp); 905789Sahrens } else { 906789Sahrens error = EINVAL; 907*1572Snd150628 mutex_exit(&sdp->sd_lock); 908789Sahrens } 909789Sahrens 910789Sahrens VN_RELE(dvp); 911789Sahrens 912789Sahrens return (error); 913789Sahrens } 914789Sahrens 915789Sahrens /* 916789Sahrens * Unmount any snapshots for the given filesystem. This is called from 917789Sahrens * zfs_umount() - if we have a ctldir, then go through and unmount all the 918789Sahrens * snapshots. 919789Sahrens */ 920789Sahrens int 921789Sahrens zfsctl_umount_snapshots(vfs_t *vfsp, int fflags, cred_t *cr) 922789Sahrens { 923789Sahrens zfsvfs_t *zfsvfs = vfsp->vfs_data; 924789Sahrens vnode_t *dvp, *svp; 925789Sahrens zfsctl_snapdir_t *sdp; 926789Sahrens zfs_snapentry_t *sep, *next; 927789Sahrens int error; 928789Sahrens 929789Sahrens ASSERT(zfsvfs->z_ctldir != NULL); 930789Sahrens error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, 931789Sahrens NULL, 0, NULL, cr); 932789Sahrens if (error != 0) 933789Sahrens return (error); 934789Sahrens sdp = dvp->v_data; 935789Sahrens 936789Sahrens mutex_enter(&sdp->sd_lock); 937789Sahrens 938789Sahrens sep = avl_first(&sdp->sd_snaps); 939789Sahrens while (sep != NULL) { 940789Sahrens svp = sep->se_root; 941789Sahrens next = AVL_NEXT(&sdp->sd_snaps, sep); 942789Sahrens 943789Sahrens /* 944789Sahrens * If this snapshot is not mounted, then it must 945789Sahrens * have just been unmounted by somebody else, and 946789Sahrens * will be cleaned up by zfsctl_snapdir_inactive(). 947789Sahrens */ 948789Sahrens if (vn_ismntpt(svp)) { 949789Sahrens if ((error = vn_vfswlock(svp)) != 0) 950789Sahrens goto out; 951789Sahrens 952789Sahrens VN_HOLD(svp); 953789Sahrens error = dounmount(vn_mountedvfs(svp), fflags, cr); 954789Sahrens if (error) { 955789Sahrens VN_RELE(svp); 956789Sahrens goto out; 957789Sahrens } 958789Sahrens 959789Sahrens avl_remove(&sdp->sd_snaps, sep); 960789Sahrens kmem_free(sep->se_name, strlen(sep->se_name) + 1); 961789Sahrens kmem_free(sep, sizeof (zfs_snapentry_t)); 962789Sahrens 963789Sahrens /* 964789Sahrens * We can't use VN_RELE(), as that will try to 965789Sahrens * invoke zfsctl_snapdir_inactive(), and that 966789Sahrens * would lead to an attempt to re-grab the sd_lock. 967789Sahrens */ 968789Sahrens ASSERT3U(svp->v_count, ==, 1); 969789Sahrens gfs_vop_inactive(svp, cr); 970789Sahrens } 971789Sahrens sep = next; 972789Sahrens } 973789Sahrens out: 974789Sahrens mutex_exit(&sdp->sd_lock); 975789Sahrens VN_RELE(dvp); 976789Sahrens 977789Sahrens return (error); 978789Sahrens } 979