1*7a7741afSMartin Matuska /* 2*7a7741afSMartin Matuska * CDDL HEADER START 3*7a7741afSMartin Matuska * 4*7a7741afSMartin Matuska * The contents of this file are subject to the terms of the 5*7a7741afSMartin Matuska * Common Development and Distribution License (the "License"). 6*7a7741afSMartin Matuska * You may not use this file except in compliance with the License. 7*7a7741afSMartin Matuska * 8*7a7741afSMartin Matuska * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*7a7741afSMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10*7a7741afSMartin Matuska * See the License for the specific language governing permissions 11*7a7741afSMartin Matuska * and limitations under the License. 12*7a7741afSMartin Matuska * 13*7a7741afSMartin Matuska * When distributing Covered Code, include this CDDL HEADER in each 14*7a7741afSMartin Matuska * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*7a7741afSMartin Matuska * If applicable, add the following below this CDDL HEADER, with the 16*7a7741afSMartin Matuska * fields enclosed by brackets "[]" replaced with your own identifying 17*7a7741afSMartin Matuska * information: Portions Copyright [yyyy] [name of copyright owner] 18*7a7741afSMartin Matuska * 19*7a7741afSMartin Matuska * CDDL HEADER END 20*7a7741afSMartin Matuska */ 21*7a7741afSMartin Matuska /* 22*7a7741afSMartin Matuska * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23*7a7741afSMartin Matuska * Copyright (c) 2012, 2018 by Delphix. All rights reserved. 24*7a7741afSMartin Matuska */ 25*7a7741afSMartin Matuska 26*7a7741afSMartin Matuska /* Portions Copyright 2007 Jeremy Teo */ 27*7a7741afSMartin Matuska 28*7a7741afSMartin Matuska #include <sys/types.h> 29*7a7741afSMartin Matuska #include <sys/param.h> 30*7a7741afSMartin Matuska #include <sys/time.h> 31*7a7741afSMartin Matuska #include <sys/sysmacros.h> 32*7a7741afSMartin Matuska #include <sys/mntent.h> 33*7a7741afSMartin Matuska #include <sys/u8_textprep.h> 34*7a7741afSMartin Matuska #include <sys/dsl_dataset.h> 35*7a7741afSMartin Matuska #include <sys/vfs.h> 36*7a7741afSMartin Matuska #include <sys/vnode.h> 37*7a7741afSMartin Matuska #include <sys/file.h> 38*7a7741afSMartin Matuska #include <sys/kmem.h> 39*7a7741afSMartin Matuska #include <sys/errno.h> 40*7a7741afSMartin Matuska #include <sys/atomic.h> 41*7a7741afSMartin Matuska #include <sys/zfs_dir.h> 42*7a7741afSMartin Matuska #include <sys/zfs_acl.h> 43*7a7741afSMartin Matuska #include <sys/zfs_ioctl.h> 44*7a7741afSMartin Matuska #include <sys/zfs_rlock.h> 45*7a7741afSMartin Matuska #include <sys/zfs_fuid.h> 46*7a7741afSMartin Matuska #include <sys/zfs_vnops.h> 47*7a7741afSMartin Matuska #include <sys/zfs_ctldir.h> 48*7a7741afSMartin Matuska #include <sys/dnode.h> 49*7a7741afSMartin Matuska #include <sys/fs/zfs.h> 50*7a7741afSMartin Matuska #include <sys/zpl.h> 51*7a7741afSMartin Matuska #include <sys/dmu.h> 52*7a7741afSMartin Matuska #include <sys/dmu_objset.h> 53*7a7741afSMartin Matuska #include <sys/dmu_tx.h> 54*7a7741afSMartin Matuska #include <sys/zfs_refcount.h> 55*7a7741afSMartin Matuska #include <sys/stat.h> 56*7a7741afSMartin Matuska #include <sys/zap.h> 57*7a7741afSMartin Matuska #include <sys/zfs_znode.h> 58*7a7741afSMartin Matuska #include <sys/sa.h> 59*7a7741afSMartin Matuska #include <sys/zfs_sa.h> 60*7a7741afSMartin Matuska #include <sys/zfs_stat.h> 61*7a7741afSMartin Matuska #include <linux/mm_compat.h> 62*7a7741afSMartin Matuska 63*7a7741afSMartin Matuska #include "zfs_prop.h" 64*7a7741afSMartin Matuska #include "zfs_comutil.h" 65*7a7741afSMartin Matuska 66*7a7741afSMartin Matuska static kmem_cache_t *znode_cache = NULL; 67*7a7741afSMartin Matuska static kmem_cache_t *znode_hold_cache = NULL; 68*7a7741afSMartin Matuska unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ; 69*7a7741afSMartin Matuska 70*7a7741afSMartin Matuska /* 71*7a7741afSMartin Matuska * This is used by the test suite so that it can delay znodes from being 72*7a7741afSMartin Matuska * freed in order to inspect the unlinked set. 73*7a7741afSMartin Matuska */ 74*7a7741afSMartin Matuska static int zfs_unlink_suspend_progress = 0; 75*7a7741afSMartin Matuska 76*7a7741afSMartin Matuska /* 77*7a7741afSMartin Matuska * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on 78*7a7741afSMartin Matuska * z_rangelock. It will modify the offset and length of the lock to reflect 79*7a7741afSMartin Matuska * znode-specific information, and convert RL_APPEND to RL_WRITER. This is 80*7a7741afSMartin Matuska * called with the rangelock_t's rl_lock held, which avoids races. 81*7a7741afSMartin Matuska */ 82*7a7741afSMartin Matuska static void 83*7a7741afSMartin Matuska zfs_rangelock_cb(zfs_locked_range_t *new, void *arg) 84*7a7741afSMartin Matuska { 85*7a7741afSMartin Matuska znode_t *zp = arg; 86*7a7741afSMartin Matuska 87*7a7741afSMartin Matuska /* 88*7a7741afSMartin Matuska * If in append mode, convert to writer and lock starting at the 89*7a7741afSMartin Matuska * current end of file. 90*7a7741afSMartin Matuska */ 91*7a7741afSMartin Matuska if (new->lr_type == RL_APPEND) { 92*7a7741afSMartin Matuska new->lr_offset = zp->z_size; 93*7a7741afSMartin Matuska new->lr_type = RL_WRITER; 94*7a7741afSMartin Matuska } 95*7a7741afSMartin Matuska 96*7a7741afSMartin Matuska /* 97*7a7741afSMartin Matuska * If we need to grow the block size then lock the whole file range. 98*7a7741afSMartin Matuska */ 99*7a7741afSMartin Matuska uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length); 100*7a7741afSMartin Matuska if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) || 101*7a7741afSMartin Matuska zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) { 102*7a7741afSMartin Matuska new->lr_offset = 0; 103*7a7741afSMartin Matuska new->lr_length = UINT64_MAX; 104*7a7741afSMartin Matuska } 105*7a7741afSMartin Matuska } 106*7a7741afSMartin Matuska 107*7a7741afSMartin Matuska static int 108*7a7741afSMartin Matuska zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) 109*7a7741afSMartin Matuska { 110*7a7741afSMartin Matuska (void) arg, (void) kmflags; 111*7a7741afSMartin Matuska znode_t *zp = buf; 112*7a7741afSMartin Matuska 113*7a7741afSMartin Matuska inode_init_once(ZTOI(zp)); 114*7a7741afSMartin Matuska list_link_init(&zp->z_link_node); 115*7a7741afSMartin Matuska 116*7a7741afSMartin Matuska mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); 117*7a7741afSMartin Matuska rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); 118*7a7741afSMartin Matuska rw_init(&zp->z_name_lock, NULL, RW_NOLOCKDEP, NULL); 119*7a7741afSMartin Matuska mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); 120*7a7741afSMartin Matuska rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL); 121*7a7741afSMartin Matuska 122*7a7741afSMartin Matuska zfs_rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp); 123*7a7741afSMartin Matuska 124*7a7741afSMartin Matuska zp->z_dirlocks = NULL; 125*7a7741afSMartin Matuska zp->z_acl_cached = NULL; 126*7a7741afSMartin Matuska zp->z_xattr_cached = NULL; 127*7a7741afSMartin Matuska zp->z_xattr_parent = 0; 128*7a7741afSMartin Matuska zp->z_sync_writes_cnt = 0; 129*7a7741afSMartin Matuska zp->z_async_writes_cnt = 0; 130*7a7741afSMartin Matuska 131*7a7741afSMartin Matuska return (0); 132*7a7741afSMartin Matuska } 133*7a7741afSMartin Matuska 134*7a7741afSMartin Matuska static void 135*7a7741afSMartin Matuska zfs_znode_cache_destructor(void *buf, void *arg) 136*7a7741afSMartin Matuska { 137*7a7741afSMartin Matuska (void) arg; 138*7a7741afSMartin Matuska znode_t *zp = buf; 139*7a7741afSMartin Matuska 140*7a7741afSMartin Matuska ASSERT(!list_link_active(&zp->z_link_node)); 141*7a7741afSMartin Matuska mutex_destroy(&zp->z_lock); 142*7a7741afSMartin Matuska rw_destroy(&zp->z_parent_lock); 143*7a7741afSMartin Matuska rw_destroy(&zp->z_name_lock); 144*7a7741afSMartin Matuska mutex_destroy(&zp->z_acl_lock); 145*7a7741afSMartin Matuska rw_destroy(&zp->z_xattr_lock); 146*7a7741afSMartin Matuska zfs_rangelock_fini(&zp->z_rangelock); 147*7a7741afSMartin Matuska 148*7a7741afSMartin Matuska ASSERT3P(zp->z_dirlocks, ==, NULL); 149*7a7741afSMartin Matuska ASSERT3P(zp->z_acl_cached, ==, NULL); 150*7a7741afSMartin Matuska ASSERT3P(zp->z_xattr_cached, ==, NULL); 151*7a7741afSMartin Matuska 152*7a7741afSMartin Matuska ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt)); 153*7a7741afSMartin Matuska ASSERT0(atomic_load_32(&zp->z_async_writes_cnt)); 154*7a7741afSMartin Matuska } 155*7a7741afSMartin Matuska 156*7a7741afSMartin Matuska static int 157*7a7741afSMartin Matuska zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags) 158*7a7741afSMartin Matuska { 159*7a7741afSMartin Matuska (void) arg, (void) kmflags; 160*7a7741afSMartin Matuska znode_hold_t *zh = buf; 161*7a7741afSMartin Matuska 162*7a7741afSMartin Matuska mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL); 163*7a7741afSMartin Matuska zh->zh_refcount = 0; 164*7a7741afSMartin Matuska 165*7a7741afSMartin Matuska return (0); 166*7a7741afSMartin Matuska } 167*7a7741afSMartin Matuska 168*7a7741afSMartin Matuska static void 169*7a7741afSMartin Matuska zfs_znode_hold_cache_destructor(void *buf, void *arg) 170*7a7741afSMartin Matuska { 171*7a7741afSMartin Matuska (void) arg; 172*7a7741afSMartin Matuska znode_hold_t *zh = buf; 173*7a7741afSMartin Matuska 174*7a7741afSMartin Matuska mutex_destroy(&zh->zh_lock); 175*7a7741afSMartin Matuska } 176*7a7741afSMartin Matuska 177*7a7741afSMartin Matuska void 178*7a7741afSMartin Matuska zfs_znode_init(void) 179*7a7741afSMartin Matuska { 180*7a7741afSMartin Matuska /* 181*7a7741afSMartin Matuska * Initialize zcache. The KMC_SLAB hint is used in order that it be 182*7a7741afSMartin Matuska * backed by kmalloc() when on the Linux slab in order that any 183*7a7741afSMartin Matuska * wait_on_bit() operations on the related inode operate properly. 184*7a7741afSMartin Matuska */ 185*7a7741afSMartin Matuska ASSERT(znode_cache == NULL); 186*7a7741afSMartin Matuska znode_cache = kmem_cache_create("zfs_znode_cache", 187*7a7741afSMartin Matuska sizeof (znode_t), 0, zfs_znode_cache_constructor, 188*7a7741afSMartin Matuska zfs_znode_cache_destructor, NULL, NULL, NULL, 189*7a7741afSMartin Matuska KMC_SLAB | KMC_RECLAIMABLE); 190*7a7741afSMartin Matuska 191*7a7741afSMartin Matuska ASSERT(znode_hold_cache == NULL); 192*7a7741afSMartin Matuska znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache", 193*7a7741afSMartin Matuska sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor, 194*7a7741afSMartin Matuska zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0); 195*7a7741afSMartin Matuska } 196*7a7741afSMartin Matuska 197*7a7741afSMartin Matuska void 198*7a7741afSMartin Matuska zfs_znode_fini(void) 199*7a7741afSMartin Matuska { 200*7a7741afSMartin Matuska /* 201*7a7741afSMartin Matuska * Cleanup zcache 202*7a7741afSMartin Matuska */ 203*7a7741afSMartin Matuska if (znode_cache) 204*7a7741afSMartin Matuska kmem_cache_destroy(znode_cache); 205*7a7741afSMartin Matuska znode_cache = NULL; 206*7a7741afSMartin Matuska 207*7a7741afSMartin Matuska if (znode_hold_cache) 208*7a7741afSMartin Matuska kmem_cache_destroy(znode_hold_cache); 209*7a7741afSMartin Matuska znode_hold_cache = NULL; 210*7a7741afSMartin Matuska } 211*7a7741afSMartin Matuska 212*7a7741afSMartin Matuska /* 213*7a7741afSMartin Matuska * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to 214*7a7741afSMartin Matuska * serialize access to a znode and its SA buffer while the object is being 215*7a7741afSMartin Matuska * created or destroyed. This kind of locking would normally reside in the 216*7a7741afSMartin Matuska * znode itself but in this case that's impossible because the znode and SA 217*7a7741afSMartin Matuska * buffer may not yet exist. Therefore the locking is handled externally 218*7a7741afSMartin Matuska * with an array of mutexes and AVLs trees which contain per-object locks. 219*7a7741afSMartin Matuska * 220*7a7741afSMartin Matuska * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted 221*7a7741afSMartin Matuska * in to the correct AVL tree and finally the per-object lock is held. In 222*7a7741afSMartin Matuska * zfs_znode_hold_exit() the process is reversed. The per-object lock is 223*7a7741afSMartin Matuska * released, removed from the AVL tree and destroyed if there are no waiters. 224*7a7741afSMartin Matuska * 225*7a7741afSMartin Matuska * This scheme has two important properties: 226*7a7741afSMartin Matuska * 227*7a7741afSMartin Matuska * 1) No memory allocations are performed while holding one of the z_hold_locks. 228*7a7741afSMartin Matuska * This ensures evict(), which can be called from direct memory reclaim, will 229*7a7741afSMartin Matuska * never block waiting on a z_hold_locks which just happens to have hashed 230*7a7741afSMartin Matuska * to the same index. 231*7a7741afSMartin Matuska * 232*7a7741afSMartin Matuska * 2) All locks used to serialize access to an object are per-object and never 233*7a7741afSMartin Matuska * shared. This minimizes lock contention without creating a large number 234*7a7741afSMartin Matuska * of dedicated locks. 235*7a7741afSMartin Matuska * 236*7a7741afSMartin Matuska * On the downside it does require znode_lock_t structures to be frequently 237*7a7741afSMartin Matuska * allocated and freed. However, because these are backed by a kmem cache 238*7a7741afSMartin Matuska * and very short lived this cost is minimal. 239*7a7741afSMartin Matuska */ 240*7a7741afSMartin Matuska int 241*7a7741afSMartin Matuska zfs_znode_hold_compare(const void *a, const void *b) 242*7a7741afSMartin Matuska { 243*7a7741afSMartin Matuska const znode_hold_t *zh_a = (const znode_hold_t *)a; 244*7a7741afSMartin Matuska const znode_hold_t *zh_b = (const znode_hold_t *)b; 245*7a7741afSMartin Matuska 246*7a7741afSMartin Matuska return (TREE_CMP(zh_a->zh_obj, zh_b->zh_obj)); 247*7a7741afSMartin Matuska } 248*7a7741afSMartin Matuska 249*7a7741afSMartin Matuska static boolean_t __maybe_unused 250*7a7741afSMartin Matuska zfs_znode_held(zfsvfs_t *zfsvfs, uint64_t obj) 251*7a7741afSMartin Matuska { 252*7a7741afSMartin Matuska znode_hold_t *zh, search; 253*7a7741afSMartin Matuska int i = ZFS_OBJ_HASH(zfsvfs, obj); 254*7a7741afSMartin Matuska boolean_t held; 255*7a7741afSMartin Matuska 256*7a7741afSMartin Matuska search.zh_obj = obj; 257*7a7741afSMartin Matuska 258*7a7741afSMartin Matuska mutex_enter(&zfsvfs->z_hold_locks[i]); 259*7a7741afSMartin Matuska zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL); 260*7a7741afSMartin Matuska held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE; 261*7a7741afSMartin Matuska mutex_exit(&zfsvfs->z_hold_locks[i]); 262*7a7741afSMartin Matuska 263*7a7741afSMartin Matuska return (held); 264*7a7741afSMartin Matuska } 265*7a7741afSMartin Matuska 266*7a7741afSMartin Matuska znode_hold_t * 267*7a7741afSMartin Matuska zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj) 268*7a7741afSMartin Matuska { 269*7a7741afSMartin Matuska znode_hold_t *zh, *zh_new, search; 270*7a7741afSMartin Matuska int i = ZFS_OBJ_HASH(zfsvfs, obj); 271*7a7741afSMartin Matuska boolean_t found = B_FALSE; 272*7a7741afSMartin Matuska 273*7a7741afSMartin Matuska zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP); 274*7a7741afSMartin Matuska search.zh_obj = obj; 275*7a7741afSMartin Matuska 276*7a7741afSMartin Matuska mutex_enter(&zfsvfs->z_hold_locks[i]); 277*7a7741afSMartin Matuska zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL); 278*7a7741afSMartin Matuska if (likely(zh == NULL)) { 279*7a7741afSMartin Matuska zh = zh_new; 280*7a7741afSMartin Matuska zh->zh_obj = obj; 281*7a7741afSMartin Matuska avl_add(&zfsvfs->z_hold_trees[i], zh); 282*7a7741afSMartin Matuska } else { 283*7a7741afSMartin Matuska ASSERT3U(zh->zh_obj, ==, obj); 284*7a7741afSMartin Matuska found = B_TRUE; 285*7a7741afSMartin Matuska } 286*7a7741afSMartin Matuska zh->zh_refcount++; 287*7a7741afSMartin Matuska ASSERT3S(zh->zh_refcount, >, 0); 288*7a7741afSMartin Matuska mutex_exit(&zfsvfs->z_hold_locks[i]); 289*7a7741afSMartin Matuska 290*7a7741afSMartin Matuska if (found == B_TRUE) 291*7a7741afSMartin Matuska kmem_cache_free(znode_hold_cache, zh_new); 292*7a7741afSMartin Matuska 293*7a7741afSMartin Matuska ASSERT(MUTEX_NOT_HELD(&zh->zh_lock)); 294*7a7741afSMartin Matuska mutex_enter(&zh->zh_lock); 295*7a7741afSMartin Matuska 296*7a7741afSMartin Matuska return (zh); 297*7a7741afSMartin Matuska } 298*7a7741afSMartin Matuska 299*7a7741afSMartin Matuska void 300*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs_t *zfsvfs, znode_hold_t *zh) 301*7a7741afSMartin Matuska { 302*7a7741afSMartin Matuska int i = ZFS_OBJ_HASH(zfsvfs, zh->zh_obj); 303*7a7741afSMartin Matuska boolean_t remove = B_FALSE; 304*7a7741afSMartin Matuska 305*7a7741afSMartin Matuska ASSERT(zfs_znode_held(zfsvfs, zh->zh_obj)); 306*7a7741afSMartin Matuska mutex_exit(&zh->zh_lock); 307*7a7741afSMartin Matuska 308*7a7741afSMartin Matuska mutex_enter(&zfsvfs->z_hold_locks[i]); 309*7a7741afSMartin Matuska ASSERT3S(zh->zh_refcount, >, 0); 310*7a7741afSMartin Matuska if (--zh->zh_refcount == 0) { 311*7a7741afSMartin Matuska avl_remove(&zfsvfs->z_hold_trees[i], zh); 312*7a7741afSMartin Matuska remove = B_TRUE; 313*7a7741afSMartin Matuska } 314*7a7741afSMartin Matuska mutex_exit(&zfsvfs->z_hold_locks[i]); 315*7a7741afSMartin Matuska 316*7a7741afSMartin Matuska if (remove == B_TRUE) 317*7a7741afSMartin Matuska kmem_cache_free(znode_hold_cache, zh); 318*7a7741afSMartin Matuska } 319*7a7741afSMartin Matuska 320*7a7741afSMartin Matuska dev_t 321*7a7741afSMartin Matuska zfs_cmpldev(uint64_t dev) 322*7a7741afSMartin Matuska { 323*7a7741afSMartin Matuska return (dev); 324*7a7741afSMartin Matuska } 325*7a7741afSMartin Matuska 326*7a7741afSMartin Matuska static void 327*7a7741afSMartin Matuska zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, 328*7a7741afSMartin Matuska dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl) 329*7a7741afSMartin Matuska { 330*7a7741afSMartin Matuska ASSERT(zfs_znode_held(zfsvfs, zp->z_id)); 331*7a7741afSMartin Matuska 332*7a7741afSMartin Matuska mutex_enter(&zp->z_lock); 333*7a7741afSMartin Matuska 334*7a7741afSMartin Matuska ASSERT(zp->z_sa_hdl == NULL); 335*7a7741afSMartin Matuska ASSERT(zp->z_acl_cached == NULL); 336*7a7741afSMartin Matuska if (sa_hdl == NULL) { 337*7a7741afSMartin Matuska VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp, 338*7a7741afSMartin Matuska SA_HDL_SHARED, &zp->z_sa_hdl)); 339*7a7741afSMartin Matuska } else { 340*7a7741afSMartin Matuska zp->z_sa_hdl = sa_hdl; 341*7a7741afSMartin Matuska sa_set_userp(sa_hdl, zp); 342*7a7741afSMartin Matuska } 343*7a7741afSMartin Matuska 344*7a7741afSMartin Matuska zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE; 345*7a7741afSMartin Matuska 346*7a7741afSMartin Matuska mutex_exit(&zp->z_lock); 347*7a7741afSMartin Matuska } 348*7a7741afSMartin Matuska 349*7a7741afSMartin Matuska void 350*7a7741afSMartin Matuska zfs_znode_dmu_fini(znode_t *zp) 351*7a7741afSMartin Matuska { 352*7a7741afSMartin Matuska ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) || 353*7a7741afSMartin Matuska RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock)); 354*7a7741afSMartin Matuska 355*7a7741afSMartin Matuska sa_handle_destroy(zp->z_sa_hdl); 356*7a7741afSMartin Matuska zp->z_sa_hdl = NULL; 357*7a7741afSMartin Matuska } 358*7a7741afSMartin Matuska 359*7a7741afSMartin Matuska /* 360*7a7741afSMartin Matuska * Called by new_inode() to allocate a new inode. 361*7a7741afSMartin Matuska */ 362*7a7741afSMartin Matuska int 363*7a7741afSMartin Matuska zfs_inode_alloc(struct super_block *sb, struct inode **ip) 364*7a7741afSMartin Matuska { 365*7a7741afSMartin Matuska znode_t *zp; 366*7a7741afSMartin Matuska 367*7a7741afSMartin Matuska zp = kmem_cache_alloc(znode_cache, KM_SLEEP); 368*7a7741afSMartin Matuska *ip = ZTOI(zp); 369*7a7741afSMartin Matuska 370*7a7741afSMartin Matuska return (0); 371*7a7741afSMartin Matuska } 372*7a7741afSMartin Matuska 373*7a7741afSMartin Matuska /* 374*7a7741afSMartin Matuska * Called in multiple places when an inode should be destroyed. 375*7a7741afSMartin Matuska */ 376*7a7741afSMartin Matuska void 377*7a7741afSMartin Matuska zfs_inode_destroy(struct inode *ip) 378*7a7741afSMartin Matuska { 379*7a7741afSMartin Matuska znode_t *zp = ITOZ(ip); 380*7a7741afSMartin Matuska zfsvfs_t *zfsvfs = ZTOZSB(zp); 381*7a7741afSMartin Matuska 382*7a7741afSMartin Matuska mutex_enter(&zfsvfs->z_znodes_lock); 383*7a7741afSMartin Matuska if (list_link_active(&zp->z_link_node)) { 384*7a7741afSMartin Matuska list_remove(&zfsvfs->z_all_znodes, zp); 385*7a7741afSMartin Matuska } 386*7a7741afSMartin Matuska mutex_exit(&zfsvfs->z_znodes_lock); 387*7a7741afSMartin Matuska 388*7a7741afSMartin Matuska if (zp->z_acl_cached) { 389*7a7741afSMartin Matuska zfs_acl_free(zp->z_acl_cached); 390*7a7741afSMartin Matuska zp->z_acl_cached = NULL; 391*7a7741afSMartin Matuska } 392*7a7741afSMartin Matuska 393*7a7741afSMartin Matuska if (zp->z_xattr_cached) { 394*7a7741afSMartin Matuska nvlist_free(zp->z_xattr_cached); 395*7a7741afSMartin Matuska zp->z_xattr_cached = NULL; 396*7a7741afSMartin Matuska } 397*7a7741afSMartin Matuska 398*7a7741afSMartin Matuska kmem_cache_free(znode_cache, zp); 399*7a7741afSMartin Matuska } 400*7a7741afSMartin Matuska 401*7a7741afSMartin Matuska static void 402*7a7741afSMartin Matuska zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip) 403*7a7741afSMartin Matuska { 404*7a7741afSMartin Matuska uint64_t rdev = 0; 405*7a7741afSMartin Matuska 406*7a7741afSMartin Matuska switch (ip->i_mode & S_IFMT) { 407*7a7741afSMartin Matuska case S_IFREG: 408*7a7741afSMartin Matuska ip->i_op = &zpl_inode_operations; 409*7a7741afSMartin Matuska ip->i_fop = &zpl_file_operations; 410*7a7741afSMartin Matuska ip->i_mapping->a_ops = &zpl_address_space_operations; 411*7a7741afSMartin Matuska break; 412*7a7741afSMartin Matuska 413*7a7741afSMartin Matuska case S_IFDIR: 414*7a7741afSMartin Matuska ip->i_op = &zpl_dir_inode_operations; 415*7a7741afSMartin Matuska ip->i_fop = &zpl_dir_file_operations; 416*7a7741afSMartin Matuska ITOZ(ip)->z_zn_prefetch = B_TRUE; 417*7a7741afSMartin Matuska break; 418*7a7741afSMartin Matuska 419*7a7741afSMartin Matuska case S_IFLNK: 420*7a7741afSMartin Matuska ip->i_op = &zpl_symlink_inode_operations; 421*7a7741afSMartin Matuska break; 422*7a7741afSMartin Matuska 423*7a7741afSMartin Matuska /* 424*7a7741afSMartin Matuska * rdev is only stored in a SA only for device files. 425*7a7741afSMartin Matuska */ 426*7a7741afSMartin Matuska case S_IFCHR: 427*7a7741afSMartin Matuska case S_IFBLK: 428*7a7741afSMartin Matuska (void) sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), &rdev, 429*7a7741afSMartin Matuska sizeof (rdev)); 430*7a7741afSMartin Matuska zfs_fallthrough; 431*7a7741afSMartin Matuska case S_IFIFO: 432*7a7741afSMartin Matuska case S_IFSOCK: 433*7a7741afSMartin Matuska init_special_inode(ip, ip->i_mode, rdev); 434*7a7741afSMartin Matuska ip->i_op = &zpl_special_inode_operations; 435*7a7741afSMartin Matuska break; 436*7a7741afSMartin Matuska 437*7a7741afSMartin Matuska default: 438*7a7741afSMartin Matuska zfs_panic_recover("inode %llu has invalid mode: 0x%x\n", 439*7a7741afSMartin Matuska (u_longlong_t)ip->i_ino, ip->i_mode); 440*7a7741afSMartin Matuska 441*7a7741afSMartin Matuska /* Assume the inode is a file and attempt to continue */ 442*7a7741afSMartin Matuska ip->i_mode = S_IFREG | 0644; 443*7a7741afSMartin Matuska ip->i_op = &zpl_inode_operations; 444*7a7741afSMartin Matuska ip->i_fop = &zpl_file_operations; 445*7a7741afSMartin Matuska ip->i_mapping->a_ops = &zpl_address_space_operations; 446*7a7741afSMartin Matuska break; 447*7a7741afSMartin Matuska } 448*7a7741afSMartin Matuska } 449*7a7741afSMartin Matuska 450*7a7741afSMartin Matuska static void 451*7a7741afSMartin Matuska zfs_set_inode_flags(znode_t *zp, struct inode *ip) 452*7a7741afSMartin Matuska { 453*7a7741afSMartin Matuska /* 454*7a7741afSMartin Matuska * Linux and Solaris have different sets of file attributes, so we 455*7a7741afSMartin Matuska * restrict this conversion to the intersection of the two. 456*7a7741afSMartin Matuska */ 457*7a7741afSMartin Matuska unsigned int flags = 0; 458*7a7741afSMartin Matuska if (zp->z_pflags & ZFS_IMMUTABLE) 459*7a7741afSMartin Matuska flags |= S_IMMUTABLE; 460*7a7741afSMartin Matuska if (zp->z_pflags & ZFS_APPENDONLY) 461*7a7741afSMartin Matuska flags |= S_APPEND; 462*7a7741afSMartin Matuska 463*7a7741afSMartin Matuska inode_set_flags(ip, flags, S_IMMUTABLE|S_APPEND); 464*7a7741afSMartin Matuska } 465*7a7741afSMartin Matuska 466*7a7741afSMartin Matuska /* 467*7a7741afSMartin Matuska * Update the embedded inode given the znode. 468*7a7741afSMartin Matuska */ 469*7a7741afSMartin Matuska void 470*7a7741afSMartin Matuska zfs_znode_update_vfs(znode_t *zp) 471*7a7741afSMartin Matuska { 472*7a7741afSMartin Matuska struct inode *ip; 473*7a7741afSMartin Matuska uint32_t blksize; 474*7a7741afSMartin Matuska u_longlong_t i_blocks; 475*7a7741afSMartin Matuska 476*7a7741afSMartin Matuska ASSERT(zp != NULL); 477*7a7741afSMartin Matuska ip = ZTOI(zp); 478*7a7741afSMartin Matuska 479*7a7741afSMartin Matuska /* Skip .zfs control nodes which do not exist on disk. */ 480*7a7741afSMartin Matuska if (zfsctl_is_node(ip)) 481*7a7741afSMartin Matuska return; 482*7a7741afSMartin Matuska 483*7a7741afSMartin Matuska dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks); 484*7a7741afSMartin Matuska 485*7a7741afSMartin Matuska spin_lock(&ip->i_lock); 486*7a7741afSMartin Matuska ip->i_mode = zp->z_mode; 487*7a7741afSMartin Matuska ip->i_blocks = i_blocks; 488*7a7741afSMartin Matuska i_size_write(ip, zp->z_size); 489*7a7741afSMartin Matuska spin_unlock(&ip->i_lock); 490*7a7741afSMartin Matuska } 491*7a7741afSMartin Matuska 492*7a7741afSMartin Matuska 493*7a7741afSMartin Matuska /* 494*7a7741afSMartin Matuska * Construct a znode+inode and initialize. 495*7a7741afSMartin Matuska * 496*7a7741afSMartin Matuska * This does not do a call to dmu_set_user() that is 497*7a7741afSMartin Matuska * up to the caller to do, in case you don't want to 498*7a7741afSMartin Matuska * return the znode 499*7a7741afSMartin Matuska */ 500*7a7741afSMartin Matuska static znode_t * 501*7a7741afSMartin Matuska zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, 502*7a7741afSMartin Matuska dmu_object_type_t obj_type, sa_handle_t *hdl) 503*7a7741afSMartin Matuska { 504*7a7741afSMartin Matuska znode_t *zp; 505*7a7741afSMartin Matuska struct inode *ip; 506*7a7741afSMartin Matuska uint64_t mode; 507*7a7741afSMartin Matuska uint64_t parent; 508*7a7741afSMartin Matuska uint64_t tmp_gen; 509*7a7741afSMartin Matuska uint64_t links; 510*7a7741afSMartin Matuska uint64_t z_uid, z_gid; 511*7a7741afSMartin Matuska uint64_t atime[2], mtime[2], ctime[2], btime[2]; 512*7a7741afSMartin Matuska inode_timespec_t tmp_ts; 513*7a7741afSMartin Matuska uint64_t projid = ZFS_DEFAULT_PROJID; 514*7a7741afSMartin Matuska sa_bulk_attr_t bulk[12]; 515*7a7741afSMartin Matuska int count = 0; 516*7a7741afSMartin Matuska 517*7a7741afSMartin Matuska ASSERT(zfsvfs != NULL); 518*7a7741afSMartin Matuska 519*7a7741afSMartin Matuska ip = new_inode(zfsvfs->z_sb); 520*7a7741afSMartin Matuska if (ip == NULL) 521*7a7741afSMartin Matuska return (NULL); 522*7a7741afSMartin Matuska 523*7a7741afSMartin Matuska zp = ITOZ(ip); 524*7a7741afSMartin Matuska ASSERT(zp->z_dirlocks == NULL); 525*7a7741afSMartin Matuska ASSERT3P(zp->z_acl_cached, ==, NULL); 526*7a7741afSMartin Matuska ASSERT3P(zp->z_xattr_cached, ==, NULL); 527*7a7741afSMartin Matuska zp->z_unlinked = B_FALSE; 528*7a7741afSMartin Matuska zp->z_atime_dirty = B_FALSE; 529*7a7741afSMartin Matuska zp->z_is_ctldir = B_FALSE; 530*7a7741afSMartin Matuska zp->z_suspended = B_FALSE; 531*7a7741afSMartin Matuska zp->z_sa_hdl = NULL; 532*7a7741afSMartin Matuska zp->z_mapcnt = 0; 533*7a7741afSMartin Matuska zp->z_id = db->db_object; 534*7a7741afSMartin Matuska zp->z_blksz = blksz; 535*7a7741afSMartin Matuska zp->z_seq = 0x7A4653; 536*7a7741afSMartin Matuska zp->z_sync_cnt = 0; 537*7a7741afSMartin Matuska zp->z_sync_writes_cnt = 0; 538*7a7741afSMartin Matuska zp->z_async_writes_cnt = 0; 539*7a7741afSMartin Matuska 540*7a7741afSMartin Matuska zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); 541*7a7741afSMartin Matuska 542*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); 543*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &tmp_gen, 8); 544*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 545*7a7741afSMartin Matuska &zp->z_size, 8); 546*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8); 547*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 548*7a7741afSMartin Matuska &zp->z_pflags, 8); 549*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, 550*7a7741afSMartin Matuska &parent, 8); 551*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &z_uid, 8); 552*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &z_gid, 8); 553*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16); 554*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); 555*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); 556*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &btime, 16); 557*7a7741afSMartin Matuska 558*7a7741afSMartin Matuska if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || tmp_gen == 0 || 559*7a7741afSMartin Matuska (dmu_objset_projectquota_enabled(zfsvfs->z_os) && 560*7a7741afSMartin Matuska (zp->z_pflags & ZFS_PROJID) && 561*7a7741afSMartin Matuska sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) { 562*7a7741afSMartin Matuska if (hdl == NULL) 563*7a7741afSMartin Matuska sa_handle_destroy(zp->z_sa_hdl); 564*7a7741afSMartin Matuska zp->z_sa_hdl = NULL; 565*7a7741afSMartin Matuska goto error; 566*7a7741afSMartin Matuska } 567*7a7741afSMartin Matuska 568*7a7741afSMartin Matuska zp->z_projid = projid; 569*7a7741afSMartin Matuska zp->z_mode = ip->i_mode = mode; 570*7a7741afSMartin Matuska ip->i_generation = (uint32_t)tmp_gen; 571*7a7741afSMartin Matuska ip->i_blkbits = SPA_MINBLOCKSHIFT; 572*7a7741afSMartin Matuska set_nlink(ip, (uint32_t)links); 573*7a7741afSMartin Matuska zfs_uid_write(ip, z_uid); 574*7a7741afSMartin Matuska zfs_gid_write(ip, z_gid); 575*7a7741afSMartin Matuska zfs_set_inode_flags(zp, ip); 576*7a7741afSMartin Matuska 577*7a7741afSMartin Matuska /* Cache the xattr parent id */ 578*7a7741afSMartin Matuska if (zp->z_pflags & ZFS_XATTR) 579*7a7741afSMartin Matuska zp->z_xattr_parent = parent; 580*7a7741afSMartin Matuska 581*7a7741afSMartin Matuska ZFS_TIME_DECODE(&tmp_ts, atime); 582*7a7741afSMartin Matuska zpl_inode_set_atime_to_ts(ip, tmp_ts); 583*7a7741afSMartin Matuska ZFS_TIME_DECODE(&tmp_ts, mtime); 584*7a7741afSMartin Matuska zpl_inode_set_mtime_to_ts(ip, tmp_ts); 585*7a7741afSMartin Matuska ZFS_TIME_DECODE(&tmp_ts, ctime); 586*7a7741afSMartin Matuska zpl_inode_set_ctime_to_ts(ip, tmp_ts); 587*7a7741afSMartin Matuska ZFS_TIME_DECODE(&zp->z_btime, btime); 588*7a7741afSMartin Matuska 589*7a7741afSMartin Matuska ip->i_ino = zp->z_id; 590*7a7741afSMartin Matuska zfs_znode_update_vfs(zp); 591*7a7741afSMartin Matuska zfs_inode_set_ops(zfsvfs, ip); 592*7a7741afSMartin Matuska 593*7a7741afSMartin Matuska /* 594*7a7741afSMartin Matuska * The only way insert_inode_locked() can fail is if the ip->i_ino 595*7a7741afSMartin Matuska * number is already hashed for this super block. This can never 596*7a7741afSMartin Matuska * happen because the inode numbers map 1:1 with the object numbers. 597*7a7741afSMartin Matuska * 598*7a7741afSMartin Matuska * Exceptions include rolling back a mounted file system, either 599*7a7741afSMartin Matuska * from the zfs rollback or zfs recv command. 600*7a7741afSMartin Matuska * 601*7a7741afSMartin Matuska * Active inodes are unhashed during the rollback, but since zrele 602*7a7741afSMartin Matuska * can happen asynchronously, we can't guarantee they've been 603*7a7741afSMartin Matuska * unhashed. This can cause hash collisions in unlinked drain 604*7a7741afSMartin Matuska * processing so do not hash unlinked znodes. 605*7a7741afSMartin Matuska */ 606*7a7741afSMartin Matuska if (links > 0) 607*7a7741afSMartin Matuska VERIFY3S(insert_inode_locked(ip), ==, 0); 608*7a7741afSMartin Matuska 609*7a7741afSMartin Matuska mutex_enter(&zfsvfs->z_znodes_lock); 610*7a7741afSMartin Matuska list_insert_tail(&zfsvfs->z_all_znodes, zp); 611*7a7741afSMartin Matuska mutex_exit(&zfsvfs->z_znodes_lock); 612*7a7741afSMartin Matuska 613*7a7741afSMartin Matuska if (links > 0) 614*7a7741afSMartin Matuska unlock_new_inode(ip); 615*7a7741afSMartin Matuska return (zp); 616*7a7741afSMartin Matuska 617*7a7741afSMartin Matuska error: 618*7a7741afSMartin Matuska iput(ip); 619*7a7741afSMartin Matuska return (NULL); 620*7a7741afSMartin Matuska } 621*7a7741afSMartin Matuska 622*7a7741afSMartin Matuska /* 623*7a7741afSMartin Matuska * Safely mark an inode dirty. Inodes which are part of a read-only 624*7a7741afSMartin Matuska * file system or snapshot may not be dirtied. 625*7a7741afSMartin Matuska */ 626*7a7741afSMartin Matuska void 627*7a7741afSMartin Matuska zfs_mark_inode_dirty(struct inode *ip) 628*7a7741afSMartin Matuska { 629*7a7741afSMartin Matuska zfsvfs_t *zfsvfs = ITOZSB(ip); 630*7a7741afSMartin Matuska 631*7a7741afSMartin Matuska if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os)) 632*7a7741afSMartin Matuska return; 633*7a7741afSMartin Matuska 634*7a7741afSMartin Matuska mark_inode_dirty(ip); 635*7a7741afSMartin Matuska } 636*7a7741afSMartin Matuska 637*7a7741afSMartin Matuska static uint64_t empty_xattr; 638*7a7741afSMartin Matuska static uint64_t pad[4]; 639*7a7741afSMartin Matuska static zfs_acl_phys_t acl_phys; 640*7a7741afSMartin Matuska /* 641*7a7741afSMartin Matuska * Create a new DMU object to hold a zfs znode. 642*7a7741afSMartin Matuska * 643*7a7741afSMartin Matuska * IN: dzp - parent directory for new znode 644*7a7741afSMartin Matuska * vap - file attributes for new znode 645*7a7741afSMartin Matuska * tx - dmu transaction id for zap operations 646*7a7741afSMartin Matuska * cr - credentials of caller 647*7a7741afSMartin Matuska * flag - flags: 648*7a7741afSMartin Matuska * IS_ROOT_NODE - new object will be root 649*7a7741afSMartin Matuska * IS_TMPFILE - new object is of O_TMPFILE 650*7a7741afSMartin Matuska * IS_XATTR - new object is an attribute 651*7a7741afSMartin Matuska * acl_ids - ACL related attributes 652*7a7741afSMartin Matuska * 653*7a7741afSMartin Matuska * OUT: zpp - allocated znode (set to dzp if IS_ROOT_NODE) 654*7a7741afSMartin Matuska * 655*7a7741afSMartin Matuska */ 656*7a7741afSMartin Matuska void 657*7a7741afSMartin Matuska zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, 658*7a7741afSMartin Matuska uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) 659*7a7741afSMartin Matuska { 660*7a7741afSMartin Matuska uint64_t crtime[2], atime[2], mtime[2], ctime[2]; 661*7a7741afSMartin Matuska uint64_t mode, size, links, parent, pflags; 662*7a7741afSMartin Matuska uint64_t projid = ZFS_DEFAULT_PROJID; 663*7a7741afSMartin Matuska uint64_t rdev = 0; 664*7a7741afSMartin Matuska zfsvfs_t *zfsvfs = ZTOZSB(dzp); 665*7a7741afSMartin Matuska dmu_buf_t *db; 666*7a7741afSMartin Matuska inode_timespec_t now; 667*7a7741afSMartin Matuska uint64_t gen, obj; 668*7a7741afSMartin Matuska int bonuslen; 669*7a7741afSMartin Matuska int dnodesize; 670*7a7741afSMartin Matuska sa_handle_t *sa_hdl; 671*7a7741afSMartin Matuska dmu_object_type_t obj_type; 672*7a7741afSMartin Matuska sa_bulk_attr_t *sa_attrs; 673*7a7741afSMartin Matuska int cnt = 0; 674*7a7741afSMartin Matuska zfs_acl_locator_cb_t locate = { 0 }; 675*7a7741afSMartin Matuska znode_hold_t *zh; 676*7a7741afSMartin Matuska 677*7a7741afSMartin Matuska if (zfsvfs->z_replay) { 678*7a7741afSMartin Matuska obj = vap->va_nodeid; 679*7a7741afSMartin Matuska now = vap->va_ctime; /* see zfs_replay_create() */ 680*7a7741afSMartin Matuska gen = vap->va_nblocks; /* ditto */ 681*7a7741afSMartin Matuska dnodesize = vap->va_fsid; /* ditto */ 682*7a7741afSMartin Matuska } else { 683*7a7741afSMartin Matuska obj = 0; 684*7a7741afSMartin Matuska gethrestime(&now); 685*7a7741afSMartin Matuska gen = dmu_tx_get_txg(tx); 686*7a7741afSMartin Matuska dnodesize = dmu_objset_dnodesize(zfsvfs->z_os); 687*7a7741afSMartin Matuska } 688*7a7741afSMartin Matuska 689*7a7741afSMartin Matuska if (dnodesize == 0) 690*7a7741afSMartin Matuska dnodesize = DNODE_MIN_SIZE; 691*7a7741afSMartin Matuska 692*7a7741afSMartin Matuska obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; 693*7a7741afSMartin Matuska 694*7a7741afSMartin Matuska bonuslen = (obj_type == DMU_OT_SA) ? 695*7a7741afSMartin Matuska DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE; 696*7a7741afSMartin Matuska 697*7a7741afSMartin Matuska /* 698*7a7741afSMartin Matuska * Create a new DMU object. 699*7a7741afSMartin Matuska */ 700*7a7741afSMartin Matuska /* 701*7a7741afSMartin Matuska * There's currently no mechanism for pre-reading the blocks that will 702*7a7741afSMartin Matuska * be needed to allocate a new object, so we accept the small chance 703*7a7741afSMartin Matuska * that there will be an i/o error and we will fail one of the 704*7a7741afSMartin Matuska * assertions below. 705*7a7741afSMartin Matuska */ 706*7a7741afSMartin Matuska if (S_ISDIR(vap->va_mode)) { 707*7a7741afSMartin Matuska if (zfsvfs->z_replay) { 708*7a7741afSMartin Matuska VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj, 709*7a7741afSMartin Matuska zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 710*7a7741afSMartin Matuska obj_type, bonuslen, dnodesize, tx)); 711*7a7741afSMartin Matuska } else { 712*7a7741afSMartin Matuska obj = zap_create_norm_dnsize(zfsvfs->z_os, 713*7a7741afSMartin Matuska zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, 714*7a7741afSMartin Matuska obj_type, bonuslen, dnodesize, tx); 715*7a7741afSMartin Matuska } 716*7a7741afSMartin Matuska } else { 717*7a7741afSMartin Matuska if (zfsvfs->z_replay) { 718*7a7741afSMartin Matuska VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj, 719*7a7741afSMartin Matuska DMU_OT_PLAIN_FILE_CONTENTS, 0, 720*7a7741afSMartin Matuska obj_type, bonuslen, dnodesize, tx)); 721*7a7741afSMartin Matuska } else { 722*7a7741afSMartin Matuska obj = dmu_object_alloc_dnsize(zfsvfs->z_os, 723*7a7741afSMartin Matuska DMU_OT_PLAIN_FILE_CONTENTS, 0, 724*7a7741afSMartin Matuska obj_type, bonuslen, dnodesize, tx); 725*7a7741afSMartin Matuska } 726*7a7741afSMartin Matuska } 727*7a7741afSMartin Matuska 728*7a7741afSMartin Matuska zh = zfs_znode_hold_enter(zfsvfs, obj); 729*7a7741afSMartin Matuska VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); 730*7a7741afSMartin Matuska 731*7a7741afSMartin Matuska /* 732*7a7741afSMartin Matuska * If this is the root, fix up the half-initialized parent pointer 733*7a7741afSMartin Matuska * to reference the just-allocated physical data area. 734*7a7741afSMartin Matuska */ 735*7a7741afSMartin Matuska if (flag & IS_ROOT_NODE) { 736*7a7741afSMartin Matuska dzp->z_id = obj; 737*7a7741afSMartin Matuska } 738*7a7741afSMartin Matuska 739*7a7741afSMartin Matuska /* 740*7a7741afSMartin Matuska * If parent is an xattr, so am I. 741*7a7741afSMartin Matuska */ 742*7a7741afSMartin Matuska if (dzp->z_pflags & ZFS_XATTR) { 743*7a7741afSMartin Matuska flag |= IS_XATTR; 744*7a7741afSMartin Matuska } 745*7a7741afSMartin Matuska 746*7a7741afSMartin Matuska if (zfsvfs->z_use_fuids) 747*7a7741afSMartin Matuska pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; 748*7a7741afSMartin Matuska else 749*7a7741afSMartin Matuska pflags = 0; 750*7a7741afSMartin Matuska 751*7a7741afSMartin Matuska if (S_ISDIR(vap->va_mode)) { 752*7a7741afSMartin Matuska size = 2; /* contents ("." and "..") */ 753*7a7741afSMartin Matuska links = 2; 754*7a7741afSMartin Matuska } else { 755*7a7741afSMartin Matuska size = 0; 756*7a7741afSMartin Matuska links = (flag & IS_TMPFILE) ? 0 : 1; 757*7a7741afSMartin Matuska } 758*7a7741afSMartin Matuska 759*7a7741afSMartin Matuska if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode)) 760*7a7741afSMartin Matuska rdev = vap->va_rdev; 761*7a7741afSMartin Matuska 762*7a7741afSMartin Matuska parent = dzp->z_id; 763*7a7741afSMartin Matuska mode = acl_ids->z_mode; 764*7a7741afSMartin Matuska if (flag & IS_XATTR) 765*7a7741afSMartin Matuska pflags |= ZFS_XATTR; 766*7a7741afSMartin Matuska 767*7a7741afSMartin Matuska if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) { 768*7a7741afSMartin Matuska /* 769*7a7741afSMartin Matuska * With ZFS_PROJID flag, we can easily know whether there is 770*7a7741afSMartin Matuska * project ID stored on disk or not. See zfs_space_delta_cb(). 771*7a7741afSMartin Matuska */ 772*7a7741afSMartin Matuska if (obj_type != DMU_OT_ZNODE && 773*7a7741afSMartin Matuska dmu_objset_projectquota_enabled(zfsvfs->z_os)) 774*7a7741afSMartin Matuska pflags |= ZFS_PROJID; 775*7a7741afSMartin Matuska 776*7a7741afSMartin Matuska /* 777*7a7741afSMartin Matuska * Inherit project ID from parent if required. 778*7a7741afSMartin Matuska */ 779*7a7741afSMartin Matuska projid = zfs_inherit_projid(dzp); 780*7a7741afSMartin Matuska if (dzp->z_pflags & ZFS_PROJINHERIT) 781*7a7741afSMartin Matuska pflags |= ZFS_PROJINHERIT; 782*7a7741afSMartin Matuska } 783*7a7741afSMartin Matuska 784*7a7741afSMartin Matuska /* 785*7a7741afSMartin Matuska * No execs denied will be determined when zfs_mode_compute() is called. 786*7a7741afSMartin Matuska */ 787*7a7741afSMartin Matuska pflags |= acl_ids->z_aclp->z_hints & 788*7a7741afSMartin Matuska (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| 789*7a7741afSMartin Matuska ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); 790*7a7741afSMartin Matuska 791*7a7741afSMartin Matuska ZFS_TIME_ENCODE(&now, crtime); 792*7a7741afSMartin Matuska ZFS_TIME_ENCODE(&now, ctime); 793*7a7741afSMartin Matuska 794*7a7741afSMartin Matuska if (vap->va_mask & ATTR_ATIME) { 795*7a7741afSMartin Matuska ZFS_TIME_ENCODE(&vap->va_atime, atime); 796*7a7741afSMartin Matuska } else { 797*7a7741afSMartin Matuska ZFS_TIME_ENCODE(&now, atime); 798*7a7741afSMartin Matuska } 799*7a7741afSMartin Matuska 800*7a7741afSMartin Matuska if (vap->va_mask & ATTR_MTIME) { 801*7a7741afSMartin Matuska ZFS_TIME_ENCODE(&vap->va_mtime, mtime); 802*7a7741afSMartin Matuska } else { 803*7a7741afSMartin Matuska ZFS_TIME_ENCODE(&now, mtime); 804*7a7741afSMartin Matuska } 805*7a7741afSMartin Matuska 806*7a7741afSMartin Matuska /* Now add in all of the "SA" attributes */ 807*7a7741afSMartin Matuska VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, 808*7a7741afSMartin Matuska &sa_hdl)); 809*7a7741afSMartin Matuska 810*7a7741afSMartin Matuska /* 811*7a7741afSMartin Matuska * Setup the array of attributes to be replaced/set on the new file 812*7a7741afSMartin Matuska * 813*7a7741afSMartin Matuska * order for DMU_OT_ZNODE is critical since it needs to be constructed 814*7a7741afSMartin Matuska * in the old znode_phys_t format. Don't change this ordering 815*7a7741afSMartin Matuska */ 816*7a7741afSMartin Matuska sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP); 817*7a7741afSMartin Matuska 818*7a7741afSMartin Matuska if (obj_type == DMU_OT_ZNODE) { 819*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 820*7a7741afSMartin Matuska NULL, &atime, 16); 821*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 822*7a7741afSMartin Matuska NULL, &mtime, 16); 823*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 824*7a7741afSMartin Matuska NULL, &ctime, 16); 825*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 826*7a7741afSMartin Matuska NULL, &crtime, 16); 827*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 828*7a7741afSMartin Matuska NULL, &gen, 8); 829*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 830*7a7741afSMartin Matuska NULL, &mode, 8); 831*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 832*7a7741afSMartin Matuska NULL, &size, 8); 833*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 834*7a7741afSMartin Matuska NULL, &parent, 8); 835*7a7741afSMartin Matuska } else { 836*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), 837*7a7741afSMartin Matuska NULL, &mode, 8); 838*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), 839*7a7741afSMartin Matuska NULL, &size, 8); 840*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), 841*7a7741afSMartin Matuska NULL, &gen, 8); 842*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), 843*7a7741afSMartin Matuska NULL, &acl_ids->z_fuid, 8); 844*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), 845*7a7741afSMartin Matuska NULL, &acl_ids->z_fgid, 8); 846*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), 847*7a7741afSMartin Matuska NULL, &parent, 8); 848*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 849*7a7741afSMartin Matuska NULL, &pflags, 8); 850*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), 851*7a7741afSMartin Matuska NULL, &atime, 16); 852*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), 853*7a7741afSMartin Matuska NULL, &mtime, 16); 854*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), 855*7a7741afSMartin Matuska NULL, &ctime, 16); 856*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), 857*7a7741afSMartin Matuska NULL, &crtime, 16); 858*7a7741afSMartin Matuska } 859*7a7741afSMartin Matuska 860*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8); 861*7a7741afSMartin Matuska 862*7a7741afSMartin Matuska if (obj_type == DMU_OT_ZNODE) { 863*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL, 864*7a7741afSMartin Matuska &empty_xattr, 8); 865*7a7741afSMartin Matuska } else if (dmu_objset_projectquota_enabled(zfsvfs->z_os) && 866*7a7741afSMartin Matuska pflags & ZFS_PROJID) { 867*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PROJID(zfsvfs), 868*7a7741afSMartin Matuska NULL, &projid, 8); 869*7a7741afSMartin Matuska } 870*7a7741afSMartin Matuska if (obj_type == DMU_OT_ZNODE || 871*7a7741afSMartin Matuska (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) { 872*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs), 873*7a7741afSMartin Matuska NULL, &rdev, 8); 874*7a7741afSMartin Matuska } 875*7a7741afSMartin Matuska if (obj_type == DMU_OT_ZNODE) { 876*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), 877*7a7741afSMartin Matuska NULL, &pflags, 8); 878*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, 879*7a7741afSMartin Matuska &acl_ids->z_fuid, 8); 880*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, 881*7a7741afSMartin Matuska &acl_ids->z_fgid, 8); 882*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad, 883*7a7741afSMartin Matuska sizeof (uint64_t) * 4); 884*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, 885*7a7741afSMartin Matuska &acl_phys, sizeof (zfs_acl_phys_t)); 886*7a7741afSMartin Matuska } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { 887*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL, 888*7a7741afSMartin Matuska &acl_ids->z_aclp->z_acl_count, 8); 889*7a7741afSMartin Matuska locate.cb_aclp = acl_ids->z_aclp; 890*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs), 891*7a7741afSMartin Matuska zfs_acl_data_locator, &locate, 892*7a7741afSMartin Matuska acl_ids->z_aclp->z_acl_bytes); 893*7a7741afSMartin Matuska mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, 894*7a7741afSMartin Matuska acl_ids->z_fuid, acl_ids->z_fgid); 895*7a7741afSMartin Matuska } 896*7a7741afSMartin Matuska 897*7a7741afSMartin Matuska VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); 898*7a7741afSMartin Matuska 899*7a7741afSMartin Matuska if (!(flag & IS_ROOT_NODE)) { 900*7a7741afSMartin Matuska /* 901*7a7741afSMartin Matuska * The call to zfs_znode_alloc() may fail if memory is low 902*7a7741afSMartin Matuska * via the call path: alloc_inode() -> inode_init_always() -> 903*7a7741afSMartin Matuska * security_inode_alloc() -> inode_alloc_security(). Since 904*7a7741afSMartin Matuska * the existing code is written such that zfs_mknode() can 905*7a7741afSMartin Matuska * not fail retry until sufficient memory has been reclaimed. 906*7a7741afSMartin Matuska */ 907*7a7741afSMartin Matuska do { 908*7a7741afSMartin Matuska *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); 909*7a7741afSMartin Matuska } while (*zpp == NULL); 910*7a7741afSMartin Matuska 911*7a7741afSMartin Matuska VERIFY(*zpp != NULL); 912*7a7741afSMartin Matuska VERIFY(dzp != NULL); 913*7a7741afSMartin Matuska } else { 914*7a7741afSMartin Matuska /* 915*7a7741afSMartin Matuska * If we are creating the root node, the "parent" we 916*7a7741afSMartin Matuska * passed in is the znode for the root. 917*7a7741afSMartin Matuska */ 918*7a7741afSMartin Matuska *zpp = dzp; 919*7a7741afSMartin Matuska 920*7a7741afSMartin Matuska (*zpp)->z_sa_hdl = sa_hdl; 921*7a7741afSMartin Matuska } 922*7a7741afSMartin Matuska 923*7a7741afSMartin Matuska (*zpp)->z_pflags = pflags; 924*7a7741afSMartin Matuska (*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode; 925*7a7741afSMartin Matuska (*zpp)->z_dnodesize = dnodesize; 926*7a7741afSMartin Matuska (*zpp)->z_projid = projid; 927*7a7741afSMartin Matuska 928*7a7741afSMartin Matuska if (obj_type == DMU_OT_ZNODE || 929*7a7741afSMartin Matuska acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { 930*7a7741afSMartin Matuska VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); 931*7a7741afSMartin Matuska } 932*7a7741afSMartin Matuska kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END); 933*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 934*7a7741afSMartin Matuska } 935*7a7741afSMartin Matuska 936*7a7741afSMartin Matuska /* 937*7a7741afSMartin Matuska * Update in-core attributes. It is assumed the caller will be doing an 938*7a7741afSMartin Matuska * sa_bulk_update to push the changes out. 939*7a7741afSMartin Matuska */ 940*7a7741afSMartin Matuska void 941*7a7741afSMartin Matuska zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) 942*7a7741afSMartin Matuska { 943*7a7741afSMartin Matuska xoptattr_t *xoap; 944*7a7741afSMartin Matuska boolean_t update_inode = B_FALSE; 945*7a7741afSMartin Matuska 946*7a7741afSMartin Matuska xoap = xva_getxoptattr(xvap); 947*7a7741afSMartin Matuska ASSERT(xoap); 948*7a7741afSMartin Matuska 949*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { 950*7a7741afSMartin Matuska uint64_t times[2]; 951*7a7741afSMartin Matuska ZFS_TIME_ENCODE(&xoap->xoa_createtime, times); 952*7a7741afSMartin Matuska (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)), 953*7a7741afSMartin Matuska ×, sizeof (times), tx); 954*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_CREATETIME); 955*7a7741afSMartin Matuska } 956*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { 957*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly, 958*7a7741afSMartin Matuska zp->z_pflags, tx); 959*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_READONLY); 960*7a7741afSMartin Matuska } 961*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { 962*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden, 963*7a7741afSMartin Matuska zp->z_pflags, tx); 964*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_HIDDEN); 965*7a7741afSMartin Matuska } 966*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { 967*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system, 968*7a7741afSMartin Matuska zp->z_pflags, tx); 969*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_SYSTEM); 970*7a7741afSMartin Matuska } 971*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { 972*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive, 973*7a7741afSMartin Matuska zp->z_pflags, tx); 974*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_ARCHIVE); 975*7a7741afSMartin Matuska } 976*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { 977*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable, 978*7a7741afSMartin Matuska zp->z_pflags, tx); 979*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_IMMUTABLE); 980*7a7741afSMartin Matuska 981*7a7741afSMartin Matuska update_inode = B_TRUE; 982*7a7741afSMartin Matuska } 983*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { 984*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink, 985*7a7741afSMartin Matuska zp->z_pflags, tx); 986*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_NOUNLINK); 987*7a7741afSMartin Matuska } 988*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { 989*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly, 990*7a7741afSMartin Matuska zp->z_pflags, tx); 991*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_APPENDONLY); 992*7a7741afSMartin Matuska 993*7a7741afSMartin Matuska update_inode = B_TRUE; 994*7a7741afSMartin Matuska } 995*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { 996*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump, 997*7a7741afSMartin Matuska zp->z_pflags, tx); 998*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_NODUMP); 999*7a7741afSMartin Matuska } 1000*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { 1001*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque, 1002*7a7741afSMartin Matuska zp->z_pflags, tx); 1003*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_OPAQUE); 1004*7a7741afSMartin Matuska } 1005*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { 1006*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, 1007*7a7741afSMartin Matuska xoap->xoa_av_quarantined, zp->z_pflags, tx); 1008*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); 1009*7a7741afSMartin Matuska } 1010*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { 1011*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified, 1012*7a7741afSMartin Matuska zp->z_pflags, tx); 1013*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_AV_MODIFIED); 1014*7a7741afSMartin Matuska } 1015*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { 1016*7a7741afSMartin Matuska zfs_sa_set_scanstamp(zp, xvap, tx); 1017*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); 1018*7a7741afSMartin Matuska } 1019*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { 1020*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse, 1021*7a7741afSMartin Matuska zp->z_pflags, tx); 1022*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_REPARSE); 1023*7a7741afSMartin Matuska } 1024*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { 1025*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline, 1026*7a7741afSMartin Matuska zp->z_pflags, tx); 1027*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_OFFLINE); 1028*7a7741afSMartin Matuska } 1029*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { 1030*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse, 1031*7a7741afSMartin Matuska zp->z_pflags, tx); 1032*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_SPARSE); 1033*7a7741afSMartin Matuska } 1034*7a7741afSMartin Matuska if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) { 1035*7a7741afSMartin Matuska ZFS_ATTR_SET(zp, ZFS_PROJINHERIT, xoap->xoa_projinherit, 1036*7a7741afSMartin Matuska zp->z_pflags, tx); 1037*7a7741afSMartin Matuska XVA_SET_RTN(xvap, XAT_PROJINHERIT); 1038*7a7741afSMartin Matuska } 1039*7a7741afSMartin Matuska 1040*7a7741afSMartin Matuska if (update_inode) 1041*7a7741afSMartin Matuska zfs_set_inode_flags(zp, ZTOI(zp)); 1042*7a7741afSMartin Matuska } 1043*7a7741afSMartin Matuska 1044*7a7741afSMartin Matuska int 1045*7a7741afSMartin Matuska zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) 1046*7a7741afSMartin Matuska { 1047*7a7741afSMartin Matuska dmu_object_info_t doi; 1048*7a7741afSMartin Matuska dmu_buf_t *db; 1049*7a7741afSMartin Matuska znode_t *zp; 1050*7a7741afSMartin Matuska znode_hold_t *zh; 1051*7a7741afSMartin Matuska int err; 1052*7a7741afSMartin Matuska sa_handle_t *hdl; 1053*7a7741afSMartin Matuska 1054*7a7741afSMartin Matuska *zpp = NULL; 1055*7a7741afSMartin Matuska 1056*7a7741afSMartin Matuska again: 1057*7a7741afSMartin Matuska zh = zfs_znode_hold_enter(zfsvfs, obj_num); 1058*7a7741afSMartin Matuska 1059*7a7741afSMartin Matuska err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 1060*7a7741afSMartin Matuska if (err) { 1061*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 1062*7a7741afSMartin Matuska return (err); 1063*7a7741afSMartin Matuska } 1064*7a7741afSMartin Matuska 1065*7a7741afSMartin Matuska dmu_object_info_from_db(db, &doi); 1066*7a7741afSMartin Matuska if (doi.doi_bonus_type != DMU_OT_SA && 1067*7a7741afSMartin Matuska (doi.doi_bonus_type != DMU_OT_ZNODE || 1068*7a7741afSMartin Matuska (doi.doi_bonus_type == DMU_OT_ZNODE && 1069*7a7741afSMartin Matuska doi.doi_bonus_size < sizeof (znode_phys_t)))) { 1070*7a7741afSMartin Matuska sa_buf_rele(db, NULL); 1071*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 1072*7a7741afSMartin Matuska return (SET_ERROR(EINVAL)); 1073*7a7741afSMartin Matuska } 1074*7a7741afSMartin Matuska 1075*7a7741afSMartin Matuska hdl = dmu_buf_get_user(db); 1076*7a7741afSMartin Matuska if (hdl != NULL) { 1077*7a7741afSMartin Matuska zp = sa_get_userdata(hdl); 1078*7a7741afSMartin Matuska 1079*7a7741afSMartin Matuska 1080*7a7741afSMartin Matuska /* 1081*7a7741afSMartin Matuska * Since "SA" does immediate eviction we 1082*7a7741afSMartin Matuska * should never find a sa handle that doesn't 1083*7a7741afSMartin Matuska * know about the znode. 1084*7a7741afSMartin Matuska */ 1085*7a7741afSMartin Matuska 1086*7a7741afSMartin Matuska ASSERT3P(zp, !=, NULL); 1087*7a7741afSMartin Matuska 1088*7a7741afSMartin Matuska mutex_enter(&zp->z_lock); 1089*7a7741afSMartin Matuska ASSERT3U(zp->z_id, ==, obj_num); 1090*7a7741afSMartin Matuska /* 1091*7a7741afSMartin Matuska * If zp->z_unlinked is set, the znode is already marked 1092*7a7741afSMartin Matuska * for deletion and should not be discovered. Check this 1093*7a7741afSMartin Matuska * after checking igrab() due to fsetxattr() & O_TMPFILE. 1094*7a7741afSMartin Matuska * 1095*7a7741afSMartin Matuska * If igrab() returns NULL the VFS has independently 1096*7a7741afSMartin Matuska * determined the inode should be evicted and has 1097*7a7741afSMartin Matuska * called iput_final() to start the eviction process. 1098*7a7741afSMartin Matuska * The SA handle is still valid but because the VFS 1099*7a7741afSMartin Matuska * requires that the eviction succeed we must drop 1100*7a7741afSMartin Matuska * our locks and references to allow the eviction to 1101*7a7741afSMartin Matuska * complete. The zfs_zget() may then be retried. 1102*7a7741afSMartin Matuska * 1103*7a7741afSMartin Matuska * This unlikely case could be optimized by registering 1104*7a7741afSMartin Matuska * a sops->drop_inode() callback. The callback would 1105*7a7741afSMartin Matuska * need to detect the active SA hold thereby informing 1106*7a7741afSMartin Matuska * the VFS that this inode should not be evicted. 1107*7a7741afSMartin Matuska */ 1108*7a7741afSMartin Matuska if (igrab(ZTOI(zp)) == NULL) { 1109*7a7741afSMartin Matuska if (zp->z_unlinked) 1110*7a7741afSMartin Matuska err = SET_ERROR(ENOENT); 1111*7a7741afSMartin Matuska else 1112*7a7741afSMartin Matuska err = SET_ERROR(EAGAIN); 1113*7a7741afSMartin Matuska } else { 1114*7a7741afSMartin Matuska *zpp = zp; 1115*7a7741afSMartin Matuska err = 0; 1116*7a7741afSMartin Matuska } 1117*7a7741afSMartin Matuska 1118*7a7741afSMartin Matuska mutex_exit(&zp->z_lock); 1119*7a7741afSMartin Matuska sa_buf_rele(db, NULL); 1120*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 1121*7a7741afSMartin Matuska 1122*7a7741afSMartin Matuska if (err == EAGAIN) { 1123*7a7741afSMartin Matuska /* inode might need this to finish evict */ 1124*7a7741afSMartin Matuska cond_resched(); 1125*7a7741afSMartin Matuska goto again; 1126*7a7741afSMartin Matuska } 1127*7a7741afSMartin Matuska return (err); 1128*7a7741afSMartin Matuska } 1129*7a7741afSMartin Matuska 1130*7a7741afSMartin Matuska /* 1131*7a7741afSMartin Matuska * Not found create new znode/vnode but only if file exists. 1132*7a7741afSMartin Matuska * 1133*7a7741afSMartin Matuska * There is a small window where zfs_vget() could 1134*7a7741afSMartin Matuska * find this object while a file create is still in 1135*7a7741afSMartin Matuska * progress. This is checked for in zfs_znode_alloc() 1136*7a7741afSMartin Matuska * 1137*7a7741afSMartin Matuska * if zfs_znode_alloc() fails it will drop the hold on the 1138*7a7741afSMartin Matuska * bonus buffer. 1139*7a7741afSMartin Matuska */ 1140*7a7741afSMartin Matuska zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, 1141*7a7741afSMartin Matuska doi.doi_bonus_type, NULL); 1142*7a7741afSMartin Matuska if (zp == NULL) { 1143*7a7741afSMartin Matuska err = SET_ERROR(ENOENT); 1144*7a7741afSMartin Matuska } else { 1145*7a7741afSMartin Matuska *zpp = zp; 1146*7a7741afSMartin Matuska } 1147*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 1148*7a7741afSMartin Matuska return (err); 1149*7a7741afSMartin Matuska } 1150*7a7741afSMartin Matuska 1151*7a7741afSMartin Matuska int 1152*7a7741afSMartin Matuska zfs_rezget(znode_t *zp) 1153*7a7741afSMartin Matuska { 1154*7a7741afSMartin Matuska zfsvfs_t *zfsvfs = ZTOZSB(zp); 1155*7a7741afSMartin Matuska dmu_object_info_t doi; 1156*7a7741afSMartin Matuska dmu_buf_t *db; 1157*7a7741afSMartin Matuska uint64_t obj_num = zp->z_id; 1158*7a7741afSMartin Matuska uint64_t mode; 1159*7a7741afSMartin Matuska uint64_t links; 1160*7a7741afSMartin Matuska sa_bulk_attr_t bulk[11]; 1161*7a7741afSMartin Matuska int err; 1162*7a7741afSMartin Matuska int count = 0; 1163*7a7741afSMartin Matuska uint64_t gen; 1164*7a7741afSMartin Matuska uint64_t z_uid, z_gid; 1165*7a7741afSMartin Matuska uint64_t atime[2], mtime[2], ctime[2], btime[2]; 1166*7a7741afSMartin Matuska inode_timespec_t tmp_ts; 1167*7a7741afSMartin Matuska uint64_t projid = ZFS_DEFAULT_PROJID; 1168*7a7741afSMartin Matuska znode_hold_t *zh; 1169*7a7741afSMartin Matuska 1170*7a7741afSMartin Matuska /* 1171*7a7741afSMartin Matuska * skip ctldir, otherwise they will always get invalidated. This will 1172*7a7741afSMartin Matuska * cause funny behaviour for the mounted snapdirs. Especially for 1173*7a7741afSMartin Matuska * Linux >= 3.18, d_invalidate will detach the mountpoint and prevent 1174*7a7741afSMartin Matuska * anyone automount it again as long as someone is still using the 1175*7a7741afSMartin Matuska * detached mount. 1176*7a7741afSMartin Matuska */ 1177*7a7741afSMartin Matuska if (zp->z_is_ctldir) 1178*7a7741afSMartin Matuska return (0); 1179*7a7741afSMartin Matuska 1180*7a7741afSMartin Matuska zh = zfs_znode_hold_enter(zfsvfs, obj_num); 1181*7a7741afSMartin Matuska 1182*7a7741afSMartin Matuska mutex_enter(&zp->z_acl_lock); 1183*7a7741afSMartin Matuska if (zp->z_acl_cached) { 1184*7a7741afSMartin Matuska zfs_acl_free(zp->z_acl_cached); 1185*7a7741afSMartin Matuska zp->z_acl_cached = NULL; 1186*7a7741afSMartin Matuska } 1187*7a7741afSMartin Matuska mutex_exit(&zp->z_acl_lock); 1188*7a7741afSMartin Matuska 1189*7a7741afSMartin Matuska rw_enter(&zp->z_xattr_lock, RW_WRITER); 1190*7a7741afSMartin Matuska if (zp->z_xattr_cached) { 1191*7a7741afSMartin Matuska nvlist_free(zp->z_xattr_cached); 1192*7a7741afSMartin Matuska zp->z_xattr_cached = NULL; 1193*7a7741afSMartin Matuska } 1194*7a7741afSMartin Matuska rw_exit(&zp->z_xattr_lock); 1195*7a7741afSMartin Matuska 1196*7a7741afSMartin Matuska ASSERT(zp->z_sa_hdl == NULL); 1197*7a7741afSMartin Matuska err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); 1198*7a7741afSMartin Matuska if (err) { 1199*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 1200*7a7741afSMartin Matuska return (err); 1201*7a7741afSMartin Matuska } 1202*7a7741afSMartin Matuska 1203*7a7741afSMartin Matuska dmu_object_info_from_db(db, &doi); 1204*7a7741afSMartin Matuska if (doi.doi_bonus_type != DMU_OT_SA && 1205*7a7741afSMartin Matuska (doi.doi_bonus_type != DMU_OT_ZNODE || 1206*7a7741afSMartin Matuska (doi.doi_bonus_type == DMU_OT_ZNODE && 1207*7a7741afSMartin Matuska doi.doi_bonus_size < sizeof (znode_phys_t)))) { 1208*7a7741afSMartin Matuska sa_buf_rele(db, NULL); 1209*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 1210*7a7741afSMartin Matuska return (SET_ERROR(EINVAL)); 1211*7a7741afSMartin Matuska } 1212*7a7741afSMartin Matuska 1213*7a7741afSMartin Matuska zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL); 1214*7a7741afSMartin Matuska 1215*7a7741afSMartin Matuska /* reload cached values */ 1216*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, 1217*7a7741afSMartin Matuska &gen, sizeof (gen)); 1218*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, 1219*7a7741afSMartin Matuska &zp->z_size, sizeof (zp->z_size)); 1220*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, 1221*7a7741afSMartin Matuska &links, sizeof (links)); 1222*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, 1223*7a7741afSMartin Matuska &zp->z_pflags, sizeof (zp->z_pflags)); 1224*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, 1225*7a7741afSMartin Matuska &z_uid, sizeof (z_uid)); 1226*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, 1227*7a7741afSMartin Matuska &z_gid, sizeof (z_gid)); 1228*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, 1229*7a7741afSMartin Matuska &mode, sizeof (mode)); 1230*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, 1231*7a7741afSMartin Matuska &atime, 16); 1232*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, 1233*7a7741afSMartin Matuska &mtime, 16); 1234*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, 1235*7a7741afSMartin Matuska &ctime, 16); 1236*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &btime, 16); 1237*7a7741afSMartin Matuska 1238*7a7741afSMartin Matuska if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { 1239*7a7741afSMartin Matuska zfs_znode_dmu_fini(zp); 1240*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 1241*7a7741afSMartin Matuska return (SET_ERROR(EIO)); 1242*7a7741afSMartin Matuska } 1243*7a7741afSMartin Matuska 1244*7a7741afSMartin Matuska if (dmu_objset_projectquota_enabled(zfsvfs->z_os)) { 1245*7a7741afSMartin Matuska err = sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), 1246*7a7741afSMartin Matuska &projid, 8); 1247*7a7741afSMartin Matuska if (err != 0 && err != ENOENT) { 1248*7a7741afSMartin Matuska zfs_znode_dmu_fini(zp); 1249*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 1250*7a7741afSMartin Matuska return (SET_ERROR(err)); 1251*7a7741afSMartin Matuska } 1252*7a7741afSMartin Matuska } 1253*7a7741afSMartin Matuska 1254*7a7741afSMartin Matuska zp->z_projid = projid; 1255*7a7741afSMartin Matuska zp->z_mode = ZTOI(zp)->i_mode = mode; 1256*7a7741afSMartin Matuska zfs_uid_write(ZTOI(zp), z_uid); 1257*7a7741afSMartin Matuska zfs_gid_write(ZTOI(zp), z_gid); 1258*7a7741afSMartin Matuska 1259*7a7741afSMartin Matuska ZFS_TIME_DECODE(&tmp_ts, atime); 1260*7a7741afSMartin Matuska zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_ts); 1261*7a7741afSMartin Matuska ZFS_TIME_DECODE(&tmp_ts, mtime); 1262*7a7741afSMartin Matuska zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts); 1263*7a7741afSMartin Matuska ZFS_TIME_DECODE(&tmp_ts, ctime); 1264*7a7741afSMartin Matuska zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts); 1265*7a7741afSMartin Matuska ZFS_TIME_DECODE(&zp->z_btime, btime); 1266*7a7741afSMartin Matuska 1267*7a7741afSMartin Matuska if ((uint32_t)gen != ZTOI(zp)->i_generation) { 1268*7a7741afSMartin Matuska zfs_znode_dmu_fini(zp); 1269*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 1270*7a7741afSMartin Matuska return (SET_ERROR(EIO)); 1271*7a7741afSMartin Matuska } 1272*7a7741afSMartin Matuska 1273*7a7741afSMartin Matuska set_nlink(ZTOI(zp), (uint32_t)links); 1274*7a7741afSMartin Matuska zfs_set_inode_flags(zp, ZTOI(zp)); 1275*7a7741afSMartin Matuska 1276*7a7741afSMartin Matuska zp->z_blksz = doi.doi_data_block_size; 1277*7a7741afSMartin Matuska zp->z_atime_dirty = B_FALSE; 1278*7a7741afSMartin Matuska zfs_znode_update_vfs(zp); 1279*7a7741afSMartin Matuska 1280*7a7741afSMartin Matuska /* 1281*7a7741afSMartin Matuska * If the file has zero links, then it has been unlinked on the send 1282*7a7741afSMartin Matuska * side and it must be in the received unlinked set. 1283*7a7741afSMartin Matuska * We call zfs_znode_dmu_fini() now to prevent any accesses to the 1284*7a7741afSMartin Matuska * stale data and to prevent automatic removal of the file in 1285*7a7741afSMartin Matuska * zfs_zinactive(). The file will be removed either when it is removed 1286*7a7741afSMartin Matuska * on the send side and the next incremental stream is received or 1287*7a7741afSMartin Matuska * when the unlinked set gets processed. 1288*7a7741afSMartin Matuska */ 1289*7a7741afSMartin Matuska zp->z_unlinked = (ZTOI(zp)->i_nlink == 0); 1290*7a7741afSMartin Matuska if (zp->z_unlinked) 1291*7a7741afSMartin Matuska zfs_znode_dmu_fini(zp); 1292*7a7741afSMartin Matuska 1293*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 1294*7a7741afSMartin Matuska 1295*7a7741afSMartin Matuska return (0); 1296*7a7741afSMartin Matuska } 1297*7a7741afSMartin Matuska 1298*7a7741afSMartin Matuska void 1299*7a7741afSMartin Matuska zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) 1300*7a7741afSMartin Matuska { 1301*7a7741afSMartin Matuska zfsvfs_t *zfsvfs = ZTOZSB(zp); 1302*7a7741afSMartin Matuska objset_t *os = zfsvfs->z_os; 1303*7a7741afSMartin Matuska uint64_t obj = zp->z_id; 1304*7a7741afSMartin Matuska uint64_t acl_obj = zfs_external_acl(zp); 1305*7a7741afSMartin Matuska znode_hold_t *zh; 1306*7a7741afSMartin Matuska 1307*7a7741afSMartin Matuska zh = zfs_znode_hold_enter(zfsvfs, obj); 1308*7a7741afSMartin Matuska if (acl_obj) { 1309*7a7741afSMartin Matuska VERIFY(!zp->z_is_sa); 1310*7a7741afSMartin Matuska VERIFY(0 == dmu_object_free(os, acl_obj, tx)); 1311*7a7741afSMartin Matuska } 1312*7a7741afSMartin Matuska VERIFY(0 == dmu_object_free(os, obj, tx)); 1313*7a7741afSMartin Matuska zfs_znode_dmu_fini(zp); 1314*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 1315*7a7741afSMartin Matuska } 1316*7a7741afSMartin Matuska 1317*7a7741afSMartin Matuska void 1318*7a7741afSMartin Matuska zfs_zinactive(znode_t *zp) 1319*7a7741afSMartin Matuska { 1320*7a7741afSMartin Matuska zfsvfs_t *zfsvfs = ZTOZSB(zp); 1321*7a7741afSMartin Matuska uint64_t z_id = zp->z_id; 1322*7a7741afSMartin Matuska znode_hold_t *zh; 1323*7a7741afSMartin Matuska 1324*7a7741afSMartin Matuska ASSERT(zp->z_sa_hdl); 1325*7a7741afSMartin Matuska 1326*7a7741afSMartin Matuska /* 1327*7a7741afSMartin Matuska * Don't allow a zfs_zget() while were trying to release this znode. 1328*7a7741afSMartin Matuska */ 1329*7a7741afSMartin Matuska zh = zfs_znode_hold_enter(zfsvfs, z_id); 1330*7a7741afSMartin Matuska 1331*7a7741afSMartin Matuska mutex_enter(&zp->z_lock); 1332*7a7741afSMartin Matuska 1333*7a7741afSMartin Matuska /* 1334*7a7741afSMartin Matuska * If this was the last reference to a file with no links, remove 1335*7a7741afSMartin Matuska * the file from the file system unless the file system is mounted 1336*7a7741afSMartin Matuska * read-only. That can happen, for example, if the file system was 1337*7a7741afSMartin Matuska * originally read-write, the file was opened, then unlinked and 1338*7a7741afSMartin Matuska * the file system was made read-only before the file was finally 1339*7a7741afSMartin Matuska * closed. The file will remain in the unlinked set. 1340*7a7741afSMartin Matuska */ 1341*7a7741afSMartin Matuska if (zp->z_unlinked) { 1342*7a7741afSMartin Matuska ASSERT(!zfsvfs->z_issnap); 1343*7a7741afSMartin Matuska if (!zfs_is_readonly(zfsvfs) && !zfs_unlink_suspend_progress) { 1344*7a7741afSMartin Matuska mutex_exit(&zp->z_lock); 1345*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 1346*7a7741afSMartin Matuska zfs_rmnode(zp); 1347*7a7741afSMartin Matuska return; 1348*7a7741afSMartin Matuska } 1349*7a7741afSMartin Matuska } 1350*7a7741afSMartin Matuska 1351*7a7741afSMartin Matuska mutex_exit(&zp->z_lock); 1352*7a7741afSMartin Matuska zfs_znode_dmu_fini(zp); 1353*7a7741afSMartin Matuska 1354*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs, zh); 1355*7a7741afSMartin Matuska } 1356*7a7741afSMartin Matuska 1357*7a7741afSMartin Matuska /* 1358*7a7741afSMartin Matuska * Determine whether the znode's atime must be updated. The logic mostly 1359*7a7741afSMartin Matuska * duplicates the Linux kernel's relatime_need_update() functionality. 1360*7a7741afSMartin Matuska * This function is only called if the underlying filesystem actually has 1361*7a7741afSMartin Matuska * atime updates enabled. 1362*7a7741afSMartin Matuska */ 1363*7a7741afSMartin Matuska boolean_t 1364*7a7741afSMartin Matuska zfs_relatime_need_update(const struct inode *ip) 1365*7a7741afSMartin Matuska { 1366*7a7741afSMartin Matuska inode_timespec_t now, tmp_atime, tmp_ts; 1367*7a7741afSMartin Matuska 1368*7a7741afSMartin Matuska gethrestime(&now); 1369*7a7741afSMartin Matuska tmp_atime = zpl_inode_get_atime(ip); 1370*7a7741afSMartin Matuska /* 1371*7a7741afSMartin Matuska * In relatime mode, only update the atime if the previous atime 1372*7a7741afSMartin Matuska * is earlier than either the ctime or mtime or if at least a day 1373*7a7741afSMartin Matuska * has passed since the last update of atime. 1374*7a7741afSMartin Matuska */ 1375*7a7741afSMartin Matuska tmp_ts = zpl_inode_get_mtime(ip); 1376*7a7741afSMartin Matuska if (timespec64_compare(&tmp_ts, &tmp_atime) >= 0) 1377*7a7741afSMartin Matuska return (B_TRUE); 1378*7a7741afSMartin Matuska 1379*7a7741afSMartin Matuska tmp_ts = zpl_inode_get_ctime(ip); 1380*7a7741afSMartin Matuska if (timespec64_compare(&tmp_ts, &tmp_atime) >= 0) 1381*7a7741afSMartin Matuska return (B_TRUE); 1382*7a7741afSMartin Matuska 1383*7a7741afSMartin Matuska if ((hrtime_t)now.tv_sec - (hrtime_t)tmp_atime.tv_sec >= 24*60*60) 1384*7a7741afSMartin Matuska return (B_TRUE); 1385*7a7741afSMartin Matuska 1386*7a7741afSMartin Matuska return (B_FALSE); 1387*7a7741afSMartin Matuska } 1388*7a7741afSMartin Matuska 1389*7a7741afSMartin Matuska /* 1390*7a7741afSMartin Matuska * Prepare to update znode time stamps. 1391*7a7741afSMartin Matuska * 1392*7a7741afSMartin Matuska * IN: zp - znode requiring timestamp update 1393*7a7741afSMartin Matuska * flag - ATTR_MTIME, ATTR_CTIME flags 1394*7a7741afSMartin Matuska * 1395*7a7741afSMartin Matuska * OUT: zp - z_seq 1396*7a7741afSMartin Matuska * mtime - new mtime 1397*7a7741afSMartin Matuska * ctime - new ctime 1398*7a7741afSMartin Matuska * 1399*7a7741afSMartin Matuska * Note: We don't update atime here, because we rely on Linux VFS to do 1400*7a7741afSMartin Matuska * atime updating. 1401*7a7741afSMartin Matuska */ 1402*7a7741afSMartin Matuska void 1403*7a7741afSMartin Matuska zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], 1404*7a7741afSMartin Matuska uint64_t ctime[2]) 1405*7a7741afSMartin Matuska { 1406*7a7741afSMartin Matuska inode_timespec_t now, tmp_ts; 1407*7a7741afSMartin Matuska 1408*7a7741afSMartin Matuska gethrestime(&now); 1409*7a7741afSMartin Matuska 1410*7a7741afSMartin Matuska zp->z_seq++; 1411*7a7741afSMartin Matuska 1412*7a7741afSMartin Matuska if (flag & ATTR_MTIME) { 1413*7a7741afSMartin Matuska ZFS_TIME_ENCODE(&now, mtime); 1414*7a7741afSMartin Matuska ZFS_TIME_DECODE(&tmp_ts, mtime); 1415*7a7741afSMartin Matuska zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts); 1416*7a7741afSMartin Matuska if (ZTOZSB(zp)->z_use_fuids) { 1417*7a7741afSMartin Matuska zp->z_pflags |= (ZFS_ARCHIVE | 1418*7a7741afSMartin Matuska ZFS_AV_MODIFIED); 1419*7a7741afSMartin Matuska } 1420*7a7741afSMartin Matuska } 1421*7a7741afSMartin Matuska 1422*7a7741afSMartin Matuska if (flag & ATTR_CTIME) { 1423*7a7741afSMartin Matuska ZFS_TIME_ENCODE(&now, ctime); 1424*7a7741afSMartin Matuska ZFS_TIME_DECODE(&tmp_ts, ctime); 1425*7a7741afSMartin Matuska zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts); 1426*7a7741afSMartin Matuska if (ZTOZSB(zp)->z_use_fuids) 1427*7a7741afSMartin Matuska zp->z_pflags |= ZFS_ARCHIVE; 1428*7a7741afSMartin Matuska } 1429*7a7741afSMartin Matuska } 1430*7a7741afSMartin Matuska 1431*7a7741afSMartin Matuska /* 1432*7a7741afSMartin Matuska * Grow the block size for a file. 1433*7a7741afSMartin Matuska * 1434*7a7741afSMartin Matuska * IN: zp - znode of file to free data in. 1435*7a7741afSMartin Matuska * size - requested block size 1436*7a7741afSMartin Matuska * tx - open transaction. 1437*7a7741afSMartin Matuska * 1438*7a7741afSMartin Matuska * NOTE: this function assumes that the znode is write locked. 1439*7a7741afSMartin Matuska */ 1440*7a7741afSMartin Matuska void 1441*7a7741afSMartin Matuska zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) 1442*7a7741afSMartin Matuska { 1443*7a7741afSMartin Matuska int error; 1444*7a7741afSMartin Matuska u_longlong_t dummy; 1445*7a7741afSMartin Matuska 1446*7a7741afSMartin Matuska if (size <= zp->z_blksz) 1447*7a7741afSMartin Matuska return; 1448*7a7741afSMartin Matuska /* 1449*7a7741afSMartin Matuska * If the file size is already greater than the current blocksize, 1450*7a7741afSMartin Matuska * we will not grow. If there is more than one block in a file, 1451*7a7741afSMartin Matuska * the blocksize cannot change. 1452*7a7741afSMartin Matuska */ 1453*7a7741afSMartin Matuska if (zp->z_blksz && zp->z_size > zp->z_blksz) 1454*7a7741afSMartin Matuska return; 1455*7a7741afSMartin Matuska 1456*7a7741afSMartin Matuska error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id, 1457*7a7741afSMartin Matuska size, 0, tx); 1458*7a7741afSMartin Matuska 1459*7a7741afSMartin Matuska if (error == ENOTSUP) 1460*7a7741afSMartin Matuska return; 1461*7a7741afSMartin Matuska ASSERT0(error); 1462*7a7741afSMartin Matuska 1463*7a7741afSMartin Matuska /* What blocksize did we actually get? */ 1464*7a7741afSMartin Matuska dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); 1465*7a7741afSMartin Matuska } 1466*7a7741afSMartin Matuska 1467*7a7741afSMartin Matuska /* 1468*7a7741afSMartin Matuska * Increase the file length 1469*7a7741afSMartin Matuska * 1470*7a7741afSMartin Matuska * IN: zp - znode of file to free data in. 1471*7a7741afSMartin Matuska * end - new end-of-file 1472*7a7741afSMartin Matuska * 1473*7a7741afSMartin Matuska * RETURN: 0 on success, error code on failure 1474*7a7741afSMartin Matuska */ 1475*7a7741afSMartin Matuska static int 1476*7a7741afSMartin Matuska zfs_extend(znode_t *zp, uint64_t end) 1477*7a7741afSMartin Matuska { 1478*7a7741afSMartin Matuska zfsvfs_t *zfsvfs = ZTOZSB(zp); 1479*7a7741afSMartin Matuska dmu_tx_t *tx; 1480*7a7741afSMartin Matuska zfs_locked_range_t *lr; 1481*7a7741afSMartin Matuska uint64_t newblksz; 1482*7a7741afSMartin Matuska int error; 1483*7a7741afSMartin Matuska 1484*7a7741afSMartin Matuska /* 1485*7a7741afSMartin Matuska * We will change zp_size, lock the whole file. 1486*7a7741afSMartin Matuska */ 1487*7a7741afSMartin Matuska lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); 1488*7a7741afSMartin Matuska 1489*7a7741afSMartin Matuska /* 1490*7a7741afSMartin Matuska * Nothing to do if file already at desired length. 1491*7a7741afSMartin Matuska */ 1492*7a7741afSMartin Matuska if (end <= zp->z_size) { 1493*7a7741afSMartin Matuska zfs_rangelock_exit(lr); 1494*7a7741afSMartin Matuska return (0); 1495*7a7741afSMartin Matuska } 1496*7a7741afSMartin Matuska tx = dmu_tx_create(zfsvfs->z_os); 1497*7a7741afSMartin Matuska dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1498*7a7741afSMartin Matuska zfs_sa_upgrade_txholds(tx, zp); 1499*7a7741afSMartin Matuska if (end > zp->z_blksz && 1500*7a7741afSMartin Matuska (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { 1501*7a7741afSMartin Matuska /* 1502*7a7741afSMartin Matuska * We are growing the file past the current block size. 1503*7a7741afSMartin Matuska */ 1504*7a7741afSMartin Matuska if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) { 1505*7a7741afSMartin Matuska /* 1506*7a7741afSMartin Matuska * File's blocksize is already larger than the 1507*7a7741afSMartin Matuska * "recordsize" property. Only let it grow to 1508*7a7741afSMartin Matuska * the next power of 2. 1509*7a7741afSMartin Matuska */ 1510*7a7741afSMartin Matuska ASSERT(!ISP2(zp->z_blksz)); 1511*7a7741afSMartin Matuska newblksz = MIN(end, 1 << highbit64(zp->z_blksz)); 1512*7a7741afSMartin Matuska } else { 1513*7a7741afSMartin Matuska newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz); 1514*7a7741afSMartin Matuska } 1515*7a7741afSMartin Matuska dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); 1516*7a7741afSMartin Matuska } else { 1517*7a7741afSMartin Matuska newblksz = 0; 1518*7a7741afSMartin Matuska } 1519*7a7741afSMartin Matuska 1520*7a7741afSMartin Matuska error = dmu_tx_assign(tx, TXG_WAIT); 1521*7a7741afSMartin Matuska if (error) { 1522*7a7741afSMartin Matuska dmu_tx_abort(tx); 1523*7a7741afSMartin Matuska zfs_rangelock_exit(lr); 1524*7a7741afSMartin Matuska return (error); 1525*7a7741afSMartin Matuska } 1526*7a7741afSMartin Matuska 1527*7a7741afSMartin Matuska if (newblksz) 1528*7a7741afSMartin Matuska zfs_grow_blocksize(zp, newblksz, tx); 1529*7a7741afSMartin Matuska 1530*7a7741afSMartin Matuska zp->z_size = end; 1531*7a7741afSMartin Matuska 1532*7a7741afSMartin Matuska VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)), 1533*7a7741afSMartin Matuska &zp->z_size, sizeof (zp->z_size), tx)); 1534*7a7741afSMartin Matuska 1535*7a7741afSMartin Matuska zfs_rangelock_exit(lr); 1536*7a7741afSMartin Matuska 1537*7a7741afSMartin Matuska dmu_tx_commit(tx); 1538*7a7741afSMartin Matuska 1539*7a7741afSMartin Matuska return (0); 1540*7a7741afSMartin Matuska } 1541*7a7741afSMartin Matuska 1542*7a7741afSMartin Matuska /* 1543*7a7741afSMartin Matuska * zfs_zero_partial_page - Modeled after update_pages() but 1544*7a7741afSMartin Matuska * with different arguments and semantics for use by zfs_freesp(). 1545*7a7741afSMartin Matuska * 1546*7a7741afSMartin Matuska * Zeroes a piece of a single page cache entry for zp at offset 1547*7a7741afSMartin Matuska * start and length len. 1548*7a7741afSMartin Matuska * 1549*7a7741afSMartin Matuska * Caller must acquire a range lock on the file for the region 1550*7a7741afSMartin Matuska * being zeroed in order that the ARC and page cache stay in sync. 1551*7a7741afSMartin Matuska */ 1552*7a7741afSMartin Matuska static void 1553*7a7741afSMartin Matuska zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len) 1554*7a7741afSMartin Matuska { 1555*7a7741afSMartin Matuska struct address_space *mp = ZTOI(zp)->i_mapping; 1556*7a7741afSMartin Matuska struct page *pp; 1557*7a7741afSMartin Matuska int64_t off; 1558*7a7741afSMartin Matuska void *pb; 1559*7a7741afSMartin Matuska 1560*7a7741afSMartin Matuska ASSERT((start & PAGE_MASK) == ((start + len - 1) & PAGE_MASK)); 1561*7a7741afSMartin Matuska 1562*7a7741afSMartin Matuska off = start & (PAGE_SIZE - 1); 1563*7a7741afSMartin Matuska start &= PAGE_MASK; 1564*7a7741afSMartin Matuska 1565*7a7741afSMartin Matuska pp = find_lock_page(mp, start >> PAGE_SHIFT); 1566*7a7741afSMartin Matuska if (pp) { 1567*7a7741afSMartin Matuska if (mapping_writably_mapped(mp)) 1568*7a7741afSMartin Matuska flush_dcache_page(pp); 1569*7a7741afSMartin Matuska 1570*7a7741afSMartin Matuska pb = kmap(pp); 1571*7a7741afSMartin Matuska memset(pb + off, 0, len); 1572*7a7741afSMartin Matuska kunmap(pp); 1573*7a7741afSMartin Matuska 1574*7a7741afSMartin Matuska if (mapping_writably_mapped(mp)) 1575*7a7741afSMartin Matuska flush_dcache_page(pp); 1576*7a7741afSMartin Matuska 1577*7a7741afSMartin Matuska mark_page_accessed(pp); 1578*7a7741afSMartin Matuska SetPageUptodate(pp); 1579*7a7741afSMartin Matuska ClearPageError(pp); 1580*7a7741afSMartin Matuska unlock_page(pp); 1581*7a7741afSMartin Matuska put_page(pp); 1582*7a7741afSMartin Matuska } 1583*7a7741afSMartin Matuska } 1584*7a7741afSMartin Matuska 1585*7a7741afSMartin Matuska /* 1586*7a7741afSMartin Matuska * Free space in a file. 1587*7a7741afSMartin Matuska * 1588*7a7741afSMartin Matuska * IN: zp - znode of file to free data in. 1589*7a7741afSMartin Matuska * off - start of section to free. 1590*7a7741afSMartin Matuska * len - length of section to free. 1591*7a7741afSMartin Matuska * 1592*7a7741afSMartin Matuska * RETURN: 0 on success, error code on failure 1593*7a7741afSMartin Matuska */ 1594*7a7741afSMartin Matuska static int 1595*7a7741afSMartin Matuska zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) 1596*7a7741afSMartin Matuska { 1597*7a7741afSMartin Matuska zfsvfs_t *zfsvfs = ZTOZSB(zp); 1598*7a7741afSMartin Matuska zfs_locked_range_t *lr; 1599*7a7741afSMartin Matuska int error; 1600*7a7741afSMartin Matuska 1601*7a7741afSMartin Matuska /* 1602*7a7741afSMartin Matuska * Lock the range being freed. 1603*7a7741afSMartin Matuska */ 1604*7a7741afSMartin Matuska lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER); 1605*7a7741afSMartin Matuska 1606*7a7741afSMartin Matuska /* 1607*7a7741afSMartin Matuska * Nothing to do if file already at desired length. 1608*7a7741afSMartin Matuska */ 1609*7a7741afSMartin Matuska if (off >= zp->z_size) { 1610*7a7741afSMartin Matuska zfs_rangelock_exit(lr); 1611*7a7741afSMartin Matuska return (0); 1612*7a7741afSMartin Matuska } 1613*7a7741afSMartin Matuska 1614*7a7741afSMartin Matuska if (off + len > zp->z_size) 1615*7a7741afSMartin Matuska len = zp->z_size - off; 1616*7a7741afSMartin Matuska 1617*7a7741afSMartin Matuska error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); 1618*7a7741afSMartin Matuska 1619*7a7741afSMartin Matuska /* 1620*7a7741afSMartin Matuska * Zero partial page cache entries. This must be done under a 1621*7a7741afSMartin Matuska * range lock in order to keep the ARC and page cache in sync. 1622*7a7741afSMartin Matuska */ 1623*7a7741afSMartin Matuska if (zn_has_cached_data(zp, off, off + len - 1)) { 1624*7a7741afSMartin Matuska loff_t first_page, last_page, page_len; 1625*7a7741afSMartin Matuska loff_t first_page_offset, last_page_offset; 1626*7a7741afSMartin Matuska 1627*7a7741afSMartin Matuska /* first possible full page in hole */ 1628*7a7741afSMartin Matuska first_page = (off + PAGE_SIZE - 1) >> PAGE_SHIFT; 1629*7a7741afSMartin Matuska /* last page of hole */ 1630*7a7741afSMartin Matuska last_page = (off + len) >> PAGE_SHIFT; 1631*7a7741afSMartin Matuska 1632*7a7741afSMartin Matuska /* offset of first_page */ 1633*7a7741afSMartin Matuska first_page_offset = first_page << PAGE_SHIFT; 1634*7a7741afSMartin Matuska /* offset of last_page */ 1635*7a7741afSMartin Matuska last_page_offset = last_page << PAGE_SHIFT; 1636*7a7741afSMartin Matuska 1637*7a7741afSMartin Matuska /* truncate whole pages */ 1638*7a7741afSMartin Matuska if (last_page_offset > first_page_offset) { 1639*7a7741afSMartin Matuska truncate_inode_pages_range(ZTOI(zp)->i_mapping, 1640*7a7741afSMartin Matuska first_page_offset, last_page_offset - 1); 1641*7a7741afSMartin Matuska } 1642*7a7741afSMartin Matuska 1643*7a7741afSMartin Matuska /* truncate sub-page ranges */ 1644*7a7741afSMartin Matuska if (first_page > last_page) { 1645*7a7741afSMartin Matuska /* entire punched area within a single page */ 1646*7a7741afSMartin Matuska zfs_zero_partial_page(zp, off, len); 1647*7a7741afSMartin Matuska } else { 1648*7a7741afSMartin Matuska /* beginning of punched area at the end of a page */ 1649*7a7741afSMartin Matuska page_len = first_page_offset - off; 1650*7a7741afSMartin Matuska if (page_len > 0) 1651*7a7741afSMartin Matuska zfs_zero_partial_page(zp, off, page_len); 1652*7a7741afSMartin Matuska 1653*7a7741afSMartin Matuska /* end of punched area at the beginning of a page */ 1654*7a7741afSMartin Matuska page_len = off + len - last_page_offset; 1655*7a7741afSMartin Matuska if (page_len > 0) 1656*7a7741afSMartin Matuska zfs_zero_partial_page(zp, last_page_offset, 1657*7a7741afSMartin Matuska page_len); 1658*7a7741afSMartin Matuska } 1659*7a7741afSMartin Matuska } 1660*7a7741afSMartin Matuska zfs_rangelock_exit(lr); 1661*7a7741afSMartin Matuska 1662*7a7741afSMartin Matuska return (error); 1663*7a7741afSMartin Matuska } 1664*7a7741afSMartin Matuska 1665*7a7741afSMartin Matuska /* 1666*7a7741afSMartin Matuska * Truncate a file 1667*7a7741afSMartin Matuska * 1668*7a7741afSMartin Matuska * IN: zp - znode of file to free data in. 1669*7a7741afSMartin Matuska * end - new end-of-file. 1670*7a7741afSMartin Matuska * 1671*7a7741afSMartin Matuska * RETURN: 0 on success, error code on failure 1672*7a7741afSMartin Matuska */ 1673*7a7741afSMartin Matuska static int 1674*7a7741afSMartin Matuska zfs_trunc(znode_t *zp, uint64_t end) 1675*7a7741afSMartin Matuska { 1676*7a7741afSMartin Matuska zfsvfs_t *zfsvfs = ZTOZSB(zp); 1677*7a7741afSMartin Matuska dmu_tx_t *tx; 1678*7a7741afSMartin Matuska zfs_locked_range_t *lr; 1679*7a7741afSMartin Matuska int error; 1680*7a7741afSMartin Matuska sa_bulk_attr_t bulk[2]; 1681*7a7741afSMartin Matuska int count = 0; 1682*7a7741afSMartin Matuska 1683*7a7741afSMartin Matuska /* 1684*7a7741afSMartin Matuska * We will change zp_size, lock the whole file. 1685*7a7741afSMartin Matuska */ 1686*7a7741afSMartin Matuska lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER); 1687*7a7741afSMartin Matuska 1688*7a7741afSMartin Matuska /* 1689*7a7741afSMartin Matuska * Nothing to do if file already at desired length. 1690*7a7741afSMartin Matuska */ 1691*7a7741afSMartin Matuska if (end >= zp->z_size) { 1692*7a7741afSMartin Matuska zfs_rangelock_exit(lr); 1693*7a7741afSMartin Matuska return (0); 1694*7a7741afSMartin Matuska } 1695*7a7741afSMartin Matuska 1696*7a7741afSMartin Matuska error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, 1697*7a7741afSMartin Matuska DMU_OBJECT_END); 1698*7a7741afSMartin Matuska if (error) { 1699*7a7741afSMartin Matuska zfs_rangelock_exit(lr); 1700*7a7741afSMartin Matuska return (error); 1701*7a7741afSMartin Matuska } 1702*7a7741afSMartin Matuska tx = dmu_tx_create(zfsvfs->z_os); 1703*7a7741afSMartin Matuska dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1704*7a7741afSMartin Matuska zfs_sa_upgrade_txholds(tx, zp); 1705*7a7741afSMartin Matuska dmu_tx_mark_netfree(tx); 1706*7a7741afSMartin Matuska error = dmu_tx_assign(tx, TXG_WAIT); 1707*7a7741afSMartin Matuska if (error) { 1708*7a7741afSMartin Matuska dmu_tx_abort(tx); 1709*7a7741afSMartin Matuska zfs_rangelock_exit(lr); 1710*7a7741afSMartin Matuska return (error); 1711*7a7741afSMartin Matuska } 1712*7a7741afSMartin Matuska 1713*7a7741afSMartin Matuska zp->z_size = end; 1714*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), 1715*7a7741afSMartin Matuska NULL, &zp->z_size, sizeof (zp->z_size)); 1716*7a7741afSMartin Matuska 1717*7a7741afSMartin Matuska if (end == 0) { 1718*7a7741afSMartin Matuska zp->z_pflags &= ~ZFS_SPARSE; 1719*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 1720*7a7741afSMartin Matuska NULL, &zp->z_pflags, 8); 1721*7a7741afSMartin Matuska } 1722*7a7741afSMartin Matuska VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); 1723*7a7741afSMartin Matuska 1724*7a7741afSMartin Matuska dmu_tx_commit(tx); 1725*7a7741afSMartin Matuska zfs_rangelock_exit(lr); 1726*7a7741afSMartin Matuska 1727*7a7741afSMartin Matuska return (0); 1728*7a7741afSMartin Matuska } 1729*7a7741afSMartin Matuska 1730*7a7741afSMartin Matuska /* 1731*7a7741afSMartin Matuska * Free space in a file 1732*7a7741afSMartin Matuska * 1733*7a7741afSMartin Matuska * IN: zp - znode of file to free data in. 1734*7a7741afSMartin Matuska * off - start of range 1735*7a7741afSMartin Matuska * len - end of range (0 => EOF) 1736*7a7741afSMartin Matuska * flag - current file open mode flags. 1737*7a7741afSMartin Matuska * log - TRUE if this action should be logged 1738*7a7741afSMartin Matuska * 1739*7a7741afSMartin Matuska * RETURN: 0 on success, error code on failure 1740*7a7741afSMartin Matuska */ 1741*7a7741afSMartin Matuska int 1742*7a7741afSMartin Matuska zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) 1743*7a7741afSMartin Matuska { 1744*7a7741afSMartin Matuska dmu_tx_t *tx; 1745*7a7741afSMartin Matuska zfsvfs_t *zfsvfs = ZTOZSB(zp); 1746*7a7741afSMartin Matuska zilog_t *zilog = zfsvfs->z_log; 1747*7a7741afSMartin Matuska uint64_t mode; 1748*7a7741afSMartin Matuska uint64_t mtime[2], ctime[2]; 1749*7a7741afSMartin Matuska sa_bulk_attr_t bulk[3]; 1750*7a7741afSMartin Matuska int count = 0; 1751*7a7741afSMartin Matuska int error; 1752*7a7741afSMartin Matuska 1753*7a7741afSMartin Matuska if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode, 1754*7a7741afSMartin Matuska sizeof (mode))) != 0) 1755*7a7741afSMartin Matuska return (error); 1756*7a7741afSMartin Matuska 1757*7a7741afSMartin Matuska if (off > zp->z_size) { 1758*7a7741afSMartin Matuska error = zfs_extend(zp, off+len); 1759*7a7741afSMartin Matuska if (error == 0 && log) 1760*7a7741afSMartin Matuska goto log; 1761*7a7741afSMartin Matuska goto out; 1762*7a7741afSMartin Matuska } 1763*7a7741afSMartin Matuska 1764*7a7741afSMartin Matuska if (len == 0) { 1765*7a7741afSMartin Matuska error = zfs_trunc(zp, off); 1766*7a7741afSMartin Matuska } else { 1767*7a7741afSMartin Matuska if ((error = zfs_free_range(zp, off, len)) == 0 && 1768*7a7741afSMartin Matuska off + len > zp->z_size) 1769*7a7741afSMartin Matuska error = zfs_extend(zp, off+len); 1770*7a7741afSMartin Matuska } 1771*7a7741afSMartin Matuska if (error || !log) 1772*7a7741afSMartin Matuska goto out; 1773*7a7741afSMartin Matuska log: 1774*7a7741afSMartin Matuska tx = dmu_tx_create(zfsvfs->z_os); 1775*7a7741afSMartin Matuska dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); 1776*7a7741afSMartin Matuska zfs_sa_upgrade_txholds(tx, zp); 1777*7a7741afSMartin Matuska error = dmu_tx_assign(tx, TXG_WAIT); 1778*7a7741afSMartin Matuska if (error) { 1779*7a7741afSMartin Matuska dmu_tx_abort(tx); 1780*7a7741afSMartin Matuska goto out; 1781*7a7741afSMartin Matuska } 1782*7a7741afSMartin Matuska 1783*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16); 1784*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16); 1785*7a7741afSMartin Matuska SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), 1786*7a7741afSMartin Matuska NULL, &zp->z_pflags, 8); 1787*7a7741afSMartin Matuska zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime); 1788*7a7741afSMartin Matuska error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); 1789*7a7741afSMartin Matuska ASSERT(error == 0); 1790*7a7741afSMartin Matuska 1791*7a7741afSMartin Matuska zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); 1792*7a7741afSMartin Matuska 1793*7a7741afSMartin Matuska dmu_tx_commit(tx); 1794*7a7741afSMartin Matuska 1795*7a7741afSMartin Matuska zfs_znode_update_vfs(zp); 1796*7a7741afSMartin Matuska error = 0; 1797*7a7741afSMartin Matuska 1798*7a7741afSMartin Matuska out: 1799*7a7741afSMartin Matuska /* 1800*7a7741afSMartin Matuska * Truncate the page cache - for file truncate operations, use 1801*7a7741afSMartin Matuska * the purpose-built API for truncations. For punching operations, 1802*7a7741afSMartin Matuska * the truncation is handled under a range lock in zfs_free_range. 1803*7a7741afSMartin Matuska */ 1804*7a7741afSMartin Matuska if (len == 0) 1805*7a7741afSMartin Matuska truncate_setsize(ZTOI(zp), off); 1806*7a7741afSMartin Matuska return (error); 1807*7a7741afSMartin Matuska } 1808*7a7741afSMartin Matuska 1809*7a7741afSMartin Matuska void 1810*7a7741afSMartin Matuska zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) 1811*7a7741afSMartin Matuska { 1812*7a7741afSMartin Matuska struct super_block *sb; 1813*7a7741afSMartin Matuska zfsvfs_t *zfsvfs; 1814*7a7741afSMartin Matuska uint64_t moid, obj, sa_obj, version; 1815*7a7741afSMartin Matuska uint64_t sense = ZFS_CASE_SENSITIVE; 1816*7a7741afSMartin Matuska uint64_t norm = 0; 1817*7a7741afSMartin Matuska nvpair_t *elem; 1818*7a7741afSMartin Matuska int size; 1819*7a7741afSMartin Matuska int error; 1820*7a7741afSMartin Matuska int i; 1821*7a7741afSMartin Matuska znode_t *rootzp = NULL; 1822*7a7741afSMartin Matuska vattr_t vattr; 1823*7a7741afSMartin Matuska znode_t *zp; 1824*7a7741afSMartin Matuska zfs_acl_ids_t acl_ids; 1825*7a7741afSMartin Matuska 1826*7a7741afSMartin Matuska /* 1827*7a7741afSMartin Matuska * First attempt to create master node. 1828*7a7741afSMartin Matuska */ 1829*7a7741afSMartin Matuska /* 1830*7a7741afSMartin Matuska * In an empty objset, there are no blocks to read and thus 1831*7a7741afSMartin Matuska * there can be no i/o errors (which we assert below). 1832*7a7741afSMartin Matuska */ 1833*7a7741afSMartin Matuska moid = MASTER_NODE_OBJ; 1834*7a7741afSMartin Matuska error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, 1835*7a7741afSMartin Matuska DMU_OT_NONE, 0, tx); 1836*7a7741afSMartin Matuska ASSERT(error == 0); 1837*7a7741afSMartin Matuska 1838*7a7741afSMartin Matuska /* 1839*7a7741afSMartin Matuska * Set starting attributes. 1840*7a7741afSMartin Matuska */ 1841*7a7741afSMartin Matuska version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os))); 1842*7a7741afSMartin Matuska elem = NULL; 1843*7a7741afSMartin Matuska while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { 1844*7a7741afSMartin Matuska /* For the moment we expect all zpl props to be uint64_ts */ 1845*7a7741afSMartin Matuska uint64_t val; 1846*7a7741afSMartin Matuska const char *name; 1847*7a7741afSMartin Matuska 1848*7a7741afSMartin Matuska ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); 1849*7a7741afSMartin Matuska VERIFY(nvpair_value_uint64(elem, &val) == 0); 1850*7a7741afSMartin Matuska name = nvpair_name(elem); 1851*7a7741afSMartin Matuska if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { 1852*7a7741afSMartin Matuska if (val < version) 1853*7a7741afSMartin Matuska version = val; 1854*7a7741afSMartin Matuska } else { 1855*7a7741afSMartin Matuska error = zap_update(os, moid, name, 8, 1, &val, tx); 1856*7a7741afSMartin Matuska } 1857*7a7741afSMartin Matuska ASSERT(error == 0); 1858*7a7741afSMartin Matuska if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) 1859*7a7741afSMartin Matuska norm = val; 1860*7a7741afSMartin Matuska else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) 1861*7a7741afSMartin Matuska sense = val; 1862*7a7741afSMartin Matuska } 1863*7a7741afSMartin Matuska ASSERT(version != 0); 1864*7a7741afSMartin Matuska error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); 1865*7a7741afSMartin Matuska ASSERT(error == 0); 1866*7a7741afSMartin Matuska 1867*7a7741afSMartin Matuska /* 1868*7a7741afSMartin Matuska * Create zap object used for SA attribute registration 1869*7a7741afSMartin Matuska */ 1870*7a7741afSMartin Matuska 1871*7a7741afSMartin Matuska if (version >= ZPL_VERSION_SA) { 1872*7a7741afSMartin Matuska sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, 1873*7a7741afSMartin Matuska DMU_OT_NONE, 0, tx); 1874*7a7741afSMartin Matuska error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); 1875*7a7741afSMartin Matuska ASSERT(error == 0); 1876*7a7741afSMartin Matuska } else { 1877*7a7741afSMartin Matuska sa_obj = 0; 1878*7a7741afSMartin Matuska } 1879*7a7741afSMartin Matuska /* 1880*7a7741afSMartin Matuska * Create a delete queue. 1881*7a7741afSMartin Matuska */ 1882*7a7741afSMartin Matuska obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); 1883*7a7741afSMartin Matuska 1884*7a7741afSMartin Matuska error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); 1885*7a7741afSMartin Matuska ASSERT(error == 0); 1886*7a7741afSMartin Matuska 1887*7a7741afSMartin Matuska /* 1888*7a7741afSMartin Matuska * Create root znode. Create minimal znode/inode/zfsvfs/sb 1889*7a7741afSMartin Matuska * to allow zfs_mknode to work. 1890*7a7741afSMartin Matuska */ 1891*7a7741afSMartin Matuska vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID; 1892*7a7741afSMartin Matuska vattr.va_mode = S_IFDIR|0755; 1893*7a7741afSMartin Matuska vattr.va_uid = crgetuid(cr); 1894*7a7741afSMartin Matuska vattr.va_gid = crgetgid(cr); 1895*7a7741afSMartin Matuska 1896*7a7741afSMartin Matuska rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); 1897*7a7741afSMartin Matuska rootzp->z_unlinked = B_FALSE; 1898*7a7741afSMartin Matuska rootzp->z_atime_dirty = B_FALSE; 1899*7a7741afSMartin Matuska rootzp->z_is_sa = USE_SA(version, os); 1900*7a7741afSMartin Matuska rootzp->z_pflags = 0; 1901*7a7741afSMartin Matuska 1902*7a7741afSMartin Matuska zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); 1903*7a7741afSMartin Matuska zfsvfs->z_os = os; 1904*7a7741afSMartin Matuska zfsvfs->z_parent = zfsvfs; 1905*7a7741afSMartin Matuska zfsvfs->z_version = version; 1906*7a7741afSMartin Matuska zfsvfs->z_use_fuids = USE_FUIDS(version, os); 1907*7a7741afSMartin Matuska zfsvfs->z_use_sa = USE_SA(version, os); 1908*7a7741afSMartin Matuska zfsvfs->z_norm = norm; 1909*7a7741afSMartin Matuska 1910*7a7741afSMartin Matuska sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP); 1911*7a7741afSMartin Matuska sb->s_fs_info = zfsvfs; 1912*7a7741afSMartin Matuska 1913*7a7741afSMartin Matuska ZTOI(rootzp)->i_sb = sb; 1914*7a7741afSMartin Matuska 1915*7a7741afSMartin Matuska error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, 1916*7a7741afSMartin Matuska &zfsvfs->z_attr_table); 1917*7a7741afSMartin Matuska 1918*7a7741afSMartin Matuska ASSERT(error == 0); 1919*7a7741afSMartin Matuska 1920*7a7741afSMartin Matuska /* 1921*7a7741afSMartin Matuska * Fold case on file systems that are always or sometimes case 1922*7a7741afSMartin Matuska * insensitive. 1923*7a7741afSMartin Matuska */ 1924*7a7741afSMartin Matuska if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) 1925*7a7741afSMartin Matuska zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; 1926*7a7741afSMartin Matuska 1927*7a7741afSMartin Matuska mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); 1928*7a7741afSMartin Matuska list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), 1929*7a7741afSMartin Matuska offsetof(znode_t, z_link_node)); 1930*7a7741afSMartin Matuska 1931*7a7741afSMartin Matuska size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX); 1932*7a7741afSMartin Matuska zfsvfs->z_hold_size = size; 1933*7a7741afSMartin Matuska zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size, 1934*7a7741afSMartin Matuska KM_SLEEP); 1935*7a7741afSMartin Matuska zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP); 1936*7a7741afSMartin Matuska for (i = 0; i != size; i++) { 1937*7a7741afSMartin Matuska avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare, 1938*7a7741afSMartin Matuska sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node)); 1939*7a7741afSMartin Matuska mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL); 1940*7a7741afSMartin Matuska } 1941*7a7741afSMartin Matuska 1942*7a7741afSMartin Matuska VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, 1943*7a7741afSMartin Matuska cr, NULL, &acl_ids, zfs_init_idmap)); 1944*7a7741afSMartin Matuska zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); 1945*7a7741afSMartin Matuska ASSERT3P(zp, ==, rootzp); 1946*7a7741afSMartin Matuska error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); 1947*7a7741afSMartin Matuska ASSERT(error == 0); 1948*7a7741afSMartin Matuska zfs_acl_ids_free(&acl_ids); 1949*7a7741afSMartin Matuska 1950*7a7741afSMartin Matuska atomic_set(&ZTOI(rootzp)->i_count, 0); 1951*7a7741afSMartin Matuska sa_handle_destroy(rootzp->z_sa_hdl); 1952*7a7741afSMartin Matuska kmem_cache_free(znode_cache, rootzp); 1953*7a7741afSMartin Matuska 1954*7a7741afSMartin Matuska for (i = 0; i != size; i++) { 1955*7a7741afSMartin Matuska avl_destroy(&zfsvfs->z_hold_trees[i]); 1956*7a7741afSMartin Matuska mutex_destroy(&zfsvfs->z_hold_locks[i]); 1957*7a7741afSMartin Matuska } 1958*7a7741afSMartin Matuska 1959*7a7741afSMartin Matuska mutex_destroy(&zfsvfs->z_znodes_lock); 1960*7a7741afSMartin Matuska 1961*7a7741afSMartin Matuska vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size); 1962*7a7741afSMartin Matuska vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size); 1963*7a7741afSMartin Matuska kmem_free(sb, sizeof (struct super_block)); 1964*7a7741afSMartin Matuska kmem_free(zfsvfs, sizeof (zfsvfs_t)); 1965*7a7741afSMartin Matuska } 1966*7a7741afSMartin Matuska 1967*7a7741afSMartin Matuska EXPORT_SYMBOL(zfs_create_fs); 1968*7a7741afSMartin Matuska EXPORT_SYMBOL(zfs_obj_to_path); 1969*7a7741afSMartin Matuska 1970*7a7741afSMartin Matuska module_param(zfs_object_mutex_size, uint, 0644); 1971*7a7741afSMartin Matuska MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array"); 1972*7a7741afSMartin Matuska module_param(zfs_unlink_suspend_progress, int, 0644); 1973*7a7741afSMartin Matuska MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks " 1974*7a7741afSMartin Matuska "(debug - leaks space into the unlinked set)"); 1975