xref: /freebsd-src/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode_os.c (revision 17aab35a77a1b1bf02fc85bb8ffadccb0ca5006d)
1*7a7741afSMartin Matuska /*
2*7a7741afSMartin Matuska  * CDDL HEADER START
3*7a7741afSMartin Matuska  *
4*7a7741afSMartin Matuska  * The contents of this file are subject to the terms of the
5*7a7741afSMartin Matuska  * Common Development and Distribution License (the "License").
6*7a7741afSMartin Matuska  * You may not use this file except in compliance with the License.
7*7a7741afSMartin Matuska  *
8*7a7741afSMartin Matuska  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*7a7741afSMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
10*7a7741afSMartin Matuska  * See the License for the specific language governing permissions
11*7a7741afSMartin Matuska  * and limitations under the License.
12*7a7741afSMartin Matuska  *
13*7a7741afSMartin Matuska  * When distributing Covered Code, include this CDDL HEADER in each
14*7a7741afSMartin Matuska  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*7a7741afSMartin Matuska  * If applicable, add the following below this CDDL HEADER, with the
16*7a7741afSMartin Matuska  * fields enclosed by brackets "[]" replaced with your own identifying
17*7a7741afSMartin Matuska  * information: Portions Copyright [yyyy] [name of copyright owner]
18*7a7741afSMartin Matuska  *
19*7a7741afSMartin Matuska  * CDDL HEADER END
20*7a7741afSMartin Matuska  */
21*7a7741afSMartin Matuska /*
22*7a7741afSMartin Matuska  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23*7a7741afSMartin Matuska  * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
24*7a7741afSMartin Matuska  */
25*7a7741afSMartin Matuska 
26*7a7741afSMartin Matuska /* Portions Copyright 2007 Jeremy Teo */
27*7a7741afSMartin Matuska 
28*7a7741afSMartin Matuska #include <sys/types.h>
29*7a7741afSMartin Matuska #include <sys/param.h>
30*7a7741afSMartin Matuska #include <sys/time.h>
31*7a7741afSMartin Matuska #include <sys/sysmacros.h>
32*7a7741afSMartin Matuska #include <sys/mntent.h>
33*7a7741afSMartin Matuska #include <sys/u8_textprep.h>
34*7a7741afSMartin Matuska #include <sys/dsl_dataset.h>
35*7a7741afSMartin Matuska #include <sys/vfs.h>
36*7a7741afSMartin Matuska #include <sys/vnode.h>
37*7a7741afSMartin Matuska #include <sys/file.h>
38*7a7741afSMartin Matuska #include <sys/kmem.h>
39*7a7741afSMartin Matuska #include <sys/errno.h>
40*7a7741afSMartin Matuska #include <sys/atomic.h>
41*7a7741afSMartin Matuska #include <sys/zfs_dir.h>
42*7a7741afSMartin Matuska #include <sys/zfs_acl.h>
43*7a7741afSMartin Matuska #include <sys/zfs_ioctl.h>
44*7a7741afSMartin Matuska #include <sys/zfs_rlock.h>
45*7a7741afSMartin Matuska #include <sys/zfs_fuid.h>
46*7a7741afSMartin Matuska #include <sys/zfs_vnops.h>
47*7a7741afSMartin Matuska #include <sys/zfs_ctldir.h>
48*7a7741afSMartin Matuska #include <sys/dnode.h>
49*7a7741afSMartin Matuska #include <sys/fs/zfs.h>
50*7a7741afSMartin Matuska #include <sys/zpl.h>
51*7a7741afSMartin Matuska #include <sys/dmu.h>
52*7a7741afSMartin Matuska #include <sys/dmu_objset.h>
53*7a7741afSMartin Matuska #include <sys/dmu_tx.h>
54*7a7741afSMartin Matuska #include <sys/zfs_refcount.h>
55*7a7741afSMartin Matuska #include <sys/stat.h>
56*7a7741afSMartin Matuska #include <sys/zap.h>
57*7a7741afSMartin Matuska #include <sys/zfs_znode.h>
58*7a7741afSMartin Matuska #include <sys/sa.h>
59*7a7741afSMartin Matuska #include <sys/zfs_sa.h>
60*7a7741afSMartin Matuska #include <sys/zfs_stat.h>
61*7a7741afSMartin Matuska #include <linux/mm_compat.h>
62*7a7741afSMartin Matuska 
63*7a7741afSMartin Matuska #include "zfs_prop.h"
64*7a7741afSMartin Matuska #include "zfs_comutil.h"
65*7a7741afSMartin Matuska 
66*7a7741afSMartin Matuska static kmem_cache_t *znode_cache = NULL;
67*7a7741afSMartin Matuska static kmem_cache_t *znode_hold_cache = NULL;
68*7a7741afSMartin Matuska unsigned int zfs_object_mutex_size = ZFS_OBJ_MTX_SZ;
69*7a7741afSMartin Matuska 
70*7a7741afSMartin Matuska /*
71*7a7741afSMartin Matuska  * This is used by the test suite so that it can delay znodes from being
72*7a7741afSMartin Matuska  * freed in order to inspect the unlinked set.
73*7a7741afSMartin Matuska  */
74*7a7741afSMartin Matuska static int zfs_unlink_suspend_progress = 0;
75*7a7741afSMartin Matuska 
76*7a7741afSMartin Matuska /*
77*7a7741afSMartin Matuska  * This callback is invoked when acquiring a RL_WRITER or RL_APPEND lock on
78*7a7741afSMartin Matuska  * z_rangelock. It will modify the offset and length of the lock to reflect
79*7a7741afSMartin Matuska  * znode-specific information, and convert RL_APPEND to RL_WRITER.  This is
80*7a7741afSMartin Matuska  * called with the rangelock_t's rl_lock held, which avoids races.
81*7a7741afSMartin Matuska  */
82*7a7741afSMartin Matuska static void
83*7a7741afSMartin Matuska zfs_rangelock_cb(zfs_locked_range_t *new, void *arg)
84*7a7741afSMartin Matuska {
85*7a7741afSMartin Matuska 	znode_t *zp = arg;
86*7a7741afSMartin Matuska 
87*7a7741afSMartin Matuska 	/*
88*7a7741afSMartin Matuska 	 * If in append mode, convert to writer and lock starting at the
89*7a7741afSMartin Matuska 	 * current end of file.
90*7a7741afSMartin Matuska 	 */
91*7a7741afSMartin Matuska 	if (new->lr_type == RL_APPEND) {
92*7a7741afSMartin Matuska 		new->lr_offset = zp->z_size;
93*7a7741afSMartin Matuska 		new->lr_type = RL_WRITER;
94*7a7741afSMartin Matuska 	}
95*7a7741afSMartin Matuska 
96*7a7741afSMartin Matuska 	/*
97*7a7741afSMartin Matuska 	 * If we need to grow the block size then lock the whole file range.
98*7a7741afSMartin Matuska 	 */
99*7a7741afSMartin Matuska 	uint64_t end_size = MAX(zp->z_size, new->lr_offset + new->lr_length);
100*7a7741afSMartin Matuska 	if (end_size > zp->z_blksz && (!ISP2(zp->z_blksz) ||
101*7a7741afSMartin Matuska 	    zp->z_blksz < ZTOZSB(zp)->z_max_blksz)) {
102*7a7741afSMartin Matuska 		new->lr_offset = 0;
103*7a7741afSMartin Matuska 		new->lr_length = UINT64_MAX;
104*7a7741afSMartin Matuska 	}
105*7a7741afSMartin Matuska }
106*7a7741afSMartin Matuska 
107*7a7741afSMartin Matuska static int
108*7a7741afSMartin Matuska zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
109*7a7741afSMartin Matuska {
110*7a7741afSMartin Matuska 	(void) arg, (void) kmflags;
111*7a7741afSMartin Matuska 	znode_t *zp = buf;
112*7a7741afSMartin Matuska 
113*7a7741afSMartin Matuska 	inode_init_once(ZTOI(zp));
114*7a7741afSMartin Matuska 	list_link_init(&zp->z_link_node);
115*7a7741afSMartin Matuska 
116*7a7741afSMartin Matuska 	mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
117*7a7741afSMartin Matuska 	rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
118*7a7741afSMartin Matuska 	rw_init(&zp->z_name_lock, NULL, RW_NOLOCKDEP, NULL);
119*7a7741afSMartin Matuska 	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
120*7a7741afSMartin Matuska 	rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
121*7a7741afSMartin Matuska 
122*7a7741afSMartin Matuska 	zfs_rangelock_init(&zp->z_rangelock, zfs_rangelock_cb, zp);
123*7a7741afSMartin Matuska 
124*7a7741afSMartin Matuska 	zp->z_dirlocks = NULL;
125*7a7741afSMartin Matuska 	zp->z_acl_cached = NULL;
126*7a7741afSMartin Matuska 	zp->z_xattr_cached = NULL;
127*7a7741afSMartin Matuska 	zp->z_xattr_parent = 0;
128*7a7741afSMartin Matuska 	zp->z_sync_writes_cnt = 0;
129*7a7741afSMartin Matuska 	zp->z_async_writes_cnt = 0;
130*7a7741afSMartin Matuska 
131*7a7741afSMartin Matuska 	return (0);
132*7a7741afSMartin Matuska }
133*7a7741afSMartin Matuska 
134*7a7741afSMartin Matuska static void
135*7a7741afSMartin Matuska zfs_znode_cache_destructor(void *buf, void *arg)
136*7a7741afSMartin Matuska {
137*7a7741afSMartin Matuska 	(void) arg;
138*7a7741afSMartin Matuska 	znode_t *zp = buf;
139*7a7741afSMartin Matuska 
140*7a7741afSMartin Matuska 	ASSERT(!list_link_active(&zp->z_link_node));
141*7a7741afSMartin Matuska 	mutex_destroy(&zp->z_lock);
142*7a7741afSMartin Matuska 	rw_destroy(&zp->z_parent_lock);
143*7a7741afSMartin Matuska 	rw_destroy(&zp->z_name_lock);
144*7a7741afSMartin Matuska 	mutex_destroy(&zp->z_acl_lock);
145*7a7741afSMartin Matuska 	rw_destroy(&zp->z_xattr_lock);
146*7a7741afSMartin Matuska 	zfs_rangelock_fini(&zp->z_rangelock);
147*7a7741afSMartin Matuska 
148*7a7741afSMartin Matuska 	ASSERT3P(zp->z_dirlocks, ==, NULL);
149*7a7741afSMartin Matuska 	ASSERT3P(zp->z_acl_cached, ==, NULL);
150*7a7741afSMartin Matuska 	ASSERT3P(zp->z_xattr_cached, ==, NULL);
151*7a7741afSMartin Matuska 
152*7a7741afSMartin Matuska 	ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
153*7a7741afSMartin Matuska 	ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
154*7a7741afSMartin Matuska }
155*7a7741afSMartin Matuska 
156*7a7741afSMartin Matuska static int
157*7a7741afSMartin Matuska zfs_znode_hold_cache_constructor(void *buf, void *arg, int kmflags)
158*7a7741afSMartin Matuska {
159*7a7741afSMartin Matuska 	(void) arg, (void) kmflags;
160*7a7741afSMartin Matuska 	znode_hold_t *zh = buf;
161*7a7741afSMartin Matuska 
162*7a7741afSMartin Matuska 	mutex_init(&zh->zh_lock, NULL, MUTEX_DEFAULT, NULL);
163*7a7741afSMartin Matuska 	zh->zh_refcount = 0;
164*7a7741afSMartin Matuska 
165*7a7741afSMartin Matuska 	return (0);
166*7a7741afSMartin Matuska }
167*7a7741afSMartin Matuska 
168*7a7741afSMartin Matuska static void
169*7a7741afSMartin Matuska zfs_znode_hold_cache_destructor(void *buf, void *arg)
170*7a7741afSMartin Matuska {
171*7a7741afSMartin Matuska 	(void) arg;
172*7a7741afSMartin Matuska 	znode_hold_t *zh = buf;
173*7a7741afSMartin Matuska 
174*7a7741afSMartin Matuska 	mutex_destroy(&zh->zh_lock);
175*7a7741afSMartin Matuska }
176*7a7741afSMartin Matuska 
177*7a7741afSMartin Matuska void
178*7a7741afSMartin Matuska zfs_znode_init(void)
179*7a7741afSMartin Matuska {
180*7a7741afSMartin Matuska 	/*
181*7a7741afSMartin Matuska 	 * Initialize zcache.  The KMC_SLAB hint is used in order that it be
182*7a7741afSMartin Matuska 	 * backed by kmalloc() when on the Linux slab in order that any
183*7a7741afSMartin Matuska 	 * wait_on_bit() operations on the related inode operate properly.
184*7a7741afSMartin Matuska 	 */
185*7a7741afSMartin Matuska 	ASSERT(znode_cache == NULL);
186*7a7741afSMartin Matuska 	znode_cache = kmem_cache_create("zfs_znode_cache",
187*7a7741afSMartin Matuska 	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
188*7a7741afSMartin Matuska 	    zfs_znode_cache_destructor, NULL, NULL, NULL,
189*7a7741afSMartin Matuska 	    KMC_SLAB | KMC_RECLAIMABLE);
190*7a7741afSMartin Matuska 
191*7a7741afSMartin Matuska 	ASSERT(znode_hold_cache == NULL);
192*7a7741afSMartin Matuska 	znode_hold_cache = kmem_cache_create("zfs_znode_hold_cache",
193*7a7741afSMartin Matuska 	    sizeof (znode_hold_t), 0, zfs_znode_hold_cache_constructor,
194*7a7741afSMartin Matuska 	    zfs_znode_hold_cache_destructor, NULL, NULL, NULL, 0);
195*7a7741afSMartin Matuska }
196*7a7741afSMartin Matuska 
197*7a7741afSMartin Matuska void
198*7a7741afSMartin Matuska zfs_znode_fini(void)
199*7a7741afSMartin Matuska {
200*7a7741afSMartin Matuska 	/*
201*7a7741afSMartin Matuska 	 * Cleanup zcache
202*7a7741afSMartin Matuska 	 */
203*7a7741afSMartin Matuska 	if (znode_cache)
204*7a7741afSMartin Matuska 		kmem_cache_destroy(znode_cache);
205*7a7741afSMartin Matuska 	znode_cache = NULL;
206*7a7741afSMartin Matuska 
207*7a7741afSMartin Matuska 	if (znode_hold_cache)
208*7a7741afSMartin Matuska 		kmem_cache_destroy(znode_hold_cache);
209*7a7741afSMartin Matuska 	znode_hold_cache = NULL;
210*7a7741afSMartin Matuska }
211*7a7741afSMartin Matuska 
212*7a7741afSMartin Matuska /*
213*7a7741afSMartin Matuska  * The zfs_znode_hold_enter() / zfs_znode_hold_exit() functions are used to
214*7a7741afSMartin Matuska  * serialize access to a znode and its SA buffer while the object is being
215*7a7741afSMartin Matuska  * created or destroyed.  This kind of locking would normally reside in the
216*7a7741afSMartin Matuska  * znode itself but in this case that's impossible because the znode and SA
217*7a7741afSMartin Matuska  * buffer may not yet exist.  Therefore the locking is handled externally
218*7a7741afSMartin Matuska  * with an array of mutexes and AVLs trees which contain per-object locks.
219*7a7741afSMartin Matuska  *
220*7a7741afSMartin Matuska  * In zfs_znode_hold_enter() a per-object lock is created as needed, inserted
221*7a7741afSMartin Matuska  * in to the correct AVL tree and finally the per-object lock is held.  In
222*7a7741afSMartin Matuska  * zfs_znode_hold_exit() the process is reversed.  The per-object lock is
223*7a7741afSMartin Matuska  * released, removed from the AVL tree and destroyed if there are no waiters.
224*7a7741afSMartin Matuska  *
225*7a7741afSMartin Matuska  * This scheme has two important properties:
226*7a7741afSMartin Matuska  *
227*7a7741afSMartin Matuska  * 1) No memory allocations are performed while holding one of the z_hold_locks.
228*7a7741afSMartin Matuska  *    This ensures evict(), which can be called from direct memory reclaim, will
229*7a7741afSMartin Matuska  *    never block waiting on a z_hold_locks which just happens to have hashed
230*7a7741afSMartin Matuska  *    to the same index.
231*7a7741afSMartin Matuska  *
232*7a7741afSMartin Matuska  * 2) All locks used to serialize access to an object are per-object and never
233*7a7741afSMartin Matuska  *    shared.  This minimizes lock contention without creating a large number
234*7a7741afSMartin Matuska  *    of dedicated locks.
235*7a7741afSMartin Matuska  *
236*7a7741afSMartin Matuska  * On the downside it does require znode_lock_t structures to be frequently
237*7a7741afSMartin Matuska  * allocated and freed.  However, because these are backed by a kmem cache
238*7a7741afSMartin Matuska  * and very short lived this cost is minimal.
239*7a7741afSMartin Matuska  */
240*7a7741afSMartin Matuska int
241*7a7741afSMartin Matuska zfs_znode_hold_compare(const void *a, const void *b)
242*7a7741afSMartin Matuska {
243*7a7741afSMartin Matuska 	const znode_hold_t *zh_a = (const znode_hold_t *)a;
244*7a7741afSMartin Matuska 	const znode_hold_t *zh_b = (const znode_hold_t *)b;
245*7a7741afSMartin Matuska 
246*7a7741afSMartin Matuska 	return (TREE_CMP(zh_a->zh_obj, zh_b->zh_obj));
247*7a7741afSMartin Matuska }
248*7a7741afSMartin Matuska 
249*7a7741afSMartin Matuska static boolean_t __maybe_unused
250*7a7741afSMartin Matuska zfs_znode_held(zfsvfs_t *zfsvfs, uint64_t obj)
251*7a7741afSMartin Matuska {
252*7a7741afSMartin Matuska 	znode_hold_t *zh, search;
253*7a7741afSMartin Matuska 	int i = ZFS_OBJ_HASH(zfsvfs, obj);
254*7a7741afSMartin Matuska 	boolean_t held;
255*7a7741afSMartin Matuska 
256*7a7741afSMartin Matuska 	search.zh_obj = obj;
257*7a7741afSMartin Matuska 
258*7a7741afSMartin Matuska 	mutex_enter(&zfsvfs->z_hold_locks[i]);
259*7a7741afSMartin Matuska 	zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
260*7a7741afSMartin Matuska 	held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE;
261*7a7741afSMartin Matuska 	mutex_exit(&zfsvfs->z_hold_locks[i]);
262*7a7741afSMartin Matuska 
263*7a7741afSMartin Matuska 	return (held);
264*7a7741afSMartin Matuska }
265*7a7741afSMartin Matuska 
266*7a7741afSMartin Matuska znode_hold_t *
267*7a7741afSMartin Matuska zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj)
268*7a7741afSMartin Matuska {
269*7a7741afSMartin Matuska 	znode_hold_t *zh, *zh_new, search;
270*7a7741afSMartin Matuska 	int i = ZFS_OBJ_HASH(zfsvfs, obj);
271*7a7741afSMartin Matuska 	boolean_t found = B_FALSE;
272*7a7741afSMartin Matuska 
273*7a7741afSMartin Matuska 	zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP);
274*7a7741afSMartin Matuska 	search.zh_obj = obj;
275*7a7741afSMartin Matuska 
276*7a7741afSMartin Matuska 	mutex_enter(&zfsvfs->z_hold_locks[i]);
277*7a7741afSMartin Matuska 	zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
278*7a7741afSMartin Matuska 	if (likely(zh == NULL)) {
279*7a7741afSMartin Matuska 		zh = zh_new;
280*7a7741afSMartin Matuska 		zh->zh_obj = obj;
281*7a7741afSMartin Matuska 		avl_add(&zfsvfs->z_hold_trees[i], zh);
282*7a7741afSMartin Matuska 	} else {
283*7a7741afSMartin Matuska 		ASSERT3U(zh->zh_obj, ==, obj);
284*7a7741afSMartin Matuska 		found = B_TRUE;
285*7a7741afSMartin Matuska 	}
286*7a7741afSMartin Matuska 	zh->zh_refcount++;
287*7a7741afSMartin Matuska 	ASSERT3S(zh->zh_refcount, >, 0);
288*7a7741afSMartin Matuska 	mutex_exit(&zfsvfs->z_hold_locks[i]);
289*7a7741afSMartin Matuska 
290*7a7741afSMartin Matuska 	if (found == B_TRUE)
291*7a7741afSMartin Matuska 		kmem_cache_free(znode_hold_cache, zh_new);
292*7a7741afSMartin Matuska 
293*7a7741afSMartin Matuska 	ASSERT(MUTEX_NOT_HELD(&zh->zh_lock));
294*7a7741afSMartin Matuska 	mutex_enter(&zh->zh_lock);
295*7a7741afSMartin Matuska 
296*7a7741afSMartin Matuska 	return (zh);
297*7a7741afSMartin Matuska }
298*7a7741afSMartin Matuska 
299*7a7741afSMartin Matuska void
300*7a7741afSMartin Matuska zfs_znode_hold_exit(zfsvfs_t *zfsvfs, znode_hold_t *zh)
301*7a7741afSMartin Matuska {
302*7a7741afSMartin Matuska 	int i = ZFS_OBJ_HASH(zfsvfs, zh->zh_obj);
303*7a7741afSMartin Matuska 	boolean_t remove = B_FALSE;
304*7a7741afSMartin Matuska 
305*7a7741afSMartin Matuska 	ASSERT(zfs_znode_held(zfsvfs, zh->zh_obj));
306*7a7741afSMartin Matuska 	mutex_exit(&zh->zh_lock);
307*7a7741afSMartin Matuska 
308*7a7741afSMartin Matuska 	mutex_enter(&zfsvfs->z_hold_locks[i]);
309*7a7741afSMartin Matuska 	ASSERT3S(zh->zh_refcount, >, 0);
310*7a7741afSMartin Matuska 	if (--zh->zh_refcount == 0) {
311*7a7741afSMartin Matuska 		avl_remove(&zfsvfs->z_hold_trees[i], zh);
312*7a7741afSMartin Matuska 		remove = B_TRUE;
313*7a7741afSMartin Matuska 	}
314*7a7741afSMartin Matuska 	mutex_exit(&zfsvfs->z_hold_locks[i]);
315*7a7741afSMartin Matuska 
316*7a7741afSMartin Matuska 	if (remove == B_TRUE)
317*7a7741afSMartin Matuska 		kmem_cache_free(znode_hold_cache, zh);
318*7a7741afSMartin Matuska }
319*7a7741afSMartin Matuska 
320*7a7741afSMartin Matuska dev_t
321*7a7741afSMartin Matuska zfs_cmpldev(uint64_t dev)
322*7a7741afSMartin Matuska {
323*7a7741afSMartin Matuska 	return (dev);
324*7a7741afSMartin Matuska }
325*7a7741afSMartin Matuska 
326*7a7741afSMartin Matuska static void
327*7a7741afSMartin Matuska zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
328*7a7741afSMartin Matuska     dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
329*7a7741afSMartin Matuska {
330*7a7741afSMartin Matuska 	ASSERT(zfs_znode_held(zfsvfs, zp->z_id));
331*7a7741afSMartin Matuska 
332*7a7741afSMartin Matuska 	mutex_enter(&zp->z_lock);
333*7a7741afSMartin Matuska 
334*7a7741afSMartin Matuska 	ASSERT(zp->z_sa_hdl == NULL);
335*7a7741afSMartin Matuska 	ASSERT(zp->z_acl_cached == NULL);
336*7a7741afSMartin Matuska 	if (sa_hdl == NULL) {
337*7a7741afSMartin Matuska 		VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
338*7a7741afSMartin Matuska 		    SA_HDL_SHARED, &zp->z_sa_hdl));
339*7a7741afSMartin Matuska 	} else {
340*7a7741afSMartin Matuska 		zp->z_sa_hdl = sa_hdl;
341*7a7741afSMartin Matuska 		sa_set_userp(sa_hdl, zp);
342*7a7741afSMartin Matuska 	}
343*7a7741afSMartin Matuska 
344*7a7741afSMartin Matuska 	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
345*7a7741afSMartin Matuska 
346*7a7741afSMartin Matuska 	mutex_exit(&zp->z_lock);
347*7a7741afSMartin Matuska }
348*7a7741afSMartin Matuska 
349*7a7741afSMartin Matuska void
350*7a7741afSMartin Matuska zfs_znode_dmu_fini(znode_t *zp)
351*7a7741afSMartin Matuska {
352*7a7741afSMartin Matuska 	ASSERT(zfs_znode_held(ZTOZSB(zp), zp->z_id) ||
353*7a7741afSMartin Matuska 	    RW_WRITE_HELD(&ZTOZSB(zp)->z_teardown_inactive_lock));
354*7a7741afSMartin Matuska 
355*7a7741afSMartin Matuska 	sa_handle_destroy(zp->z_sa_hdl);
356*7a7741afSMartin Matuska 	zp->z_sa_hdl = NULL;
357*7a7741afSMartin Matuska }
358*7a7741afSMartin Matuska 
359*7a7741afSMartin Matuska /*
360*7a7741afSMartin Matuska  * Called by new_inode() to allocate a new inode.
361*7a7741afSMartin Matuska  */
362*7a7741afSMartin Matuska int
363*7a7741afSMartin Matuska zfs_inode_alloc(struct super_block *sb, struct inode **ip)
364*7a7741afSMartin Matuska {
365*7a7741afSMartin Matuska 	znode_t *zp;
366*7a7741afSMartin Matuska 
367*7a7741afSMartin Matuska 	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
368*7a7741afSMartin Matuska 	*ip = ZTOI(zp);
369*7a7741afSMartin Matuska 
370*7a7741afSMartin Matuska 	return (0);
371*7a7741afSMartin Matuska }
372*7a7741afSMartin Matuska 
373*7a7741afSMartin Matuska /*
374*7a7741afSMartin Matuska  * Called in multiple places when an inode should be destroyed.
375*7a7741afSMartin Matuska  */
376*7a7741afSMartin Matuska void
377*7a7741afSMartin Matuska zfs_inode_destroy(struct inode *ip)
378*7a7741afSMartin Matuska {
379*7a7741afSMartin Matuska 	znode_t *zp = ITOZ(ip);
380*7a7741afSMartin Matuska 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
381*7a7741afSMartin Matuska 
382*7a7741afSMartin Matuska 	mutex_enter(&zfsvfs->z_znodes_lock);
383*7a7741afSMartin Matuska 	if (list_link_active(&zp->z_link_node)) {
384*7a7741afSMartin Matuska 		list_remove(&zfsvfs->z_all_znodes, zp);
385*7a7741afSMartin Matuska 	}
386*7a7741afSMartin Matuska 	mutex_exit(&zfsvfs->z_znodes_lock);
387*7a7741afSMartin Matuska 
388*7a7741afSMartin Matuska 	if (zp->z_acl_cached) {
389*7a7741afSMartin Matuska 		zfs_acl_free(zp->z_acl_cached);
390*7a7741afSMartin Matuska 		zp->z_acl_cached = NULL;
391*7a7741afSMartin Matuska 	}
392*7a7741afSMartin Matuska 
393*7a7741afSMartin Matuska 	if (zp->z_xattr_cached) {
394*7a7741afSMartin Matuska 		nvlist_free(zp->z_xattr_cached);
395*7a7741afSMartin Matuska 		zp->z_xattr_cached = NULL;
396*7a7741afSMartin Matuska 	}
397*7a7741afSMartin Matuska 
398*7a7741afSMartin Matuska 	kmem_cache_free(znode_cache, zp);
399*7a7741afSMartin Matuska }
400*7a7741afSMartin Matuska 
401*7a7741afSMartin Matuska static void
402*7a7741afSMartin Matuska zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip)
403*7a7741afSMartin Matuska {
404*7a7741afSMartin Matuska 	uint64_t rdev = 0;
405*7a7741afSMartin Matuska 
406*7a7741afSMartin Matuska 	switch (ip->i_mode & S_IFMT) {
407*7a7741afSMartin Matuska 	case S_IFREG:
408*7a7741afSMartin Matuska 		ip->i_op = &zpl_inode_operations;
409*7a7741afSMartin Matuska 		ip->i_fop = &zpl_file_operations;
410*7a7741afSMartin Matuska 		ip->i_mapping->a_ops = &zpl_address_space_operations;
411*7a7741afSMartin Matuska 		break;
412*7a7741afSMartin Matuska 
413*7a7741afSMartin Matuska 	case S_IFDIR:
414*7a7741afSMartin Matuska 		ip->i_op = &zpl_dir_inode_operations;
415*7a7741afSMartin Matuska 		ip->i_fop = &zpl_dir_file_operations;
416*7a7741afSMartin Matuska 		ITOZ(ip)->z_zn_prefetch = B_TRUE;
417*7a7741afSMartin Matuska 		break;
418*7a7741afSMartin Matuska 
419*7a7741afSMartin Matuska 	case S_IFLNK:
420*7a7741afSMartin Matuska 		ip->i_op = &zpl_symlink_inode_operations;
421*7a7741afSMartin Matuska 		break;
422*7a7741afSMartin Matuska 
423*7a7741afSMartin Matuska 	/*
424*7a7741afSMartin Matuska 	 * rdev is only stored in a SA only for device files.
425*7a7741afSMartin Matuska 	 */
426*7a7741afSMartin Matuska 	case S_IFCHR:
427*7a7741afSMartin Matuska 	case S_IFBLK:
428*7a7741afSMartin Matuska 		(void) sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), &rdev,
429*7a7741afSMartin Matuska 		    sizeof (rdev));
430*7a7741afSMartin Matuska 		zfs_fallthrough;
431*7a7741afSMartin Matuska 	case S_IFIFO:
432*7a7741afSMartin Matuska 	case S_IFSOCK:
433*7a7741afSMartin Matuska 		init_special_inode(ip, ip->i_mode, rdev);
434*7a7741afSMartin Matuska 		ip->i_op = &zpl_special_inode_operations;
435*7a7741afSMartin Matuska 		break;
436*7a7741afSMartin Matuska 
437*7a7741afSMartin Matuska 	default:
438*7a7741afSMartin Matuska 		zfs_panic_recover("inode %llu has invalid mode: 0x%x\n",
439*7a7741afSMartin Matuska 		    (u_longlong_t)ip->i_ino, ip->i_mode);
440*7a7741afSMartin Matuska 
441*7a7741afSMartin Matuska 		/* Assume the inode is a file and attempt to continue */
442*7a7741afSMartin Matuska 		ip->i_mode = S_IFREG | 0644;
443*7a7741afSMartin Matuska 		ip->i_op = &zpl_inode_operations;
444*7a7741afSMartin Matuska 		ip->i_fop = &zpl_file_operations;
445*7a7741afSMartin Matuska 		ip->i_mapping->a_ops = &zpl_address_space_operations;
446*7a7741afSMartin Matuska 		break;
447*7a7741afSMartin Matuska 	}
448*7a7741afSMartin Matuska }
449*7a7741afSMartin Matuska 
450*7a7741afSMartin Matuska static void
451*7a7741afSMartin Matuska zfs_set_inode_flags(znode_t *zp, struct inode *ip)
452*7a7741afSMartin Matuska {
453*7a7741afSMartin Matuska 	/*
454*7a7741afSMartin Matuska 	 * Linux and Solaris have different sets of file attributes, so we
455*7a7741afSMartin Matuska 	 * restrict this conversion to the intersection of the two.
456*7a7741afSMartin Matuska 	 */
457*7a7741afSMartin Matuska 	unsigned int flags = 0;
458*7a7741afSMartin Matuska 	if (zp->z_pflags & ZFS_IMMUTABLE)
459*7a7741afSMartin Matuska 		flags |= S_IMMUTABLE;
460*7a7741afSMartin Matuska 	if (zp->z_pflags & ZFS_APPENDONLY)
461*7a7741afSMartin Matuska 		flags |= S_APPEND;
462*7a7741afSMartin Matuska 
463*7a7741afSMartin Matuska 	inode_set_flags(ip, flags, S_IMMUTABLE|S_APPEND);
464*7a7741afSMartin Matuska }
465*7a7741afSMartin Matuska 
466*7a7741afSMartin Matuska /*
467*7a7741afSMartin Matuska  * Update the embedded inode given the znode.
468*7a7741afSMartin Matuska  */
469*7a7741afSMartin Matuska void
470*7a7741afSMartin Matuska zfs_znode_update_vfs(znode_t *zp)
471*7a7741afSMartin Matuska {
472*7a7741afSMartin Matuska 	struct inode	*ip;
473*7a7741afSMartin Matuska 	uint32_t	blksize;
474*7a7741afSMartin Matuska 	u_longlong_t	i_blocks;
475*7a7741afSMartin Matuska 
476*7a7741afSMartin Matuska 	ASSERT(zp != NULL);
477*7a7741afSMartin Matuska 	ip = ZTOI(zp);
478*7a7741afSMartin Matuska 
479*7a7741afSMartin Matuska 	/* Skip .zfs control nodes which do not exist on disk. */
480*7a7741afSMartin Matuska 	if (zfsctl_is_node(ip))
481*7a7741afSMartin Matuska 		return;
482*7a7741afSMartin Matuska 
483*7a7741afSMartin Matuska 	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks);
484*7a7741afSMartin Matuska 
485*7a7741afSMartin Matuska 	spin_lock(&ip->i_lock);
486*7a7741afSMartin Matuska 	ip->i_mode = zp->z_mode;
487*7a7741afSMartin Matuska 	ip->i_blocks = i_blocks;
488*7a7741afSMartin Matuska 	i_size_write(ip, zp->z_size);
489*7a7741afSMartin Matuska 	spin_unlock(&ip->i_lock);
490*7a7741afSMartin Matuska }
491*7a7741afSMartin Matuska 
492*7a7741afSMartin Matuska 
493*7a7741afSMartin Matuska /*
494*7a7741afSMartin Matuska  * Construct a znode+inode and initialize.
495*7a7741afSMartin Matuska  *
496*7a7741afSMartin Matuska  * This does not do a call to dmu_set_user() that is
497*7a7741afSMartin Matuska  * up to the caller to do, in case you don't want to
498*7a7741afSMartin Matuska  * return the znode
499*7a7741afSMartin Matuska  */
500*7a7741afSMartin Matuska static znode_t *
501*7a7741afSMartin Matuska zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
502*7a7741afSMartin Matuska     dmu_object_type_t obj_type, sa_handle_t *hdl)
503*7a7741afSMartin Matuska {
504*7a7741afSMartin Matuska 	znode_t	*zp;
505*7a7741afSMartin Matuska 	struct inode *ip;
506*7a7741afSMartin Matuska 	uint64_t mode;
507*7a7741afSMartin Matuska 	uint64_t parent;
508*7a7741afSMartin Matuska 	uint64_t tmp_gen;
509*7a7741afSMartin Matuska 	uint64_t links;
510*7a7741afSMartin Matuska 	uint64_t z_uid, z_gid;
511*7a7741afSMartin Matuska 	uint64_t atime[2], mtime[2], ctime[2], btime[2];
512*7a7741afSMartin Matuska 	inode_timespec_t tmp_ts;
513*7a7741afSMartin Matuska 	uint64_t projid = ZFS_DEFAULT_PROJID;
514*7a7741afSMartin Matuska 	sa_bulk_attr_t bulk[12];
515*7a7741afSMartin Matuska 	int count = 0;
516*7a7741afSMartin Matuska 
517*7a7741afSMartin Matuska 	ASSERT(zfsvfs != NULL);
518*7a7741afSMartin Matuska 
519*7a7741afSMartin Matuska 	ip = new_inode(zfsvfs->z_sb);
520*7a7741afSMartin Matuska 	if (ip == NULL)
521*7a7741afSMartin Matuska 		return (NULL);
522*7a7741afSMartin Matuska 
523*7a7741afSMartin Matuska 	zp = ITOZ(ip);
524*7a7741afSMartin Matuska 	ASSERT(zp->z_dirlocks == NULL);
525*7a7741afSMartin Matuska 	ASSERT3P(zp->z_acl_cached, ==, NULL);
526*7a7741afSMartin Matuska 	ASSERT3P(zp->z_xattr_cached, ==, NULL);
527*7a7741afSMartin Matuska 	zp->z_unlinked = B_FALSE;
528*7a7741afSMartin Matuska 	zp->z_atime_dirty = B_FALSE;
529*7a7741afSMartin Matuska 	zp->z_is_ctldir = B_FALSE;
530*7a7741afSMartin Matuska 	zp->z_suspended = B_FALSE;
531*7a7741afSMartin Matuska 	zp->z_sa_hdl = NULL;
532*7a7741afSMartin Matuska 	zp->z_mapcnt = 0;
533*7a7741afSMartin Matuska 	zp->z_id = db->db_object;
534*7a7741afSMartin Matuska 	zp->z_blksz = blksz;
535*7a7741afSMartin Matuska 	zp->z_seq = 0x7A4653;
536*7a7741afSMartin Matuska 	zp->z_sync_cnt = 0;
537*7a7741afSMartin Matuska 	zp->z_sync_writes_cnt = 0;
538*7a7741afSMartin Matuska 	zp->z_async_writes_cnt = 0;
539*7a7741afSMartin Matuska 
540*7a7741afSMartin Matuska 	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
541*7a7741afSMartin Matuska 
542*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
543*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &tmp_gen, 8);
544*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
545*7a7741afSMartin Matuska 	    &zp->z_size, 8);
546*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
547*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
548*7a7741afSMartin Matuska 	    &zp->z_pflags, 8);
549*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
550*7a7741afSMartin Matuska 	    &parent, 8);
551*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &z_uid, 8);
552*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &z_gid, 8);
553*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
554*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
555*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
556*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &btime, 16);
557*7a7741afSMartin Matuska 
558*7a7741afSMartin Matuska 	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || tmp_gen == 0 ||
559*7a7741afSMartin Matuska 	    (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
560*7a7741afSMartin Matuska 	    (zp->z_pflags & ZFS_PROJID) &&
561*7a7741afSMartin Matuska 	    sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs), &projid, 8) != 0)) {
562*7a7741afSMartin Matuska 		if (hdl == NULL)
563*7a7741afSMartin Matuska 			sa_handle_destroy(zp->z_sa_hdl);
564*7a7741afSMartin Matuska 		zp->z_sa_hdl = NULL;
565*7a7741afSMartin Matuska 		goto error;
566*7a7741afSMartin Matuska 	}
567*7a7741afSMartin Matuska 
568*7a7741afSMartin Matuska 	zp->z_projid = projid;
569*7a7741afSMartin Matuska 	zp->z_mode = ip->i_mode = mode;
570*7a7741afSMartin Matuska 	ip->i_generation = (uint32_t)tmp_gen;
571*7a7741afSMartin Matuska 	ip->i_blkbits = SPA_MINBLOCKSHIFT;
572*7a7741afSMartin Matuska 	set_nlink(ip, (uint32_t)links);
573*7a7741afSMartin Matuska 	zfs_uid_write(ip, z_uid);
574*7a7741afSMartin Matuska 	zfs_gid_write(ip, z_gid);
575*7a7741afSMartin Matuska 	zfs_set_inode_flags(zp, ip);
576*7a7741afSMartin Matuska 
577*7a7741afSMartin Matuska 	/* Cache the xattr parent id */
578*7a7741afSMartin Matuska 	if (zp->z_pflags & ZFS_XATTR)
579*7a7741afSMartin Matuska 		zp->z_xattr_parent = parent;
580*7a7741afSMartin Matuska 
581*7a7741afSMartin Matuska 	ZFS_TIME_DECODE(&tmp_ts, atime);
582*7a7741afSMartin Matuska 	zpl_inode_set_atime_to_ts(ip, tmp_ts);
583*7a7741afSMartin Matuska 	ZFS_TIME_DECODE(&tmp_ts, mtime);
584*7a7741afSMartin Matuska 	zpl_inode_set_mtime_to_ts(ip, tmp_ts);
585*7a7741afSMartin Matuska 	ZFS_TIME_DECODE(&tmp_ts, ctime);
586*7a7741afSMartin Matuska 	zpl_inode_set_ctime_to_ts(ip, tmp_ts);
587*7a7741afSMartin Matuska 	ZFS_TIME_DECODE(&zp->z_btime, btime);
588*7a7741afSMartin Matuska 
589*7a7741afSMartin Matuska 	ip->i_ino = zp->z_id;
590*7a7741afSMartin Matuska 	zfs_znode_update_vfs(zp);
591*7a7741afSMartin Matuska 	zfs_inode_set_ops(zfsvfs, ip);
592*7a7741afSMartin Matuska 
593*7a7741afSMartin Matuska 	/*
594*7a7741afSMartin Matuska 	 * The only way insert_inode_locked() can fail is if the ip->i_ino
595*7a7741afSMartin Matuska 	 * number is already hashed for this super block.  This can never
596*7a7741afSMartin Matuska 	 * happen because the inode numbers map 1:1 with the object numbers.
597*7a7741afSMartin Matuska 	 *
598*7a7741afSMartin Matuska 	 * Exceptions include rolling back a mounted file system, either
599*7a7741afSMartin Matuska 	 * from the zfs rollback or zfs recv command.
600*7a7741afSMartin Matuska 	 *
601*7a7741afSMartin Matuska 	 * Active inodes are unhashed during the rollback, but since zrele
602*7a7741afSMartin Matuska 	 * can happen asynchronously, we can't guarantee they've been
603*7a7741afSMartin Matuska 	 * unhashed.  This can cause hash collisions in unlinked drain
604*7a7741afSMartin Matuska 	 * processing so do not hash unlinked znodes.
605*7a7741afSMartin Matuska 	 */
606*7a7741afSMartin Matuska 	if (links > 0)
607*7a7741afSMartin Matuska 		VERIFY3S(insert_inode_locked(ip), ==, 0);
608*7a7741afSMartin Matuska 
609*7a7741afSMartin Matuska 	mutex_enter(&zfsvfs->z_znodes_lock);
610*7a7741afSMartin Matuska 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
611*7a7741afSMartin Matuska 	mutex_exit(&zfsvfs->z_znodes_lock);
612*7a7741afSMartin Matuska 
613*7a7741afSMartin Matuska 	if (links > 0)
614*7a7741afSMartin Matuska 		unlock_new_inode(ip);
615*7a7741afSMartin Matuska 	return (zp);
616*7a7741afSMartin Matuska 
617*7a7741afSMartin Matuska error:
618*7a7741afSMartin Matuska 	iput(ip);
619*7a7741afSMartin Matuska 	return (NULL);
620*7a7741afSMartin Matuska }
621*7a7741afSMartin Matuska 
622*7a7741afSMartin Matuska /*
623*7a7741afSMartin Matuska  * Safely mark an inode dirty.  Inodes which are part of a read-only
624*7a7741afSMartin Matuska  * file system or snapshot may not be dirtied.
625*7a7741afSMartin Matuska  */
626*7a7741afSMartin Matuska void
627*7a7741afSMartin Matuska zfs_mark_inode_dirty(struct inode *ip)
628*7a7741afSMartin Matuska {
629*7a7741afSMartin Matuska 	zfsvfs_t *zfsvfs = ITOZSB(ip);
630*7a7741afSMartin Matuska 
631*7a7741afSMartin Matuska 	if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
632*7a7741afSMartin Matuska 		return;
633*7a7741afSMartin Matuska 
634*7a7741afSMartin Matuska 	mark_inode_dirty(ip);
635*7a7741afSMartin Matuska }
636*7a7741afSMartin Matuska 
637*7a7741afSMartin Matuska static uint64_t empty_xattr;
638*7a7741afSMartin Matuska static uint64_t pad[4];
639*7a7741afSMartin Matuska static zfs_acl_phys_t acl_phys;
640*7a7741afSMartin Matuska /*
641*7a7741afSMartin Matuska  * Create a new DMU object to hold a zfs znode.
642*7a7741afSMartin Matuska  *
643*7a7741afSMartin Matuska  *	IN:	dzp	- parent directory for new znode
644*7a7741afSMartin Matuska  *		vap	- file attributes for new znode
645*7a7741afSMartin Matuska  *		tx	- dmu transaction id for zap operations
646*7a7741afSMartin Matuska  *		cr	- credentials of caller
647*7a7741afSMartin Matuska  *		flag	- flags:
648*7a7741afSMartin Matuska  *			  IS_ROOT_NODE	- new object will be root
649*7a7741afSMartin Matuska  *			  IS_TMPFILE	- new object is of O_TMPFILE
650*7a7741afSMartin Matuska  *			  IS_XATTR	- new object is an attribute
651*7a7741afSMartin Matuska  *		acl_ids	- ACL related attributes
652*7a7741afSMartin Matuska  *
653*7a7741afSMartin Matuska  *	OUT:	zpp	- allocated znode (set to dzp if IS_ROOT_NODE)
654*7a7741afSMartin Matuska  *
655*7a7741afSMartin Matuska  */
656*7a7741afSMartin Matuska void
657*7a7741afSMartin Matuska zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
658*7a7741afSMartin Matuska     uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
659*7a7741afSMartin Matuska {
660*7a7741afSMartin Matuska 	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
661*7a7741afSMartin Matuska 	uint64_t	mode, size, links, parent, pflags;
662*7a7741afSMartin Matuska 	uint64_t	projid = ZFS_DEFAULT_PROJID;
663*7a7741afSMartin Matuska 	uint64_t	rdev = 0;
664*7a7741afSMartin Matuska 	zfsvfs_t	*zfsvfs = ZTOZSB(dzp);
665*7a7741afSMartin Matuska 	dmu_buf_t	*db;
666*7a7741afSMartin Matuska 	inode_timespec_t now;
667*7a7741afSMartin Matuska 	uint64_t	gen, obj;
668*7a7741afSMartin Matuska 	int		bonuslen;
669*7a7741afSMartin Matuska 	int		dnodesize;
670*7a7741afSMartin Matuska 	sa_handle_t	*sa_hdl;
671*7a7741afSMartin Matuska 	dmu_object_type_t obj_type;
672*7a7741afSMartin Matuska 	sa_bulk_attr_t	*sa_attrs;
673*7a7741afSMartin Matuska 	int		cnt = 0;
674*7a7741afSMartin Matuska 	zfs_acl_locator_cb_t locate = { 0 };
675*7a7741afSMartin Matuska 	znode_hold_t	*zh;
676*7a7741afSMartin Matuska 
677*7a7741afSMartin Matuska 	if (zfsvfs->z_replay) {
678*7a7741afSMartin Matuska 		obj = vap->va_nodeid;
679*7a7741afSMartin Matuska 		now = vap->va_ctime;		/* see zfs_replay_create() */
680*7a7741afSMartin Matuska 		gen = vap->va_nblocks;		/* ditto */
681*7a7741afSMartin Matuska 		dnodesize = vap->va_fsid;	/* ditto */
682*7a7741afSMartin Matuska 	} else {
683*7a7741afSMartin Matuska 		obj = 0;
684*7a7741afSMartin Matuska 		gethrestime(&now);
685*7a7741afSMartin Matuska 		gen = dmu_tx_get_txg(tx);
686*7a7741afSMartin Matuska 		dnodesize = dmu_objset_dnodesize(zfsvfs->z_os);
687*7a7741afSMartin Matuska 	}
688*7a7741afSMartin Matuska 
689*7a7741afSMartin Matuska 	if (dnodesize == 0)
690*7a7741afSMartin Matuska 		dnodesize = DNODE_MIN_SIZE;
691*7a7741afSMartin Matuska 
692*7a7741afSMartin Matuska 	obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
693*7a7741afSMartin Matuska 
694*7a7741afSMartin Matuska 	bonuslen = (obj_type == DMU_OT_SA) ?
695*7a7741afSMartin Matuska 	    DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
696*7a7741afSMartin Matuska 
697*7a7741afSMartin Matuska 	/*
698*7a7741afSMartin Matuska 	 * Create a new DMU object.
699*7a7741afSMartin Matuska 	 */
700*7a7741afSMartin Matuska 	/*
701*7a7741afSMartin Matuska 	 * There's currently no mechanism for pre-reading the blocks that will
702*7a7741afSMartin Matuska 	 * be needed to allocate a new object, so we accept the small chance
703*7a7741afSMartin Matuska 	 * that there will be an i/o error and we will fail one of the
704*7a7741afSMartin Matuska 	 * assertions below.
705*7a7741afSMartin Matuska 	 */
706*7a7741afSMartin Matuska 	if (S_ISDIR(vap->va_mode)) {
707*7a7741afSMartin Matuska 		if (zfsvfs->z_replay) {
708*7a7741afSMartin Matuska 			VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj,
709*7a7741afSMartin Matuska 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
710*7a7741afSMartin Matuska 			    obj_type, bonuslen, dnodesize, tx));
711*7a7741afSMartin Matuska 		} else {
712*7a7741afSMartin Matuska 			obj = zap_create_norm_dnsize(zfsvfs->z_os,
713*7a7741afSMartin Matuska 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
714*7a7741afSMartin Matuska 			    obj_type, bonuslen, dnodesize, tx);
715*7a7741afSMartin Matuska 		}
716*7a7741afSMartin Matuska 	} else {
717*7a7741afSMartin Matuska 		if (zfsvfs->z_replay) {
718*7a7741afSMartin Matuska 			VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj,
719*7a7741afSMartin Matuska 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
720*7a7741afSMartin Matuska 			    obj_type, bonuslen, dnodesize, tx));
721*7a7741afSMartin Matuska 		} else {
722*7a7741afSMartin Matuska 			obj = dmu_object_alloc_dnsize(zfsvfs->z_os,
723*7a7741afSMartin Matuska 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
724*7a7741afSMartin Matuska 			    obj_type, bonuslen, dnodesize, tx);
725*7a7741afSMartin Matuska 		}
726*7a7741afSMartin Matuska 	}
727*7a7741afSMartin Matuska 
728*7a7741afSMartin Matuska 	zh = zfs_znode_hold_enter(zfsvfs, obj);
729*7a7741afSMartin Matuska 	VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
730*7a7741afSMartin Matuska 
731*7a7741afSMartin Matuska 	/*
732*7a7741afSMartin Matuska 	 * If this is the root, fix up the half-initialized parent pointer
733*7a7741afSMartin Matuska 	 * to reference the just-allocated physical data area.
734*7a7741afSMartin Matuska 	 */
735*7a7741afSMartin Matuska 	if (flag & IS_ROOT_NODE) {
736*7a7741afSMartin Matuska 		dzp->z_id = obj;
737*7a7741afSMartin Matuska 	}
738*7a7741afSMartin Matuska 
739*7a7741afSMartin Matuska 	/*
740*7a7741afSMartin Matuska 	 * If parent is an xattr, so am I.
741*7a7741afSMartin Matuska 	 */
742*7a7741afSMartin Matuska 	if (dzp->z_pflags & ZFS_XATTR) {
743*7a7741afSMartin Matuska 		flag |= IS_XATTR;
744*7a7741afSMartin Matuska 	}
745*7a7741afSMartin Matuska 
746*7a7741afSMartin Matuska 	if (zfsvfs->z_use_fuids)
747*7a7741afSMartin Matuska 		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
748*7a7741afSMartin Matuska 	else
749*7a7741afSMartin Matuska 		pflags = 0;
750*7a7741afSMartin Matuska 
751*7a7741afSMartin Matuska 	if (S_ISDIR(vap->va_mode)) {
752*7a7741afSMartin Matuska 		size = 2;		/* contents ("." and "..") */
753*7a7741afSMartin Matuska 		links = 2;
754*7a7741afSMartin Matuska 	} else {
755*7a7741afSMartin Matuska 		size = 0;
756*7a7741afSMartin Matuska 		links = (flag & IS_TMPFILE) ? 0 : 1;
757*7a7741afSMartin Matuska 	}
758*7a7741afSMartin Matuska 
759*7a7741afSMartin Matuska 	if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
760*7a7741afSMartin Matuska 		rdev = vap->va_rdev;
761*7a7741afSMartin Matuska 
762*7a7741afSMartin Matuska 	parent = dzp->z_id;
763*7a7741afSMartin Matuska 	mode = acl_ids->z_mode;
764*7a7741afSMartin Matuska 	if (flag & IS_XATTR)
765*7a7741afSMartin Matuska 		pflags |= ZFS_XATTR;
766*7a7741afSMartin Matuska 
767*7a7741afSMartin Matuska 	if (S_ISREG(vap->va_mode) || S_ISDIR(vap->va_mode)) {
768*7a7741afSMartin Matuska 		/*
769*7a7741afSMartin Matuska 		 * With ZFS_PROJID flag, we can easily know whether there is
770*7a7741afSMartin Matuska 		 * project ID stored on disk or not. See zfs_space_delta_cb().
771*7a7741afSMartin Matuska 		 */
772*7a7741afSMartin Matuska 		if (obj_type != DMU_OT_ZNODE &&
773*7a7741afSMartin Matuska 		    dmu_objset_projectquota_enabled(zfsvfs->z_os))
774*7a7741afSMartin Matuska 			pflags |= ZFS_PROJID;
775*7a7741afSMartin Matuska 
776*7a7741afSMartin Matuska 		/*
777*7a7741afSMartin Matuska 		 * Inherit project ID from parent if required.
778*7a7741afSMartin Matuska 		 */
779*7a7741afSMartin Matuska 		projid = zfs_inherit_projid(dzp);
780*7a7741afSMartin Matuska 		if (dzp->z_pflags & ZFS_PROJINHERIT)
781*7a7741afSMartin Matuska 			pflags |= ZFS_PROJINHERIT;
782*7a7741afSMartin Matuska 	}
783*7a7741afSMartin Matuska 
784*7a7741afSMartin Matuska 	/*
785*7a7741afSMartin Matuska 	 * No execs denied will be determined when zfs_mode_compute() is called.
786*7a7741afSMartin Matuska 	 */
787*7a7741afSMartin Matuska 	pflags |= acl_ids->z_aclp->z_hints &
788*7a7741afSMartin Matuska 	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
789*7a7741afSMartin Matuska 	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
790*7a7741afSMartin Matuska 
791*7a7741afSMartin Matuska 	ZFS_TIME_ENCODE(&now, crtime);
792*7a7741afSMartin Matuska 	ZFS_TIME_ENCODE(&now, ctime);
793*7a7741afSMartin Matuska 
794*7a7741afSMartin Matuska 	if (vap->va_mask & ATTR_ATIME) {
795*7a7741afSMartin Matuska 		ZFS_TIME_ENCODE(&vap->va_atime, atime);
796*7a7741afSMartin Matuska 	} else {
797*7a7741afSMartin Matuska 		ZFS_TIME_ENCODE(&now, atime);
798*7a7741afSMartin Matuska 	}
799*7a7741afSMartin Matuska 
800*7a7741afSMartin Matuska 	if (vap->va_mask & ATTR_MTIME) {
801*7a7741afSMartin Matuska 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
802*7a7741afSMartin Matuska 	} else {
803*7a7741afSMartin Matuska 		ZFS_TIME_ENCODE(&now, mtime);
804*7a7741afSMartin Matuska 	}
805*7a7741afSMartin Matuska 
806*7a7741afSMartin Matuska 	/* Now add in all of the "SA" attributes */
807*7a7741afSMartin Matuska 	VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
808*7a7741afSMartin Matuska 	    &sa_hdl));
809*7a7741afSMartin Matuska 
810*7a7741afSMartin Matuska 	/*
811*7a7741afSMartin Matuska 	 * Setup the array of attributes to be replaced/set on the new file
812*7a7741afSMartin Matuska 	 *
813*7a7741afSMartin Matuska 	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
814*7a7741afSMartin Matuska 	 * in the old znode_phys_t format.  Don't change this ordering
815*7a7741afSMartin Matuska 	 */
816*7a7741afSMartin Matuska 	sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
817*7a7741afSMartin Matuska 
818*7a7741afSMartin Matuska 	if (obj_type == DMU_OT_ZNODE) {
819*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
820*7a7741afSMartin Matuska 		    NULL, &atime, 16);
821*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
822*7a7741afSMartin Matuska 		    NULL, &mtime, 16);
823*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
824*7a7741afSMartin Matuska 		    NULL, &ctime, 16);
825*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
826*7a7741afSMartin Matuska 		    NULL, &crtime, 16);
827*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
828*7a7741afSMartin Matuska 		    NULL, &gen, 8);
829*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
830*7a7741afSMartin Matuska 		    NULL, &mode, 8);
831*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
832*7a7741afSMartin Matuska 		    NULL, &size, 8);
833*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
834*7a7741afSMartin Matuska 		    NULL, &parent, 8);
835*7a7741afSMartin Matuska 	} else {
836*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
837*7a7741afSMartin Matuska 		    NULL, &mode, 8);
838*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
839*7a7741afSMartin Matuska 		    NULL, &size, 8);
840*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
841*7a7741afSMartin Matuska 		    NULL, &gen, 8);
842*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs),
843*7a7741afSMartin Matuska 		    NULL, &acl_ids->z_fuid, 8);
844*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs),
845*7a7741afSMartin Matuska 		    NULL, &acl_ids->z_fgid, 8);
846*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
847*7a7741afSMartin Matuska 		    NULL, &parent, 8);
848*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
849*7a7741afSMartin Matuska 		    NULL, &pflags, 8);
850*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
851*7a7741afSMartin Matuska 		    NULL, &atime, 16);
852*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
853*7a7741afSMartin Matuska 		    NULL, &mtime, 16);
854*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
855*7a7741afSMartin Matuska 		    NULL, &ctime, 16);
856*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
857*7a7741afSMartin Matuska 		    NULL, &crtime, 16);
858*7a7741afSMartin Matuska 	}
859*7a7741afSMartin Matuska 
860*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
861*7a7741afSMartin Matuska 
862*7a7741afSMartin Matuska 	if (obj_type == DMU_OT_ZNODE) {
863*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
864*7a7741afSMartin Matuska 		    &empty_xattr, 8);
865*7a7741afSMartin Matuska 	} else if (dmu_objset_projectquota_enabled(zfsvfs->z_os) &&
866*7a7741afSMartin Matuska 	    pflags & ZFS_PROJID) {
867*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PROJID(zfsvfs),
868*7a7741afSMartin Matuska 		    NULL, &projid, 8);
869*7a7741afSMartin Matuska 	}
870*7a7741afSMartin Matuska 	if (obj_type == DMU_OT_ZNODE ||
871*7a7741afSMartin Matuska 	    (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
872*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
873*7a7741afSMartin Matuska 		    NULL, &rdev, 8);
874*7a7741afSMartin Matuska 	}
875*7a7741afSMartin Matuska 	if (obj_type == DMU_OT_ZNODE) {
876*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
877*7a7741afSMartin Matuska 		    NULL, &pflags, 8);
878*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
879*7a7741afSMartin Matuska 		    &acl_ids->z_fuid, 8);
880*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
881*7a7741afSMartin Matuska 		    &acl_ids->z_fgid, 8);
882*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
883*7a7741afSMartin Matuska 		    sizeof (uint64_t) * 4);
884*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
885*7a7741afSMartin Matuska 		    &acl_phys, sizeof (zfs_acl_phys_t));
886*7a7741afSMartin Matuska 	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
887*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
888*7a7741afSMartin Matuska 		    &acl_ids->z_aclp->z_acl_count, 8);
889*7a7741afSMartin Matuska 		locate.cb_aclp = acl_ids->z_aclp;
890*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
891*7a7741afSMartin Matuska 		    zfs_acl_data_locator, &locate,
892*7a7741afSMartin Matuska 		    acl_ids->z_aclp->z_acl_bytes);
893*7a7741afSMartin Matuska 		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
894*7a7741afSMartin Matuska 		    acl_ids->z_fuid, acl_ids->z_fgid);
895*7a7741afSMartin Matuska 	}
896*7a7741afSMartin Matuska 
897*7a7741afSMartin Matuska 	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
898*7a7741afSMartin Matuska 
899*7a7741afSMartin Matuska 	if (!(flag & IS_ROOT_NODE)) {
900*7a7741afSMartin Matuska 		/*
901*7a7741afSMartin Matuska 		 * The call to zfs_znode_alloc() may fail if memory is low
902*7a7741afSMartin Matuska 		 * via the call path: alloc_inode() -> inode_init_always() ->
903*7a7741afSMartin Matuska 		 * security_inode_alloc() -> inode_alloc_security().  Since
904*7a7741afSMartin Matuska 		 * the existing code is written such that zfs_mknode() can
905*7a7741afSMartin Matuska 		 * not fail retry until sufficient memory has been reclaimed.
906*7a7741afSMartin Matuska 		 */
907*7a7741afSMartin Matuska 		do {
908*7a7741afSMartin Matuska 			*zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
909*7a7741afSMartin Matuska 		} while (*zpp == NULL);
910*7a7741afSMartin Matuska 
911*7a7741afSMartin Matuska 		VERIFY(*zpp != NULL);
912*7a7741afSMartin Matuska 		VERIFY(dzp != NULL);
913*7a7741afSMartin Matuska 	} else {
914*7a7741afSMartin Matuska 		/*
915*7a7741afSMartin Matuska 		 * If we are creating the root node, the "parent" we
916*7a7741afSMartin Matuska 		 * passed in is the znode for the root.
917*7a7741afSMartin Matuska 		 */
918*7a7741afSMartin Matuska 		*zpp = dzp;
919*7a7741afSMartin Matuska 
920*7a7741afSMartin Matuska 		(*zpp)->z_sa_hdl = sa_hdl;
921*7a7741afSMartin Matuska 	}
922*7a7741afSMartin Matuska 
923*7a7741afSMartin Matuska 	(*zpp)->z_pflags = pflags;
924*7a7741afSMartin Matuska 	(*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode;
925*7a7741afSMartin Matuska 	(*zpp)->z_dnodesize = dnodesize;
926*7a7741afSMartin Matuska 	(*zpp)->z_projid = projid;
927*7a7741afSMartin Matuska 
928*7a7741afSMartin Matuska 	if (obj_type == DMU_OT_ZNODE ||
929*7a7741afSMartin Matuska 	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
930*7a7741afSMartin Matuska 		VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
931*7a7741afSMartin Matuska 	}
932*7a7741afSMartin Matuska 	kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
933*7a7741afSMartin Matuska 	zfs_znode_hold_exit(zfsvfs, zh);
934*7a7741afSMartin Matuska }
935*7a7741afSMartin Matuska 
936*7a7741afSMartin Matuska /*
937*7a7741afSMartin Matuska  * Update in-core attributes.  It is assumed the caller will be doing an
938*7a7741afSMartin Matuska  * sa_bulk_update to push the changes out.
939*7a7741afSMartin Matuska  */
940*7a7741afSMartin Matuska void
941*7a7741afSMartin Matuska zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
942*7a7741afSMartin Matuska {
943*7a7741afSMartin Matuska 	xoptattr_t *xoap;
944*7a7741afSMartin Matuska 	boolean_t update_inode = B_FALSE;
945*7a7741afSMartin Matuska 
946*7a7741afSMartin Matuska 	xoap = xva_getxoptattr(xvap);
947*7a7741afSMartin Matuska 	ASSERT(xoap);
948*7a7741afSMartin Matuska 
949*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
950*7a7741afSMartin Matuska 		uint64_t times[2];
951*7a7741afSMartin Matuska 		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
952*7a7741afSMartin Matuska 		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(ZTOZSB(zp)),
953*7a7741afSMartin Matuska 		    &times, sizeof (times), tx);
954*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_CREATETIME);
955*7a7741afSMartin Matuska 	}
956*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
957*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
958*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
959*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_READONLY);
960*7a7741afSMartin Matuska 	}
961*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
962*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
963*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
964*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_HIDDEN);
965*7a7741afSMartin Matuska 	}
966*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
967*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
968*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
969*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_SYSTEM);
970*7a7741afSMartin Matuska 	}
971*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
972*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
973*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
974*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_ARCHIVE);
975*7a7741afSMartin Matuska 	}
976*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
977*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
978*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
979*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
980*7a7741afSMartin Matuska 
981*7a7741afSMartin Matuska 		update_inode = B_TRUE;
982*7a7741afSMartin Matuska 	}
983*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
984*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
985*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
986*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_NOUNLINK);
987*7a7741afSMartin Matuska 	}
988*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
989*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
990*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
991*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_APPENDONLY);
992*7a7741afSMartin Matuska 
993*7a7741afSMartin Matuska 		update_inode = B_TRUE;
994*7a7741afSMartin Matuska 	}
995*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
996*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
997*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
998*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_NODUMP);
999*7a7741afSMartin Matuska 	}
1000*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
1001*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
1002*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
1003*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_OPAQUE);
1004*7a7741afSMartin Matuska 	}
1005*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
1006*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
1007*7a7741afSMartin Matuska 		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
1008*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
1009*7a7741afSMartin Matuska 	}
1010*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
1011*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
1012*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
1013*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
1014*7a7741afSMartin Matuska 	}
1015*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
1016*7a7741afSMartin Matuska 		zfs_sa_set_scanstamp(zp, xvap, tx);
1017*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
1018*7a7741afSMartin Matuska 	}
1019*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
1020*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
1021*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
1022*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_REPARSE);
1023*7a7741afSMartin Matuska 	}
1024*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
1025*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
1026*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
1027*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_OFFLINE);
1028*7a7741afSMartin Matuska 	}
1029*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
1030*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
1031*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
1032*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_SPARSE);
1033*7a7741afSMartin Matuska 	}
1034*7a7741afSMartin Matuska 	if (XVA_ISSET_REQ(xvap, XAT_PROJINHERIT)) {
1035*7a7741afSMartin Matuska 		ZFS_ATTR_SET(zp, ZFS_PROJINHERIT, xoap->xoa_projinherit,
1036*7a7741afSMartin Matuska 		    zp->z_pflags, tx);
1037*7a7741afSMartin Matuska 		XVA_SET_RTN(xvap, XAT_PROJINHERIT);
1038*7a7741afSMartin Matuska 	}
1039*7a7741afSMartin Matuska 
1040*7a7741afSMartin Matuska 	if (update_inode)
1041*7a7741afSMartin Matuska 		zfs_set_inode_flags(zp, ZTOI(zp));
1042*7a7741afSMartin Matuska }
1043*7a7741afSMartin Matuska 
1044*7a7741afSMartin Matuska int
1045*7a7741afSMartin Matuska zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
1046*7a7741afSMartin Matuska {
1047*7a7741afSMartin Matuska 	dmu_object_info_t doi;
1048*7a7741afSMartin Matuska 	dmu_buf_t	*db;
1049*7a7741afSMartin Matuska 	znode_t		*zp;
1050*7a7741afSMartin Matuska 	znode_hold_t	*zh;
1051*7a7741afSMartin Matuska 	int err;
1052*7a7741afSMartin Matuska 	sa_handle_t	*hdl;
1053*7a7741afSMartin Matuska 
1054*7a7741afSMartin Matuska 	*zpp = NULL;
1055*7a7741afSMartin Matuska 
1056*7a7741afSMartin Matuska again:
1057*7a7741afSMartin Matuska 	zh = zfs_znode_hold_enter(zfsvfs, obj_num);
1058*7a7741afSMartin Matuska 
1059*7a7741afSMartin Matuska 	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
1060*7a7741afSMartin Matuska 	if (err) {
1061*7a7741afSMartin Matuska 		zfs_znode_hold_exit(zfsvfs, zh);
1062*7a7741afSMartin Matuska 		return (err);
1063*7a7741afSMartin Matuska 	}
1064*7a7741afSMartin Matuska 
1065*7a7741afSMartin Matuska 	dmu_object_info_from_db(db, &doi);
1066*7a7741afSMartin Matuska 	if (doi.doi_bonus_type != DMU_OT_SA &&
1067*7a7741afSMartin Matuska 	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
1068*7a7741afSMartin Matuska 	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
1069*7a7741afSMartin Matuska 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1070*7a7741afSMartin Matuska 		sa_buf_rele(db, NULL);
1071*7a7741afSMartin Matuska 		zfs_znode_hold_exit(zfsvfs, zh);
1072*7a7741afSMartin Matuska 		return (SET_ERROR(EINVAL));
1073*7a7741afSMartin Matuska 	}
1074*7a7741afSMartin Matuska 
1075*7a7741afSMartin Matuska 	hdl = dmu_buf_get_user(db);
1076*7a7741afSMartin Matuska 	if (hdl != NULL) {
1077*7a7741afSMartin Matuska 		zp = sa_get_userdata(hdl);
1078*7a7741afSMartin Matuska 
1079*7a7741afSMartin Matuska 
1080*7a7741afSMartin Matuska 		/*
1081*7a7741afSMartin Matuska 		 * Since "SA" does immediate eviction we
1082*7a7741afSMartin Matuska 		 * should never find a sa handle that doesn't
1083*7a7741afSMartin Matuska 		 * know about the znode.
1084*7a7741afSMartin Matuska 		 */
1085*7a7741afSMartin Matuska 
1086*7a7741afSMartin Matuska 		ASSERT3P(zp, !=, NULL);
1087*7a7741afSMartin Matuska 
1088*7a7741afSMartin Matuska 		mutex_enter(&zp->z_lock);
1089*7a7741afSMartin Matuska 		ASSERT3U(zp->z_id, ==, obj_num);
1090*7a7741afSMartin Matuska 		/*
1091*7a7741afSMartin Matuska 		 * If zp->z_unlinked is set, the znode is already marked
1092*7a7741afSMartin Matuska 		 * for deletion and should not be discovered. Check this
1093*7a7741afSMartin Matuska 		 * after checking igrab() due to fsetxattr() & O_TMPFILE.
1094*7a7741afSMartin Matuska 		 *
1095*7a7741afSMartin Matuska 		 * If igrab() returns NULL the VFS has independently
1096*7a7741afSMartin Matuska 		 * determined the inode should be evicted and has
1097*7a7741afSMartin Matuska 		 * called iput_final() to start the eviction process.
1098*7a7741afSMartin Matuska 		 * The SA handle is still valid but because the VFS
1099*7a7741afSMartin Matuska 		 * requires that the eviction succeed we must drop
1100*7a7741afSMartin Matuska 		 * our locks and references to allow the eviction to
1101*7a7741afSMartin Matuska 		 * complete.  The zfs_zget() may then be retried.
1102*7a7741afSMartin Matuska 		 *
1103*7a7741afSMartin Matuska 		 * This unlikely case could be optimized by registering
1104*7a7741afSMartin Matuska 		 * a sops->drop_inode() callback.  The callback would
1105*7a7741afSMartin Matuska 		 * need to detect the active SA hold thereby informing
1106*7a7741afSMartin Matuska 		 * the VFS that this inode should not be evicted.
1107*7a7741afSMartin Matuska 		 */
1108*7a7741afSMartin Matuska 		if (igrab(ZTOI(zp)) == NULL) {
1109*7a7741afSMartin Matuska 			if (zp->z_unlinked)
1110*7a7741afSMartin Matuska 				err = SET_ERROR(ENOENT);
1111*7a7741afSMartin Matuska 			else
1112*7a7741afSMartin Matuska 				err = SET_ERROR(EAGAIN);
1113*7a7741afSMartin Matuska 		} else {
1114*7a7741afSMartin Matuska 			*zpp = zp;
1115*7a7741afSMartin Matuska 			err = 0;
1116*7a7741afSMartin Matuska 		}
1117*7a7741afSMartin Matuska 
1118*7a7741afSMartin Matuska 		mutex_exit(&zp->z_lock);
1119*7a7741afSMartin Matuska 		sa_buf_rele(db, NULL);
1120*7a7741afSMartin Matuska 		zfs_znode_hold_exit(zfsvfs, zh);
1121*7a7741afSMartin Matuska 
1122*7a7741afSMartin Matuska 		if (err == EAGAIN) {
1123*7a7741afSMartin Matuska 			/* inode might need this to finish evict */
1124*7a7741afSMartin Matuska 			cond_resched();
1125*7a7741afSMartin Matuska 			goto again;
1126*7a7741afSMartin Matuska 		}
1127*7a7741afSMartin Matuska 		return (err);
1128*7a7741afSMartin Matuska 	}
1129*7a7741afSMartin Matuska 
1130*7a7741afSMartin Matuska 	/*
1131*7a7741afSMartin Matuska 	 * Not found create new znode/vnode but only if file exists.
1132*7a7741afSMartin Matuska 	 *
1133*7a7741afSMartin Matuska 	 * There is a small window where zfs_vget() could
1134*7a7741afSMartin Matuska 	 * find this object while a file create is still in
1135*7a7741afSMartin Matuska 	 * progress.  This is checked for in zfs_znode_alloc()
1136*7a7741afSMartin Matuska 	 *
1137*7a7741afSMartin Matuska 	 * if zfs_znode_alloc() fails it will drop the hold on the
1138*7a7741afSMartin Matuska 	 * bonus buffer.
1139*7a7741afSMartin Matuska 	 */
1140*7a7741afSMartin Matuska 	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
1141*7a7741afSMartin Matuska 	    doi.doi_bonus_type, NULL);
1142*7a7741afSMartin Matuska 	if (zp == NULL) {
1143*7a7741afSMartin Matuska 		err = SET_ERROR(ENOENT);
1144*7a7741afSMartin Matuska 	} else {
1145*7a7741afSMartin Matuska 		*zpp = zp;
1146*7a7741afSMartin Matuska 	}
1147*7a7741afSMartin Matuska 	zfs_znode_hold_exit(zfsvfs, zh);
1148*7a7741afSMartin Matuska 	return (err);
1149*7a7741afSMartin Matuska }
1150*7a7741afSMartin Matuska 
1151*7a7741afSMartin Matuska int
1152*7a7741afSMartin Matuska zfs_rezget(znode_t *zp)
1153*7a7741afSMartin Matuska {
1154*7a7741afSMartin Matuska 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
1155*7a7741afSMartin Matuska 	dmu_object_info_t doi;
1156*7a7741afSMartin Matuska 	dmu_buf_t *db;
1157*7a7741afSMartin Matuska 	uint64_t obj_num = zp->z_id;
1158*7a7741afSMartin Matuska 	uint64_t mode;
1159*7a7741afSMartin Matuska 	uint64_t links;
1160*7a7741afSMartin Matuska 	sa_bulk_attr_t bulk[11];
1161*7a7741afSMartin Matuska 	int err;
1162*7a7741afSMartin Matuska 	int count = 0;
1163*7a7741afSMartin Matuska 	uint64_t gen;
1164*7a7741afSMartin Matuska 	uint64_t z_uid, z_gid;
1165*7a7741afSMartin Matuska 	uint64_t atime[2], mtime[2], ctime[2], btime[2];
1166*7a7741afSMartin Matuska 	inode_timespec_t tmp_ts;
1167*7a7741afSMartin Matuska 	uint64_t projid = ZFS_DEFAULT_PROJID;
1168*7a7741afSMartin Matuska 	znode_hold_t *zh;
1169*7a7741afSMartin Matuska 
1170*7a7741afSMartin Matuska 	/*
1171*7a7741afSMartin Matuska 	 * skip ctldir, otherwise they will always get invalidated. This will
1172*7a7741afSMartin Matuska 	 * cause funny behaviour for the mounted snapdirs. Especially for
1173*7a7741afSMartin Matuska 	 * Linux >= 3.18, d_invalidate will detach the mountpoint and prevent
1174*7a7741afSMartin Matuska 	 * anyone automount it again as long as someone is still using the
1175*7a7741afSMartin Matuska 	 * detached mount.
1176*7a7741afSMartin Matuska 	 */
1177*7a7741afSMartin Matuska 	if (zp->z_is_ctldir)
1178*7a7741afSMartin Matuska 		return (0);
1179*7a7741afSMartin Matuska 
1180*7a7741afSMartin Matuska 	zh = zfs_znode_hold_enter(zfsvfs, obj_num);
1181*7a7741afSMartin Matuska 
1182*7a7741afSMartin Matuska 	mutex_enter(&zp->z_acl_lock);
1183*7a7741afSMartin Matuska 	if (zp->z_acl_cached) {
1184*7a7741afSMartin Matuska 		zfs_acl_free(zp->z_acl_cached);
1185*7a7741afSMartin Matuska 		zp->z_acl_cached = NULL;
1186*7a7741afSMartin Matuska 	}
1187*7a7741afSMartin Matuska 	mutex_exit(&zp->z_acl_lock);
1188*7a7741afSMartin Matuska 
1189*7a7741afSMartin Matuska 	rw_enter(&zp->z_xattr_lock, RW_WRITER);
1190*7a7741afSMartin Matuska 	if (zp->z_xattr_cached) {
1191*7a7741afSMartin Matuska 		nvlist_free(zp->z_xattr_cached);
1192*7a7741afSMartin Matuska 		zp->z_xattr_cached = NULL;
1193*7a7741afSMartin Matuska 	}
1194*7a7741afSMartin Matuska 	rw_exit(&zp->z_xattr_lock);
1195*7a7741afSMartin Matuska 
1196*7a7741afSMartin Matuska 	ASSERT(zp->z_sa_hdl == NULL);
1197*7a7741afSMartin Matuska 	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
1198*7a7741afSMartin Matuska 	if (err) {
1199*7a7741afSMartin Matuska 		zfs_znode_hold_exit(zfsvfs, zh);
1200*7a7741afSMartin Matuska 		return (err);
1201*7a7741afSMartin Matuska 	}
1202*7a7741afSMartin Matuska 
1203*7a7741afSMartin Matuska 	dmu_object_info_from_db(db, &doi);
1204*7a7741afSMartin Matuska 	if (doi.doi_bonus_type != DMU_OT_SA &&
1205*7a7741afSMartin Matuska 	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
1206*7a7741afSMartin Matuska 	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
1207*7a7741afSMartin Matuska 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
1208*7a7741afSMartin Matuska 		sa_buf_rele(db, NULL);
1209*7a7741afSMartin Matuska 		zfs_znode_hold_exit(zfsvfs, zh);
1210*7a7741afSMartin Matuska 		return (SET_ERROR(EINVAL));
1211*7a7741afSMartin Matuska 	}
1212*7a7741afSMartin Matuska 
1213*7a7741afSMartin Matuska 	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
1214*7a7741afSMartin Matuska 
1215*7a7741afSMartin Matuska 	/* reload cached values */
1216*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
1217*7a7741afSMartin Matuska 	    &gen, sizeof (gen));
1218*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
1219*7a7741afSMartin Matuska 	    &zp->z_size, sizeof (zp->z_size));
1220*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
1221*7a7741afSMartin Matuska 	    &links, sizeof (links));
1222*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
1223*7a7741afSMartin Matuska 	    &zp->z_pflags, sizeof (zp->z_pflags));
1224*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
1225*7a7741afSMartin Matuska 	    &z_uid, sizeof (z_uid));
1226*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
1227*7a7741afSMartin Matuska 	    &z_gid, sizeof (z_gid));
1228*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
1229*7a7741afSMartin Matuska 	    &mode, sizeof (mode));
1230*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
1231*7a7741afSMartin Matuska 	    &atime, 16);
1232*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
1233*7a7741afSMartin Matuska 	    &mtime, 16);
1234*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
1235*7a7741afSMartin Matuska 	    &ctime, 16);
1236*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &btime, 16);
1237*7a7741afSMartin Matuska 
1238*7a7741afSMartin Matuska 	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
1239*7a7741afSMartin Matuska 		zfs_znode_dmu_fini(zp);
1240*7a7741afSMartin Matuska 		zfs_znode_hold_exit(zfsvfs, zh);
1241*7a7741afSMartin Matuska 		return (SET_ERROR(EIO));
1242*7a7741afSMartin Matuska 	}
1243*7a7741afSMartin Matuska 
1244*7a7741afSMartin Matuska 	if (dmu_objset_projectquota_enabled(zfsvfs->z_os)) {
1245*7a7741afSMartin Matuska 		err = sa_lookup(zp->z_sa_hdl, SA_ZPL_PROJID(zfsvfs),
1246*7a7741afSMartin Matuska 		    &projid, 8);
1247*7a7741afSMartin Matuska 		if (err != 0 && err != ENOENT) {
1248*7a7741afSMartin Matuska 			zfs_znode_dmu_fini(zp);
1249*7a7741afSMartin Matuska 			zfs_znode_hold_exit(zfsvfs, zh);
1250*7a7741afSMartin Matuska 			return (SET_ERROR(err));
1251*7a7741afSMartin Matuska 		}
1252*7a7741afSMartin Matuska 	}
1253*7a7741afSMartin Matuska 
1254*7a7741afSMartin Matuska 	zp->z_projid = projid;
1255*7a7741afSMartin Matuska 	zp->z_mode = ZTOI(zp)->i_mode = mode;
1256*7a7741afSMartin Matuska 	zfs_uid_write(ZTOI(zp), z_uid);
1257*7a7741afSMartin Matuska 	zfs_gid_write(ZTOI(zp), z_gid);
1258*7a7741afSMartin Matuska 
1259*7a7741afSMartin Matuska 	ZFS_TIME_DECODE(&tmp_ts, atime);
1260*7a7741afSMartin Matuska 	zpl_inode_set_atime_to_ts(ZTOI(zp), tmp_ts);
1261*7a7741afSMartin Matuska 	ZFS_TIME_DECODE(&tmp_ts, mtime);
1262*7a7741afSMartin Matuska 	zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts);
1263*7a7741afSMartin Matuska 	ZFS_TIME_DECODE(&tmp_ts, ctime);
1264*7a7741afSMartin Matuska 	zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts);
1265*7a7741afSMartin Matuska 	ZFS_TIME_DECODE(&zp->z_btime, btime);
1266*7a7741afSMartin Matuska 
1267*7a7741afSMartin Matuska 	if ((uint32_t)gen != ZTOI(zp)->i_generation) {
1268*7a7741afSMartin Matuska 		zfs_znode_dmu_fini(zp);
1269*7a7741afSMartin Matuska 		zfs_znode_hold_exit(zfsvfs, zh);
1270*7a7741afSMartin Matuska 		return (SET_ERROR(EIO));
1271*7a7741afSMartin Matuska 	}
1272*7a7741afSMartin Matuska 
1273*7a7741afSMartin Matuska 	set_nlink(ZTOI(zp), (uint32_t)links);
1274*7a7741afSMartin Matuska 	zfs_set_inode_flags(zp, ZTOI(zp));
1275*7a7741afSMartin Matuska 
1276*7a7741afSMartin Matuska 	zp->z_blksz = doi.doi_data_block_size;
1277*7a7741afSMartin Matuska 	zp->z_atime_dirty = B_FALSE;
1278*7a7741afSMartin Matuska 	zfs_znode_update_vfs(zp);
1279*7a7741afSMartin Matuska 
1280*7a7741afSMartin Matuska 	/*
1281*7a7741afSMartin Matuska 	 * If the file has zero links, then it has been unlinked on the send
1282*7a7741afSMartin Matuska 	 * side and it must be in the received unlinked set.
1283*7a7741afSMartin Matuska 	 * We call zfs_znode_dmu_fini() now to prevent any accesses to the
1284*7a7741afSMartin Matuska 	 * stale data and to prevent automatic removal of the file in
1285*7a7741afSMartin Matuska 	 * zfs_zinactive().  The file will be removed either when it is removed
1286*7a7741afSMartin Matuska 	 * on the send side and the next incremental stream is received or
1287*7a7741afSMartin Matuska 	 * when the unlinked set gets processed.
1288*7a7741afSMartin Matuska 	 */
1289*7a7741afSMartin Matuska 	zp->z_unlinked = (ZTOI(zp)->i_nlink == 0);
1290*7a7741afSMartin Matuska 	if (zp->z_unlinked)
1291*7a7741afSMartin Matuska 		zfs_znode_dmu_fini(zp);
1292*7a7741afSMartin Matuska 
1293*7a7741afSMartin Matuska 	zfs_znode_hold_exit(zfsvfs, zh);
1294*7a7741afSMartin Matuska 
1295*7a7741afSMartin Matuska 	return (0);
1296*7a7741afSMartin Matuska }
1297*7a7741afSMartin Matuska 
1298*7a7741afSMartin Matuska void
1299*7a7741afSMartin Matuska zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
1300*7a7741afSMartin Matuska {
1301*7a7741afSMartin Matuska 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
1302*7a7741afSMartin Matuska 	objset_t *os = zfsvfs->z_os;
1303*7a7741afSMartin Matuska 	uint64_t obj = zp->z_id;
1304*7a7741afSMartin Matuska 	uint64_t acl_obj = zfs_external_acl(zp);
1305*7a7741afSMartin Matuska 	znode_hold_t *zh;
1306*7a7741afSMartin Matuska 
1307*7a7741afSMartin Matuska 	zh = zfs_znode_hold_enter(zfsvfs, obj);
1308*7a7741afSMartin Matuska 	if (acl_obj) {
1309*7a7741afSMartin Matuska 		VERIFY(!zp->z_is_sa);
1310*7a7741afSMartin Matuska 		VERIFY(0 == dmu_object_free(os, acl_obj, tx));
1311*7a7741afSMartin Matuska 	}
1312*7a7741afSMartin Matuska 	VERIFY(0 == dmu_object_free(os, obj, tx));
1313*7a7741afSMartin Matuska 	zfs_znode_dmu_fini(zp);
1314*7a7741afSMartin Matuska 	zfs_znode_hold_exit(zfsvfs, zh);
1315*7a7741afSMartin Matuska }
1316*7a7741afSMartin Matuska 
1317*7a7741afSMartin Matuska void
1318*7a7741afSMartin Matuska zfs_zinactive(znode_t *zp)
1319*7a7741afSMartin Matuska {
1320*7a7741afSMartin Matuska 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
1321*7a7741afSMartin Matuska 	uint64_t z_id = zp->z_id;
1322*7a7741afSMartin Matuska 	znode_hold_t *zh;
1323*7a7741afSMartin Matuska 
1324*7a7741afSMartin Matuska 	ASSERT(zp->z_sa_hdl);
1325*7a7741afSMartin Matuska 
1326*7a7741afSMartin Matuska 	/*
1327*7a7741afSMartin Matuska 	 * Don't allow a zfs_zget() while were trying to release this znode.
1328*7a7741afSMartin Matuska 	 */
1329*7a7741afSMartin Matuska 	zh = zfs_znode_hold_enter(zfsvfs, z_id);
1330*7a7741afSMartin Matuska 
1331*7a7741afSMartin Matuska 	mutex_enter(&zp->z_lock);
1332*7a7741afSMartin Matuska 
1333*7a7741afSMartin Matuska 	/*
1334*7a7741afSMartin Matuska 	 * If this was the last reference to a file with no links, remove
1335*7a7741afSMartin Matuska 	 * the file from the file system unless the file system is mounted
1336*7a7741afSMartin Matuska 	 * read-only.  That can happen, for example, if the file system was
1337*7a7741afSMartin Matuska 	 * originally read-write, the file was opened, then unlinked and
1338*7a7741afSMartin Matuska 	 * the file system was made read-only before the file was finally
1339*7a7741afSMartin Matuska 	 * closed.  The file will remain in the unlinked set.
1340*7a7741afSMartin Matuska 	 */
1341*7a7741afSMartin Matuska 	if (zp->z_unlinked) {
1342*7a7741afSMartin Matuska 		ASSERT(!zfsvfs->z_issnap);
1343*7a7741afSMartin Matuska 		if (!zfs_is_readonly(zfsvfs) && !zfs_unlink_suspend_progress) {
1344*7a7741afSMartin Matuska 			mutex_exit(&zp->z_lock);
1345*7a7741afSMartin Matuska 			zfs_znode_hold_exit(zfsvfs, zh);
1346*7a7741afSMartin Matuska 			zfs_rmnode(zp);
1347*7a7741afSMartin Matuska 			return;
1348*7a7741afSMartin Matuska 		}
1349*7a7741afSMartin Matuska 	}
1350*7a7741afSMartin Matuska 
1351*7a7741afSMartin Matuska 	mutex_exit(&zp->z_lock);
1352*7a7741afSMartin Matuska 	zfs_znode_dmu_fini(zp);
1353*7a7741afSMartin Matuska 
1354*7a7741afSMartin Matuska 	zfs_znode_hold_exit(zfsvfs, zh);
1355*7a7741afSMartin Matuska }
1356*7a7741afSMartin Matuska 
1357*7a7741afSMartin Matuska /*
1358*7a7741afSMartin Matuska  * Determine whether the znode's atime must be updated.  The logic mostly
1359*7a7741afSMartin Matuska  * duplicates the Linux kernel's relatime_need_update() functionality.
1360*7a7741afSMartin Matuska  * This function is only called if the underlying filesystem actually has
1361*7a7741afSMartin Matuska  * atime updates enabled.
1362*7a7741afSMartin Matuska  */
1363*7a7741afSMartin Matuska boolean_t
1364*7a7741afSMartin Matuska zfs_relatime_need_update(const struct inode *ip)
1365*7a7741afSMartin Matuska {
1366*7a7741afSMartin Matuska 	inode_timespec_t now, tmp_atime, tmp_ts;
1367*7a7741afSMartin Matuska 
1368*7a7741afSMartin Matuska 	gethrestime(&now);
1369*7a7741afSMartin Matuska 	tmp_atime = zpl_inode_get_atime(ip);
1370*7a7741afSMartin Matuska 	/*
1371*7a7741afSMartin Matuska 	 * In relatime mode, only update the atime if the previous atime
1372*7a7741afSMartin Matuska 	 * is earlier than either the ctime or mtime or if at least a day
1373*7a7741afSMartin Matuska 	 * has passed since the last update of atime.
1374*7a7741afSMartin Matuska 	 */
1375*7a7741afSMartin Matuska 	tmp_ts = zpl_inode_get_mtime(ip);
1376*7a7741afSMartin Matuska 	if (timespec64_compare(&tmp_ts, &tmp_atime) >= 0)
1377*7a7741afSMartin Matuska 		return (B_TRUE);
1378*7a7741afSMartin Matuska 
1379*7a7741afSMartin Matuska 	tmp_ts = zpl_inode_get_ctime(ip);
1380*7a7741afSMartin Matuska 	if (timespec64_compare(&tmp_ts, &tmp_atime) >= 0)
1381*7a7741afSMartin Matuska 		return (B_TRUE);
1382*7a7741afSMartin Matuska 
1383*7a7741afSMartin Matuska 	if ((hrtime_t)now.tv_sec - (hrtime_t)tmp_atime.tv_sec >= 24*60*60)
1384*7a7741afSMartin Matuska 		return (B_TRUE);
1385*7a7741afSMartin Matuska 
1386*7a7741afSMartin Matuska 	return (B_FALSE);
1387*7a7741afSMartin Matuska }
1388*7a7741afSMartin Matuska 
1389*7a7741afSMartin Matuska /*
1390*7a7741afSMartin Matuska  * Prepare to update znode time stamps.
1391*7a7741afSMartin Matuska  *
1392*7a7741afSMartin Matuska  *	IN:	zp	- znode requiring timestamp update
1393*7a7741afSMartin Matuska  *		flag	- ATTR_MTIME, ATTR_CTIME flags
1394*7a7741afSMartin Matuska  *
1395*7a7741afSMartin Matuska  *	OUT:	zp	- z_seq
1396*7a7741afSMartin Matuska  *		mtime	- new mtime
1397*7a7741afSMartin Matuska  *		ctime	- new ctime
1398*7a7741afSMartin Matuska  *
1399*7a7741afSMartin Matuska  *	Note: We don't update atime here, because we rely on Linux VFS to do
1400*7a7741afSMartin Matuska  *	atime updating.
1401*7a7741afSMartin Matuska  */
1402*7a7741afSMartin Matuska void
1403*7a7741afSMartin Matuska zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
1404*7a7741afSMartin Matuska     uint64_t ctime[2])
1405*7a7741afSMartin Matuska {
1406*7a7741afSMartin Matuska 	inode_timespec_t now, tmp_ts;
1407*7a7741afSMartin Matuska 
1408*7a7741afSMartin Matuska 	gethrestime(&now);
1409*7a7741afSMartin Matuska 
1410*7a7741afSMartin Matuska 	zp->z_seq++;
1411*7a7741afSMartin Matuska 
1412*7a7741afSMartin Matuska 	if (flag & ATTR_MTIME) {
1413*7a7741afSMartin Matuska 		ZFS_TIME_ENCODE(&now, mtime);
1414*7a7741afSMartin Matuska 		ZFS_TIME_DECODE(&tmp_ts, mtime);
1415*7a7741afSMartin Matuska 		zpl_inode_set_mtime_to_ts(ZTOI(zp), tmp_ts);
1416*7a7741afSMartin Matuska 		if (ZTOZSB(zp)->z_use_fuids) {
1417*7a7741afSMartin Matuska 			zp->z_pflags |= (ZFS_ARCHIVE |
1418*7a7741afSMartin Matuska 			    ZFS_AV_MODIFIED);
1419*7a7741afSMartin Matuska 		}
1420*7a7741afSMartin Matuska 	}
1421*7a7741afSMartin Matuska 
1422*7a7741afSMartin Matuska 	if (flag & ATTR_CTIME) {
1423*7a7741afSMartin Matuska 		ZFS_TIME_ENCODE(&now, ctime);
1424*7a7741afSMartin Matuska 		ZFS_TIME_DECODE(&tmp_ts, ctime);
1425*7a7741afSMartin Matuska 		zpl_inode_set_ctime_to_ts(ZTOI(zp), tmp_ts);
1426*7a7741afSMartin Matuska 		if (ZTOZSB(zp)->z_use_fuids)
1427*7a7741afSMartin Matuska 			zp->z_pflags |= ZFS_ARCHIVE;
1428*7a7741afSMartin Matuska 	}
1429*7a7741afSMartin Matuska }
1430*7a7741afSMartin Matuska 
1431*7a7741afSMartin Matuska /*
1432*7a7741afSMartin Matuska  * Grow the block size for a file.
1433*7a7741afSMartin Matuska  *
1434*7a7741afSMartin Matuska  *	IN:	zp	- znode of file to free data in.
1435*7a7741afSMartin Matuska  *		size	- requested block size
1436*7a7741afSMartin Matuska  *		tx	- open transaction.
1437*7a7741afSMartin Matuska  *
1438*7a7741afSMartin Matuska  * NOTE: this function assumes that the znode is write locked.
1439*7a7741afSMartin Matuska  */
1440*7a7741afSMartin Matuska void
1441*7a7741afSMartin Matuska zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
1442*7a7741afSMartin Matuska {
1443*7a7741afSMartin Matuska 	int		error;
1444*7a7741afSMartin Matuska 	u_longlong_t	dummy;
1445*7a7741afSMartin Matuska 
1446*7a7741afSMartin Matuska 	if (size <= zp->z_blksz)
1447*7a7741afSMartin Matuska 		return;
1448*7a7741afSMartin Matuska 	/*
1449*7a7741afSMartin Matuska 	 * If the file size is already greater than the current blocksize,
1450*7a7741afSMartin Matuska 	 * we will not grow.  If there is more than one block in a file,
1451*7a7741afSMartin Matuska 	 * the blocksize cannot change.
1452*7a7741afSMartin Matuska 	 */
1453*7a7741afSMartin Matuska 	if (zp->z_blksz && zp->z_size > zp->z_blksz)
1454*7a7741afSMartin Matuska 		return;
1455*7a7741afSMartin Matuska 
1456*7a7741afSMartin Matuska 	error = dmu_object_set_blocksize(ZTOZSB(zp)->z_os, zp->z_id,
1457*7a7741afSMartin Matuska 	    size, 0, tx);
1458*7a7741afSMartin Matuska 
1459*7a7741afSMartin Matuska 	if (error == ENOTSUP)
1460*7a7741afSMartin Matuska 		return;
1461*7a7741afSMartin Matuska 	ASSERT0(error);
1462*7a7741afSMartin Matuska 
1463*7a7741afSMartin Matuska 	/* What blocksize did we actually get? */
1464*7a7741afSMartin Matuska 	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
1465*7a7741afSMartin Matuska }
1466*7a7741afSMartin Matuska 
1467*7a7741afSMartin Matuska /*
1468*7a7741afSMartin Matuska  * Increase the file length
1469*7a7741afSMartin Matuska  *
1470*7a7741afSMartin Matuska  *	IN:	zp	- znode of file to free data in.
1471*7a7741afSMartin Matuska  *		end	- new end-of-file
1472*7a7741afSMartin Matuska  *
1473*7a7741afSMartin Matuska  *	RETURN:	0 on success, error code on failure
1474*7a7741afSMartin Matuska  */
1475*7a7741afSMartin Matuska static int
1476*7a7741afSMartin Matuska zfs_extend(znode_t *zp, uint64_t end)
1477*7a7741afSMartin Matuska {
1478*7a7741afSMartin Matuska 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
1479*7a7741afSMartin Matuska 	dmu_tx_t *tx;
1480*7a7741afSMartin Matuska 	zfs_locked_range_t *lr;
1481*7a7741afSMartin Matuska 	uint64_t newblksz;
1482*7a7741afSMartin Matuska 	int error;
1483*7a7741afSMartin Matuska 
1484*7a7741afSMartin Matuska 	/*
1485*7a7741afSMartin Matuska 	 * We will change zp_size, lock the whole file.
1486*7a7741afSMartin Matuska 	 */
1487*7a7741afSMartin Matuska 	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
1488*7a7741afSMartin Matuska 
1489*7a7741afSMartin Matuska 	/*
1490*7a7741afSMartin Matuska 	 * Nothing to do if file already at desired length.
1491*7a7741afSMartin Matuska 	 */
1492*7a7741afSMartin Matuska 	if (end <= zp->z_size) {
1493*7a7741afSMartin Matuska 		zfs_rangelock_exit(lr);
1494*7a7741afSMartin Matuska 		return (0);
1495*7a7741afSMartin Matuska 	}
1496*7a7741afSMartin Matuska 	tx = dmu_tx_create(zfsvfs->z_os);
1497*7a7741afSMartin Matuska 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1498*7a7741afSMartin Matuska 	zfs_sa_upgrade_txholds(tx, zp);
1499*7a7741afSMartin Matuska 	if (end > zp->z_blksz &&
1500*7a7741afSMartin Matuska 	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
1501*7a7741afSMartin Matuska 		/*
1502*7a7741afSMartin Matuska 		 * We are growing the file past the current block size.
1503*7a7741afSMartin Matuska 		 */
1504*7a7741afSMartin Matuska 		if (zp->z_blksz > ZTOZSB(zp)->z_max_blksz) {
1505*7a7741afSMartin Matuska 			/*
1506*7a7741afSMartin Matuska 			 * File's blocksize is already larger than the
1507*7a7741afSMartin Matuska 			 * "recordsize" property.  Only let it grow to
1508*7a7741afSMartin Matuska 			 * the next power of 2.
1509*7a7741afSMartin Matuska 			 */
1510*7a7741afSMartin Matuska 			ASSERT(!ISP2(zp->z_blksz));
1511*7a7741afSMartin Matuska 			newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
1512*7a7741afSMartin Matuska 		} else {
1513*7a7741afSMartin Matuska 			newblksz = MIN(end, ZTOZSB(zp)->z_max_blksz);
1514*7a7741afSMartin Matuska 		}
1515*7a7741afSMartin Matuska 		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
1516*7a7741afSMartin Matuska 	} else {
1517*7a7741afSMartin Matuska 		newblksz = 0;
1518*7a7741afSMartin Matuska 	}
1519*7a7741afSMartin Matuska 
1520*7a7741afSMartin Matuska 	error = dmu_tx_assign(tx, TXG_WAIT);
1521*7a7741afSMartin Matuska 	if (error) {
1522*7a7741afSMartin Matuska 		dmu_tx_abort(tx);
1523*7a7741afSMartin Matuska 		zfs_rangelock_exit(lr);
1524*7a7741afSMartin Matuska 		return (error);
1525*7a7741afSMartin Matuska 	}
1526*7a7741afSMartin Matuska 
1527*7a7741afSMartin Matuska 	if (newblksz)
1528*7a7741afSMartin Matuska 		zfs_grow_blocksize(zp, newblksz, tx);
1529*7a7741afSMartin Matuska 
1530*7a7741afSMartin Matuska 	zp->z_size = end;
1531*7a7741afSMartin Matuska 
1532*7a7741afSMartin Matuska 	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(ZTOZSB(zp)),
1533*7a7741afSMartin Matuska 	    &zp->z_size, sizeof (zp->z_size), tx));
1534*7a7741afSMartin Matuska 
1535*7a7741afSMartin Matuska 	zfs_rangelock_exit(lr);
1536*7a7741afSMartin Matuska 
1537*7a7741afSMartin Matuska 	dmu_tx_commit(tx);
1538*7a7741afSMartin Matuska 
1539*7a7741afSMartin Matuska 	return (0);
1540*7a7741afSMartin Matuska }
1541*7a7741afSMartin Matuska 
1542*7a7741afSMartin Matuska /*
1543*7a7741afSMartin Matuska  * zfs_zero_partial_page - Modeled after update_pages() but
1544*7a7741afSMartin Matuska  * with different arguments and semantics for use by zfs_freesp().
1545*7a7741afSMartin Matuska  *
1546*7a7741afSMartin Matuska  * Zeroes a piece of a single page cache entry for zp at offset
1547*7a7741afSMartin Matuska  * start and length len.
1548*7a7741afSMartin Matuska  *
1549*7a7741afSMartin Matuska  * Caller must acquire a range lock on the file for the region
1550*7a7741afSMartin Matuska  * being zeroed in order that the ARC and page cache stay in sync.
1551*7a7741afSMartin Matuska  */
1552*7a7741afSMartin Matuska static void
1553*7a7741afSMartin Matuska zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len)
1554*7a7741afSMartin Matuska {
1555*7a7741afSMartin Matuska 	struct address_space *mp = ZTOI(zp)->i_mapping;
1556*7a7741afSMartin Matuska 	struct page *pp;
1557*7a7741afSMartin Matuska 	int64_t	off;
1558*7a7741afSMartin Matuska 	void *pb;
1559*7a7741afSMartin Matuska 
1560*7a7741afSMartin Matuska 	ASSERT((start & PAGE_MASK) == ((start + len - 1) & PAGE_MASK));
1561*7a7741afSMartin Matuska 
1562*7a7741afSMartin Matuska 	off = start & (PAGE_SIZE - 1);
1563*7a7741afSMartin Matuska 	start &= PAGE_MASK;
1564*7a7741afSMartin Matuska 
1565*7a7741afSMartin Matuska 	pp = find_lock_page(mp, start >> PAGE_SHIFT);
1566*7a7741afSMartin Matuska 	if (pp) {
1567*7a7741afSMartin Matuska 		if (mapping_writably_mapped(mp))
1568*7a7741afSMartin Matuska 			flush_dcache_page(pp);
1569*7a7741afSMartin Matuska 
1570*7a7741afSMartin Matuska 		pb = kmap(pp);
1571*7a7741afSMartin Matuska 		memset(pb + off, 0, len);
1572*7a7741afSMartin Matuska 		kunmap(pp);
1573*7a7741afSMartin Matuska 
1574*7a7741afSMartin Matuska 		if (mapping_writably_mapped(mp))
1575*7a7741afSMartin Matuska 			flush_dcache_page(pp);
1576*7a7741afSMartin Matuska 
1577*7a7741afSMartin Matuska 		mark_page_accessed(pp);
1578*7a7741afSMartin Matuska 		SetPageUptodate(pp);
1579*7a7741afSMartin Matuska 		ClearPageError(pp);
1580*7a7741afSMartin Matuska 		unlock_page(pp);
1581*7a7741afSMartin Matuska 		put_page(pp);
1582*7a7741afSMartin Matuska 	}
1583*7a7741afSMartin Matuska }
1584*7a7741afSMartin Matuska 
1585*7a7741afSMartin Matuska /*
1586*7a7741afSMartin Matuska  * Free space in a file.
1587*7a7741afSMartin Matuska  *
1588*7a7741afSMartin Matuska  *	IN:	zp	- znode of file to free data in.
1589*7a7741afSMartin Matuska  *		off	- start of section to free.
1590*7a7741afSMartin Matuska  *		len	- length of section to free.
1591*7a7741afSMartin Matuska  *
1592*7a7741afSMartin Matuska  *	RETURN:	0 on success, error code on failure
1593*7a7741afSMartin Matuska  */
1594*7a7741afSMartin Matuska static int
1595*7a7741afSMartin Matuska zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
1596*7a7741afSMartin Matuska {
1597*7a7741afSMartin Matuska 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
1598*7a7741afSMartin Matuska 	zfs_locked_range_t *lr;
1599*7a7741afSMartin Matuska 	int error;
1600*7a7741afSMartin Matuska 
1601*7a7741afSMartin Matuska 	/*
1602*7a7741afSMartin Matuska 	 * Lock the range being freed.
1603*7a7741afSMartin Matuska 	 */
1604*7a7741afSMartin Matuska 	lr = zfs_rangelock_enter(&zp->z_rangelock, off, len, RL_WRITER);
1605*7a7741afSMartin Matuska 
1606*7a7741afSMartin Matuska 	/*
1607*7a7741afSMartin Matuska 	 * Nothing to do if file already at desired length.
1608*7a7741afSMartin Matuska 	 */
1609*7a7741afSMartin Matuska 	if (off >= zp->z_size) {
1610*7a7741afSMartin Matuska 		zfs_rangelock_exit(lr);
1611*7a7741afSMartin Matuska 		return (0);
1612*7a7741afSMartin Matuska 	}
1613*7a7741afSMartin Matuska 
1614*7a7741afSMartin Matuska 	if (off + len > zp->z_size)
1615*7a7741afSMartin Matuska 		len = zp->z_size - off;
1616*7a7741afSMartin Matuska 
1617*7a7741afSMartin Matuska 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
1618*7a7741afSMartin Matuska 
1619*7a7741afSMartin Matuska 	/*
1620*7a7741afSMartin Matuska 	 * Zero partial page cache entries.  This must be done under a
1621*7a7741afSMartin Matuska 	 * range lock in order to keep the ARC and page cache in sync.
1622*7a7741afSMartin Matuska 	 */
1623*7a7741afSMartin Matuska 	if (zn_has_cached_data(zp, off, off + len - 1)) {
1624*7a7741afSMartin Matuska 		loff_t first_page, last_page, page_len;
1625*7a7741afSMartin Matuska 		loff_t first_page_offset, last_page_offset;
1626*7a7741afSMartin Matuska 
1627*7a7741afSMartin Matuska 		/* first possible full page in hole */
1628*7a7741afSMartin Matuska 		first_page = (off + PAGE_SIZE - 1) >> PAGE_SHIFT;
1629*7a7741afSMartin Matuska 		/* last page of hole */
1630*7a7741afSMartin Matuska 		last_page = (off + len) >> PAGE_SHIFT;
1631*7a7741afSMartin Matuska 
1632*7a7741afSMartin Matuska 		/* offset of first_page */
1633*7a7741afSMartin Matuska 		first_page_offset = first_page << PAGE_SHIFT;
1634*7a7741afSMartin Matuska 		/* offset of last_page */
1635*7a7741afSMartin Matuska 		last_page_offset = last_page << PAGE_SHIFT;
1636*7a7741afSMartin Matuska 
1637*7a7741afSMartin Matuska 		/* truncate whole pages */
1638*7a7741afSMartin Matuska 		if (last_page_offset > first_page_offset) {
1639*7a7741afSMartin Matuska 			truncate_inode_pages_range(ZTOI(zp)->i_mapping,
1640*7a7741afSMartin Matuska 			    first_page_offset, last_page_offset - 1);
1641*7a7741afSMartin Matuska 		}
1642*7a7741afSMartin Matuska 
1643*7a7741afSMartin Matuska 		/* truncate sub-page ranges */
1644*7a7741afSMartin Matuska 		if (first_page > last_page) {
1645*7a7741afSMartin Matuska 			/* entire punched area within a single page */
1646*7a7741afSMartin Matuska 			zfs_zero_partial_page(zp, off, len);
1647*7a7741afSMartin Matuska 		} else {
1648*7a7741afSMartin Matuska 			/* beginning of punched area at the end of a page */
1649*7a7741afSMartin Matuska 			page_len  = first_page_offset - off;
1650*7a7741afSMartin Matuska 			if (page_len > 0)
1651*7a7741afSMartin Matuska 				zfs_zero_partial_page(zp, off, page_len);
1652*7a7741afSMartin Matuska 
1653*7a7741afSMartin Matuska 			/* end of punched area at the beginning of a page */
1654*7a7741afSMartin Matuska 			page_len = off + len - last_page_offset;
1655*7a7741afSMartin Matuska 			if (page_len > 0)
1656*7a7741afSMartin Matuska 				zfs_zero_partial_page(zp, last_page_offset,
1657*7a7741afSMartin Matuska 				    page_len);
1658*7a7741afSMartin Matuska 		}
1659*7a7741afSMartin Matuska 	}
1660*7a7741afSMartin Matuska 	zfs_rangelock_exit(lr);
1661*7a7741afSMartin Matuska 
1662*7a7741afSMartin Matuska 	return (error);
1663*7a7741afSMartin Matuska }
1664*7a7741afSMartin Matuska 
1665*7a7741afSMartin Matuska /*
1666*7a7741afSMartin Matuska  * Truncate a file
1667*7a7741afSMartin Matuska  *
1668*7a7741afSMartin Matuska  *	IN:	zp	- znode of file to free data in.
1669*7a7741afSMartin Matuska  *		end	- new end-of-file.
1670*7a7741afSMartin Matuska  *
1671*7a7741afSMartin Matuska  *	RETURN:	0 on success, error code on failure
1672*7a7741afSMartin Matuska  */
1673*7a7741afSMartin Matuska static int
1674*7a7741afSMartin Matuska zfs_trunc(znode_t *zp, uint64_t end)
1675*7a7741afSMartin Matuska {
1676*7a7741afSMartin Matuska 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
1677*7a7741afSMartin Matuska 	dmu_tx_t *tx;
1678*7a7741afSMartin Matuska 	zfs_locked_range_t *lr;
1679*7a7741afSMartin Matuska 	int error;
1680*7a7741afSMartin Matuska 	sa_bulk_attr_t bulk[2];
1681*7a7741afSMartin Matuska 	int count = 0;
1682*7a7741afSMartin Matuska 
1683*7a7741afSMartin Matuska 	/*
1684*7a7741afSMartin Matuska 	 * We will change zp_size, lock the whole file.
1685*7a7741afSMartin Matuska 	 */
1686*7a7741afSMartin Matuska 	lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_WRITER);
1687*7a7741afSMartin Matuska 
1688*7a7741afSMartin Matuska 	/*
1689*7a7741afSMartin Matuska 	 * Nothing to do if file already at desired length.
1690*7a7741afSMartin Matuska 	 */
1691*7a7741afSMartin Matuska 	if (end >= zp->z_size) {
1692*7a7741afSMartin Matuska 		zfs_rangelock_exit(lr);
1693*7a7741afSMartin Matuska 		return (0);
1694*7a7741afSMartin Matuska 	}
1695*7a7741afSMartin Matuska 
1696*7a7741afSMartin Matuska 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,
1697*7a7741afSMartin Matuska 	    DMU_OBJECT_END);
1698*7a7741afSMartin Matuska 	if (error) {
1699*7a7741afSMartin Matuska 		zfs_rangelock_exit(lr);
1700*7a7741afSMartin Matuska 		return (error);
1701*7a7741afSMartin Matuska 	}
1702*7a7741afSMartin Matuska 	tx = dmu_tx_create(zfsvfs->z_os);
1703*7a7741afSMartin Matuska 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1704*7a7741afSMartin Matuska 	zfs_sa_upgrade_txholds(tx, zp);
1705*7a7741afSMartin Matuska 	dmu_tx_mark_netfree(tx);
1706*7a7741afSMartin Matuska 	error = dmu_tx_assign(tx, TXG_WAIT);
1707*7a7741afSMartin Matuska 	if (error) {
1708*7a7741afSMartin Matuska 		dmu_tx_abort(tx);
1709*7a7741afSMartin Matuska 		zfs_rangelock_exit(lr);
1710*7a7741afSMartin Matuska 		return (error);
1711*7a7741afSMartin Matuska 	}
1712*7a7741afSMartin Matuska 
1713*7a7741afSMartin Matuska 	zp->z_size = end;
1714*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
1715*7a7741afSMartin Matuska 	    NULL, &zp->z_size, sizeof (zp->z_size));
1716*7a7741afSMartin Matuska 
1717*7a7741afSMartin Matuska 	if (end == 0) {
1718*7a7741afSMartin Matuska 		zp->z_pflags &= ~ZFS_SPARSE;
1719*7a7741afSMartin Matuska 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
1720*7a7741afSMartin Matuska 		    NULL, &zp->z_pflags, 8);
1721*7a7741afSMartin Matuska 	}
1722*7a7741afSMartin Matuska 	VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
1723*7a7741afSMartin Matuska 
1724*7a7741afSMartin Matuska 	dmu_tx_commit(tx);
1725*7a7741afSMartin Matuska 	zfs_rangelock_exit(lr);
1726*7a7741afSMartin Matuska 
1727*7a7741afSMartin Matuska 	return (0);
1728*7a7741afSMartin Matuska }
1729*7a7741afSMartin Matuska 
1730*7a7741afSMartin Matuska /*
1731*7a7741afSMartin Matuska  * Free space in a file
1732*7a7741afSMartin Matuska  *
1733*7a7741afSMartin Matuska  *	IN:	zp	- znode of file to free data in.
1734*7a7741afSMartin Matuska  *		off	- start of range
1735*7a7741afSMartin Matuska  *		len	- end of range (0 => EOF)
1736*7a7741afSMartin Matuska  *		flag	- current file open mode flags.
1737*7a7741afSMartin Matuska  *		log	- TRUE if this action should be logged
1738*7a7741afSMartin Matuska  *
1739*7a7741afSMartin Matuska  *	RETURN:	0 on success, error code on failure
1740*7a7741afSMartin Matuska  */
1741*7a7741afSMartin Matuska int
1742*7a7741afSMartin Matuska zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
1743*7a7741afSMartin Matuska {
1744*7a7741afSMartin Matuska 	dmu_tx_t *tx;
1745*7a7741afSMartin Matuska 	zfsvfs_t *zfsvfs = ZTOZSB(zp);
1746*7a7741afSMartin Matuska 	zilog_t *zilog = zfsvfs->z_log;
1747*7a7741afSMartin Matuska 	uint64_t mode;
1748*7a7741afSMartin Matuska 	uint64_t mtime[2], ctime[2];
1749*7a7741afSMartin Matuska 	sa_bulk_attr_t bulk[3];
1750*7a7741afSMartin Matuska 	int count = 0;
1751*7a7741afSMartin Matuska 	int error;
1752*7a7741afSMartin Matuska 
1753*7a7741afSMartin Matuska 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
1754*7a7741afSMartin Matuska 	    sizeof (mode))) != 0)
1755*7a7741afSMartin Matuska 		return (error);
1756*7a7741afSMartin Matuska 
1757*7a7741afSMartin Matuska 	if (off > zp->z_size) {
1758*7a7741afSMartin Matuska 		error =  zfs_extend(zp, off+len);
1759*7a7741afSMartin Matuska 		if (error == 0 && log)
1760*7a7741afSMartin Matuska 			goto log;
1761*7a7741afSMartin Matuska 		goto out;
1762*7a7741afSMartin Matuska 	}
1763*7a7741afSMartin Matuska 
1764*7a7741afSMartin Matuska 	if (len == 0) {
1765*7a7741afSMartin Matuska 		error = zfs_trunc(zp, off);
1766*7a7741afSMartin Matuska 	} else {
1767*7a7741afSMartin Matuska 		if ((error = zfs_free_range(zp, off, len)) == 0 &&
1768*7a7741afSMartin Matuska 		    off + len > zp->z_size)
1769*7a7741afSMartin Matuska 			error = zfs_extend(zp, off+len);
1770*7a7741afSMartin Matuska 	}
1771*7a7741afSMartin Matuska 	if (error || !log)
1772*7a7741afSMartin Matuska 		goto out;
1773*7a7741afSMartin Matuska log:
1774*7a7741afSMartin Matuska 	tx = dmu_tx_create(zfsvfs->z_os);
1775*7a7741afSMartin Matuska 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1776*7a7741afSMartin Matuska 	zfs_sa_upgrade_txholds(tx, zp);
1777*7a7741afSMartin Matuska 	error = dmu_tx_assign(tx, TXG_WAIT);
1778*7a7741afSMartin Matuska 	if (error) {
1779*7a7741afSMartin Matuska 		dmu_tx_abort(tx);
1780*7a7741afSMartin Matuska 		goto out;
1781*7a7741afSMartin Matuska 	}
1782*7a7741afSMartin Matuska 
1783*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
1784*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
1785*7a7741afSMartin Matuska 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
1786*7a7741afSMartin Matuska 	    NULL, &zp->z_pflags, 8);
1787*7a7741afSMartin Matuska 	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
1788*7a7741afSMartin Matuska 	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1789*7a7741afSMartin Matuska 	ASSERT(error == 0);
1790*7a7741afSMartin Matuska 
1791*7a7741afSMartin Matuska 	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
1792*7a7741afSMartin Matuska 
1793*7a7741afSMartin Matuska 	dmu_tx_commit(tx);
1794*7a7741afSMartin Matuska 
1795*7a7741afSMartin Matuska 	zfs_znode_update_vfs(zp);
1796*7a7741afSMartin Matuska 	error = 0;
1797*7a7741afSMartin Matuska 
1798*7a7741afSMartin Matuska out:
1799*7a7741afSMartin Matuska 	/*
1800*7a7741afSMartin Matuska 	 * Truncate the page cache - for file truncate operations, use
1801*7a7741afSMartin Matuska 	 * the purpose-built API for truncations.  For punching operations,
1802*7a7741afSMartin Matuska 	 * the truncation is handled under a range lock in zfs_free_range.
1803*7a7741afSMartin Matuska 	 */
1804*7a7741afSMartin Matuska 	if (len == 0)
1805*7a7741afSMartin Matuska 		truncate_setsize(ZTOI(zp), off);
1806*7a7741afSMartin Matuska 	return (error);
1807*7a7741afSMartin Matuska }
1808*7a7741afSMartin Matuska 
1809*7a7741afSMartin Matuska void
1810*7a7741afSMartin Matuska zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
1811*7a7741afSMartin Matuska {
1812*7a7741afSMartin Matuska 	struct super_block *sb;
1813*7a7741afSMartin Matuska 	zfsvfs_t	*zfsvfs;
1814*7a7741afSMartin Matuska 	uint64_t	moid, obj, sa_obj, version;
1815*7a7741afSMartin Matuska 	uint64_t	sense = ZFS_CASE_SENSITIVE;
1816*7a7741afSMartin Matuska 	uint64_t	norm = 0;
1817*7a7741afSMartin Matuska 	nvpair_t	*elem;
1818*7a7741afSMartin Matuska 	int		size;
1819*7a7741afSMartin Matuska 	int		error;
1820*7a7741afSMartin Matuska 	int		i;
1821*7a7741afSMartin Matuska 	znode_t		*rootzp = NULL;
1822*7a7741afSMartin Matuska 	vattr_t		vattr;
1823*7a7741afSMartin Matuska 	znode_t		*zp;
1824*7a7741afSMartin Matuska 	zfs_acl_ids_t	acl_ids;
1825*7a7741afSMartin Matuska 
1826*7a7741afSMartin Matuska 	/*
1827*7a7741afSMartin Matuska 	 * First attempt to create master node.
1828*7a7741afSMartin Matuska 	 */
1829*7a7741afSMartin Matuska 	/*
1830*7a7741afSMartin Matuska 	 * In an empty objset, there are no blocks to read and thus
1831*7a7741afSMartin Matuska 	 * there can be no i/o errors (which we assert below).
1832*7a7741afSMartin Matuska 	 */
1833*7a7741afSMartin Matuska 	moid = MASTER_NODE_OBJ;
1834*7a7741afSMartin Matuska 	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
1835*7a7741afSMartin Matuska 	    DMU_OT_NONE, 0, tx);
1836*7a7741afSMartin Matuska 	ASSERT(error == 0);
1837*7a7741afSMartin Matuska 
1838*7a7741afSMartin Matuska 	/*
1839*7a7741afSMartin Matuska 	 * Set starting attributes.
1840*7a7741afSMartin Matuska 	 */
1841*7a7741afSMartin Matuska 	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
1842*7a7741afSMartin Matuska 	elem = NULL;
1843*7a7741afSMartin Matuska 	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
1844*7a7741afSMartin Matuska 		/* For the moment we expect all zpl props to be uint64_ts */
1845*7a7741afSMartin Matuska 		uint64_t val;
1846*7a7741afSMartin Matuska 		const char *name;
1847*7a7741afSMartin Matuska 
1848*7a7741afSMartin Matuska 		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
1849*7a7741afSMartin Matuska 		VERIFY(nvpair_value_uint64(elem, &val) == 0);
1850*7a7741afSMartin Matuska 		name = nvpair_name(elem);
1851*7a7741afSMartin Matuska 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
1852*7a7741afSMartin Matuska 			if (val < version)
1853*7a7741afSMartin Matuska 				version = val;
1854*7a7741afSMartin Matuska 		} else {
1855*7a7741afSMartin Matuska 			error = zap_update(os, moid, name, 8, 1, &val, tx);
1856*7a7741afSMartin Matuska 		}
1857*7a7741afSMartin Matuska 		ASSERT(error == 0);
1858*7a7741afSMartin Matuska 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
1859*7a7741afSMartin Matuska 			norm = val;
1860*7a7741afSMartin Matuska 		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
1861*7a7741afSMartin Matuska 			sense = val;
1862*7a7741afSMartin Matuska 	}
1863*7a7741afSMartin Matuska 	ASSERT(version != 0);
1864*7a7741afSMartin Matuska 	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
1865*7a7741afSMartin Matuska 	ASSERT(error == 0);
1866*7a7741afSMartin Matuska 
1867*7a7741afSMartin Matuska 	/*
1868*7a7741afSMartin Matuska 	 * Create zap object used for SA attribute registration
1869*7a7741afSMartin Matuska 	 */
1870*7a7741afSMartin Matuska 
1871*7a7741afSMartin Matuska 	if (version >= ZPL_VERSION_SA) {
1872*7a7741afSMartin Matuska 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
1873*7a7741afSMartin Matuska 		    DMU_OT_NONE, 0, tx);
1874*7a7741afSMartin Matuska 		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
1875*7a7741afSMartin Matuska 		ASSERT(error == 0);
1876*7a7741afSMartin Matuska 	} else {
1877*7a7741afSMartin Matuska 		sa_obj = 0;
1878*7a7741afSMartin Matuska 	}
1879*7a7741afSMartin Matuska 	/*
1880*7a7741afSMartin Matuska 	 * Create a delete queue.
1881*7a7741afSMartin Matuska 	 */
1882*7a7741afSMartin Matuska 	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
1883*7a7741afSMartin Matuska 
1884*7a7741afSMartin Matuska 	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
1885*7a7741afSMartin Matuska 	ASSERT(error == 0);
1886*7a7741afSMartin Matuska 
1887*7a7741afSMartin Matuska 	/*
1888*7a7741afSMartin Matuska 	 * Create root znode.  Create minimal znode/inode/zfsvfs/sb
1889*7a7741afSMartin Matuska 	 * to allow zfs_mknode to work.
1890*7a7741afSMartin Matuska 	 */
1891*7a7741afSMartin Matuska 	vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
1892*7a7741afSMartin Matuska 	vattr.va_mode = S_IFDIR|0755;
1893*7a7741afSMartin Matuska 	vattr.va_uid = crgetuid(cr);
1894*7a7741afSMartin Matuska 	vattr.va_gid = crgetgid(cr);
1895*7a7741afSMartin Matuska 
1896*7a7741afSMartin Matuska 	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
1897*7a7741afSMartin Matuska 	rootzp->z_unlinked = B_FALSE;
1898*7a7741afSMartin Matuska 	rootzp->z_atime_dirty = B_FALSE;
1899*7a7741afSMartin Matuska 	rootzp->z_is_sa = USE_SA(version, os);
1900*7a7741afSMartin Matuska 	rootzp->z_pflags = 0;
1901*7a7741afSMartin Matuska 
1902*7a7741afSMartin Matuska 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
1903*7a7741afSMartin Matuska 	zfsvfs->z_os = os;
1904*7a7741afSMartin Matuska 	zfsvfs->z_parent = zfsvfs;
1905*7a7741afSMartin Matuska 	zfsvfs->z_version = version;
1906*7a7741afSMartin Matuska 	zfsvfs->z_use_fuids = USE_FUIDS(version, os);
1907*7a7741afSMartin Matuska 	zfsvfs->z_use_sa = USE_SA(version, os);
1908*7a7741afSMartin Matuska 	zfsvfs->z_norm = norm;
1909*7a7741afSMartin Matuska 
1910*7a7741afSMartin Matuska 	sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP);
1911*7a7741afSMartin Matuska 	sb->s_fs_info = zfsvfs;
1912*7a7741afSMartin Matuska 
1913*7a7741afSMartin Matuska 	ZTOI(rootzp)->i_sb = sb;
1914*7a7741afSMartin Matuska 
1915*7a7741afSMartin Matuska 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
1916*7a7741afSMartin Matuska 	    &zfsvfs->z_attr_table);
1917*7a7741afSMartin Matuska 
1918*7a7741afSMartin Matuska 	ASSERT(error == 0);
1919*7a7741afSMartin Matuska 
1920*7a7741afSMartin Matuska 	/*
1921*7a7741afSMartin Matuska 	 * Fold case on file systems that are always or sometimes case
1922*7a7741afSMartin Matuska 	 * insensitive.
1923*7a7741afSMartin Matuska 	 */
1924*7a7741afSMartin Matuska 	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
1925*7a7741afSMartin Matuska 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
1926*7a7741afSMartin Matuska 
1927*7a7741afSMartin Matuska 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
1928*7a7741afSMartin Matuska 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
1929*7a7741afSMartin Matuska 	    offsetof(znode_t, z_link_node));
1930*7a7741afSMartin Matuska 
1931*7a7741afSMartin Matuska 	size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
1932*7a7741afSMartin Matuska 	zfsvfs->z_hold_size = size;
1933*7a7741afSMartin Matuska 	zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,
1934*7a7741afSMartin Matuska 	    KM_SLEEP);
1935*7a7741afSMartin Matuska 	zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
1936*7a7741afSMartin Matuska 	for (i = 0; i != size; i++) {
1937*7a7741afSMartin Matuska 		avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
1938*7a7741afSMartin Matuska 		    sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
1939*7a7741afSMartin Matuska 		mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
1940*7a7741afSMartin Matuska 	}
1941*7a7741afSMartin Matuska 
1942*7a7741afSMartin Matuska 	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
1943*7a7741afSMartin Matuska 	    cr, NULL, &acl_ids, zfs_init_idmap));
1944*7a7741afSMartin Matuska 	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
1945*7a7741afSMartin Matuska 	ASSERT3P(zp, ==, rootzp);
1946*7a7741afSMartin Matuska 	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
1947*7a7741afSMartin Matuska 	ASSERT(error == 0);
1948*7a7741afSMartin Matuska 	zfs_acl_ids_free(&acl_ids);
1949*7a7741afSMartin Matuska 
1950*7a7741afSMartin Matuska 	atomic_set(&ZTOI(rootzp)->i_count, 0);
1951*7a7741afSMartin Matuska 	sa_handle_destroy(rootzp->z_sa_hdl);
1952*7a7741afSMartin Matuska 	kmem_cache_free(znode_cache, rootzp);
1953*7a7741afSMartin Matuska 
1954*7a7741afSMartin Matuska 	for (i = 0; i != size; i++) {
1955*7a7741afSMartin Matuska 		avl_destroy(&zfsvfs->z_hold_trees[i]);
1956*7a7741afSMartin Matuska 		mutex_destroy(&zfsvfs->z_hold_locks[i]);
1957*7a7741afSMartin Matuska 	}
1958*7a7741afSMartin Matuska 
1959*7a7741afSMartin Matuska 	mutex_destroy(&zfsvfs->z_znodes_lock);
1960*7a7741afSMartin Matuska 
1961*7a7741afSMartin Matuska 	vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
1962*7a7741afSMartin Matuska 	vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
1963*7a7741afSMartin Matuska 	kmem_free(sb, sizeof (struct super_block));
1964*7a7741afSMartin Matuska 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
1965*7a7741afSMartin Matuska }
1966*7a7741afSMartin Matuska 
1967*7a7741afSMartin Matuska EXPORT_SYMBOL(zfs_create_fs);
1968*7a7741afSMartin Matuska EXPORT_SYMBOL(zfs_obj_to_path);
1969*7a7741afSMartin Matuska 
1970*7a7741afSMartin Matuska module_param(zfs_object_mutex_size, uint, 0644);
1971*7a7741afSMartin Matuska MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
1972*7a7741afSMartin Matuska module_param(zfs_unlink_suspend_progress, int, 0644);
1973*7a7741afSMartin Matuska MODULE_PARM_DESC(zfs_unlink_suspend_progress, "Set to prevent async unlinks "
1974*7a7741afSMartin Matuska "(debug - leaks space into the unlinked set)");
1975