xref: /onnv-gate/usr/src/uts/common/fs/zfs/zfs_znode.c (revision 13144:9747db92c2e8)
1789Sahrens /*
2789Sahrens  * CDDL HEADER START
3789Sahrens  *
4789Sahrens  * The contents of this file are subject to the terms of the
51544Seschrock  * Common Development and Distribution License (the "License").
61544Seschrock  * You may not use this file except in compliance with the License.
7789Sahrens  *
8789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens  * or http://www.opensolaris.org/os/licensing.
10789Sahrens  * See the License for the specific language governing permissions
11789Sahrens  * and limitations under the License.
12789Sahrens  *
13789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens  *
19789Sahrens  * CDDL HEADER END
20789Sahrens  */
21789Sahrens /*
2212050SMark.Shellenbaum@Sun.COM  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23789Sahrens  */
24789Sahrens 
254144Speteh /* Portions Copyright 2007 Jeremy Teo */
264144Speteh 
273444Sek110237 #ifdef _KERNEL
28789Sahrens #include <sys/types.h>
29789Sahrens #include <sys/param.h>
30789Sahrens #include <sys/time.h>
31789Sahrens #include <sys/systm.h>
32789Sahrens #include <sys/sysmacros.h>
33789Sahrens #include <sys/resource.h>
34789Sahrens #include <sys/mntent.h>
351816Smarks #include <sys/mkdev.h>
365498Stimh #include <sys/u8_textprep.h>
376492Stimh #include <sys/dsl_dataset.h>
38789Sahrens #include <sys/vfs.h>
393898Srsb #include <sys/vfs_opreg.h>
40789Sahrens #include <sys/vnode.h>
41789Sahrens #include <sys/file.h>
42789Sahrens #include <sys/kmem.h>
43789Sahrens #include <sys/errno.h>
44789Sahrens #include <sys/unistd.h>
45789Sahrens #include <sys/mode.h>
46789Sahrens #include <sys/atomic.h>
47789Sahrens #include <vm/pvn.h>
48789Sahrens #include "fs/fs_subr.h"
49789Sahrens #include <sys/zfs_dir.h>
50789Sahrens #include <sys/zfs_acl.h>
51789Sahrens #include <sys/zfs_ioctl.h>
521669Sperrin #include <sys/zfs_rlock.h>
535331Samw #include <sys/zfs_fuid.h>
5411935SMark.Shellenbaum@Sun.COM #include <sys/dnode.h>
553444Sek110237 #include <sys/fs/zfs.h>
565331Samw #include <sys/kidmap.h>
573444Sek110237 #endif /* _KERNEL */
583444Sek110237 
593444Sek110237 #include <sys/dmu.h>
603444Sek110237 #include <sys/refcount.h>
613444Sek110237 #include <sys/stat.h>
62789Sahrens #include <sys/zap.h>
633444Sek110237 #include <sys/zfs_znode.h>
6411935SMark.Shellenbaum@Sun.COM #include <sys/sa.h>
6511935SMark.Shellenbaum@Sun.COM #include <sys/zfs_sa.h>
6613043STim.Haley@Sun.COM #include <sys/zfs_stat.h>
67789Sahrens 
685498Stimh #include "zfs_prop.h"
6911935SMark.Shellenbaum@Sun.COM #include "zfs_comutil.h"
705498Stimh 
713444Sek110237 /*
726712Stomee  * Define ZNODE_STATS to turn on statistic gathering. By default, it is only
736712Stomee  * turned on when DEBUG is also defined.
746712Stomee  */
756712Stomee #ifdef	DEBUG
766712Stomee #define	ZNODE_STATS
776712Stomee #endif	/* DEBUG */
786712Stomee 
796712Stomee #ifdef	ZNODE_STATS
806712Stomee #define	ZNODE_STAT_ADD(stat)			((stat)++)
816712Stomee #else
826712Stomee #define	ZNODE_STAT_ADD(stat)			/* nothing */
836712Stomee #endif	/* ZNODE_STATS */
846712Stomee 
856712Stomee /*
863444Sek110237  * Functions needed for userland (ie: libzpool) are not put under
873444Sek110237  * #ifdef_KERNEL; the rest of the functions have dependencies
883444Sek110237  * (such as VFS logic) that will not compile easily in userland.
893444Sek110237  */
903444Sek110237 #ifdef _KERNEL
919788STom.Erickson@Sun.COM /*
929788STom.Erickson@Sun.COM  * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to
939788STom.Erickson@Sun.COM  * be freed before it can be safely accessed.
949788STom.Erickson@Sun.COM  */
959788STom.Erickson@Sun.COM krwlock_t zfsvfs_lock;
969788STom.Erickson@Sun.COM 
976712Stomee static kmem_cache_t *znode_cache = NULL;
98789Sahrens 
99789Sahrens /*ARGSUSED*/
100789Sahrens static void
znode_evict_error(dmu_buf_t * dbuf,void * user_ptr)1015642Smaybee znode_evict_error(dmu_buf_t *dbuf, void *user_ptr)
102789Sahrens {
1035642Smaybee 	/*
1045642Smaybee 	 * We should never drop all dbuf refs without first clearing
1055642Smaybee 	 * the eviction callback.
1065642Smaybee 	 */
1075642Smaybee 	panic("evicting znode %p\n", user_ptr);
108789Sahrens }
109789Sahrens 
110789Sahrens /*ARGSUSED*/
111789Sahrens static int
zfs_znode_cache_constructor(void * buf,void * arg,int kmflags)1126712Stomee zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
113789Sahrens {
114789Sahrens 	znode_t *zp = buf;
115789Sahrens 
1166712Stomee 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
1176712Stomee 
1186712Stomee 	zp->z_vnode = vn_alloc(kmflags);
1196712Stomee 	if (zp->z_vnode == NULL) {
1206712Stomee 		return (-1);
1216712Stomee 	}
1226712Stomee 	ZTOV(zp)->v_data = zp;
1236712Stomee 
1246712Stomee 	list_link_init(&zp->z_link_node);
1256712Stomee 
126789Sahrens 	mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
1271669Sperrin 	rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
1283897Smaybee 	rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
129789Sahrens 	mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
1301669Sperrin 
1311669Sperrin 	mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL);
1321669Sperrin 	avl_create(&zp->z_range_avl, zfs_range_compare,
1331669Sperrin 	    sizeof (rl_t), offsetof(rl_t, r_node));
1341669Sperrin 
1356712Stomee 	zp->z_dirlocks = NULL;
1369981STim.Haley@Sun.COM 	zp->z_acl_cached = NULL;
13712684STom.Erickson@Sun.COM 	zp->z_moved = 0;
138789Sahrens 	return (0);
139789Sahrens }
140789Sahrens 
141789Sahrens /*ARGSUSED*/
142789Sahrens static void
zfs_znode_cache_destructor(void * buf,void * arg)1436712Stomee zfs_znode_cache_destructor(void *buf, void *arg)
144789Sahrens {
145789Sahrens 	znode_t *zp = buf;
146789Sahrens 
1476712Stomee 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
1486712Stomee 	ASSERT(ZTOV(zp)->v_data == zp);
1496712Stomee 	vn_free(ZTOV(zp));
1506712Stomee 	ASSERT(!list_link_active(&zp->z_link_node));
151789Sahrens 	mutex_destroy(&zp->z_lock);
1521669Sperrin 	rw_destroy(&zp->z_parent_lock);
1533897Smaybee 	rw_destroy(&zp->z_name_lock);
154789Sahrens 	mutex_destroy(&zp->z_acl_lock);
1551669Sperrin 	avl_destroy(&zp->z_range_avl);
1564831Sgw25295 	mutex_destroy(&zp->z_range_lock);
157789Sahrens 
1586712Stomee 	ASSERT(zp->z_dirlocks == NULL);
15910143STim.Haley@Sun.COM 	ASSERT(zp->z_acl_cached == NULL);
1606712Stomee }
1616712Stomee 
1626712Stomee #ifdef	ZNODE_STATS
1636712Stomee static struct {
1646712Stomee 	uint64_t zms_zfsvfs_invalid;
1659788STom.Erickson@Sun.COM 	uint64_t zms_zfsvfs_recheck1;
1666712Stomee 	uint64_t zms_zfsvfs_unmounted;
1679788STom.Erickson@Sun.COM 	uint64_t zms_zfsvfs_recheck2;
1687579STom.Erickson@Sun.COM 	uint64_t zms_obj_held;
1696712Stomee 	uint64_t zms_vnode_locked;
1707579STom.Erickson@Sun.COM 	uint64_t zms_not_only_dnlc;
1716712Stomee } znode_move_stats;
1726712Stomee #endif	/* ZNODE_STATS */
1736712Stomee 
1746712Stomee static void
zfs_znode_move_impl(znode_t * ozp,znode_t * nzp)1756712Stomee zfs_znode_move_impl(znode_t *ozp, znode_t *nzp)
1766712Stomee {
1776712Stomee 	vnode_t *vp;
1786712Stomee 
1796712Stomee 	/* Copy fields. */
1806712Stomee 	nzp->z_zfsvfs = ozp->z_zfsvfs;
1816712Stomee 
1826712Stomee 	/* Swap vnodes. */
1836712Stomee 	vp = nzp->z_vnode;
1846712Stomee 	nzp->z_vnode = ozp->z_vnode;
1856712Stomee 	ozp->z_vnode = vp; /* let destructor free the overwritten vnode */
1866712Stomee 	ZTOV(ozp)->v_data = ozp;
1876712Stomee 	ZTOV(nzp)->v_data = nzp;
1886712Stomee 
1896712Stomee 	nzp->z_id = ozp->z_id;
1906712Stomee 	ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */
1916712Stomee 	ASSERT(avl_numnodes(&ozp->z_range_avl) == 0);
1926712Stomee 	nzp->z_unlinked = ozp->z_unlinked;
1936712Stomee 	nzp->z_atime_dirty = ozp->z_atime_dirty;
1946712Stomee 	nzp->z_zn_prefetch = ozp->z_zn_prefetch;
1956712Stomee 	nzp->z_blksz = ozp->z_blksz;
1966712Stomee 	nzp->z_seq = ozp->z_seq;
1976712Stomee 	nzp->z_mapcnt = ozp->z_mapcnt;
1986712Stomee 	nzp->z_gen = ozp->z_gen;
1996712Stomee 	nzp->z_sync_cnt = ozp->z_sync_cnt;
20011935SMark.Shellenbaum@Sun.COM 	nzp->z_is_sa = ozp->z_is_sa;
20111935SMark.Shellenbaum@Sun.COM 	nzp->z_sa_hdl = ozp->z_sa_hdl;
20211935SMark.Shellenbaum@Sun.COM 	bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2);
20311935SMark.Shellenbaum@Sun.COM 	nzp->z_links = ozp->z_links;
20411935SMark.Shellenbaum@Sun.COM 	nzp->z_size = ozp->z_size;
20511935SMark.Shellenbaum@Sun.COM 	nzp->z_pflags = ozp->z_pflags;
20611935SMark.Shellenbaum@Sun.COM 	nzp->z_uid = ozp->z_uid;
20711935SMark.Shellenbaum@Sun.COM 	nzp->z_gid = ozp->z_gid;
20811935SMark.Shellenbaum@Sun.COM 	nzp->z_mode = ozp->z_mode;
20910250SMark.Shellenbaum@Sun.COM 
21010250SMark.Shellenbaum@Sun.COM 	/*
21110269SMark.Shellenbaum@Sun.COM 	 * Since this is just an idle znode and kmem is already dealing with
21210269SMark.Shellenbaum@Sun.COM 	 * memory pressure, release any cached ACL.
21310250SMark.Shellenbaum@Sun.COM 	 */
21410250SMark.Shellenbaum@Sun.COM 	if (ozp->z_acl_cached) {
21510250SMark.Shellenbaum@Sun.COM 		zfs_acl_free(ozp->z_acl_cached);
21610250SMark.Shellenbaum@Sun.COM 		ozp->z_acl_cached = NULL;
21710250SMark.Shellenbaum@Sun.COM 	}
2186712Stomee 
21911935SMark.Shellenbaum@Sun.COM 	sa_set_userp(nzp->z_sa_hdl, nzp);
2206712Stomee 
2216712Stomee 	/*
2226712Stomee 	 * Invalidate the original znode by clearing fields that provide a
2236712Stomee 	 * pointer back to the znode. Set the low bit of the vfs pointer to
2246712Stomee 	 * ensure that zfs_znode_move() recognizes the znode as invalid in any
2256712Stomee 	 * subsequent callback.
2266712Stomee 	 */
22711935SMark.Shellenbaum@Sun.COM 	ozp->z_sa_hdl = NULL;
2286712Stomee 	POINTER_INVALIDATE(&ozp->z_zfsvfs);
22912684STom.Erickson@Sun.COM 
23012684STom.Erickson@Sun.COM 	/*
23112684STom.Erickson@Sun.COM 	 * Mark the znode.
23212684STom.Erickson@Sun.COM 	 */
23312684STom.Erickson@Sun.COM 	nzp->z_moved = 1;
23412684STom.Erickson@Sun.COM 	ozp->z_moved = (uint8_t)-1;
2356712Stomee }
2366712Stomee 
2376712Stomee /*ARGSUSED*/
2386712Stomee static kmem_cbrc_t
zfs_znode_move(void * buf,void * newbuf,size_t size,void * arg)2396712Stomee zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg)
2406712Stomee {
2416712Stomee 	znode_t *ozp = buf, *nzp = newbuf;
2426712Stomee 	zfsvfs_t *zfsvfs;
2436712Stomee 	vnode_t *vp;
2446712Stomee 
2456712Stomee 	/*
2466712Stomee 	 * The znode is on the file system's list of known znodes if the vfs
2476712Stomee 	 * pointer is valid. We set the low bit of the vfs pointer when freeing
2486712Stomee 	 * the znode to invalidate it, and the memory patterns written by kmem
2496712Stomee 	 * (baddcafe and deadbeef) set at least one of the two low bits. A newly
2506712Stomee 	 * created znode sets the vfs pointer last of all to indicate that the
2516712Stomee 	 * znode is known and in a valid state to be moved by this function.
2526712Stomee 	 */
2536712Stomee 	zfsvfs = ozp->z_zfsvfs;
2546712Stomee 	if (!POINTER_IS_VALID(zfsvfs)) {
2556712Stomee 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid);
2566712Stomee 		return (KMEM_CBRC_DONT_KNOW);
2576712Stomee 	}
2586712Stomee 
2596712Stomee 	/*
2609788STom.Erickson@Sun.COM 	 * Close a small window in which it's possible that the filesystem could
2619788STom.Erickson@Sun.COM 	 * be unmounted and freed, and zfsvfs, though valid in the previous
2629788STom.Erickson@Sun.COM 	 * statement, could point to unrelated memory by the time we try to
2639788STom.Erickson@Sun.COM 	 * prevent the filesystem from being unmounted.
2649788STom.Erickson@Sun.COM 	 */
2659788STom.Erickson@Sun.COM 	rw_enter(&zfsvfs_lock, RW_WRITER);
2669788STom.Erickson@Sun.COM 	if (zfsvfs != ozp->z_zfsvfs) {
2679788STom.Erickson@Sun.COM 		rw_exit(&zfsvfs_lock);
2689788STom.Erickson@Sun.COM 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1);
2699788STom.Erickson@Sun.COM 		return (KMEM_CBRC_DONT_KNOW);
2709788STom.Erickson@Sun.COM 	}
2719788STom.Erickson@Sun.COM 
2729788STom.Erickson@Sun.COM 	/*
2739788STom.Erickson@Sun.COM 	 * If the znode is still valid, then so is the file system. We know that
2749788STom.Erickson@Sun.COM 	 * no valid file system can be freed while we hold zfsvfs_lock, so we
2759788STom.Erickson@Sun.COM 	 * can safely ensure that the filesystem is not and will not be
2769788STom.Erickson@Sun.COM 	 * unmounted. The next statement is equivalent to ZFS_ENTER().
2776712Stomee 	 */
2789396SMatthew.Ahrens@Sun.COM 	rrw_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG);
2799396SMatthew.Ahrens@Sun.COM 	if (zfsvfs->z_unmounted) {
2809396SMatthew.Ahrens@Sun.COM 		ZFS_EXIT(zfsvfs);
2819788STom.Erickson@Sun.COM 		rw_exit(&zfsvfs_lock);
2826712Stomee 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted);
2836712Stomee 		return (KMEM_CBRC_DONT_KNOW);
2846712Stomee 	}
2859788STom.Erickson@Sun.COM 	rw_exit(&zfsvfs_lock);
2866712Stomee 
2876712Stomee 	mutex_enter(&zfsvfs->z_znodes_lock);
2886712Stomee 	/*
2896712Stomee 	 * Recheck the vfs pointer in case the znode was removed just before
2906712Stomee 	 * acquiring the lock.
2916712Stomee 	 */
2926712Stomee 	if (zfsvfs != ozp->z_zfsvfs) {
2936712Stomee 		mutex_exit(&zfsvfs->z_znodes_lock);
2946712Stomee 		ZFS_EXIT(zfsvfs);
2959788STom.Erickson@Sun.COM 		ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2);
2966712Stomee 		return (KMEM_CBRC_DONT_KNOW);
2976712Stomee 	}
2986712Stomee 
2996712Stomee 	/*
3006712Stomee 	 * At this point we know that as long as we hold z_znodes_lock, the
3016712Stomee 	 * znode cannot be freed and fields within the znode can be safely
3027579STom.Erickson@Sun.COM 	 * accessed. Now, prevent a race with zfs_zget().
3036712Stomee 	 */
3047579STom.Erickson@Sun.COM 	if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) {
3057579STom.Erickson@Sun.COM 		mutex_exit(&zfsvfs->z_znodes_lock);
3067579STom.Erickson@Sun.COM 		ZFS_EXIT(zfsvfs);
3077579STom.Erickson@Sun.COM 		ZNODE_STAT_ADD(znode_move_stats.zms_obj_held);
3087579STom.Erickson@Sun.COM 		return (KMEM_CBRC_LATER);
3097579STom.Erickson@Sun.COM 	}
3107579STom.Erickson@Sun.COM 
3116712Stomee 	vp = ZTOV(ozp);
3126712Stomee 	if (mutex_tryenter(&vp->v_lock) == 0) {
3137579STom.Erickson@Sun.COM 		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
3146712Stomee 		mutex_exit(&zfsvfs->z_znodes_lock);
3156712Stomee 		ZFS_EXIT(zfsvfs);
3166712Stomee 		ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked);
3176712Stomee 		return (KMEM_CBRC_LATER);
3186712Stomee 	}
3197579STom.Erickson@Sun.COM 
3206712Stomee 	/* Only move znodes that are referenced _only_ by the DNLC. */
3216712Stomee 	if (vp->v_count != 1 || !vn_in_dnlc(vp)) {
3226712Stomee 		mutex_exit(&vp->v_lock);
3237579STom.Erickson@Sun.COM 		ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
3246712Stomee 		mutex_exit(&zfsvfs->z_znodes_lock);
3256712Stomee 		ZFS_EXIT(zfsvfs);
3267579STom.Erickson@Sun.COM 		ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc);
3276712Stomee 		return (KMEM_CBRC_LATER);
3286712Stomee 	}
3296712Stomee 
3306712Stomee 	/*
3316712Stomee 	 * The znode is known and in a valid state to move. We're holding the
3326712Stomee 	 * locks needed to execute the critical section.
3336712Stomee 	 */
3346712Stomee 	zfs_znode_move_impl(ozp, nzp);
3356712Stomee 	mutex_exit(&vp->v_lock);
3367579STom.Erickson@Sun.COM 	ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id);
3376712Stomee 
3386712Stomee 	list_link_replace(&ozp->z_link_node, &nzp->z_link_node);
3396712Stomee 	mutex_exit(&zfsvfs->z_znodes_lock);
3406712Stomee 	ZFS_EXIT(zfsvfs);
3416712Stomee 
3426712Stomee 	return (KMEM_CBRC_YES);
343789Sahrens }
344789Sahrens 
345789Sahrens void
zfs_znode_init(void)346789Sahrens zfs_znode_init(void)
347789Sahrens {
348789Sahrens 	/*
349789Sahrens 	 * Initialize zcache
350789Sahrens 	 */
3519788STom.Erickson@Sun.COM 	rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL);
352789Sahrens 	ASSERT(znode_cache == NULL);
353789Sahrens 	znode_cache = kmem_cache_create("zfs_znode_cache",
354789Sahrens 	    sizeof (znode_t), 0, zfs_znode_cache_constructor,
355789Sahrens 	    zfs_znode_cache_destructor, NULL, NULL, NULL, 0);
3566712Stomee 	kmem_cache_set_move(znode_cache, zfs_znode_move);
357789Sahrens }
358789Sahrens 
359789Sahrens void
zfs_znode_fini(void)360789Sahrens zfs_znode_fini(void)
361789Sahrens {
362789Sahrens 	/*
363789Sahrens 	 * Cleanup vfs & vnode ops
364789Sahrens 	 */
365789Sahrens 	zfs_remove_op_tables();
366789Sahrens 
367789Sahrens 	/*
368789Sahrens 	 * Cleanup zcache
369789Sahrens 	 */
370789Sahrens 	if (znode_cache)
371789Sahrens 		kmem_cache_destroy(znode_cache);
372789Sahrens 	znode_cache = NULL;
3739788STom.Erickson@Sun.COM 	rw_destroy(&zfsvfs_lock);
374789Sahrens }
375789Sahrens 
376789Sahrens struct vnodeops *zfs_dvnodeops;
377789Sahrens struct vnodeops *zfs_fvnodeops;
378789Sahrens struct vnodeops *zfs_symvnodeops;
379789Sahrens struct vnodeops *zfs_xdvnodeops;
380789Sahrens struct vnodeops *zfs_evnodeops;
3818845Samw@Sun.COM struct vnodeops *zfs_sharevnodeops;
382789Sahrens 
383789Sahrens void
zfs_remove_op_tables()384789Sahrens zfs_remove_op_tables()
385789Sahrens {
386789Sahrens 	/*
387789Sahrens 	 * Remove vfs ops
388789Sahrens 	 */
389789Sahrens 	ASSERT(zfsfstype);
390789Sahrens 	(void) vfs_freevfsops_by_type(zfsfstype);
391789Sahrens 	zfsfstype = 0;
392789Sahrens 
393789Sahrens 	/*
394789Sahrens 	 * Remove vnode ops
395789Sahrens 	 */
396789Sahrens 	if (zfs_dvnodeops)
397789Sahrens 		vn_freevnodeops(zfs_dvnodeops);
398789Sahrens 	if (zfs_fvnodeops)
399789Sahrens 		vn_freevnodeops(zfs_fvnodeops);
400789Sahrens 	if (zfs_symvnodeops)
401789Sahrens 		vn_freevnodeops(zfs_symvnodeops);
402789Sahrens 	if (zfs_xdvnodeops)
403789Sahrens 		vn_freevnodeops(zfs_xdvnodeops);
404789Sahrens 	if (zfs_evnodeops)
405789Sahrens 		vn_freevnodeops(zfs_evnodeops);
4068845Samw@Sun.COM 	if (zfs_sharevnodeops)
4078845Samw@Sun.COM 		vn_freevnodeops(zfs_sharevnodeops);
408789Sahrens 
409789Sahrens 	zfs_dvnodeops = NULL;
410789Sahrens 	zfs_fvnodeops = NULL;
411789Sahrens 	zfs_symvnodeops = NULL;
412789Sahrens 	zfs_xdvnodeops = NULL;
413789Sahrens 	zfs_evnodeops = NULL;
4148845Samw@Sun.COM 	zfs_sharevnodeops = NULL;
415789Sahrens }
416789Sahrens 
417789Sahrens extern const fs_operation_def_t zfs_dvnodeops_template[];
418789Sahrens extern const fs_operation_def_t zfs_fvnodeops_template[];
419789Sahrens extern const fs_operation_def_t zfs_xdvnodeops_template[];
420789Sahrens extern const fs_operation_def_t zfs_symvnodeops_template[];
421789Sahrens extern const fs_operation_def_t zfs_evnodeops_template[];
4228845Samw@Sun.COM extern const fs_operation_def_t zfs_sharevnodeops_template[];
423789Sahrens 
424789Sahrens int
zfs_create_op_tables()425789Sahrens zfs_create_op_tables()
426789Sahrens {
427789Sahrens 	int error;
428789Sahrens 
429789Sahrens 	/*
430789Sahrens 	 * zfs_dvnodeops can be set if mod_remove() calls mod_installfs()
431789Sahrens 	 * due to a failure to remove the the 2nd modlinkage (zfs_modldrv).
432789Sahrens 	 * In this case we just return as the ops vectors are already set up.
433789Sahrens 	 */
434789Sahrens 	if (zfs_dvnodeops)
435789Sahrens 		return (0);
436789Sahrens 
437789Sahrens 	error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template,
438789Sahrens 	    &zfs_dvnodeops);
439789Sahrens 	if (error)
440789Sahrens 		return (error);
441789Sahrens 
442789Sahrens 	error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template,
443789Sahrens 	    &zfs_fvnodeops);
444789Sahrens 	if (error)
445789Sahrens 		return (error);
446789Sahrens 
447789Sahrens 	error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template,
448789Sahrens 	    &zfs_symvnodeops);
449789Sahrens 	if (error)
450789Sahrens 		return (error);
451789Sahrens 
452789Sahrens 	error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template,
453789Sahrens 	    &zfs_xdvnodeops);
454789Sahrens 	if (error)
455789Sahrens 		return (error);
456789Sahrens 
457789Sahrens 	error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template,
458789Sahrens 	    &zfs_evnodeops);
4598845Samw@Sun.COM 	if (error)
4608845Samw@Sun.COM 		return (error);
4618845Samw@Sun.COM 
4628845Samw@Sun.COM 	error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template,
4638845Samw@Sun.COM 	    &zfs_sharevnodeops);
4648845Samw@Sun.COM 
4658845Samw@Sun.COM 	return (error);
4668845Samw@Sun.COM }
4678845Samw@Sun.COM 
4689030SMark.Shellenbaum@Sun.COM int
zfs_create_share_dir(zfsvfs_t * zfsvfs,dmu_tx_t * tx)4698845Samw@Sun.COM zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
4708845Samw@Sun.COM {
4719179SMark.Shellenbaum@Sun.COM 	zfs_acl_ids_t acl_ids;
4728845Samw@Sun.COM 	vattr_t vattr;
4738845Samw@Sun.COM 	znode_t *sharezp;
4748845Samw@Sun.COM 	vnode_t *vp;
4758845Samw@Sun.COM 	znode_t *zp;
4768845Samw@Sun.COM 	int error;
4778845Samw@Sun.COM 
4788845Samw@Sun.COM 	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4798845Samw@Sun.COM 	vattr.va_type = VDIR;
4808845Samw@Sun.COM 	vattr.va_mode = S_IFDIR|0555;
4818845Samw@Sun.COM 	vattr.va_uid = crgetuid(kcred);
4828845Samw@Sun.COM 	vattr.va_gid = crgetgid(kcred);
4838845Samw@Sun.COM 
4848845Samw@Sun.COM 	sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
48512684STom.Erickson@Sun.COM 	ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs));
48612684STom.Erickson@Sun.COM 	sharezp->z_moved = 0;
4878845Samw@Sun.COM 	sharezp->z_unlinked = 0;
4888845Samw@Sun.COM 	sharezp->z_atime_dirty = 0;
4898845Samw@Sun.COM 	sharezp->z_zfsvfs = zfsvfs;
49011935SMark.Shellenbaum@Sun.COM 	sharezp->z_is_sa = zfsvfs->z_use_sa;
4918845Samw@Sun.COM 
4928845Samw@Sun.COM 	vp = ZTOV(sharezp);
4938845Samw@Sun.COM 	vn_reinit(vp);
4948845Samw@Sun.COM 	vp->v_type = VDIR;
4958845Samw@Sun.COM 
4969179SMark.Shellenbaum@Sun.COM 	VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr,
4979179SMark.Shellenbaum@Sun.COM 	    kcred, NULL, &acl_ids));
49811935SMark.Shellenbaum@Sun.COM 	zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids);
4998845Samw@Sun.COM 	ASSERT3P(zp, ==, sharezp);
5008845Samw@Sun.COM 	ASSERT(!vn_in_dnlc(ZTOV(sharezp))); /* not valid to move */
5018845Samw@Sun.COM 	POINTER_INVALIDATE(&sharezp->z_zfsvfs);
5028845Samw@Sun.COM 	error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
5038845Samw@Sun.COM 	    ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx);
5048845Samw@Sun.COM 	zfsvfs->z_shares_dir = sharezp->z_id;
5058845Samw@Sun.COM 
5069179SMark.Shellenbaum@Sun.COM 	zfs_acl_ids_free(&acl_ids);
5078845Samw@Sun.COM 	ZTOV(sharezp)->v_count = 0;
50811935SMark.Shellenbaum@Sun.COM 	sa_handle_destroy(sharezp->z_sa_hdl);
5098845Samw@Sun.COM 	kmem_cache_free(znode_cache, sharezp);
510789Sahrens 
511789Sahrens 	return (error);
512789Sahrens }
513789Sahrens 
514789Sahrens /*
5151816Smarks  * define a couple of values we need available
5161816Smarks  * for both 64 and 32 bit environments.
5171816Smarks  */
5181816Smarks #ifndef NBITSMINOR64
5191816Smarks #define	NBITSMINOR64	32
5201816Smarks #endif
5211816Smarks #ifndef MAXMAJ64
5221816Smarks #define	MAXMAJ64	0xffffffffUL
5231816Smarks #endif
5241816Smarks #ifndef	MAXMIN64
5251816Smarks #define	MAXMIN64	0xffffffffUL
5261816Smarks #endif
5271816Smarks 
5281816Smarks /*
5291816Smarks  * Create special expldev for ZFS private use.
5301816Smarks  * Can't use standard expldev since it doesn't do
5311816Smarks  * what we want.  The standard expldev() takes a
5321816Smarks  * dev32_t in LP64 and expands it to a long dev_t.
5331816Smarks  * We need an interface that takes a dev32_t in ILP32
5341816Smarks  * and expands it to a long dev_t.
5351816Smarks  */
5361816Smarks static uint64_t
zfs_expldev(dev_t dev)5371816Smarks zfs_expldev(dev_t dev)
5381816Smarks {
5391816Smarks #ifndef _LP64
5401816Smarks 	major_t major = (major_t)dev >> NBITSMINOR32 & MAXMAJ32;
5411816Smarks 	return (((uint64_t)major << NBITSMINOR64) |
5421816Smarks 	    ((minor_t)dev & MAXMIN32));
5431816Smarks #else
5441816Smarks 	return (dev);
5451816Smarks #endif
5461816Smarks }
5471816Smarks 
5481816Smarks /*
5491816Smarks  * Special cmpldev for ZFS private use.
5501816Smarks  * Can't use standard cmpldev since it takes
5511816Smarks  * a long dev_t and compresses it to dev32_t in
5521816Smarks  * LP64.  We need to do a compaction of a long dev_t
5531816Smarks  * to a dev32_t in ILP32.
5541816Smarks  */
5551816Smarks dev_t
zfs_cmpldev(uint64_t dev)5561816Smarks zfs_cmpldev(uint64_t dev)
5571816Smarks {
5581816Smarks #ifndef _LP64
5591816Smarks 	minor_t minor = (minor_t)dev & MAXMIN64;
5601816Smarks 	major_t major = (major_t)(dev >> NBITSMINOR64) & MAXMAJ64;
5611816Smarks 
5621816Smarks 	if (major > MAXMAJ32 || minor > MAXMIN32)
5631816Smarks 		return (NODEV32);
5641816Smarks 
5651816Smarks 	return (((dev32_t)major << NBITSMINOR32) | minor);
5661816Smarks #else
5671816Smarks 	return (dev);
5681816Smarks #endif
5691816Smarks }
5701816Smarks 
5715446Sahrens static void
zfs_znode_sa_init(zfsvfs_t * zfsvfs,znode_t * zp,dmu_buf_t * db,dmu_object_type_t obj_type,sa_handle_t * sa_hdl)57211935SMark.Shellenbaum@Sun.COM zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
57311935SMark.Shellenbaum@Sun.COM     dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
5745446Sahrens {
5756712Stomee 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs));
5766712Stomee 	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id)));
5775446Sahrens 
5785446Sahrens 	mutex_enter(&zp->z_lock);
5795446Sahrens 
58011935SMark.Shellenbaum@Sun.COM 	ASSERT(zp->z_sa_hdl == NULL);
58110269SMark.Shellenbaum@Sun.COM 	ASSERT(zp->z_acl_cached == NULL);
58211935SMark.Shellenbaum@Sun.COM 	if (sa_hdl == NULL) {
58311935SMark.Shellenbaum@Sun.COM 		VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
58411935SMark.Shellenbaum@Sun.COM 		    SA_HDL_SHARED, &zp->z_sa_hdl));
58511935SMark.Shellenbaum@Sun.COM 	} else {
58611935SMark.Shellenbaum@Sun.COM 		zp->z_sa_hdl = sa_hdl;
58711935SMark.Shellenbaum@Sun.COM 		sa_set_userp(sa_hdl, zp);
58811935SMark.Shellenbaum@Sun.COM 	}
5895446Sahrens 
59011935SMark.Shellenbaum@Sun.COM 	zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE;
5915446Sahrens 
5925446Sahrens 	/*
5935446Sahrens 	 * Slap on VROOT if we are the root znode
5945446Sahrens 	 */
5955446Sahrens 	if (zp->z_id == zfsvfs->z_root)
5965446Sahrens 		ZTOV(zp)->v_flag |= VROOT;
5975446Sahrens 
5985446Sahrens 	mutex_exit(&zp->z_lock);
5995446Sahrens 	vn_exists(ZTOV(zp));
6005446Sahrens }
6015446Sahrens 
6025642Smaybee void
zfs_znode_dmu_fini(znode_t * zp)6035446Sahrens zfs_znode_dmu_fini(znode_t *zp)
6045446Sahrens {
6056712Stomee 	ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) ||
6066712Stomee 	    zp->z_unlinked ||
6075642Smaybee 	    RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock));
60811935SMark.Shellenbaum@Sun.COM 
60911935SMark.Shellenbaum@Sun.COM 	sa_handle_destroy(zp->z_sa_hdl);
61011935SMark.Shellenbaum@Sun.COM 	zp->z_sa_hdl = NULL;
6115446Sahrens }
6125446Sahrens 
6131816Smarks /*
614789Sahrens  * Construct a new znode/vnode and intialize.
615789Sahrens  *
616789Sahrens  * This does not do a call to dmu_set_user() that is
617789Sahrens  * up to the caller to do, in case you don't want to
618789Sahrens  * return the znode
619789Sahrens  */
6201544Seschrock static znode_t *
zfs_znode_alloc(zfsvfs_t * zfsvfs,dmu_buf_t * db,int blksz,dmu_object_type_t obj_type,sa_handle_t * hdl)62111935SMark.Shellenbaum@Sun.COM zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
62211935SMark.Shellenbaum@Sun.COM     dmu_object_type_t obj_type, sa_handle_t *hdl)
623789Sahrens {
624789Sahrens 	znode_t	*zp;
625789Sahrens 	vnode_t *vp;
62611935SMark.Shellenbaum@Sun.COM 	uint64_t mode;
62711935SMark.Shellenbaum@Sun.COM 	uint64_t parent;
62811935SMark.Shellenbaum@Sun.COM 	sa_bulk_attr_t bulk[9];
62911935SMark.Shellenbaum@Sun.COM 	int count = 0;
630789Sahrens 
631789Sahrens 	zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
632789Sahrens 
633789Sahrens 	ASSERT(zp->z_dirlocks == NULL);
6346712Stomee 	ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs));
63512684STom.Erickson@Sun.COM 	zp->z_moved = 0;
636789Sahrens 
6376712Stomee 	/*
6386712Stomee 	 * Defer setting z_zfsvfs until the znode is ready to be a candidate for
6396712Stomee 	 * the zfs_znode_move() callback.
6406712Stomee 	 */
64111935SMark.Shellenbaum@Sun.COM 	zp->z_sa_hdl = NULL;
6423461Sahrens 	zp->z_unlinked = 0;
643789Sahrens 	zp->z_atime_dirty = 0;
644789Sahrens 	zp->z_mapcnt = 0;
6455446Sahrens 	zp->z_id = db->db_object;
646789Sahrens 	zp->z_blksz = blksz;
647789Sahrens 	zp->z_seq = 0x7A4653;
6483063Sperrin 	zp->z_sync_cnt = 0;
6495446Sahrens 
6505446Sahrens 	vp = ZTOV(zp);
6515446Sahrens 	vn_reinit(vp);
6525446Sahrens 
65311935SMark.Shellenbaum@Sun.COM 	zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
6545446Sahrens 
65511935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
65611935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8);
65711935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
65811935SMark.Shellenbaum@Sun.COM 	    &zp->z_size, 8);
65911935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
66011935SMark.Shellenbaum@Sun.COM 	    &zp->z_links, 8);
66111935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
66211935SMark.Shellenbaum@Sun.COM 	    &zp->z_pflags, 8);
66311935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
66411935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
66511935SMark.Shellenbaum@Sun.COM 	    &zp->z_atime, 16);
66611935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
66713069SMark.Shellenbaum@Oracle.COM 	    &zp->z_uid, 8);
66811935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
66913069SMark.Shellenbaum@Oracle.COM 	    &zp->z_gid, 8);
670789Sahrens 
67111935SMark.Shellenbaum@Sun.COM 	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) {
67211935SMark.Shellenbaum@Sun.COM 		if (hdl == NULL)
67311935SMark.Shellenbaum@Sun.COM 			sa_handle_destroy(zp->z_sa_hdl);
67411935SMark.Shellenbaum@Sun.COM 		kmem_cache_free(znode_cache, zp);
67511935SMark.Shellenbaum@Sun.COM 		return (NULL);
67611935SMark.Shellenbaum@Sun.COM 	}
67711935SMark.Shellenbaum@Sun.COM 
67811935SMark.Shellenbaum@Sun.COM 	zp->z_mode = mode;
679789Sahrens 	vp->v_vfsp = zfsvfs->z_parent->z_vfs;
68011935SMark.Shellenbaum@Sun.COM 
68111935SMark.Shellenbaum@Sun.COM 	vp->v_type = IFTOVT((mode_t)mode);
682789Sahrens 
683789Sahrens 	switch (vp->v_type) {
684789Sahrens 	case VDIR:
68511935SMark.Shellenbaum@Sun.COM 		if (zp->z_pflags & ZFS_XATTR) {
686789Sahrens 			vn_setops(vp, zfs_xdvnodeops);
687789Sahrens 			vp->v_flag |= V_XATTRDIR;
6885446Sahrens 		} else {
689789Sahrens 			vn_setops(vp, zfs_dvnodeops);
6905446Sahrens 		}
691869Sperrin 		zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */
692789Sahrens 		break;
693789Sahrens 	case VBLK:
694789Sahrens 	case VCHR:
69511935SMark.Shellenbaum@Sun.COM 		{
69611935SMark.Shellenbaum@Sun.COM 			uint64_t rdev;
69711935SMark.Shellenbaum@Sun.COM 			VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs),
69811935SMark.Shellenbaum@Sun.COM 			    &rdev, sizeof (rdev)) == 0);
69911935SMark.Shellenbaum@Sun.COM 
70011935SMark.Shellenbaum@Sun.COM 			vp->v_rdev = zfs_cmpldev(rdev);
70111935SMark.Shellenbaum@Sun.COM 		}
702789Sahrens 		/*FALLTHROUGH*/
703789Sahrens 	case VFIFO:
704789Sahrens 	case VSOCK:
705789Sahrens 	case VDOOR:
706789Sahrens 		vn_setops(vp, zfs_fvnodeops);
707789Sahrens 		break;
708789Sahrens 	case VREG:
709789Sahrens 		vp->v_flag |= VMODSORT;
71011935SMark.Shellenbaum@Sun.COM 		if (parent == zfsvfs->z_shares_dir) {
71113069SMark.Shellenbaum@Oracle.COM 			ASSERT(zp->z_uid == 0 && zp->z_gid == 0);
7128845Samw@Sun.COM 			vn_setops(vp, zfs_sharevnodeops);
71311935SMark.Shellenbaum@Sun.COM 		} else {
7148845Samw@Sun.COM 			vn_setops(vp, zfs_fvnodeops);
71511935SMark.Shellenbaum@Sun.COM 		}
716789Sahrens 		break;
717789Sahrens 	case VLNK:
718789Sahrens 		vn_setops(vp, zfs_symvnodeops);
719789Sahrens 		break;
720789Sahrens 	default:
721789Sahrens 		vn_setops(vp, zfs_evnodeops);
722789Sahrens 		break;
723789Sahrens 	}
724789Sahrens 
7256712Stomee 	mutex_enter(&zfsvfs->z_znodes_lock);
7266712Stomee 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
7276712Stomee 	membar_producer();
7286712Stomee 	/*
7296712Stomee 	 * Everything else must be valid before assigning z_zfsvfs makes the
7306712Stomee 	 * znode eligible for zfs_znode_move().
7316712Stomee 	 */
7326712Stomee 	zp->z_zfsvfs = zfsvfs;
7336712Stomee 	mutex_exit(&zfsvfs->z_znodes_lock);
7346712Stomee 
7355642Smaybee 	VFS_HOLD(zfsvfs->z_vfs);
736789Sahrens 	return (zp);
737789Sahrens }
738789Sahrens 
73911935SMark.Shellenbaum@Sun.COM static uint64_t empty_xattr;
74011935SMark.Shellenbaum@Sun.COM static uint64_t pad[4];
74111935SMark.Shellenbaum@Sun.COM static zfs_acl_phys_t acl_phys;
742789Sahrens /*
743789Sahrens  * Create a new DMU object to hold a zfs znode.
744789Sahrens  *
745789Sahrens  *	IN:	dzp	- parent directory for new znode
746789Sahrens  *		vap	- file attributes for new znode
747789Sahrens  *		tx	- dmu transaction id for zap operations
748789Sahrens  *		cr	- credentials of caller
749789Sahrens  *		flag	- flags:
750789Sahrens  *			  IS_ROOT_NODE	- new object will be root
751789Sahrens  *			  IS_XATTR	- new object is an attribute
7525331Samw  *		bonuslen - length of bonus buffer
7535331Samw  *		setaclp  - File/Dir initial ACL
7545331Samw  *		fuidp	 - Tracks fuid allocation.
755789Sahrens  *
7565446Sahrens  *	OUT:	zpp	- allocated znode
757789Sahrens  *
758789Sahrens  */
759789Sahrens void
zfs_mknode(znode_t * dzp,vattr_t * vap,dmu_tx_t * tx,cred_t * cr,uint_t flag,znode_t ** zpp,zfs_acl_ids_t * acl_ids)7605446Sahrens zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
76111935SMark.Shellenbaum@Sun.COM     uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids)
762789Sahrens {
76311935SMark.Shellenbaum@Sun.COM 	uint64_t	crtime[2], atime[2], mtime[2], ctime[2];
76411935SMark.Shellenbaum@Sun.COM 	uint64_t	mode, size, links, parent, pflags;
76512684STom.Erickson@Sun.COM 	uint64_t	dzp_pflags = 0;
76611935SMark.Shellenbaum@Sun.COM 	uint64_t	rdev = 0;
76711935SMark.Shellenbaum@Sun.COM 	zfsvfs_t	*zfsvfs = dzp->z_zfsvfs;
7685446Sahrens 	dmu_buf_t	*db;
769789Sahrens 	timestruc_t	now;
7705446Sahrens 	uint64_t	gen, obj;
771789Sahrens 	int		err;
77211935SMark.Shellenbaum@Sun.COM 	int		bonuslen;
77311935SMark.Shellenbaum@Sun.COM 	sa_handle_t	*sa_hdl;
77411935SMark.Shellenbaum@Sun.COM 	dmu_object_type_t obj_type;
77511935SMark.Shellenbaum@Sun.COM 	sa_bulk_attr_t	sa_attrs[ZPL_END];
77611935SMark.Shellenbaum@Sun.COM 	int		cnt = 0;
77711935SMark.Shellenbaum@Sun.COM 	zfs_acl_locator_cb_t locate = { 0 };
778789Sahrens 
779789Sahrens 	ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
780789Sahrens 
7818227SNeil.Perrin@Sun.COM 	if (zfsvfs->z_replay) {
7825446Sahrens 		obj = vap->va_nodeid;
783789Sahrens 		now = vap->va_ctime;		/* see zfs_replay_create() */
784789Sahrens 		gen = vap->va_nblocks;		/* ditto */
785789Sahrens 	} else {
7865446Sahrens 		obj = 0;
787789Sahrens 		gethrestime(&now);
788789Sahrens 		gen = dmu_tx_get_txg(tx);
789789Sahrens 	}
790789Sahrens 
79111935SMark.Shellenbaum@Sun.COM 	obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
79211935SMark.Shellenbaum@Sun.COM 	bonuslen = (obj_type == DMU_OT_SA) ?
79311935SMark.Shellenbaum@Sun.COM 	    DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE;
79411935SMark.Shellenbaum@Sun.COM 
795789Sahrens 	/*
796789Sahrens 	 * Create a new DMU object.
797789Sahrens 	 */
7981544Seschrock 	/*
7991544Seschrock 	 * There's currently no mechanism for pre-reading the blocks that will
80012684STom.Erickson@Sun.COM 	 * be needed to allocate a new object, so we accept the small chance
8011544Seschrock 	 * that there will be an i/o error and we will fail one of the
8021544Seschrock 	 * assertions below.
8031544Seschrock 	 */
804789Sahrens 	if (vap->va_type == VDIR) {
80511249SMark.Shellenbaum@Sun.COM 		if (zfsvfs->z_replay) {
8065446Sahrens 			err = zap_create_claim_norm(zfsvfs->z_os, obj,
8075331Samw 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
80811935SMark.Shellenbaum@Sun.COM 			    obj_type, bonuslen, tx);
809789Sahrens 			ASSERT3U(err, ==, 0);
810789Sahrens 		} else {
8115446Sahrens 			obj = zap_create_norm(zfsvfs->z_os,
8125331Samw 			    zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
81311935SMark.Shellenbaum@Sun.COM 			    obj_type, bonuslen, tx);
814789Sahrens 		}
815789Sahrens 	} else {
81611249SMark.Shellenbaum@Sun.COM 		if (zfsvfs->z_replay) {
8175446Sahrens 			err = dmu_object_claim(zfsvfs->z_os, obj,
818789Sahrens 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
81911935SMark.Shellenbaum@Sun.COM 			    obj_type, bonuslen, tx);
820789Sahrens 			ASSERT3U(err, ==, 0);
821789Sahrens 		} else {
8225446Sahrens 			obj = dmu_object_alloc(zfsvfs->z_os,
823789Sahrens 			    DMU_OT_PLAIN_FILE_CONTENTS, 0,
82411935SMark.Shellenbaum@Sun.COM 			    obj_type, bonuslen, tx);
825789Sahrens 		}
826789Sahrens 	}
82710938SMark.Shellenbaum@Sun.COM 
82810938SMark.Shellenbaum@Sun.COM 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
82911935SMark.Shellenbaum@Sun.COM 	VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
830789Sahrens 
831789Sahrens 	/*
832789Sahrens 	 * If this is the root, fix up the half-initialized parent pointer
833789Sahrens 	 * to reference the just-allocated physical data area.
834789Sahrens 	 */
835789Sahrens 	if (flag & IS_ROOT_NODE) {
8365446Sahrens 		dzp->z_id = obj;
83711935SMark.Shellenbaum@Sun.COM 	} else {
83811935SMark.Shellenbaum@Sun.COM 		dzp_pflags = dzp->z_pflags;
839789Sahrens 	}
840789Sahrens 
841789Sahrens 	/*
842789Sahrens 	 * If parent is an xattr, so am I.
843789Sahrens 	 */
84411935SMark.Shellenbaum@Sun.COM 	if (dzp_pflags & ZFS_XATTR) {
845789Sahrens 		flag |= IS_XATTR;
846789Sahrens 	}
847789Sahrens 
8485331Samw 	if (zfsvfs->z_use_fuids)
84911935SMark.Shellenbaum@Sun.COM 		pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
85011935SMark.Shellenbaum@Sun.COM 	else
85111935SMark.Shellenbaum@Sun.COM 		pflags = 0;
8525331Samw 
853789Sahrens 	if (vap->va_type == VDIR) {
85411935SMark.Shellenbaum@Sun.COM 		size = 2;		/* contents ("." and "..") */
85511935SMark.Shellenbaum@Sun.COM 		links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
85611935SMark.Shellenbaum@Sun.COM 	} else {
85711935SMark.Shellenbaum@Sun.COM 		size = links = 0;
85811935SMark.Shellenbaum@Sun.COM 	}
85911935SMark.Shellenbaum@Sun.COM 
86011935SMark.Shellenbaum@Sun.COM 	if (vap->va_type == VBLK || vap->va_type == VCHR) {
86111935SMark.Shellenbaum@Sun.COM 		rdev = zfs_expldev(vap->va_rdev);
862789Sahrens 	}
863789Sahrens 
86411935SMark.Shellenbaum@Sun.COM 	parent = dzp->z_id;
86511935SMark.Shellenbaum@Sun.COM 	mode = acl_ids->z_mode;
866789Sahrens 	if (flag & IS_XATTR)
86711935SMark.Shellenbaum@Sun.COM 		pflags |= ZFS_XATTR;
868789Sahrens 
86911935SMark.Shellenbaum@Sun.COM 	/*
87011935SMark.Shellenbaum@Sun.COM 	 * No execs denied will be deterimed when zfs_mode_compute() is called.
87111935SMark.Shellenbaum@Sun.COM 	 */
87211935SMark.Shellenbaum@Sun.COM 	pflags |= acl_ids->z_aclp->z_hints &
87311935SMark.Shellenbaum@Sun.COM 	    (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT|
87411935SMark.Shellenbaum@Sun.COM 	    ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED);
875789Sahrens 
87611935SMark.Shellenbaum@Sun.COM 	ZFS_TIME_ENCODE(&now, crtime);
87711935SMark.Shellenbaum@Sun.COM 	ZFS_TIME_ENCODE(&now, ctime);
878789Sahrens 
879789Sahrens 	if (vap->va_mask & AT_ATIME) {
88011935SMark.Shellenbaum@Sun.COM 		ZFS_TIME_ENCODE(&vap->va_atime, atime);
881789Sahrens 	} else {
88211935SMark.Shellenbaum@Sun.COM 		ZFS_TIME_ENCODE(&now, atime);
883789Sahrens 	}
884789Sahrens 
885789Sahrens 	if (vap->va_mask & AT_MTIME) {
88611935SMark.Shellenbaum@Sun.COM 		ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
887789Sahrens 	} else {
88811935SMark.Shellenbaum@Sun.COM 		ZFS_TIME_ENCODE(&now, mtime);
889789Sahrens 	}
89011935SMark.Shellenbaum@Sun.COM 
89111935SMark.Shellenbaum@Sun.COM 	/* Now add in all of the "SA" attributes */
89211935SMark.Shellenbaum@Sun.COM 	VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
89311935SMark.Shellenbaum@Sun.COM 	    &sa_hdl));
89411935SMark.Shellenbaum@Sun.COM 
89511935SMark.Shellenbaum@Sun.COM 	/*
89611935SMark.Shellenbaum@Sun.COM 	 * Setup the array of attributes to be replaced/set on the new file
89711935SMark.Shellenbaum@Sun.COM 	 *
89811935SMark.Shellenbaum@Sun.COM 	 * order for  DMU_OT_ZNODE is critical since it needs to be constructed
89911935SMark.Shellenbaum@Sun.COM 	 * in the old znode_phys_t format.  Don't change this ordering
90011935SMark.Shellenbaum@Sun.COM 	 */
90111935SMark.Shellenbaum@Sun.COM 
90211935SMark.Shellenbaum@Sun.COM 	if (obj_type == DMU_OT_ZNODE) {
90311935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
90411935SMark.Shellenbaum@Sun.COM 		    NULL, &atime, 16);
90511935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
90611935SMark.Shellenbaum@Sun.COM 		    NULL, &mtime, 16);
90711935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
90811935SMark.Shellenbaum@Sun.COM 		    NULL, &ctime, 16);
90911935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
91011935SMark.Shellenbaum@Sun.COM 		    NULL, &crtime, 16);
91111935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
91211935SMark.Shellenbaum@Sun.COM 		    NULL, &gen, 8);
91311935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
91411935SMark.Shellenbaum@Sun.COM 		    NULL, &mode, 8);
91511935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
91611935SMark.Shellenbaum@Sun.COM 		    NULL, &size, 8);
91711935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
91811935SMark.Shellenbaum@Sun.COM 		    NULL, &parent, 8);
91911935SMark.Shellenbaum@Sun.COM 	} else {
92011935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
92111935SMark.Shellenbaum@Sun.COM 		    NULL, &mode, 8);
92211935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
92311935SMark.Shellenbaum@Sun.COM 		    NULL, &size, 8);
92411935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
92511935SMark.Shellenbaum@Sun.COM 		    NULL, &gen, 8);
92611935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
92711935SMark.Shellenbaum@Sun.COM 		    &acl_ids->z_fuid, 8);
92811935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
92911935SMark.Shellenbaum@Sun.COM 		    &acl_ids->z_fgid, 8);
93011935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
93111935SMark.Shellenbaum@Sun.COM 		    NULL, &parent, 8);
93211935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
93311935SMark.Shellenbaum@Sun.COM 		    NULL, &pflags, 8);
93411935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
93511935SMark.Shellenbaum@Sun.COM 		    NULL, &atime, 16);
93611935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
93711935SMark.Shellenbaum@Sun.COM 		    NULL, &mtime, 16);
93811935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
93911935SMark.Shellenbaum@Sun.COM 		    NULL, &ctime, 16);
94011935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
94111935SMark.Shellenbaum@Sun.COM 		    NULL, &crtime, 16);
94211935SMark.Shellenbaum@Sun.COM 	}
94311935SMark.Shellenbaum@Sun.COM 
94411935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
94511935SMark.Shellenbaum@Sun.COM 
94611935SMark.Shellenbaum@Sun.COM 	if (obj_type == DMU_OT_ZNODE) {
94711935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
94811935SMark.Shellenbaum@Sun.COM 		    &empty_xattr, 8);
94911935SMark.Shellenbaum@Sun.COM 	}
95011935SMark.Shellenbaum@Sun.COM 	if (obj_type == DMU_OT_ZNODE ||
95111935SMark.Shellenbaum@Sun.COM 	    (vap->va_type == VBLK || vap->va_type == VCHR)) {
95211935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
95311935SMark.Shellenbaum@Sun.COM 		    NULL, &rdev, 8);
95411935SMark.Shellenbaum@Sun.COM 
95511935SMark.Shellenbaum@Sun.COM 	}
95611935SMark.Shellenbaum@Sun.COM 	if (obj_type == DMU_OT_ZNODE) {
95711935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
95811935SMark.Shellenbaum@Sun.COM 		    NULL, &pflags, 8);
95911935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
96011935SMark.Shellenbaum@Sun.COM 		    &acl_ids->z_fuid, 8);
96111935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
96211935SMark.Shellenbaum@Sun.COM 		    &acl_ids->z_fgid, 8);
96311935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
96411935SMark.Shellenbaum@Sun.COM 		    sizeof (uint64_t) * 4);
96511935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
96611935SMark.Shellenbaum@Sun.COM 		    &acl_phys, sizeof (zfs_acl_phys_t));
96711935SMark.Shellenbaum@Sun.COM 	} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
96811935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
96911935SMark.Shellenbaum@Sun.COM 		    &acl_ids->z_aclp->z_acl_count, 8);
97011935SMark.Shellenbaum@Sun.COM 		locate.cb_aclp = acl_ids->z_aclp;
97111935SMark.Shellenbaum@Sun.COM 		SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
97211935SMark.Shellenbaum@Sun.COM 		    zfs_acl_data_locator, &locate,
97311935SMark.Shellenbaum@Sun.COM 		    acl_ids->z_aclp->z_acl_bytes);
97412164SMark.Shellenbaum@Sun.COM 		mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
97512164SMark.Shellenbaum@Sun.COM 		    acl_ids->z_fuid, acl_ids->z_fgid);
97611935SMark.Shellenbaum@Sun.COM 	}
97711935SMark.Shellenbaum@Sun.COM 
97811935SMark.Shellenbaum@Sun.COM 	VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
97911935SMark.Shellenbaum@Sun.COM 
9805642Smaybee 	if (!(flag & IS_ROOT_NODE)) {
98111935SMark.Shellenbaum@Sun.COM 		*zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl);
98211935SMark.Shellenbaum@Sun.COM 		ASSERT(*zpp != NULL);
9835642Smaybee 	} else {
9845642Smaybee 		/*
9855642Smaybee 		 * If we are creating the root node, the "parent" we
9865642Smaybee 		 * passed in is the znode for the root.
9875642Smaybee 		 */
9885642Smaybee 		*zpp = dzp;
98911935SMark.Shellenbaum@Sun.COM 
99011935SMark.Shellenbaum@Sun.COM 		(*zpp)->z_sa_hdl = sa_hdl;
9915642Smaybee 	}
99211935SMark.Shellenbaum@Sun.COM 
99312050SMark.Shellenbaum@Sun.COM 	(*zpp)->z_pflags = pflags;
99412050SMark.Shellenbaum@Sun.COM 	(*zpp)->z_mode = mode;
99512050SMark.Shellenbaum@Sun.COM 
9969179SMark.Shellenbaum@Sun.COM 	if (vap->va_mask & AT_XVATTR)
99711935SMark.Shellenbaum@Sun.COM 		zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx);
99811935SMark.Shellenbaum@Sun.COM 
99911935SMark.Shellenbaum@Sun.COM 	if (obj_type == DMU_OT_ZNODE ||
100011935SMark.Shellenbaum@Sun.COM 	    acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
100111935SMark.Shellenbaum@Sun.COM 		err = zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx);
100211935SMark.Shellenbaum@Sun.COM 		ASSERT3P(err, ==, 0);
100311935SMark.Shellenbaum@Sun.COM 	}
100410938SMark.Shellenbaum@Sun.COM 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
1005789Sahrens }
1006789Sahrens 
100711935SMark.Shellenbaum@Sun.COM /*
100811935SMark.Shellenbaum@Sun.COM  * zfs_xvattr_set only updates the in-core attributes
100911935SMark.Shellenbaum@Sun.COM  * it is assumed the caller will be doing an sa_bulk_update
101011935SMark.Shellenbaum@Sun.COM  * to push the changes out
101111935SMark.Shellenbaum@Sun.COM  */
10125331Samw void
zfs_xvattr_set(znode_t * zp,xvattr_t * xvap,dmu_tx_t * tx)101311935SMark.Shellenbaum@Sun.COM zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
10145331Samw {
10155331Samw 	xoptattr_t *xoap;
10165331Samw 
10175331Samw 	xoap = xva_getxoptattr(xvap);
10185331Samw 	ASSERT(xoap);
10195331Samw 
10205331Samw 	if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
102111935SMark.Shellenbaum@Sun.COM 		uint64_t times[2];
102211935SMark.Shellenbaum@Sun.COM 		ZFS_TIME_ENCODE(&xoap->xoa_createtime, times);
102311935SMark.Shellenbaum@Sun.COM 		(void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs),
102411935SMark.Shellenbaum@Sun.COM 		    &times, sizeof (times), tx);
10255331Samw 		XVA_SET_RTN(xvap, XAT_CREATETIME);
10265331Samw 	}
10275331Samw 	if (XVA_ISSET_REQ(xvap, XAT_READONLY)) {
102811935SMark.Shellenbaum@Sun.COM 		ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly,
102911935SMark.Shellenbaum@Sun.COM 		    zp->z_pflags, tx);
10305331Samw 		XVA_SET_RTN(xvap, XAT_READONLY);
10315331Samw 	}
10325331Samw 	if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) {
103311935SMark.Shellenbaum@Sun.COM 		ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden,
103411935SMark.Shellenbaum@Sun.COM 		    zp->z_pflags, tx);
10355331Samw 		XVA_SET_RTN(xvap, XAT_HIDDEN);
10365331Samw 	}
10375331Samw 	if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) {
103811935SMark.Shellenbaum@Sun.COM 		ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system,
103911935SMark.Shellenbaum@Sun.COM 		    zp->z_pflags, tx);
10405331Samw 		XVA_SET_RTN(xvap, XAT_SYSTEM);
10415331Samw 	}
10425331Samw 	if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
104311935SMark.Shellenbaum@Sun.COM 		ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive,
104411935SMark.Shellenbaum@Sun.COM 		    zp->z_pflags, tx);
10455331Samw 		XVA_SET_RTN(xvap, XAT_ARCHIVE);
10465331Samw 	}
10475331Samw 	if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) {
104811935SMark.Shellenbaum@Sun.COM 		ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
104911935SMark.Shellenbaum@Sun.COM 		    zp->z_pflags, tx);
10505331Samw 		XVA_SET_RTN(xvap, XAT_IMMUTABLE);
10515331Samw 	}
10525331Samw 	if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
105311935SMark.Shellenbaum@Sun.COM 		ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
105411935SMark.Shellenbaum@Sun.COM 		    zp->z_pflags, tx);
10555331Samw 		XVA_SET_RTN(xvap, XAT_NOUNLINK);
10565331Samw 	}
10575331Samw 	if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) {
105811935SMark.Shellenbaum@Sun.COM 		ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
105911935SMark.Shellenbaum@Sun.COM 		    zp->z_pflags, tx);
10605331Samw 		XVA_SET_RTN(xvap, XAT_APPENDONLY);
10615331Samw 	}
10625331Samw 	if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
106311935SMark.Shellenbaum@Sun.COM 		ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
106411935SMark.Shellenbaum@Sun.COM 		    zp->z_pflags, tx);
10655331Samw 		XVA_SET_RTN(xvap, XAT_NODUMP);
10665331Samw 	}
10675331Samw 	if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) {
106811935SMark.Shellenbaum@Sun.COM 		ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque,
106911935SMark.Shellenbaum@Sun.COM 		    zp->z_pflags, tx);
10705331Samw 		XVA_SET_RTN(xvap, XAT_OPAQUE);
10715331Samw 	}
10725331Samw 	if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) {
10735331Samw 		ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED,
107411935SMark.Shellenbaum@Sun.COM 		    xoap->xoa_av_quarantined, zp->z_pflags, tx);
10755331Samw 		XVA_SET_RTN(xvap, XAT_AV_QUARANTINED);
10765331Samw 	}
10775331Samw 	if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) {
107811935SMark.Shellenbaum@Sun.COM 		ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified,
107911935SMark.Shellenbaum@Sun.COM 		    zp->z_pflags, tx);
10805331Samw 		XVA_SET_RTN(xvap, XAT_AV_MODIFIED);
10815331Samw 	}
10825331Samw 	if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) {
108311935SMark.Shellenbaum@Sun.COM 		zfs_sa_set_scanstamp(zp, xvap, tx);
10845331Samw 		XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP);
10855331Samw 	}
108610793Sdai.ngo@sun.com 	if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) {
108711935SMark.Shellenbaum@Sun.COM 		ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse,
108811935SMark.Shellenbaum@Sun.COM 		    zp->z_pflags, tx);
108910793Sdai.ngo@sun.com 		XVA_SET_RTN(xvap, XAT_REPARSE);
109010793Sdai.ngo@sun.com 	}
109113082SJoyce.McIntosh@Sun.COM 	if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) {
109213082SJoyce.McIntosh@Sun.COM 		ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline,
109313082SJoyce.McIntosh@Sun.COM 		    zp->z_pflags, tx);
109413082SJoyce.McIntosh@Sun.COM 		XVA_SET_RTN(xvap, XAT_OFFLINE);
109513082SJoyce.McIntosh@Sun.COM 	}
109613082SJoyce.McIntosh@Sun.COM 	if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) {
109713082SJoyce.McIntosh@Sun.COM 		ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse,
109813082SJoyce.McIntosh@Sun.COM 		    zp->z_pflags, tx);
109913082SJoyce.McIntosh@Sun.COM 		XVA_SET_RTN(xvap, XAT_SPARSE);
110013082SJoyce.McIntosh@Sun.COM 	}
11015331Samw }
11025331Samw 
1103789Sahrens int
zfs_zget(zfsvfs_t * zfsvfs,uint64_t obj_num,znode_t ** zpp)1104789Sahrens zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
1105789Sahrens {
1106789Sahrens 	dmu_object_info_t doi;
1107789Sahrens 	dmu_buf_t	*db;
1108789Sahrens 	znode_t		*zp;
11091544Seschrock 	int err;
111011935SMark.Shellenbaum@Sun.COM 	sa_handle_t	*hdl;
1111789Sahrens 
1112789Sahrens 	*zpp = NULL;
1113789Sahrens 
1114789Sahrens 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
1115789Sahrens 
111611935SMark.Shellenbaum@Sun.COM 	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
11171544Seschrock 	if (err) {
1118789Sahrens 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
11191544Seschrock 		return (err);
1120789Sahrens 	}
1121789Sahrens 
1122789Sahrens 	dmu_object_info_from_db(db, &doi);
112311935SMark.Shellenbaum@Sun.COM 	if (doi.doi_bonus_type != DMU_OT_SA &&
112411935SMark.Shellenbaum@Sun.COM 	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
112511935SMark.Shellenbaum@Sun.COM 	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
112611935SMark.Shellenbaum@Sun.COM 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
112711935SMark.Shellenbaum@Sun.COM 		sa_buf_rele(db, NULL);
1128789Sahrens 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
1129789Sahrens 		return (EINVAL);
1130789Sahrens 	}
1131789Sahrens 
113211935SMark.Shellenbaum@Sun.COM 	hdl = dmu_buf_get_user(db);
113311935SMark.Shellenbaum@Sun.COM 	if (hdl != NULL) {
113411935SMark.Shellenbaum@Sun.COM 		zp  = sa_get_userdata(hdl);
113511935SMark.Shellenbaum@Sun.COM 
1136789Sahrens 
11375446Sahrens 		/*
113811935SMark.Shellenbaum@Sun.COM 		 * Since "SA" does immediate eviction we
113911935SMark.Shellenbaum@Sun.COM 		 * should never find a sa handle that doesn't
114011935SMark.Shellenbaum@Sun.COM 		 * know about the znode.
11415446Sahrens 		 */
114211935SMark.Shellenbaum@Sun.COM 
114311935SMark.Shellenbaum@Sun.COM 		ASSERT3P(zp, !=, NULL);
114411935SMark.Shellenbaum@Sun.COM 
114511935SMark.Shellenbaum@Sun.COM 		mutex_enter(&zp->z_lock);
1146789Sahrens 		ASSERT3U(zp->z_id, ==, obj_num);
11473461Sahrens 		if (zp->z_unlinked) {
11485446Sahrens 			err = ENOENT;
1149789Sahrens 		} else {
11505446Sahrens 			VN_HOLD(ZTOV(zp));
11515446Sahrens 			*zpp = zp;
11525446Sahrens 			err = 0;
1153789Sahrens 		}
115411935SMark.Shellenbaum@Sun.COM 		sa_buf_rele(db, NULL);
1155789Sahrens 		mutex_exit(&zp->z_lock);
11561544Seschrock 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
11575446Sahrens 		return (err);
1158789Sahrens 	}
1159789Sahrens 
1160789Sahrens 	/*
1161789Sahrens 	 * Not found create new znode/vnode
116210938SMark.Shellenbaum@Sun.COM 	 * but only if file exists.
116310938SMark.Shellenbaum@Sun.COM 	 *
116410938SMark.Shellenbaum@Sun.COM 	 * There is a small window where zfs_vget() could
116510938SMark.Shellenbaum@Sun.COM 	 * find this object while a file create is still in
116611935SMark.Shellenbaum@Sun.COM 	 * progress.  This is checked for in zfs_znode_alloc()
116711935SMark.Shellenbaum@Sun.COM 	 *
116811935SMark.Shellenbaum@Sun.COM 	 * if zfs_znode_alloc() fails it will drop the hold on the
116911935SMark.Shellenbaum@Sun.COM 	 * bonus buffer.
1170789Sahrens 	 */
117111935SMark.Shellenbaum@Sun.COM 	zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
117211935SMark.Shellenbaum@Sun.COM 	    doi.doi_bonus_type, NULL);
117311935SMark.Shellenbaum@Sun.COM 	if (zp == NULL) {
117411935SMark.Shellenbaum@Sun.COM 		err = ENOENT;
117511935SMark.Shellenbaum@Sun.COM 	} else {
117610938SMark.Shellenbaum@Sun.COM 		*zpp = zp;
117710938SMark.Shellenbaum@Sun.COM 	}
11781544Seschrock 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
117910938SMark.Shellenbaum@Sun.COM 	return (err);
1180789Sahrens }
1181789Sahrens 
11825326Sek110237 int
zfs_rezget(znode_t * zp)11835326Sek110237 zfs_rezget(znode_t *zp)
11845326Sek110237 {
11855326Sek110237 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
11865326Sek110237 	dmu_object_info_t doi;
11875326Sek110237 	dmu_buf_t *db;
11885326Sek110237 	uint64_t obj_num = zp->z_id;
118911935SMark.Shellenbaum@Sun.COM 	uint64_t mode;
119011935SMark.Shellenbaum@Sun.COM 	sa_bulk_attr_t bulk[8];
11915326Sek110237 	int err;
119211935SMark.Shellenbaum@Sun.COM 	int count = 0;
119311935SMark.Shellenbaum@Sun.COM 	uint64_t gen;
11945326Sek110237 
11955326Sek110237 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num);
11965326Sek110237 
119711935SMark.Shellenbaum@Sun.COM 	mutex_enter(&zp->z_acl_lock);
119811935SMark.Shellenbaum@Sun.COM 	if (zp->z_acl_cached) {
119911935SMark.Shellenbaum@Sun.COM 		zfs_acl_free(zp->z_acl_cached);
120011935SMark.Shellenbaum@Sun.COM 		zp->z_acl_cached = NULL;
120111935SMark.Shellenbaum@Sun.COM 	}
120211935SMark.Shellenbaum@Sun.COM 
120311935SMark.Shellenbaum@Sun.COM 	mutex_exit(&zp->z_acl_lock);
120411935SMark.Shellenbaum@Sun.COM 	ASSERT(zp->z_sa_hdl == NULL);
120511935SMark.Shellenbaum@Sun.COM 	err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
12065326Sek110237 	if (err) {
12075326Sek110237 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
12085326Sek110237 		return (err);
12095326Sek110237 	}
12105326Sek110237 
12115326Sek110237 	dmu_object_info_from_db(db, &doi);
121211935SMark.Shellenbaum@Sun.COM 	if (doi.doi_bonus_type != DMU_OT_SA &&
121311935SMark.Shellenbaum@Sun.COM 	    (doi.doi_bonus_type != DMU_OT_ZNODE ||
121411935SMark.Shellenbaum@Sun.COM 	    (doi.doi_bonus_type == DMU_OT_ZNODE &&
121511935SMark.Shellenbaum@Sun.COM 	    doi.doi_bonus_size < sizeof (znode_phys_t)))) {
121611935SMark.Shellenbaum@Sun.COM 		sa_buf_rele(db, NULL);
12175326Sek110237 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
12185326Sek110237 		return (EINVAL);
12195326Sek110237 	}
12205326Sek110237 
122111935SMark.Shellenbaum@Sun.COM 	zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
122211935SMark.Shellenbaum@Sun.COM 
122311935SMark.Shellenbaum@Sun.COM 	/* reload cached values */
122411935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
122511935SMark.Shellenbaum@Sun.COM 	    &gen, sizeof (gen));
122611935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
122711935SMark.Shellenbaum@Sun.COM 	    &zp->z_size, sizeof (zp->z_size));
122811935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
122911935SMark.Shellenbaum@Sun.COM 	    &zp->z_links, sizeof (zp->z_links));
123011935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
123111935SMark.Shellenbaum@Sun.COM 	    &zp->z_pflags, sizeof (zp->z_pflags));
123211935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
123311935SMark.Shellenbaum@Sun.COM 	    &zp->z_atime, sizeof (zp->z_atime));
123411935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
123513069SMark.Shellenbaum@Oracle.COM 	    &zp->z_uid, sizeof (zp->z_uid));
123611935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
123713069SMark.Shellenbaum@Oracle.COM 	    &zp->z_gid, sizeof (zp->z_gid));
123811935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
123911935SMark.Shellenbaum@Sun.COM 	    &mode, sizeof (mode));
124011935SMark.Shellenbaum@Sun.COM 
124111935SMark.Shellenbaum@Sun.COM 	if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
124211935SMark.Shellenbaum@Sun.COM 		zfs_znode_dmu_fini(zp);
12435326Sek110237 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
12445326Sek110237 		return (EIO);
12455326Sek110237 	}
12465326Sek110237 
124712620SMark.Shellenbaum@Oracle.COM 	zp->z_mode = mode;
124812620SMark.Shellenbaum@Oracle.COM 
124911935SMark.Shellenbaum@Sun.COM 	if (gen != zp->z_gen) {
125011935SMark.Shellenbaum@Sun.COM 		zfs_znode_dmu_fini(zp);
125111935SMark.Shellenbaum@Sun.COM 		ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
125211935SMark.Shellenbaum@Sun.COM 		return (EIO);
125310269SMark.Shellenbaum@Sun.COM 	}
125410269SMark.Shellenbaum@Sun.COM 
125511935SMark.Shellenbaum@Sun.COM 	zp->z_unlinked = (zp->z_links == 0);
12565844Sek110237 	zp->z_blksz = doi.doi_data_block_size;
12575326Sek110237 
12585326Sek110237 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num);
12595326Sek110237 
12605326Sek110237 	return (0);
12615326Sek110237 }
12625326Sek110237 
1263789Sahrens void
zfs_znode_delete(znode_t * zp,dmu_tx_t * tx)1264789Sahrens zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
1265789Sahrens {
1266789Sahrens 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
12676992Smaybee 	objset_t *os = zfsvfs->z_os;
12685446Sahrens 	uint64_t obj = zp->z_id;
126912620SMark.Shellenbaum@Oracle.COM 	uint64_t acl_obj = zfs_external_acl(zp);
1270789Sahrens 
12715446Sahrens 	ZFS_OBJ_HOLD_ENTER(zfsvfs, obj);
127212620SMark.Shellenbaum@Oracle.COM 	if (acl_obj) {
127312620SMark.Shellenbaum@Oracle.COM 		VERIFY(!zp->z_is_sa);
12746992Smaybee 		VERIFY(0 == dmu_object_free(os, acl_obj, tx));
127512620SMark.Shellenbaum@Oracle.COM 	}
12766992Smaybee 	VERIFY(0 == dmu_object_free(os, obj, tx));
12775446Sahrens 	zfs_znode_dmu_fini(zp);
12785446Sahrens 	ZFS_OBJ_HOLD_EXIT(zfsvfs, obj);
12795642Smaybee 	zfs_znode_free(zp);
1280789Sahrens }
1281789Sahrens 
1282789Sahrens void
zfs_zinactive(znode_t * zp)1283789Sahrens zfs_zinactive(znode_t *zp)
1284789Sahrens {
1285789Sahrens 	vnode_t	*vp = ZTOV(zp);
1286789Sahrens 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1287789Sahrens 	uint64_t z_id = zp->z_id;
1288789Sahrens 
128911935SMark.Shellenbaum@Sun.COM 	ASSERT(zp->z_sa_hdl);
1290789Sahrens 
1291789Sahrens 	/*
1292789Sahrens 	 * Don't allow a zfs_zget() while were trying to release this znode
1293789Sahrens 	 */
1294789Sahrens 	ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id);
1295789Sahrens 
1296789Sahrens 	mutex_enter(&zp->z_lock);
1297789Sahrens 	mutex_enter(&vp->v_lock);
1298789Sahrens 	vp->v_count--;
1299789Sahrens 	if (vp->v_count > 0 || vn_has_cached_data(vp)) {
1300789Sahrens 		/*
1301789Sahrens 		 * If the hold count is greater than zero, somebody has
1302789Sahrens 		 * obtained a new reference on this znode while we were
1303789Sahrens 		 * processing it here, so we are done.  If we still have
1304789Sahrens 		 * mapped pages then we are also done, since we don't
1305789Sahrens 		 * want to inactivate the znode until the pages get pushed.
1306789Sahrens 		 *
1307789Sahrens 		 * XXX - if vn_has_cached_data(vp) is true, but count == 0,
1308789Sahrens 		 * this seems like it would leave the znode hanging with
1309789Sahrens 		 * no chance to go inactive...
1310789Sahrens 		 */
1311789Sahrens 		mutex_exit(&vp->v_lock);
1312789Sahrens 		mutex_exit(&zp->z_lock);
1313789Sahrens 		ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
1314789Sahrens 		return;
1315789Sahrens 	}
1316789Sahrens 	mutex_exit(&vp->v_lock);
1317789Sahrens 
1318789Sahrens 	/*
1319789Sahrens 	 * If this was the last reference to a file with no links,
1320789Sahrens 	 * remove the file from the file system.
1321789Sahrens 	 */
13223461Sahrens 	if (zp->z_unlinked) {
1323789Sahrens 		mutex_exit(&zp->z_lock);
1324789Sahrens 		ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
13253461Sahrens 		zfs_rmnode(zp);
1326789Sahrens 		return;
1327789Sahrens 	}
132811935SMark.Shellenbaum@Sun.COM 
1329789Sahrens 	mutex_exit(&zp->z_lock);
13305446Sahrens 	zfs_znode_dmu_fini(zp);
1331789Sahrens 	ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id);
13325642Smaybee 	zfs_znode_free(zp);
1333789Sahrens }
1334789Sahrens 
1335789Sahrens void
zfs_znode_free(znode_t * zp)1336789Sahrens zfs_znode_free(znode_t *zp)
1337789Sahrens {
1338789Sahrens 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
1339789Sahrens 
13405642Smaybee 	vn_invalid(ZTOV(zp));
13415642Smaybee 
13426712Stomee 	ASSERT(ZTOV(zp)->v_count == 0);
13436712Stomee 
1344789Sahrens 	mutex_enter(&zfsvfs->z_znodes_lock);
13456712Stomee 	POINTER_INVALIDATE(&zp->z_zfsvfs);
1346789Sahrens 	list_remove(&zfsvfs->z_all_znodes, zp);
1347789Sahrens 	mutex_exit(&zfsvfs->z_znodes_lock);
1348789Sahrens 
13499981STim.Haley@Sun.COM 	if (zp->z_acl_cached) {
13509981STim.Haley@Sun.COM 		zfs_acl_free(zp->z_acl_cached);
13519981STim.Haley@Sun.COM 		zp->z_acl_cached = NULL;
13529981STim.Haley@Sun.COM 	}
13539981STim.Haley@Sun.COM 
1354789Sahrens 	kmem_cache_free(znode_cache, zp);
13555642Smaybee 
13565642Smaybee 	VFS_RELE(zfsvfs->z_vfs);
1357789Sahrens }
1358789Sahrens 
1359789Sahrens void
zfs_tstamp_update_setup(znode_t * zp,uint_t flag,uint64_t mtime[2],uint64_t ctime[2],boolean_t have_tx)136011935SMark.Shellenbaum@Sun.COM zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
136111935SMark.Shellenbaum@Sun.COM     uint64_t ctime[2], boolean_t have_tx)
1362789Sahrens {
1363789Sahrens 	timestruc_t	now;
1364789Sahrens 
1365789Sahrens 	gethrestime(&now);
1366789Sahrens 
136711935SMark.Shellenbaum@Sun.COM 	if (have_tx) {	/* will sa_bulk_update happen really soon? */
1368789Sahrens 		zp->z_atime_dirty = 0;
1369789Sahrens 		zp->z_seq++;
1370789Sahrens 	} else {
1371789Sahrens 		zp->z_atime_dirty = 1;
1372789Sahrens 	}
1373789Sahrens 
137411935SMark.Shellenbaum@Sun.COM 	if (flag & AT_ATIME) {
137511935SMark.Shellenbaum@Sun.COM 		ZFS_TIME_ENCODE(&now, zp->z_atime);
137611935SMark.Shellenbaum@Sun.COM 	}
1377789Sahrens 
13785331Samw 	if (flag & AT_MTIME) {
137911935SMark.Shellenbaum@Sun.COM 		ZFS_TIME_ENCODE(&now, mtime);
138011935SMark.Shellenbaum@Sun.COM 		if (zp->z_zfsvfs->z_use_fuids) {
138111935SMark.Shellenbaum@Sun.COM 			zp->z_pflags |= (ZFS_ARCHIVE |
138211935SMark.Shellenbaum@Sun.COM 			    ZFS_AV_MODIFIED);
138311935SMark.Shellenbaum@Sun.COM 		}
13845331Samw 	}
1385789Sahrens 
13865331Samw 	if (flag & AT_CTIME) {
138711935SMark.Shellenbaum@Sun.COM 		ZFS_TIME_ENCODE(&now, ctime);
13885331Samw 		if (zp->z_zfsvfs->z_use_fuids)
138911935SMark.Shellenbaum@Sun.COM 			zp->z_pflags |= ZFS_ARCHIVE;
13905331Samw 	}
1391789Sahrens }
1392789Sahrens 
1393789Sahrens /*
13941669Sperrin  * Grow the block size for a file.
1395789Sahrens  *
1396789Sahrens  *	IN:	zp	- znode of file to free data in.
1397789Sahrens  *		size	- requested block size
1398789Sahrens  *		tx	- open transaction.
1399789Sahrens  *
1400789Sahrens  * NOTE: this function assumes that the znode is write locked.
1401789Sahrens  */
14021669Sperrin void
zfs_grow_blocksize(znode_t * zp,uint64_t size,dmu_tx_t * tx)1403789Sahrens zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
1404789Sahrens {
1405789Sahrens 	int		error;
1406789Sahrens 	u_longlong_t	dummy;
1407789Sahrens 
1408789Sahrens 	if (size <= zp->z_blksz)
14091669Sperrin 		return;
1410789Sahrens 	/*
1411789Sahrens 	 * If the file size is already greater than the current blocksize,
1412789Sahrens 	 * we will not grow.  If there is more than one block in a file,
1413789Sahrens 	 * the blocksize cannot change.
1414789Sahrens 	 */
141511935SMark.Shellenbaum@Sun.COM 	if (zp->z_blksz && zp->z_size > zp->z_blksz)
14161669Sperrin 		return;
1417789Sahrens 
1418789Sahrens 	error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id,
1419789Sahrens 	    size, 0, tx);
142011935SMark.Shellenbaum@Sun.COM 
1421789Sahrens 	if (error == ENOTSUP)
14221669Sperrin 		return;
1423789Sahrens 	ASSERT3U(error, ==, 0);
1424789Sahrens 
1425789Sahrens 	/* What blocksize did we actually get? */
142611935SMark.Shellenbaum@Sun.COM 	dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy);
1427789Sahrens }
1428789Sahrens 
1429789Sahrens /*
1430789Sahrens  * This is a dummy interface used when pvn_vplist_dirty() should *not*
1431789Sahrens  * be calling back into the fs for a putpage().  E.g.: when truncating
1432789Sahrens  * a file, the pages being "thrown away* don't need to be written out.
1433789Sahrens  */
1434789Sahrens /* ARGSUSED */
1435789Sahrens static int
zfs_no_putpage(vnode_t * vp,page_t * pp,u_offset_t * offp,size_t * lenp,int flags,cred_t * cr)1436789Sahrens zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp,
1437789Sahrens     int flags, cred_t *cr)
1438789Sahrens {
1439789Sahrens 	ASSERT(0);
1440789Sahrens 	return (0);
1441789Sahrens }
1442789Sahrens 
1443789Sahrens /*
14446992Smaybee  * Increase the file length
1445789Sahrens  *
1446789Sahrens  *	IN:	zp	- znode of file to free data in.
14476992Smaybee  *		end	- new end-of-file
1448789Sahrens  *
1449789Sahrens  * 	RETURN:	0 if success
1450789Sahrens  *		error code if failure
1451789Sahrens  */
14526992Smaybee static int
zfs_extend(znode_t * zp,uint64_t end)14536992Smaybee zfs_extend(znode_t *zp, uint64_t end)
1454789Sahrens {
14551878Smaybee 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
14566992Smaybee 	dmu_tx_t *tx;
14571878Smaybee 	rl_t *rl;
14586992Smaybee 	uint64_t newblksz;
14591669Sperrin 	int error;
1460789Sahrens 
1461789Sahrens 	/*
14626992Smaybee 	 * We will change zp_size, lock the whole file.
14631878Smaybee 	 */
14646992Smaybee 	rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
14651878Smaybee 
14661878Smaybee 	/*
1467789Sahrens 	 * Nothing to do if file already at desired length.
1468789Sahrens 	 */
146911935SMark.Shellenbaum@Sun.COM 	if (end <= zp->z_size) {
14702237Smaybee 		zfs_range_unlock(rl);
1471789Sahrens 		return (0);
1472789Sahrens 	}
14736992Smaybee top:
14741878Smaybee 	tx = dmu_tx_create(zfsvfs->z_os);
147511935SMark.Shellenbaum@Sun.COM 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
147611935SMark.Shellenbaum@Sun.COM 	zfs_sa_upgrade_txholds(tx, zp);
14776992Smaybee 	if (end > zp->z_blksz &&
14781878Smaybee 	    (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
1479789Sahrens 		/*
1480789Sahrens 		 * We are growing the file past the current block size.
1481789Sahrens 		 */
1482789Sahrens 		if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) {
1483789Sahrens 			ASSERT(!ISP2(zp->z_blksz));
14846992Smaybee 			newblksz = MIN(end, SPA_MAXBLOCKSIZE);
1485789Sahrens 		} else {
14866992Smaybee 			newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
1487789Sahrens 		}
14886992Smaybee 		dmu_tx_hold_write(tx, zp->z_id, 0, newblksz);
14896992Smaybee 	} else {
14906992Smaybee 		newblksz = 0;
14911878Smaybee 	}
14921878Smaybee 
14938227SNeil.Perrin@Sun.COM 	error = dmu_tx_assign(tx, TXG_NOWAIT);
14941878Smaybee 	if (error) {
14958227SNeil.Perrin@Sun.COM 		if (error == ERESTART) {
14962113Sahrens 			dmu_tx_wait(tx);
14976992Smaybee 			dmu_tx_abort(tx);
14986992Smaybee 			goto top;
14996992Smaybee 		}
15001878Smaybee 		dmu_tx_abort(tx);
15012237Smaybee 		zfs_range_unlock(rl);
15021878Smaybee 		return (error);
15031878Smaybee 	}
15041878Smaybee 
15056992Smaybee 	if (newblksz)
15066992Smaybee 		zfs_grow_blocksize(zp, newblksz, tx);
15071878Smaybee 
150811935SMark.Shellenbaum@Sun.COM 	zp->z_size = end;
150911935SMark.Shellenbaum@Sun.COM 
151011935SMark.Shellenbaum@Sun.COM 	VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs),
151111935SMark.Shellenbaum@Sun.COM 	    &zp->z_size, sizeof (zp->z_size), tx));
15121878Smaybee 
15132237Smaybee 	zfs_range_unlock(rl);
15141878Smaybee 
15151878Smaybee 	dmu_tx_commit(tx);
15161878Smaybee 
15176992Smaybee 	return (0);
15186992Smaybee }
15196992Smaybee 
15206992Smaybee /*
15216992Smaybee  * Free space in a file.
15226992Smaybee  *
15236992Smaybee  *	IN:	zp	- znode of file to free data in.
15246992Smaybee  *		off	- start of section to free.
15256992Smaybee  *		len	- length of section to free.
15266992Smaybee  *
15276992Smaybee  * 	RETURN:	0 if success
15286992Smaybee  *		error code if failure
15296992Smaybee  */
15306992Smaybee static int
zfs_free_range(znode_t * zp,uint64_t off,uint64_t len)15316992Smaybee zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
15326992Smaybee {
15336992Smaybee 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
15346992Smaybee 	rl_t *rl;
15356992Smaybee 	int error;
15366992Smaybee 
15376992Smaybee 	/*
15386992Smaybee 	 * Lock the range being freed.
15396992Smaybee 	 */
15406992Smaybee 	rl = zfs_range_lock(zp, off, len, RL_WRITER);
15416992Smaybee 
15426992Smaybee 	/*
15436992Smaybee 	 * Nothing to do if file already at desired length.
15446992Smaybee 	 */
154511935SMark.Shellenbaum@Sun.COM 	if (off >= zp->z_size) {
15466992Smaybee 		zfs_range_unlock(rl);
15476992Smaybee 		return (0);
15486992Smaybee 	}
15496992Smaybee 
155011935SMark.Shellenbaum@Sun.COM 	if (off + len > zp->z_size)
155111935SMark.Shellenbaum@Sun.COM 		len = zp->z_size - off;
15526992Smaybee 
15536992Smaybee 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
15546992Smaybee 
15556992Smaybee 	zfs_range_unlock(rl);
15566992Smaybee 
15576992Smaybee 	return (error);
15586992Smaybee }
15596992Smaybee 
15606992Smaybee /*
15616992Smaybee  * Truncate a file
15626992Smaybee  *
15636992Smaybee  *	IN:	zp	- znode of file to free data in.
15646992Smaybee  *		end	- new end-of-file.
15656992Smaybee  *
15666992Smaybee  * 	RETURN:	0 if success
15676992Smaybee  *		error code if failure
15686992Smaybee  */
15696992Smaybee static int
zfs_trunc(znode_t * zp,uint64_t end)15706992Smaybee zfs_trunc(znode_t *zp, uint64_t end)
15716992Smaybee {
15726992Smaybee 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
15736992Smaybee 	vnode_t *vp = ZTOV(zp);
15746992Smaybee 	dmu_tx_t *tx;
15756992Smaybee 	rl_t *rl;
15766992Smaybee 	int error;
157713082SJoyce.McIntosh@Sun.COM 	sa_bulk_attr_t bulk[2];
157813082SJoyce.McIntosh@Sun.COM 	int count = 0;
15796992Smaybee 
15806992Smaybee 	/*
15816992Smaybee 	 * We will change zp_size, lock the whole file.
15826992Smaybee 	 */
15836992Smaybee 	rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER);
15846992Smaybee 
15856992Smaybee 	/*
15866992Smaybee 	 * Nothing to do if file already at desired length.
15876992Smaybee 	 */
158811935SMark.Shellenbaum@Sun.COM 	if (end >= zp->z_size) {
15896992Smaybee 		zfs_range_unlock(rl);
15906992Smaybee 		return (0);
15916992Smaybee 	}
15926992Smaybee 
15936992Smaybee 	error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end,  -1);
15946992Smaybee 	if (error) {
15956992Smaybee 		zfs_range_unlock(rl);
15966992Smaybee 		return (error);
15976992Smaybee 	}
15986992Smaybee top:
15996992Smaybee 	tx = dmu_tx_create(zfsvfs->z_os);
160011935SMark.Shellenbaum@Sun.COM 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
160111935SMark.Shellenbaum@Sun.COM 	zfs_sa_upgrade_txholds(tx, zp);
16028227SNeil.Perrin@Sun.COM 	error = dmu_tx_assign(tx, TXG_NOWAIT);
16036992Smaybee 	if (error) {
16048227SNeil.Perrin@Sun.COM 		if (error == ERESTART) {
16056992Smaybee 			dmu_tx_wait(tx);
16066992Smaybee 			dmu_tx_abort(tx);
16076992Smaybee 			goto top;
16086992Smaybee 		}
16096992Smaybee 		dmu_tx_abort(tx);
16106992Smaybee 		zfs_range_unlock(rl);
16116992Smaybee 		return (error);
16126992Smaybee 	}
161311935SMark.Shellenbaum@Sun.COM 
161411935SMark.Shellenbaum@Sun.COM 	zp->z_size = end;
161513082SJoyce.McIntosh@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
161613082SJoyce.McIntosh@Sun.COM 	    NULL, &zp->z_size, sizeof (zp->z_size));
16176992Smaybee 
161813082SJoyce.McIntosh@Sun.COM 	if (end == 0) {
161913082SJoyce.McIntosh@Sun.COM 		zp->z_pflags &= ~ZFS_SPARSE;
162013082SJoyce.McIntosh@Sun.COM 		SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
162113082SJoyce.McIntosh@Sun.COM 		    NULL, &zp->z_pflags, 8);
162213082SJoyce.McIntosh@Sun.COM 	}
162313082SJoyce.McIntosh@Sun.COM 	VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
16246992Smaybee 
16256992Smaybee 	dmu_tx_commit(tx);
16266992Smaybee 
1627789Sahrens 	/*
16281878Smaybee 	 * Clear any mapped pages in the truncated region.  This has to
16291878Smaybee 	 * happen outside of the transaction to avoid the possibility of
16301878Smaybee 	 * a deadlock with someone trying to push a page that we are
16311878Smaybee 	 * about to invalidate.
1632789Sahrens 	 */
16336992Smaybee 	if (vn_has_cached_data(vp)) {
1634789Sahrens 		page_t *pp;
16356992Smaybee 		uint64_t start = end & PAGEMASK;
16366992Smaybee 		int poff = end & PAGEOFFSET;
1637789Sahrens 
16381878Smaybee 		if (poff != 0 && (pp = page_lookup(vp, start, SE_SHARED))) {
1639789Sahrens 			/*
1640789Sahrens 			 * We need to zero a partial page.
1641789Sahrens 			 */
16421878Smaybee 			pagezero(pp, poff, PAGESIZE - poff);
1643789Sahrens 			start += PAGESIZE;
1644789Sahrens 			page_unlock(pp);
1645789Sahrens 		}
1646789Sahrens 		error = pvn_vplist_dirty(vp, start, zfs_no_putpage,
16471878Smaybee 		    B_INVAL | B_TRUNC, NULL);
1648789Sahrens 		ASSERT(error == 0);
1649789Sahrens 	}
16508636SMark.Maybee@Sun.COM 
16518636SMark.Maybee@Sun.COM 	zfs_range_unlock(rl);
1652789Sahrens 
1653789Sahrens 	return (0);
1654789Sahrens }
1655789Sahrens 
16566992Smaybee /*
16576992Smaybee  * Free space in a file
16586992Smaybee  *
16596992Smaybee  *	IN:	zp	- znode of file to free data in.
16606992Smaybee  *		off	- start of range
16616992Smaybee  *		len	- end of range (0 => EOF)
16626992Smaybee  *		flag	- current file open mode flags.
16636992Smaybee  *		log	- TRUE if this action should be logged
16646992Smaybee  *
16656992Smaybee  * 	RETURN:	0 if success
16666992Smaybee  *		error code if failure
16676992Smaybee  */
16686992Smaybee int
zfs_freesp(znode_t * zp,uint64_t off,uint64_t len,int flag,boolean_t log)16696992Smaybee zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
16706992Smaybee {
16716992Smaybee 	vnode_t *vp = ZTOV(zp);
16726992Smaybee 	dmu_tx_t *tx;
16736992Smaybee 	zfsvfs_t *zfsvfs = zp->z_zfsvfs;
16746992Smaybee 	zilog_t *zilog = zfsvfs->z_log;
167511935SMark.Shellenbaum@Sun.COM 	uint64_t mode;
167611935SMark.Shellenbaum@Sun.COM 	uint64_t mtime[2], ctime[2];
167712394SMark.Shellenbaum@Sun.COM 	sa_bulk_attr_t bulk[3];
167811935SMark.Shellenbaum@Sun.COM 	int count = 0;
16796992Smaybee 	int error;
16806992Smaybee 
168111935SMark.Shellenbaum@Sun.COM 	if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
168211935SMark.Shellenbaum@Sun.COM 	    sizeof (mode))) != 0)
168311935SMark.Shellenbaum@Sun.COM 		return (error);
168411935SMark.Shellenbaum@Sun.COM 
168511935SMark.Shellenbaum@Sun.COM 	if (off > zp->z_size) {
16866992Smaybee 		error =  zfs_extend(zp, off+len);
16876992Smaybee 		if (error == 0 && log)
16886992Smaybee 			goto log;
16896992Smaybee 		else
16906992Smaybee 			return (error);
16916992Smaybee 	}
16926992Smaybee 
16936992Smaybee 	/*
16946992Smaybee 	 * Check for any locks in the region to be freed.
16956992Smaybee 	 */
169611935SMark.Shellenbaum@Sun.COM 
169711935SMark.Shellenbaum@Sun.COM 	if (MANDLOCK(vp, (mode_t)mode)) {
169811935SMark.Shellenbaum@Sun.COM 		uint64_t length = (len ? len : zp->z_size - off);
16996992Smaybee 		if (error = chklock(vp, FWRITE, off, length, flag, NULL))
17006992Smaybee 			return (error);
17016992Smaybee 	}
17026992Smaybee 
17036992Smaybee 	if (len == 0) {
17046992Smaybee 		error = zfs_trunc(zp, off);
17056992Smaybee 	} else {
17066992Smaybee 		if ((error = zfs_free_range(zp, off, len)) == 0 &&
170711935SMark.Shellenbaum@Sun.COM 		    off + len > zp->z_size)
17086992Smaybee 			error = zfs_extend(zp, off+len);
17096992Smaybee 	}
17106992Smaybee 	if (error || !log)
17116992Smaybee 		return (error);
17126992Smaybee log:
17136992Smaybee 	tx = dmu_tx_create(zfsvfs->z_os);
171411935SMark.Shellenbaum@Sun.COM 	dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
171511935SMark.Shellenbaum@Sun.COM 	zfs_sa_upgrade_txholds(tx, zp);
17168227SNeil.Perrin@Sun.COM 	error = dmu_tx_assign(tx, TXG_NOWAIT);
17176992Smaybee 	if (error) {
17188227SNeil.Perrin@Sun.COM 		if (error == ERESTART) {
17196992Smaybee 			dmu_tx_wait(tx);
17206992Smaybee 			dmu_tx_abort(tx);
17216992Smaybee 			goto log;
17226992Smaybee 		}
17236992Smaybee 		dmu_tx_abort(tx);
17246992Smaybee 		return (error);
17256992Smaybee 	}
17266992Smaybee 
172711935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
172811935SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
172912394SMark.Shellenbaum@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
173012394SMark.Shellenbaum@Sun.COM 	    NULL, &zp->z_pflags, 8);
173111935SMark.Shellenbaum@Sun.COM 	zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
173211935SMark.Shellenbaum@Sun.COM 	error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
173311935SMark.Shellenbaum@Sun.COM 	ASSERT(error == 0);
173411935SMark.Shellenbaum@Sun.COM 
17356992Smaybee 	zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len);
17366992Smaybee 
17376992Smaybee 	dmu_tx_commit(tx);
17386992Smaybee 	return (0);
17396992Smaybee }
17406992Smaybee 
1741789Sahrens void
zfs_create_fs(objset_t * os,cred_t * cr,nvlist_t * zplprops,dmu_tx_t * tx)17425498Stimh zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
1743789Sahrens {
1744789Sahrens 	zfsvfs_t	zfsvfs;
174511935SMark.Shellenbaum@Sun.COM 	uint64_t	moid, obj, sa_obj, version;
17465498Stimh 	uint64_t	sense = ZFS_CASE_SENSITIVE;
17475498Stimh 	uint64_t	norm = 0;
17485498Stimh 	nvpair_t	*elem;
1749789Sahrens 	int		error;
175010938SMark.Shellenbaum@Sun.COM 	int		i;
1751789Sahrens 	znode_t		*rootzp = NULL;
1752789Sahrens 	vnode_t		*vp;
1753789Sahrens 	vattr_t		vattr;
17545446Sahrens 	znode_t		*zp;
17559179SMark.Shellenbaum@Sun.COM 	zfs_acl_ids_t	acl_ids;
1756789Sahrens 
1757789Sahrens 	/*
1758789Sahrens 	 * First attempt to create master node.
1759789Sahrens 	 */
17601544Seschrock 	/*
17611544Seschrock 	 * In an empty objset, there are no blocks to read and thus
17621544Seschrock 	 * there can be no i/o errors (which we assert below).
17631544Seschrock 	 */
1764789Sahrens 	moid = MASTER_NODE_OBJ;
1765789Sahrens 	error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE,
1766789Sahrens 	    DMU_OT_NONE, 0, tx);
1767789Sahrens 	ASSERT(error == 0);
1768789Sahrens 
1769789Sahrens 	/*
1770789Sahrens 	 * Set starting attributes.
1771789Sahrens 	 */
177211935SMark.Shellenbaum@Sun.COM 	version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os)));
17735498Stimh 	elem = NULL;
17745498Stimh 	while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) {
17755498Stimh 		/* For the moment we expect all zpl props to be uint64_ts */
17765498Stimh 		uint64_t val;
17775498Stimh 		char *name;
1778789Sahrens 
17795498Stimh 		ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64);
17805520Stimh 		VERIFY(nvpair_value_uint64(elem, &val) == 0);
17815498Stimh 		name = nvpair_name(elem);
17825498Stimh 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) {
17839396SMatthew.Ahrens@Sun.COM 			if (val < version)
17849396SMatthew.Ahrens@Sun.COM 				version = val;
17855498Stimh 		} else {
17865498Stimh 			error = zap_update(os, moid, name, 8, 1, &val, tx);
17875498Stimh 		}
17885498Stimh 		ASSERT(error == 0);
17895498Stimh 		if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0)
17905498Stimh 			norm = val;
17915498Stimh 		else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0)
17925498Stimh 			sense = val;
17935498Stimh 	}
17945498Stimh 	ASSERT(version != 0);
17959396SMatthew.Ahrens@Sun.COM 	error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx);
1796789Sahrens 
1797789Sahrens 	/*
179811935SMark.Shellenbaum@Sun.COM 	 * Create zap object used for SA attribute registration
179911935SMark.Shellenbaum@Sun.COM 	 */
180011935SMark.Shellenbaum@Sun.COM 
180111935SMark.Shellenbaum@Sun.COM 	if (version >= ZPL_VERSION_SA) {
180211935SMark.Shellenbaum@Sun.COM 		sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
180311935SMark.Shellenbaum@Sun.COM 		    DMU_OT_NONE, 0, tx);
180411935SMark.Shellenbaum@Sun.COM 		error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
180511935SMark.Shellenbaum@Sun.COM 		ASSERT(error == 0);
180611935SMark.Shellenbaum@Sun.COM 	} else {
180711935SMark.Shellenbaum@Sun.COM 		sa_obj = 0;
180811935SMark.Shellenbaum@Sun.COM 	}
180911935SMark.Shellenbaum@Sun.COM 	/*
1810789Sahrens 	 * Create a delete queue.
1811789Sahrens 	 */
18129396SMatthew.Ahrens@Sun.COM 	obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx);
1813789Sahrens 
18149396SMatthew.Ahrens@Sun.COM 	error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx);
1815789Sahrens 	ASSERT(error == 0);
1816789Sahrens 
1817789Sahrens 	/*
1818789Sahrens 	 * Create root znode.  Create minimal znode/vnode/zfsvfs
1819789Sahrens 	 * to allow zfs_mknode to work.
1820789Sahrens 	 */
1821789Sahrens 	vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
1822789Sahrens 	vattr.va_type = VDIR;
1823789Sahrens 	vattr.va_mode = S_IFDIR|0755;
18244543Smarks 	vattr.va_uid = crgetuid(cr);
18254543Smarks 	vattr.va_gid = crgetgid(cr);
1826789Sahrens 
1827789Sahrens 	rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP);
182812684STom.Erickson@Sun.COM 	ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs));
182912684STom.Erickson@Sun.COM 	rootzp->z_moved = 0;
18303461Sahrens 	rootzp->z_unlinked = 0;
1831789Sahrens 	rootzp->z_atime_dirty = 0;
183211935SMark.Shellenbaum@Sun.COM 	rootzp->z_is_sa = USE_SA(version, os);
1833789Sahrens 
1834789Sahrens 	vp = ZTOV(rootzp);
1835789Sahrens 	vn_reinit(vp);
1836789Sahrens 	vp->v_type = VDIR;
1837789Sahrens 
1838789Sahrens 	bzero(&zfsvfs, sizeof (zfsvfs_t));
1839789Sahrens 
1840789Sahrens 	zfsvfs.z_os = os;
1841789Sahrens 	zfsvfs.z_parent = &zfsvfs;
18425331Samw 	zfsvfs.z_version = version;
18435331Samw 	zfsvfs.z_use_fuids = USE_FUIDS(version, os);
184411935SMark.Shellenbaum@Sun.COM 	zfsvfs.z_use_sa = USE_SA(version, os);
18455331Samw 	zfsvfs.z_norm = norm;
184611935SMark.Shellenbaum@Sun.COM 
184712493SMark.Shellenbaum@Oracle.COM 	error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
184812493SMark.Shellenbaum@Oracle.COM 	    &zfsvfs.z_attr_table);
184912493SMark.Shellenbaum@Oracle.COM 
185012493SMark.Shellenbaum@Oracle.COM 	ASSERT(error == 0);
185111935SMark.Shellenbaum@Sun.COM 
18525498Stimh 	/*
18535498Stimh 	 * Fold case on file systems that are always or sometimes case
18545498Stimh 	 * insensitive.
18555498Stimh 	 */
18565498Stimh 	if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
18575498Stimh 		zfsvfs.z_norm |= U8_TEXTPREP_TOUPPER;
1858789Sahrens 
1859789Sahrens 	mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
1860789Sahrens 	list_create(&zfsvfs.z_all_znodes, sizeof (znode_t),
1861789Sahrens 	    offsetof(znode_t, z_link_node));
1862789Sahrens 
186310938SMark.Shellenbaum@Sun.COM 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
186410938SMark.Shellenbaum@Sun.COM 		mutex_init(&zfsvfs.z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
186510938SMark.Shellenbaum@Sun.COM 
18666712Stomee 	rootzp->z_zfsvfs = &zfsvfs;
18679179SMark.Shellenbaum@Sun.COM 	VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
18689179SMark.Shellenbaum@Sun.COM 	    cr, NULL, &acl_ids));
186911935SMark.Shellenbaum@Sun.COM 	zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids);
18705642Smaybee 	ASSERT3P(zp, ==, rootzp);
18716712Stomee 	ASSERT(!vn_in_dnlc(ZTOV(rootzp))); /* not valid to move */
18725446Sahrens 	error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx);
1873789Sahrens 	ASSERT(error == 0);
18749179SMark.Shellenbaum@Sun.COM 	zfs_acl_ids_free(&acl_ids);
18756712Stomee 	POINTER_INVALIDATE(&rootzp->z_zfsvfs);
1876789Sahrens 
1877789Sahrens 	ZTOV(rootzp)->v_count = 0;
187811935SMark.Shellenbaum@Sun.COM 	sa_handle_destroy(rootzp->z_sa_hdl);
1879789Sahrens 	kmem_cache_free(znode_cache, rootzp);
18808845Samw@Sun.COM 
18818845Samw@Sun.COM 	/*
18828845Samw@Sun.COM 	 * Create shares directory
18838845Samw@Sun.COM 	 */
18848845Samw@Sun.COM 
18858845Samw@Sun.COM 	error = zfs_create_share_dir(&zfsvfs, tx);
18869179SMark.Shellenbaum@Sun.COM 
18878845Samw@Sun.COM 	ASSERT(error == 0);
188810938SMark.Shellenbaum@Sun.COM 
188910938SMark.Shellenbaum@Sun.COM 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
189010938SMark.Shellenbaum@Sun.COM 		mutex_destroy(&zfsvfs.z_hold_mtx[i]);
1891789Sahrens }
18925331Samw 
18933444Sek110237 #endif /* _KERNEL */
189411935SMark.Shellenbaum@Sun.COM 
189513043STim.Haley@Sun.COM static int
zfs_sa_setup(objset_t * osp,sa_attr_type_t ** sa_table)189613043STim.Haley@Sun.COM zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
189713043STim.Haley@Sun.COM {
189813043STim.Haley@Sun.COM 	uint64_t sa_obj = 0;
189913043STim.Haley@Sun.COM 	int error;
190013043STim.Haley@Sun.COM 
190113043STim.Haley@Sun.COM 	error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
190213043STim.Haley@Sun.COM 	if (error != 0 && error != ENOENT)
190313043STim.Haley@Sun.COM 		return (error);
190413043STim.Haley@Sun.COM 
190513043STim.Haley@Sun.COM 	error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
190613043STim.Haley@Sun.COM 	return (error);
190713043STim.Haley@Sun.COM }
190813043STim.Haley@Sun.COM 
190913043STim.Haley@Sun.COM static int
zfs_grab_sa_handle(objset_t * osp,uint64_t obj,sa_handle_t ** hdlp,dmu_buf_t ** db,void * tag)191013043STim.Haley@Sun.COM zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
1911*13144STim.Haley@Sun.COM     dmu_buf_t **db, void *tag)
191213043STim.Haley@Sun.COM {
191313043STim.Haley@Sun.COM 	dmu_object_info_t doi;
191413043STim.Haley@Sun.COM 	int error;
191513043STim.Haley@Sun.COM 
1916*13144STim.Haley@Sun.COM 	if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
191713043STim.Haley@Sun.COM 		return (error);
191813043STim.Haley@Sun.COM 
191913043STim.Haley@Sun.COM 	dmu_object_info_from_db(*db, &doi);
192013043STim.Haley@Sun.COM 	if ((doi.doi_bonus_type != DMU_OT_SA &&
192113043STim.Haley@Sun.COM 	    doi.doi_bonus_type != DMU_OT_ZNODE) ||
192213043STim.Haley@Sun.COM 	    doi.doi_bonus_type == DMU_OT_ZNODE &&
192313043STim.Haley@Sun.COM 	    doi.doi_bonus_size < sizeof (znode_phys_t)) {
1924*13144STim.Haley@Sun.COM 		sa_buf_rele(*db, tag);
192513043STim.Haley@Sun.COM 		return (ENOTSUP);
192613043STim.Haley@Sun.COM 	}
192713043STim.Haley@Sun.COM 
192813043STim.Haley@Sun.COM 	error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
192913043STim.Haley@Sun.COM 	if (error != 0) {
1930*13144STim.Haley@Sun.COM 		sa_buf_rele(*db, tag);
193113043STim.Haley@Sun.COM 		return (error);
193213043STim.Haley@Sun.COM 	}
193313043STim.Haley@Sun.COM 
193413043STim.Haley@Sun.COM 	return (0);
193513043STim.Haley@Sun.COM }
193613043STim.Haley@Sun.COM 
193713043STim.Haley@Sun.COM void
zfs_release_sa_handle(sa_handle_t * hdl,dmu_buf_t * db,void * tag)1938*13144STim.Haley@Sun.COM zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
193913043STim.Haley@Sun.COM {
194013043STim.Haley@Sun.COM 	sa_handle_destroy(hdl);
1941*13144STim.Haley@Sun.COM 	sa_buf_rele(db, tag);
194213043STim.Haley@Sun.COM }
194313043STim.Haley@Sun.COM 
19443444Sek110237 /*
19453444Sek110237  * Given an object number, return its parent object number and whether
19463444Sek110237  * or not the object is an extended attribute directory.
19473444Sek110237  */
19483444Sek110237 static int
zfs_obj_to_pobj(sa_handle_t * hdl,sa_attr_type_t * sa_table,uint64_t * pobjp,int * is_xattrdir)194913043STim.Haley@Sun.COM zfs_obj_to_pobj(sa_handle_t *hdl, sa_attr_type_t *sa_table, uint64_t *pobjp,
195013043STim.Haley@Sun.COM     int *is_xattrdir)
19513444Sek110237 {
195211935SMark.Shellenbaum@Sun.COM 	uint64_t parent;
195311935SMark.Shellenbaum@Sun.COM 	uint64_t pflags;
195411935SMark.Shellenbaum@Sun.COM 	uint64_t mode;
195511935SMark.Shellenbaum@Sun.COM 	sa_bulk_attr_t bulk[3];
195611935SMark.Shellenbaum@Sun.COM 	int count = 0;
195713043STim.Haley@Sun.COM 	int error;
19583444Sek110237 
195913043STim.Haley@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
196013043STim.Haley@Sun.COM 	    &parent, sizeof (parent));
196113043STim.Haley@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL,
196213043STim.Haley@Sun.COM 	    &pflags, sizeof (pflags));
196313043STim.Haley@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
196413043STim.Haley@Sun.COM 	    &mode, sizeof (mode));
196513043STim.Haley@Sun.COM 
196613043STim.Haley@Sun.COM 	if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
19673444Sek110237 		return (error);
19683444Sek110237 
196911935SMark.Shellenbaum@Sun.COM 	*pobjp = parent;
197011935SMark.Shellenbaum@Sun.COM 	*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
19713444Sek110237 
19723444Sek110237 	return (0);
19733444Sek110237 }
19743444Sek110237 
197513043STim.Haley@Sun.COM /*
197613043STim.Haley@Sun.COM  * Given an object number, return some zpl level statistics
197713043STim.Haley@Sun.COM  */
197813043STim.Haley@Sun.COM static int
zfs_obj_to_stats_impl(sa_handle_t * hdl,sa_attr_type_t * sa_table,zfs_stat_t * sb)197913043STim.Haley@Sun.COM zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table,
198013043STim.Haley@Sun.COM     zfs_stat_t *sb)
19813444Sek110237 {
198213043STim.Haley@Sun.COM 	sa_bulk_attr_t bulk[4];
198313043STim.Haley@Sun.COM 	int count = 0;
198413043STim.Haley@Sun.COM 
198513043STim.Haley@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
198613043STim.Haley@Sun.COM 	    &sb->zs_mode, sizeof (sb->zs_mode));
198713043STim.Haley@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
198813043STim.Haley@Sun.COM 	    &sb->zs_gen, sizeof (sb->zs_gen));
198913043STim.Haley@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
199013043STim.Haley@Sun.COM 	    &sb->zs_links, sizeof (sb->zs_links));
199113043STim.Haley@Sun.COM 	SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
199213043STim.Haley@Sun.COM 	    &sb->zs_ctime, sizeof (sb->zs_ctime));
199313043STim.Haley@Sun.COM 
199413043STim.Haley@Sun.COM 	return (sa_bulk_lookup(hdl, bulk, count));
199513043STim.Haley@Sun.COM }
199613043STim.Haley@Sun.COM 
199713043STim.Haley@Sun.COM static int
zfs_obj_to_path_impl(objset_t * osp,uint64_t obj,sa_handle_t * hdl,sa_attr_type_t * sa_table,char * buf,int len)199813043STim.Haley@Sun.COM zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
199913043STim.Haley@Sun.COM     sa_attr_type_t *sa_table, char *buf, int len)
200013043STim.Haley@Sun.COM {
200113043STim.Haley@Sun.COM 	sa_handle_t *sa_hdl;
200213043STim.Haley@Sun.COM 	sa_handle_t *prevhdl = NULL;
200313043STim.Haley@Sun.COM 	dmu_buf_t *prevdb = NULL;
200413043STim.Haley@Sun.COM 	dmu_buf_t *sa_db = NULL;
20053444Sek110237 	char *path = buf + len - 1;
20063444Sek110237 	int error;
20073444Sek110237 
20083444Sek110237 	*path = '\0';
200913043STim.Haley@Sun.COM 	sa_hdl = hdl;
201011935SMark.Shellenbaum@Sun.COM 
20113444Sek110237 	for (;;) {
20123444Sek110237 		uint64_t pobj;
20133444Sek110237 		char component[MAXNAMELEN + 2];
20143444Sek110237 		size_t complen;
20153444Sek110237 		int is_xattrdir;
20163444Sek110237 
201713043STim.Haley@Sun.COM 		if (prevdb)
2018*13144STim.Haley@Sun.COM 			zfs_release_sa_handle(prevhdl, prevdb, FTAG);
201913043STim.Haley@Sun.COM 
202013043STim.Haley@Sun.COM 		if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj,
202113043STim.Haley@Sun.COM 		    &is_xattrdir)) != 0)
20223444Sek110237 			break;
20233444Sek110237 
20243444Sek110237 		if (pobj == obj) {
20253444Sek110237 			if (path[0] != '/')
20263444Sek110237 				*--path = '/';
20273444Sek110237 			break;
20283444Sek110237 		}
20293444Sek110237 
20303444Sek110237 		component[0] = '/';
20313444Sek110237 		if (is_xattrdir) {
20323444Sek110237 			(void) sprintf(component + 1, "<xattrdir>");
20333444Sek110237 		} else {
20344577Sahrens 			error = zap_value_search(osp, pobj, obj,
20354577Sahrens 			    ZFS_DIRENT_OBJ(-1ULL), component + 1);
20363444Sek110237 			if (error != 0)
20373444Sek110237 				break;
20383444Sek110237 		}
20393444Sek110237 
20403444Sek110237 		complen = strlen(component);
20413444Sek110237 		path -= complen;
20423444Sek110237 		ASSERT(path >= buf);
20433444Sek110237 		bcopy(component, path, complen);
20443444Sek110237 		obj = pobj;
204513043STim.Haley@Sun.COM 
204613043STim.Haley@Sun.COM 		if (sa_hdl != hdl) {
204713043STim.Haley@Sun.COM 			prevhdl = sa_hdl;
204813043STim.Haley@Sun.COM 			prevdb = sa_db;
204913043STim.Haley@Sun.COM 		}
2050*13144STim.Haley@Sun.COM 		error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG);
205113043STim.Haley@Sun.COM 		if (error != 0) {
205213043STim.Haley@Sun.COM 			sa_hdl = prevhdl;
205313043STim.Haley@Sun.COM 			sa_db = prevdb;
205413043STim.Haley@Sun.COM 			break;
205513043STim.Haley@Sun.COM 		}
205613043STim.Haley@Sun.COM 	}
205713043STim.Haley@Sun.COM 
205813043STim.Haley@Sun.COM 	if (sa_hdl != NULL && sa_hdl != hdl) {
205913043STim.Haley@Sun.COM 		ASSERT(sa_db != NULL);
2060*13144STim.Haley@Sun.COM 		zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
20613444Sek110237 	}
20623444Sek110237 
20633444Sek110237 	if (error == 0)
20643444Sek110237 		(void) memmove(buf, path, buf + len - path);
206511935SMark.Shellenbaum@Sun.COM 
20663444Sek110237 	return (error);
20673444Sek110237 }
206813043STim.Haley@Sun.COM 
206913043STim.Haley@Sun.COM int
zfs_obj_to_path(objset_t * osp,uint64_t obj,char * buf,int len)207013043STim.Haley@Sun.COM zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len)
207113043STim.Haley@Sun.COM {
207213043STim.Haley@Sun.COM 	sa_attr_type_t *sa_table;
207313043STim.Haley@Sun.COM 	sa_handle_t *hdl;
207413043STim.Haley@Sun.COM 	dmu_buf_t *db;
207513043STim.Haley@Sun.COM 	int error;
207613043STim.Haley@Sun.COM 
207713043STim.Haley@Sun.COM 	error = zfs_sa_setup(osp, &sa_table);
207813043STim.Haley@Sun.COM 	if (error != 0)
207913043STim.Haley@Sun.COM 		return (error);
208013043STim.Haley@Sun.COM 
2081*13144STim.Haley@Sun.COM 	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
208213043STim.Haley@Sun.COM 	if (error != 0)
208313043STim.Haley@Sun.COM 		return (error);
208413043STim.Haley@Sun.COM 
208513043STim.Haley@Sun.COM 	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
208613043STim.Haley@Sun.COM 
2087*13144STim.Haley@Sun.COM 	zfs_release_sa_handle(hdl, db, FTAG);
208813043STim.Haley@Sun.COM 	return (error);
208913043STim.Haley@Sun.COM }
209013043STim.Haley@Sun.COM 
209113043STim.Haley@Sun.COM int
zfs_obj_to_stats(objset_t * osp,uint64_t obj,zfs_stat_t * sb,char * buf,int len)209213043STim.Haley@Sun.COM zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
209313043STim.Haley@Sun.COM     char *buf, int len)
209413043STim.Haley@Sun.COM {
209513043STim.Haley@Sun.COM 	char *path = buf + len - 1;
209613043STim.Haley@Sun.COM 	sa_attr_type_t *sa_table;
209713043STim.Haley@Sun.COM 	sa_handle_t *hdl;
209813043STim.Haley@Sun.COM 	dmu_buf_t *db;
209913043STim.Haley@Sun.COM 	int error;
210013043STim.Haley@Sun.COM 
210113043STim.Haley@Sun.COM 	*path = '\0';
210213043STim.Haley@Sun.COM 
210313043STim.Haley@Sun.COM 	error = zfs_sa_setup(osp, &sa_table);
210413043STim.Haley@Sun.COM 	if (error != 0)
210513043STim.Haley@Sun.COM 		return (error);
210613043STim.Haley@Sun.COM 
2107*13144STim.Haley@Sun.COM 	error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG);
210813043STim.Haley@Sun.COM 	if (error != 0)
210913043STim.Haley@Sun.COM 		return (error);
211013043STim.Haley@Sun.COM 
211113043STim.Haley@Sun.COM 	error = zfs_obj_to_stats_impl(hdl, sa_table, sb);
211213043STim.Haley@Sun.COM 	if (error != 0) {
2113*13144STim.Haley@Sun.COM 		zfs_release_sa_handle(hdl, db, FTAG);
211413043STim.Haley@Sun.COM 		return (error);
211513043STim.Haley@Sun.COM 	}
211613043STim.Haley@Sun.COM 
211713043STim.Haley@Sun.COM 	error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len);
211813043STim.Haley@Sun.COM 
2119*13144STim.Haley@Sun.COM 	zfs_release_sa_handle(hdl, db, FTAG);
212013043STim.Haley@Sun.COM 	return (error);
212113043STim.Haley@Sun.COM }
2122