xref: /onnv-gate/usr/src/uts/common/fs/zfs/dsl_dataset.c (revision 6992:20c04e18c58c)
1789Sahrens /*
2789Sahrens  * CDDL HEADER START
3789Sahrens  *
4789Sahrens  * The contents of this file are subject to the terms of the
51544Seschrock  * Common Development and Distribution License (the "License").
61544Seschrock  * You may not use this file except in compliance with the License.
7789Sahrens  *
8789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens  * or http://www.opensolaris.org/os/licensing.
10789Sahrens  * See the License for the specific language governing permissions
11789Sahrens  * and limitations under the License.
12789Sahrens  *
13789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens  *
19789Sahrens  * CDDL HEADER END
20789Sahrens  */
21789Sahrens /*
225831Sck153898  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23789Sahrens  * Use is subject to license terms.
24789Sahrens  */
25789Sahrens 
26789Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
27789Sahrens 
28789Sahrens #include <sys/dmu_objset.h>
29789Sahrens #include <sys/dsl_dataset.h>
30789Sahrens #include <sys/dsl_dir.h>
312082Seschrock #include <sys/dsl_prop.h>
322199Sahrens #include <sys/dsl_synctask.h>
33789Sahrens #include <sys/dmu_traverse.h>
34789Sahrens #include <sys/dmu_tx.h>
35789Sahrens #include <sys/arc.h>
36789Sahrens #include <sys/zio.h>
37789Sahrens #include <sys/zap.h>
38789Sahrens #include <sys/unique.h>
39789Sahrens #include <sys/zfs_context.h>
404007Smmusante #include <sys/zfs_ioctl.h>
414543Smarks #include <sys/spa.h>
424543Smarks #include <sys/sunddi.h>
43789Sahrens 
446689Smaybee static char *dsl_reaper = "the grim reaper";
456689Smaybee 
462199Sahrens static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
472199Sahrens static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
482199Sahrens static dsl_checkfunc_t dsl_dataset_rollback_check;
492199Sahrens static dsl_syncfunc_t dsl_dataset_rollback_sync;
505378Sck153898 static dsl_syncfunc_t dsl_dataset_set_reservation_sync;
511731Sbonwick 
523444Sek110237 #define	DS_REF_MAX	(1ULL << 62)
53789Sahrens 
54789Sahrens #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
55789Sahrens 
566689Smaybee #define	DSL_DATASET_IS_DESTROYED(ds)	((ds)->ds_owner == dsl_reaper)
576689Smaybee 
586689Smaybee static void dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag);
59789Sahrens 
605378Sck153898 /*
615378Sck153898  * Figure out how much of this delta should be propogated to the dsl_dir
625378Sck153898  * layer.  If there's a refreservation, that space has already been
635378Sck153898  * partially accounted for in our ancestors.
645378Sck153898  */
655378Sck153898 static int64_t
665378Sck153898 parent_delta(dsl_dataset_t *ds, int64_t delta)
675378Sck153898 {
685378Sck153898 	uint64_t old_bytes, new_bytes;
695378Sck153898 
705378Sck153898 	if (ds->ds_reserved == 0)
715378Sck153898 		return (delta);
725378Sck153898 
735378Sck153898 	old_bytes = MAX(ds->ds_phys->ds_unique_bytes, ds->ds_reserved);
745378Sck153898 	new_bytes = MAX(ds->ds_phys->ds_unique_bytes + delta, ds->ds_reserved);
755378Sck153898 
765378Sck153898 	ASSERT3U(ABS((int64_t)(new_bytes - old_bytes)), <=, ABS(delta));
775378Sck153898 	return (new_bytes - old_bytes);
785378Sck153898 }
79789Sahrens 
80789Sahrens void
81789Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
82789Sahrens {
832082Seschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
84789Sahrens 	int compressed = BP_GET_PSIZE(bp);
85789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
865378Sck153898 	int64_t delta;
87789Sahrens 
88789Sahrens 	dprintf_bp(bp, "born, ds=%p\n", ds);
89789Sahrens 
90789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
91789Sahrens 	/* It could have been compressed away to nothing */
92789Sahrens 	if (BP_IS_HOLE(bp))
93789Sahrens 		return;
94789Sahrens 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
95789Sahrens 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
96789Sahrens 	if (ds == NULL) {
97789Sahrens 		/*
98789Sahrens 		 * Account for the meta-objset space in its placeholder
99789Sahrens 		 * dsl_dir.
100789Sahrens 		 */
101789Sahrens 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
102789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
103789Sahrens 		    used, compressed, uncompressed, tx);
104789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
105789Sahrens 		return;
106789Sahrens 	}
107789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
108789Sahrens 	mutex_enter(&ds->ds_lock);
1095378Sck153898 	delta = parent_delta(ds, used);
110789Sahrens 	ds->ds_phys->ds_used_bytes += used;
111789Sahrens 	ds->ds_phys->ds_compressed_bytes += compressed;
112789Sahrens 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
113789Sahrens 	ds->ds_phys->ds_unique_bytes += used;
114789Sahrens 	mutex_exit(&ds->ds_lock);
1155378Sck153898 	dsl_dir_diduse_space(ds->ds_dir, delta, compressed, uncompressed, tx);
116789Sahrens }
117789Sahrens 
118*6992Smaybee int
1193547Smaybee dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
1203547Smaybee     dmu_tx_t *tx)
121789Sahrens {
1222082Seschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
123789Sahrens 	int compressed = BP_GET_PSIZE(bp);
124789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
125789Sahrens 
126789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1273547Smaybee 	/* No block pointer => nothing to free */
128789Sahrens 	if (BP_IS_HOLE(bp))
129*6992Smaybee 		return (0);
130789Sahrens 
131789Sahrens 	ASSERT(used > 0);
132789Sahrens 	if (ds == NULL) {
1333547Smaybee 		int err;
134789Sahrens 		/*
135789Sahrens 		 * Account for the meta-objset space in its placeholder
136789Sahrens 		 * dataset.
137789Sahrens 		 */
1383547Smaybee 		err = arc_free(pio, tx->tx_pool->dp_spa,
1393547Smaybee 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
1403547Smaybee 		ASSERT(err == 0);
141789Sahrens 
142789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
143789Sahrens 		    -used, -compressed, -uncompressed, tx);
144789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
145*6992Smaybee 		return (used);
146789Sahrens 	}
147789Sahrens 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
148789Sahrens 
149789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
150789Sahrens 
151789Sahrens 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
1523547Smaybee 		int err;
1535378Sck153898 		int64_t delta;
1543547Smaybee 
155789Sahrens 		dprintf_bp(bp, "freeing: %s", "");
1563547Smaybee 		err = arc_free(pio, tx->tx_pool->dp_spa,
1573547Smaybee 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
1583547Smaybee 		ASSERT(err == 0);
159789Sahrens 
160789Sahrens 		mutex_enter(&ds->ds_lock);
1615378Sck153898 		ASSERT(ds->ds_phys->ds_unique_bytes >= used ||
1625378Sck153898 		    !DS_UNIQUE_IS_ACCURATE(ds));
1635378Sck153898 		delta = parent_delta(ds, -used);
164789Sahrens 		ds->ds_phys->ds_unique_bytes -= used;
165789Sahrens 		mutex_exit(&ds->ds_lock);
166789Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
1675378Sck153898 		    delta, -compressed, -uncompressed, tx);
168789Sahrens 	} else {
169789Sahrens 		dprintf_bp(bp, "putting on dead list: %s", "");
1701544Seschrock 		VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
1715712Sahrens 		ASSERT3U(ds->ds_prev->ds_object, ==,
1725712Sahrens 		    ds->ds_phys->ds_prev_snap_obj);
1735712Sahrens 		ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
174789Sahrens 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
1755712Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
1765712Sahrens 		    ds->ds_object && bp->blk_birth >
1775712Sahrens 		    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
1785712Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1795712Sahrens 			mutex_enter(&ds->ds_prev->ds_lock);
1805712Sahrens 			ds->ds_prev->ds_phys->ds_unique_bytes += used;
1815712Sahrens 			mutex_exit(&ds->ds_prev->ds_lock);
182789Sahrens 		}
183789Sahrens 	}
184789Sahrens 	mutex_enter(&ds->ds_lock);
185789Sahrens 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
186789Sahrens 	ds->ds_phys->ds_used_bytes -= used;
187789Sahrens 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
188789Sahrens 	ds->ds_phys->ds_compressed_bytes -= compressed;
189789Sahrens 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
190789Sahrens 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
191789Sahrens 	mutex_exit(&ds->ds_lock);
192*6992Smaybee 
193*6992Smaybee 	return (used);
194789Sahrens }
195789Sahrens 
1961544Seschrock uint64_t
1971544Seschrock dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
198789Sahrens {
1992885Sahrens 	uint64_t trysnap = 0;
2002885Sahrens 
201789Sahrens 	if (ds == NULL)
2021544Seschrock 		return (0);
203789Sahrens 	/*
204789Sahrens 	 * The snapshot creation could fail, but that would cause an
205789Sahrens 	 * incorrect FALSE return, which would only result in an
206789Sahrens 	 * overestimation of the amount of space that an operation would
207789Sahrens 	 * consume, which is OK.
208789Sahrens 	 *
209789Sahrens 	 * There's also a small window where we could miss a pending
210789Sahrens 	 * snapshot, because we could set the sync task in the quiescing
211789Sahrens 	 * phase.  So this should only be used as a guess.
212789Sahrens 	 */
2132885Sahrens 	if (ds->ds_trysnap_txg >
2142885Sahrens 	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
2152885Sahrens 		trysnap = ds->ds_trysnap_txg;
2162885Sahrens 	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
2171544Seschrock }
2181544Seschrock 
2191544Seschrock int
2201544Seschrock dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
2211544Seschrock {
2221544Seschrock 	return (blk_birth > dsl_dataset_prev_snap_txg(ds));
223789Sahrens }
224789Sahrens 
225789Sahrens /* ARGSUSED */
226789Sahrens static void
227789Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv)
228789Sahrens {
229789Sahrens 	dsl_dataset_t *ds = dsv;
230789Sahrens 
2316689Smaybee 	ASSERT(ds->ds_owner == NULL || DSL_DATASET_IS_DESTROYED(ds));
232789Sahrens 
233789Sahrens 	dprintf_ds(ds, "evicting %s\n", "");
234789Sahrens 
2354787Sahrens 	unique_remove(ds->ds_fsid_guid);
236789Sahrens 
237789Sahrens 	if (ds->ds_user_ptr != NULL)
238789Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
239789Sahrens 
240789Sahrens 	if (ds->ds_prev) {
2416689Smaybee 		dsl_dataset_drop_ref(ds->ds_prev, ds);
242789Sahrens 		ds->ds_prev = NULL;
243789Sahrens 	}
244789Sahrens 
245789Sahrens 	bplist_close(&ds->ds_deadlist);
2466689Smaybee 	if (ds->ds_dir)
2476689Smaybee 		dsl_dir_close(ds->ds_dir, ds);
248789Sahrens 
2494787Sahrens 	ASSERT(!list_link_active(&ds->ds_synced_link));
250789Sahrens 
2512856Snd150628 	mutex_destroy(&ds->ds_lock);
2524787Sahrens 	mutex_destroy(&ds->ds_opening_lock);
2532856Snd150628 	mutex_destroy(&ds->ds_deadlist.bpl_lock);
2546689Smaybee 	rw_destroy(&ds->ds_rwlock);
2556689Smaybee 	cv_destroy(&ds->ds_exclusive_cv);
2562856Snd150628 
257789Sahrens 	kmem_free(ds, sizeof (dsl_dataset_t));
258789Sahrens }
259789Sahrens 
2601544Seschrock static int
261789Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds)
262789Sahrens {
263789Sahrens 	dsl_dataset_phys_t *headphys;
264789Sahrens 	int err;
265789Sahrens 	dmu_buf_t *headdbuf;
266789Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
267789Sahrens 	objset_t *mos = dp->dp_meta_objset;
268789Sahrens 
269789Sahrens 	if (ds->ds_snapname[0])
2701544Seschrock 		return (0);
271789Sahrens 	if (ds->ds_phys->ds_next_snap_obj == 0)
2721544Seschrock 		return (0);
273789Sahrens 
2741544Seschrock 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
2751544Seschrock 	    FTAG, &headdbuf);
2761544Seschrock 	if (err)
2771544Seschrock 		return (err);
278789Sahrens 	headphys = headdbuf->db_data;
279789Sahrens 	err = zap_value_search(dp->dp_meta_objset,
2804577Sahrens 	    headphys->ds_snapnames_zapobj, ds->ds_object, 0, ds->ds_snapname);
2811544Seschrock 	dmu_buf_rele(headdbuf, FTAG);
2821544Seschrock 	return (err);
283789Sahrens }
284789Sahrens 
2856492Stimh static int
2866689Smaybee dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
2876492Stimh {
2886689Smaybee 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
2896689Smaybee 	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
2906492Stimh 	matchtype_t mt;
2916492Stimh 	int err;
2926492Stimh 
2936689Smaybee 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
2946492Stimh 		mt = MT_FIRST;
2956492Stimh 	else
2966492Stimh 		mt = MT_EXACT;
2976492Stimh 
2986689Smaybee 	err = zap_lookup_norm(mos, snapobj, name, 8, 1,
2996492Stimh 	    value, mt, NULL, 0, NULL);
3006492Stimh 	if (err == ENOTSUP && mt == MT_FIRST)
3016689Smaybee 		err = zap_lookup(mos, snapobj, name, 8, 1, value);
3026492Stimh 	return (err);
3036492Stimh }
3046492Stimh 
3056492Stimh static int
3066689Smaybee dsl_dataset_snap_remove(dsl_dataset_t *ds, char *name, dmu_tx_t *tx)
3076492Stimh {
3086689Smaybee 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
3096689Smaybee 	uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
3106492Stimh 	matchtype_t mt;
3116492Stimh 	int err;
3126492Stimh 
3136689Smaybee 	if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
3146492Stimh 		mt = MT_FIRST;
3156492Stimh 	else
3166492Stimh 		mt = MT_EXACT;
3176492Stimh 
3186689Smaybee 	err = zap_remove_norm(mos, snapobj, name, mt, tx);
3196492Stimh 	if (err == ENOTSUP && mt == MT_FIRST)
3206689Smaybee 		err = zap_remove(mos, snapobj, name, tx);
3216492Stimh 	return (err);
3226492Stimh }
3236492Stimh 
3246689Smaybee static int
3256689Smaybee dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
3266689Smaybee     dsl_dataset_t **dsp)
327789Sahrens {
328789Sahrens 	objset_t *mos = dp->dp_meta_objset;
329789Sahrens 	dmu_buf_t *dbuf;
330789Sahrens 	dsl_dataset_t *ds;
3311544Seschrock 	int err;
332789Sahrens 
333789Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
334789Sahrens 	    dsl_pool_sync_context(dp));
335789Sahrens 
3361544Seschrock 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
3371544Seschrock 	if (err)
3381544Seschrock 		return (err);
339789Sahrens 	ds = dmu_buf_get_user(dbuf);
340789Sahrens 	if (ds == NULL) {
341789Sahrens 		dsl_dataset_t *winner;
342789Sahrens 
343789Sahrens 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
344789Sahrens 		ds->ds_dbuf = dbuf;
345789Sahrens 		ds->ds_object = dsobj;
346789Sahrens 		ds->ds_phys = dbuf->db_data;
347789Sahrens 
3482856Snd150628 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
3494787Sahrens 		mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
3502856Snd150628 		mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT,
3512856Snd150628 		    NULL);
3526689Smaybee 		rw_init(&ds->ds_rwlock, 0, 0, 0);
3536689Smaybee 		cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
3542856Snd150628 
3551544Seschrock 		err = bplist_open(&ds->ds_deadlist,
356789Sahrens 		    mos, ds->ds_phys->ds_deadlist_obj);
3571544Seschrock 		if (err == 0) {
3581544Seschrock 			err = dsl_dir_open_obj(dp,
3591544Seschrock 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
3601544Seschrock 		}
3611544Seschrock 		if (err) {
3621544Seschrock 			/*
3631544Seschrock 			 * we don't really need to close the blist if we
3641544Seschrock 			 * just opened it.
3651544Seschrock 			 */
3662856Snd150628 			mutex_destroy(&ds->ds_lock);
3674787Sahrens 			mutex_destroy(&ds->ds_opening_lock);
3682856Snd150628 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
3696689Smaybee 			rw_destroy(&ds->ds_rwlock);
3706689Smaybee 			cv_destroy(&ds->ds_exclusive_cv);
3711544Seschrock 			kmem_free(ds, sizeof (dsl_dataset_t));
3721544Seschrock 			dmu_buf_rele(dbuf, tag);
3731544Seschrock 			return (err);
3741544Seschrock 		}
375789Sahrens 
376789Sahrens 		if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
377789Sahrens 			ds->ds_snapname[0] = '\0';
378789Sahrens 			if (ds->ds_phys->ds_prev_snap_obj) {
3796689Smaybee 				err = dsl_dataset_get_ref(dp,
3806689Smaybee 				    ds->ds_phys->ds_prev_snap_obj,
3816689Smaybee 				    ds, &ds->ds_prev);
382789Sahrens 			}
3836689Smaybee 		} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
3846689Smaybee 			err = dsl_dataset_get_snapname(ds);
385789Sahrens 		}
386789Sahrens 
3875475Sck153898 		if (!dsl_dataset_is_snapshot(ds)) {
3885569Sck153898 			/*
3895569Sck153898 			 * In sync context, we're called with either no lock
3905569Sck153898 			 * or with the write lock.  If we're not syncing,
3915569Sck153898 			 * we're always called with the read lock held.
3925569Sck153898 			 */
3935475Sck153898 			boolean_t need_lock =
3945569Sck153898 			    !RW_WRITE_HELD(&dp->dp_config_rwlock) &&
3955569Sck153898 			    dsl_pool_sync_context(dp);
3965475Sck153898 
3975475Sck153898 			if (need_lock)
3985475Sck153898 				rw_enter(&dp->dp_config_rwlock, RW_READER);
3995475Sck153898 
4005475Sck153898 			err = dsl_prop_get_ds_locked(ds->ds_dir,
4015475Sck153898 			    "refreservation", sizeof (uint64_t), 1,
4025475Sck153898 			    &ds->ds_reserved, NULL);
4035475Sck153898 			if (err == 0) {
4045475Sck153898 				err = dsl_prop_get_ds_locked(ds->ds_dir,
4055475Sck153898 				    "refquota", sizeof (uint64_t), 1,
4065475Sck153898 				    &ds->ds_quota, NULL);
4075475Sck153898 			}
4085475Sck153898 
4095475Sck153898 			if (need_lock)
4105475Sck153898 				rw_exit(&dp->dp_config_rwlock);
4115475Sck153898 		} else {
4125475Sck153898 			ds->ds_reserved = ds->ds_quota = 0;
4135475Sck153898 		}
4145475Sck153898 
4151544Seschrock 		if (err == 0) {
4161544Seschrock 			winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
4171544Seschrock 			    dsl_dataset_evict);
4181544Seschrock 		}
4191544Seschrock 		if (err || winner) {
420789Sahrens 			bplist_close(&ds->ds_deadlist);
4216689Smaybee 			if (ds->ds_prev)
4226689Smaybee 				dsl_dataset_drop_ref(ds->ds_prev, ds);
423789Sahrens 			dsl_dir_close(ds->ds_dir, ds);
4242856Snd150628 			mutex_destroy(&ds->ds_lock);
4254787Sahrens 			mutex_destroy(&ds->ds_opening_lock);
4262856Snd150628 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
4276689Smaybee 			rw_destroy(&ds->ds_rwlock);
4286689Smaybee 			cv_destroy(&ds->ds_exclusive_cv);
429789Sahrens 			kmem_free(ds, sizeof (dsl_dataset_t));
4301544Seschrock 			if (err) {
4311544Seschrock 				dmu_buf_rele(dbuf, tag);
4321544Seschrock 				return (err);
4331544Seschrock 			}
434789Sahrens 			ds = winner;
435789Sahrens 		} else {
4364787Sahrens 			ds->ds_fsid_guid =
437789Sahrens 			    unique_insert(ds->ds_phys->ds_fsid_guid);
438789Sahrens 		}
439789Sahrens 	}
440789Sahrens 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
441789Sahrens 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
442789Sahrens 	mutex_enter(&ds->ds_lock);
4436689Smaybee 	if (!dsl_pool_sync_context(dp) && DSL_DATASET_IS_DESTROYED(ds)) {
444789Sahrens 		mutex_exit(&ds->ds_lock);
4456689Smaybee 		dmu_buf_rele(ds->ds_dbuf, tag);
4466689Smaybee 		return (ENOENT);
4476689Smaybee 	}
4486689Smaybee 	mutex_exit(&ds->ds_lock);
4496689Smaybee 	*dsp = ds;
4506689Smaybee 	return (0);
4516689Smaybee }
4526689Smaybee 
4536689Smaybee static int
4546689Smaybee dsl_dataset_hold_ref(dsl_dataset_t *ds, void *tag)
4556689Smaybee {
4566689Smaybee 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
4576689Smaybee 
4586689Smaybee 	/*
4596689Smaybee 	 * In syncing context we don't want the rwlock lock: there
4606689Smaybee 	 * may be an existing writer waiting for sync phase to
4616689Smaybee 	 * finish.  We don't need to worry about such writers, since
4626689Smaybee 	 * sync phase is single-threaded, so the writer can't be
4636689Smaybee 	 * doing anything while we are active.
4646689Smaybee 	 */
4656689Smaybee 	if (dsl_pool_sync_context(dp)) {
4666689Smaybee 		ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
4676689Smaybee 		return (0);
468789Sahrens 	}
4696689Smaybee 
4706689Smaybee 	/*
4716689Smaybee 	 * Normal users will hold the ds_rwlock as a READER until they
4726689Smaybee 	 * are finished (i.e., call dsl_dataset_rele()).  "Owners" will
4736689Smaybee 	 * drop their READER lock after they set the ds_owner field.
4746689Smaybee 	 *
4756689Smaybee 	 * If the dataset is being destroyed, the destroy thread will
4766689Smaybee 	 * obtain a WRITER lock for exclusive access after it's done its
4776689Smaybee 	 * open-context work and then change the ds_owner to
4786689Smaybee 	 * dsl_reaper once destruction is assured.  So threads
4796689Smaybee 	 * may block here temporarily, until the "destructability" of
4806689Smaybee 	 * the dataset is determined.
4816689Smaybee 	 */
4826689Smaybee 	ASSERT(!RW_WRITE_HELD(&dp->dp_config_rwlock));
4836689Smaybee 	mutex_enter(&ds->ds_lock);
4846689Smaybee 	while (!rw_tryenter(&ds->ds_rwlock, RW_READER)) {
4856689Smaybee 		rw_exit(&dp->dp_config_rwlock);
4866689Smaybee 		cv_wait(&ds->ds_exclusive_cv, &ds->ds_lock);
4876689Smaybee 		if (DSL_DATASET_IS_DESTROYED(ds)) {
4886689Smaybee 			mutex_exit(&ds->ds_lock);
4896689Smaybee 			dsl_dataset_drop_ref(ds, tag);
4906689Smaybee 			rw_enter(&dp->dp_config_rwlock, RW_READER);
4916689Smaybee 			return (ENOENT);
4926689Smaybee 		}
4936689Smaybee 		rw_enter(&dp->dp_config_rwlock, RW_READER);
4946689Smaybee 	}
495789Sahrens 	mutex_exit(&ds->ds_lock);
4961544Seschrock 	return (0);
497789Sahrens }
498789Sahrens 
499789Sahrens int
5006689Smaybee dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
5016689Smaybee     dsl_dataset_t **dsp)
5026689Smaybee {
5036689Smaybee 	int err = dsl_dataset_get_ref(dp, dsobj, tag, dsp);
5046689Smaybee 
5056689Smaybee 	if (err)
5066689Smaybee 		return (err);
5076689Smaybee 	return (dsl_dataset_hold_ref(*dsp, tag));
5086689Smaybee }
5096689Smaybee 
5106689Smaybee int
5116689Smaybee dsl_dataset_own_obj(dsl_pool_t *dp, uint64_t dsobj, int flags, void *owner,
5126689Smaybee     dsl_dataset_t **dsp)
5136689Smaybee {
5146689Smaybee 	int err = dsl_dataset_hold_obj(dp, dsobj, owner, dsp);
5156689Smaybee 
5166689Smaybee 	ASSERT(DS_MODE_TYPE(flags) != DS_MODE_USER);
5176689Smaybee 
5186689Smaybee 	if (err)
5196689Smaybee 		return (err);
5206689Smaybee 	if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) {
5216689Smaybee 		dsl_dataset_rele(*dsp, owner);
5226689Smaybee 		return (EBUSY);
5236689Smaybee 	}
5246689Smaybee 	return (0);
5256689Smaybee }
5266689Smaybee 
5276689Smaybee int
5286689Smaybee dsl_dataset_hold(const char *name, void *tag, dsl_dataset_t **dsp)
529789Sahrens {
530789Sahrens 	dsl_dir_t *dd;
531789Sahrens 	dsl_pool_t *dp;
5326689Smaybee 	const char *snapname;
533789Sahrens 	uint64_t obj;
534789Sahrens 	int err = 0;
535789Sahrens 
5366689Smaybee 	err = dsl_dir_open_spa(NULL, name, FTAG, &dd, &snapname);
5371544Seschrock 	if (err)
5381544Seschrock 		return (err);
539789Sahrens 
540789Sahrens 	dp = dd->dd_pool;
541789Sahrens 	obj = dd->dd_phys->dd_head_dataset_obj;
542789Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
5436689Smaybee 	if (obj)
5446689Smaybee 		err = dsl_dataset_get_ref(dp, obj, tag, dsp);
5456689Smaybee 	else
546789Sahrens 		err = ENOENT;
5476689Smaybee 	if (err)
548789Sahrens 		goto out;
5496689Smaybee 
5506689Smaybee 	err = dsl_dataset_hold_ref(*dsp, tag);
5516689Smaybee 
5526689Smaybee 	/* we may be looking for a snapshot */
5536689Smaybee 	if (err == 0 && snapname != NULL) {
5546689Smaybee 		dsl_dataset_t *ds = NULL;
5556689Smaybee 
5566689Smaybee 		if (*snapname++ != '@') {
5576689Smaybee 			dsl_dataset_rele(*dsp, tag);
558789Sahrens 			err = ENOENT;
559789Sahrens 			goto out;
560789Sahrens 		}
5616689Smaybee 
5626689Smaybee 		dprintf("looking for snapshot '%s'\n", snapname);
5636689Smaybee 		err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
5646689Smaybee 		if (err == 0)
5656689Smaybee 			err = dsl_dataset_get_ref(dp, obj, tag, &ds);
5666689Smaybee 		dsl_dataset_rele(*dsp, tag);
5676689Smaybee 
5686689Smaybee 		ASSERT3U((err == 0), ==, (ds != NULL));
5696689Smaybee 
5706689Smaybee 		if (ds) {
5716689Smaybee 			mutex_enter(&ds->ds_lock);
5726689Smaybee 			if (ds->ds_snapname[0] == 0)
5736689Smaybee 				(void) strlcpy(ds->ds_snapname, snapname,
5746689Smaybee 				    sizeof (ds->ds_snapname));
5756689Smaybee 			mutex_exit(&ds->ds_lock);
5766689Smaybee 			err = dsl_dataset_hold_ref(ds, tag);
5776689Smaybee 			*dsp = err ? NULL : ds;
578789Sahrens 		}
579789Sahrens 	}
580789Sahrens out:
581789Sahrens 	rw_exit(&dp->dp_config_rwlock);
582789Sahrens 	dsl_dir_close(dd, FTAG);
583789Sahrens 	return (err);
584789Sahrens }
585789Sahrens 
586789Sahrens int
5876689Smaybee dsl_dataset_own(const char *name, int flags, void *owner, dsl_dataset_t **dsp)
588789Sahrens {
5896689Smaybee 	int err = dsl_dataset_hold(name, owner, dsp);
5906689Smaybee 	if (err)
5916689Smaybee 		return (err);
5926689Smaybee 	if ((*dsp)->ds_phys->ds_num_children > 0 &&
5936689Smaybee 	    !DS_MODE_IS_READONLY(flags)) {
5946689Smaybee 		dsl_dataset_rele(*dsp, owner);
5956689Smaybee 		return (EROFS);
5966689Smaybee 	}
5976689Smaybee 	if (!dsl_dataset_tryown(*dsp, DS_MODE_IS_INCONSISTENT(flags), owner)) {
5986689Smaybee 		dsl_dataset_rele(*dsp, owner);
5996689Smaybee 		return (EBUSY);
6006689Smaybee 	}
6016689Smaybee 	return (0);
602789Sahrens }
603789Sahrens 
604789Sahrens void
605789Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name)
606789Sahrens {
607789Sahrens 	if (ds == NULL) {
608789Sahrens 		(void) strcpy(name, "mos");
609789Sahrens 	} else {
610789Sahrens 		dsl_dir_name(ds->ds_dir, name);
6111544Seschrock 		VERIFY(0 == dsl_dataset_get_snapname(ds));
612789Sahrens 		if (ds->ds_snapname[0]) {
613789Sahrens 			(void) strcat(name, "@");
6146689Smaybee 			/*
6156689Smaybee 			 * We use a "recursive" mutex so that we
6166689Smaybee 			 * can call dprintf_ds() with ds_lock held.
6176689Smaybee 			 */
618789Sahrens 			if (!MUTEX_HELD(&ds->ds_lock)) {
619789Sahrens 				mutex_enter(&ds->ds_lock);
620789Sahrens 				(void) strcat(name, ds->ds_snapname);
621789Sahrens 				mutex_exit(&ds->ds_lock);
622789Sahrens 			} else {
623789Sahrens 				(void) strcat(name, ds->ds_snapname);
624789Sahrens 			}
625789Sahrens 		}
626789Sahrens 	}
627789Sahrens }
628789Sahrens 
6293978Smmusante static int
6303978Smmusante dsl_dataset_namelen(dsl_dataset_t *ds)
6313978Smmusante {
6323978Smmusante 	int result;
6333978Smmusante 
6343978Smmusante 	if (ds == NULL) {
6353978Smmusante 		result = 3;	/* "mos" */
6363978Smmusante 	} else {
6373978Smmusante 		result = dsl_dir_namelen(ds->ds_dir);
6383978Smmusante 		VERIFY(0 == dsl_dataset_get_snapname(ds));
6393978Smmusante 		if (ds->ds_snapname[0]) {
6403978Smmusante 			++result;	/* adding one for the @-sign */
6413978Smmusante 			if (!MUTEX_HELD(&ds->ds_lock)) {
6423978Smmusante 				mutex_enter(&ds->ds_lock);
6433978Smmusante 				result += strlen(ds->ds_snapname);
6443978Smmusante 				mutex_exit(&ds->ds_lock);
6453978Smmusante 			} else {
6463978Smmusante 				result += strlen(ds->ds_snapname);
6473978Smmusante 			}
6483978Smmusante 		}
6493978Smmusante 	}
6503978Smmusante 
6513978Smmusante 	return (result);
6523978Smmusante }
6533978Smmusante 
6546689Smaybee static void
6556689Smaybee dsl_dataset_drop_ref(dsl_dataset_t *ds, void *tag)
656789Sahrens {
6571544Seschrock 	dmu_buf_rele(ds->ds_dbuf, tag);
658789Sahrens }
659789Sahrens 
660789Sahrens void
6616689Smaybee dsl_dataset_rele(dsl_dataset_t *ds, void *tag)
6625367Sahrens {
6636689Smaybee 	ASSERT(ds->ds_owner != tag);
6646689Smaybee 	if (!dsl_pool_sync_context(ds->ds_dir->dd_pool)) {
6656689Smaybee 		rw_exit(&ds->ds_rwlock);
6666689Smaybee 	}
6676689Smaybee 	dsl_dataset_drop_ref(ds, tag);
6686689Smaybee }
6696689Smaybee 
6706689Smaybee void
6716689Smaybee dsl_dataset_disown(dsl_dataset_t *ds, void *owner)
6726689Smaybee {
6736689Smaybee 	ASSERT((ds->ds_owner == owner && ds->ds_dbuf) ||
6746689Smaybee 	    (DSL_DATASET_IS_DESTROYED(ds) && ds->ds_dbuf == NULL));
6756689Smaybee 
6765367Sahrens 	mutex_enter(&ds->ds_lock);
6776689Smaybee 	ds->ds_owner = NULL;
6786689Smaybee 	if (RW_WRITE_HELD(&ds->ds_rwlock)) {
6796689Smaybee 		rw_exit(&ds->ds_rwlock);
6806689Smaybee 		cv_broadcast(&ds->ds_exclusive_cv);
6816689Smaybee 	}
6825367Sahrens 	mutex_exit(&ds->ds_lock);
6836689Smaybee 	if (ds->ds_dbuf)
6846689Smaybee 		dsl_dataset_drop_ref(ds, owner);
6856689Smaybee 	else
6866689Smaybee 		dsl_dataset_evict(ds->ds_dbuf, ds);
6875367Sahrens }
6885367Sahrens 
6895367Sahrens boolean_t
6906689Smaybee dsl_dataset_tryown(dsl_dataset_t *ds, boolean_t inconsistentok, void *owner)
6915367Sahrens {
6926689Smaybee 	boolean_t gotit = FALSE;
6936689Smaybee 
6945367Sahrens 	mutex_enter(&ds->ds_lock);
6956689Smaybee 	if (ds->ds_owner == NULL &&
6966689Smaybee 	    (!DS_IS_INCONSISTENT(ds) || inconsistentok)) {
6976689Smaybee 		ds->ds_owner = owner;
6986689Smaybee 		if (!dsl_pool_sync_context(ds->ds_dir->dd_pool))
6996689Smaybee 			rw_exit(&ds->ds_rwlock);
7006689Smaybee 		gotit = TRUE;
7015367Sahrens 	}
7025367Sahrens 	mutex_exit(&ds->ds_lock);
7036689Smaybee 	return (gotit);
7046689Smaybee }
7056689Smaybee 
7066689Smaybee void
7076689Smaybee dsl_dataset_make_exclusive(dsl_dataset_t *ds, void *owner)
7086689Smaybee {
7096689Smaybee 	ASSERT3P(owner, ==, ds->ds_owner);
7106689Smaybee 	if (!RW_WRITE_HELD(&ds->ds_rwlock))
7116689Smaybee 		rw_enter(&ds->ds_rwlock, RW_WRITER);
7125367Sahrens }
7135367Sahrens 
7145367Sahrens void
715789Sahrens dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
716789Sahrens {
717789Sahrens 	objset_t *mos = dp->dp_meta_objset;
718789Sahrens 	dmu_buf_t *dbuf;
719789Sahrens 	dsl_dataset_phys_t *dsphys;
720789Sahrens 	dsl_dataset_t *ds;
721789Sahrens 	uint64_t dsobj;
722789Sahrens 	dsl_dir_t *dd;
723789Sahrens 
724789Sahrens 	dsl_dir_create_root(mos, ddobjp, tx);
7251544Seschrock 	VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd));
726789Sahrens 
727928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
728928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
7291544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
730789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
731789Sahrens 	dsphys = dbuf->db_data;
732789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
733789Sahrens 	dsphys->ds_fsid_guid = unique_create();
734789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
735789Sahrens 	    sizeof (dsphys->ds_guid));
736789Sahrens 	dsphys->ds_snapnames_zapobj =
7376492Stimh 	    zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
7386492Stimh 	    DMU_OT_NONE, 0, tx);
739789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
740789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
741789Sahrens 	dsphys->ds_deadlist_obj =
742789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
7435378Sck153898 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
7445378Sck153898 		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
7451544Seschrock 	dmu_buf_rele(dbuf, FTAG);
746789Sahrens 
747789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
748789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
749789Sahrens 	dsl_dir_close(dd, FTAG);
750789Sahrens 
7516689Smaybee 	VERIFY(0 == dsl_dataset_get_ref(dp, dsobj, FTAG, &ds));
7523547Smaybee 	(void) dmu_objset_create_impl(dp->dp_spa, ds,
7533547Smaybee 	    &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx);
7546689Smaybee 	dsl_dataset_drop_ref(ds, FTAG);
755789Sahrens }
756789Sahrens 
7572199Sahrens uint64_t
7586492Stimh dsl_dataset_create_sync_impl(dsl_dir_t *dd, dsl_dataset_t *origin,
7596492Stimh     uint64_t flags, dmu_tx_t *tx)
760789Sahrens {
7615367Sahrens 	dsl_pool_t *dp = dd->dd_pool;
762789Sahrens 	dmu_buf_t *dbuf;
763789Sahrens 	dsl_dataset_phys_t *dsphys;
7645367Sahrens 	uint64_t dsobj;
765789Sahrens 	objset_t *mos = dp->dp_meta_objset;
766789Sahrens 
7675367Sahrens 	ASSERT(origin == NULL || origin->ds_dir->dd_pool == dp);
7685367Sahrens 	ASSERT(origin == NULL || origin->ds_phys->ds_num_children > 0);
769789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
7705367Sahrens 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
771789Sahrens 
772928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
773928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
7741544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
775789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
776789Sahrens 	dsphys = dbuf->db_data;
7776689Smaybee 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
778789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
7796492Stimh 	dsphys->ds_flags = flags;
780789Sahrens 	dsphys->ds_fsid_guid = unique_create();
781789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
782789Sahrens 	    sizeof (dsphys->ds_guid));
783789Sahrens 	dsphys->ds_snapnames_zapobj =
7846492Stimh 	    zap_create_norm(mos, U8_TEXTPREP_TOUPPER, DMU_OT_DSL_DS_SNAP_MAP,
7856492Stimh 	    DMU_OT_NONE, 0, tx);
786789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
787789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
788789Sahrens 	dsphys->ds_deadlist_obj =
789789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
7905378Sck153898 
7915367Sahrens 	if (origin) {
7925367Sahrens 		dsphys->ds_prev_snap_obj = origin->ds_object;
793789Sahrens 		dsphys->ds_prev_snap_txg =
7945367Sahrens 		    origin->ds_phys->ds_creation_txg;
795789Sahrens 		dsphys->ds_used_bytes =
7965367Sahrens 		    origin->ds_phys->ds_used_bytes;
797789Sahrens 		dsphys->ds_compressed_bytes =
7985367Sahrens 		    origin->ds_phys->ds_compressed_bytes;
799789Sahrens 		dsphys->ds_uncompressed_bytes =
8005367Sahrens 		    origin->ds_phys->ds_uncompressed_bytes;
8015367Sahrens 		dsphys->ds_bp = origin->ds_phys->ds_bp;
8026502Stimh 		dsphys->ds_flags |= origin->ds_phys->ds_flags;
803789Sahrens 
8045367Sahrens 		dmu_buf_will_dirty(origin->ds_dbuf, tx);
8055367Sahrens 		origin->ds_phys->ds_num_children++;
806789Sahrens 
807789Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
8085367Sahrens 		dd->dd_phys->dd_origin_obj = origin->ds_object;
809789Sahrens 	}
8106492Stimh 
8116492Stimh 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
8126492Stimh 		dsphys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
8136492Stimh 
8141544Seschrock 	dmu_buf_rele(dbuf, FTAG);
815789Sahrens 
816789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
817789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
8185367Sahrens 
8195367Sahrens 	return (dsobj);
8205367Sahrens }
8215367Sahrens 
8225367Sahrens uint64_t
8236492Stimh dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
8246492Stimh     dsl_dataset_t *origin, uint64_t flags, cred_t *cr, dmu_tx_t *tx)
8255367Sahrens {
8265367Sahrens 	dsl_pool_t *dp = pdd->dd_pool;
8275367Sahrens 	uint64_t dsobj, ddobj;
8285367Sahrens 	dsl_dir_t *dd;
8295367Sahrens 
8305367Sahrens 	ASSERT(lastname[0] != '@');
8315367Sahrens 
8325367Sahrens 	ddobj = dsl_dir_create_sync(pdd, lastname, tx);
8335367Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
8345367Sahrens 
8356492Stimh 	dsobj = dsl_dataset_create_sync_impl(dd, origin, flags, tx);
8365367Sahrens 
8375367Sahrens 	dsl_deleg_set_create_perms(dd, tx, cr);
8385367Sahrens 
839789Sahrens 	dsl_dir_close(dd, FTAG);
840789Sahrens 
8412199Sahrens 	return (dsobj);
8422199Sahrens }
8432199Sahrens 
8442199Sahrens struct destroyarg {
8452199Sahrens 	dsl_sync_task_group_t *dstg;
8462199Sahrens 	char *snapname;
8472199Sahrens 	char *failed;
8482199Sahrens };
8492199Sahrens 
8502199Sahrens static int
8512199Sahrens dsl_snapshot_destroy_one(char *name, void *arg)
8522199Sahrens {
8532199Sahrens 	struct destroyarg *da = arg;
8542199Sahrens 	dsl_dataset_t *ds;
8552199Sahrens 	char *cp;
8562199Sahrens 	int err;
8572199Sahrens 
8582199Sahrens 	(void) strcat(name, "@");
8592199Sahrens 	(void) strcat(name, da->snapname);
8606689Smaybee 	err = dsl_dataset_own(name, DS_MODE_READONLY | DS_MODE_INCONSISTENT,
8614007Smmusante 	    da->dstg, &ds);
8622199Sahrens 	cp = strchr(name, '@');
8632199Sahrens 	*cp = '\0';
8646689Smaybee 	if (err == 0) {
8656689Smaybee 		dsl_dataset_make_exclusive(ds, da->dstg);
8666689Smaybee 		dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
8676689Smaybee 		    dsl_dataset_destroy_sync, ds, da->dstg, 0);
8686689Smaybee 	} else if (err == ENOENT) {
8696689Smaybee 		err = 0;
8706689Smaybee 	} else {
8712199Sahrens 		(void) strcpy(da->failed, name);
8722199Sahrens 	}
8736689Smaybee 	return (err);
874789Sahrens }
875789Sahrens 
8762199Sahrens /*
8772199Sahrens  * Destroy 'snapname' in all descendants of 'fsname'.
8782199Sahrens  */
8792199Sahrens #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
8802199Sahrens int
8812199Sahrens dsl_snapshots_destroy(char *fsname, char *snapname)
8822199Sahrens {
8832199Sahrens 	int err;
8842199Sahrens 	struct destroyarg da;
8852199Sahrens 	dsl_sync_task_t *dst;
8862199Sahrens 	spa_t *spa;
8872199Sahrens 
8884603Sahrens 	err = spa_open(fsname, &spa, FTAG);
8892199Sahrens 	if (err)
8902199Sahrens 		return (err);
8912199Sahrens 	da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
8922199Sahrens 	da.snapname = snapname;
8932199Sahrens 	da.failed = fsname;
8942199Sahrens 
8952199Sahrens 	err = dmu_objset_find(fsname,
8962417Sahrens 	    dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
8972199Sahrens 
8982199Sahrens 	if (err == 0)
8992199Sahrens 		err = dsl_sync_task_group_wait(da.dstg);
9002199Sahrens 
9012199Sahrens 	for (dst = list_head(&da.dstg->dstg_tasks); dst;
9022199Sahrens 	    dst = list_next(&da.dstg->dstg_tasks, dst)) {
9032199Sahrens 		dsl_dataset_t *ds = dst->dst_arg1;
9046689Smaybee 		/*
9056689Smaybee 		 * Return the file system name that triggered the error
9066689Smaybee 		 */
9072199Sahrens 		if (dst->dst_err) {
9082199Sahrens 			dsl_dataset_name(ds, fsname);
9094603Sahrens 			*strchr(fsname, '@') = '\0';
9102199Sahrens 		}
9116689Smaybee 		dsl_dataset_disown(ds, da.dstg);
9122199Sahrens 	}
9132199Sahrens 
9142199Sahrens 	dsl_sync_task_group_destroy(da.dstg);
9152199Sahrens 	spa_close(spa, FTAG);
9162199Sahrens 	return (err);
9172199Sahrens }
9182199Sahrens 
9195367Sahrens /*
9206689Smaybee  * ds must be opened as OWNER.  On return (whether successful or not),
9216689Smaybee  * ds will be closed and caller can no longer dereference it.
9225367Sahrens  */
923789Sahrens int
9245367Sahrens dsl_dataset_destroy(dsl_dataset_t *ds, void *tag)
925789Sahrens {
926789Sahrens 	int err;
9272199Sahrens 	dsl_sync_task_group_t *dstg;
9282199Sahrens 	objset_t *os;
929789Sahrens 	dsl_dir_t *dd;
9302199Sahrens 	uint64_t obj;
9312199Sahrens 
9325367Sahrens 	if (dsl_dataset_is_snapshot(ds)) {
9332199Sahrens 		/* Destroying a snapshot is simpler */
9346689Smaybee 		dsl_dataset_make_exclusive(ds, tag);
9352199Sahrens 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
9362199Sahrens 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
9375367Sahrens 		    ds, tag, 0);
9385367Sahrens 		goto out;
9392199Sahrens 	}
9402199Sahrens 
9412199Sahrens 	dd = ds->ds_dir;
942789Sahrens 
9432199Sahrens 	/*
9442199Sahrens 	 * Check for errors and mark this ds as inconsistent, in
9452199Sahrens 	 * case we crash while freeing the objects.
9462199Sahrens 	 */
9472199Sahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
9482199Sahrens 	    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
9495367Sahrens 	if (err)
9505367Sahrens 		goto out;
9515367Sahrens 
9525367Sahrens 	err = dmu_objset_open_ds(ds, DMU_OST_ANY, &os);
9535367Sahrens 	if (err)
9545367Sahrens 		goto out;
9552199Sahrens 
9562199Sahrens 	/*
9572199Sahrens 	 * remove the objects in open context, so that we won't
9582199Sahrens 	 * have too much to do in syncing context.
9592199Sahrens 	 */
9603025Sahrens 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
9613025Sahrens 	    ds->ds_phys->ds_prev_snap_txg)) {
962*6992Smaybee 		/*
963*6992Smaybee 		 * Ignore errors, if there is not enough disk space
964*6992Smaybee 		 * we will deal with it in dsl_dataset_destroy_sync().
965*6992Smaybee 		 */
966*6992Smaybee 		(void) dmu_free_object(os, obj);
9672199Sahrens 	}
9682199Sahrens 
9692199Sahrens 	dmu_objset_close(os);
9702199Sahrens 	if (err != ESRCH)
9715367Sahrens 		goto out;
9722199Sahrens 
9736975Smaybee 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
9746975Smaybee 	err = dsl_dir_open_obj(dd->dd_pool, dd->dd_object, NULL, FTAG, &dd);
9756975Smaybee 	rw_exit(&dd->dd_pool->dp_config_rwlock);
9766975Smaybee 
9776975Smaybee 	if (err)
9786975Smaybee 		goto out;
9796975Smaybee 
9805367Sahrens 	if (ds->ds_user_ptr) {
9816689Smaybee 		/*
9826689Smaybee 		 * We need to sync out all in-flight IO before we try
9836689Smaybee 		 * to evict (the dataset evict func is trying to clear
9846689Smaybee 		 * the cached entries for this dataset in the ARC).
9856689Smaybee 		 */
9866689Smaybee 		txg_wait_synced(dd->dd_pool, 0);
9875367Sahrens 	}
9885367Sahrens 
9892199Sahrens 	/*
9902199Sahrens 	 * Blow away the dsl_dir + head dataset.
9912199Sahrens 	 */
9926689Smaybee 	dsl_dataset_make_exclusive(ds, tag);
9936975Smaybee 	if (ds->ds_user_ptr) {
9946975Smaybee 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
9956975Smaybee 		ds->ds_user_ptr = NULL;
9966975Smaybee 	}
9972199Sahrens 	dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
9982199Sahrens 	dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
9995367Sahrens 	    dsl_dataset_destroy_sync, ds, tag, 0);
10002199Sahrens 	dsl_sync_task_create(dstg, dsl_dir_destroy_check,
10012199Sahrens 	    dsl_dir_destroy_sync, dd, FTAG, 0);
10022199Sahrens 	err = dsl_sync_task_group_wait(dstg);
10032199Sahrens 	dsl_sync_task_group_destroy(dstg);
10046689Smaybee 	/* if it is successful, dsl_dir_destroy_sync will close the dd */
10055367Sahrens 	if (err)
10062199Sahrens 		dsl_dir_close(dd, FTAG);
10075367Sahrens out:
10086689Smaybee 	dsl_dataset_disown(ds, tag);
1009789Sahrens 	return (err);
1010789Sahrens }
1011789Sahrens 
1012789Sahrens int
10135367Sahrens dsl_dataset_rollback(dsl_dataset_t *ds, dmu_objset_type_t ost)
1014789Sahrens {
10156689Smaybee 	ASSERT(ds->ds_owner);
10165367Sahrens 
10172199Sahrens 	return (dsl_sync_task_do(ds->ds_dir->dd_pool,
10182199Sahrens 	    dsl_dataset_rollback_check, dsl_dataset_rollback_sync,
10195367Sahrens 	    ds, &ost, 0));
1020789Sahrens }
1021789Sahrens 
1022789Sahrens void *
1023789Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
1024789Sahrens     void *p, dsl_dataset_evict_func_t func)
1025789Sahrens {
1026789Sahrens 	void *old;
1027789Sahrens 
1028789Sahrens 	mutex_enter(&ds->ds_lock);
1029789Sahrens 	old = ds->ds_user_ptr;
1030789Sahrens 	if (old == NULL) {
1031789Sahrens 		ds->ds_user_ptr = p;
1032789Sahrens 		ds->ds_user_evict_func = func;
1033789Sahrens 	}
1034789Sahrens 	mutex_exit(&ds->ds_lock);
1035789Sahrens 	return (old);
1036789Sahrens }
1037789Sahrens 
1038789Sahrens void *
1039789Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
1040789Sahrens {
1041789Sahrens 	return (ds->ds_user_ptr);
1042789Sahrens }
1043789Sahrens 
1044789Sahrens 
10453547Smaybee blkptr_t *
10463547Smaybee dsl_dataset_get_blkptr(dsl_dataset_t *ds)
1047789Sahrens {
10483547Smaybee 	return (&ds->ds_phys->ds_bp);
1049789Sahrens }
1050789Sahrens 
1051789Sahrens void
1052789Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
1053789Sahrens {
1054789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1055789Sahrens 	/* If it's the meta-objset, set dp_meta_rootbp */
1056789Sahrens 	if (ds == NULL) {
1057789Sahrens 		tx->tx_pool->dp_meta_rootbp = *bp;
1058789Sahrens 	} else {
1059789Sahrens 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
1060789Sahrens 		ds->ds_phys->ds_bp = *bp;
1061789Sahrens 	}
1062789Sahrens }
1063789Sahrens 
1064789Sahrens spa_t *
1065789Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds)
1066789Sahrens {
1067789Sahrens 	return (ds->ds_dir->dd_pool->dp_spa);
1068789Sahrens }
1069789Sahrens 
1070789Sahrens void
1071789Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
1072789Sahrens {
1073789Sahrens 	dsl_pool_t *dp;
1074789Sahrens 
1075789Sahrens 	if (ds == NULL) /* this is the meta-objset */
1076789Sahrens 		return;
1077789Sahrens 
1078789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
10792885Sahrens 
10802885Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
10812885Sahrens 		panic("dirtying snapshot!");
1082789Sahrens 
1083789Sahrens 	dp = ds->ds_dir->dd_pool;
1084789Sahrens 
1085789Sahrens 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
1086789Sahrens 		/* up the hold count until we can be written out */
1087789Sahrens 		dmu_buf_add_ref(ds->ds_dbuf, ds);
1088789Sahrens 	}
1089789Sahrens }
1090789Sahrens 
10915378Sck153898 /*
10925378Sck153898  * The unique space in the head dataset can be calculated by subtracting
10935378Sck153898  * the space used in the most recent snapshot, that is still being used
10945378Sck153898  * in this file system, from the space currently in use.  To figure out
10955378Sck153898  * the space in the most recent snapshot still in use, we need to take
10965378Sck153898  * the total space used in the snapshot and subtract out the space that
10975378Sck153898  * has been freed up since the snapshot was taken.
10985378Sck153898  */
10995378Sck153898 static void
11005378Sck153898 dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
11015378Sck153898 {
11025378Sck153898 	uint64_t mrs_used;
11035378Sck153898 	uint64_t dlused, dlcomp, dluncomp;
11045378Sck153898 
11055378Sck153898 	ASSERT(ds->ds_object == ds->ds_dir->dd_phys->dd_head_dataset_obj);
11065378Sck153898 
11075378Sck153898 	if (ds->ds_phys->ds_prev_snap_obj != 0)
11085378Sck153898 		mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
11095378Sck153898 	else
11105378Sck153898 		mrs_used = 0;
11115378Sck153898 
11125378Sck153898 	VERIFY(0 == bplist_space(&ds->ds_deadlist, &dlused, &dlcomp,
11135378Sck153898 	    &dluncomp));
11145378Sck153898 
11155378Sck153898 	ASSERT3U(dlused, <=, mrs_used);
11165378Sck153898 	ds->ds_phys->ds_unique_bytes =
11175378Sck153898 	    ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
11185378Sck153898 
11195378Sck153898 	if (!DS_UNIQUE_IS_ACCURATE(ds) &&
11205378Sck153898 	    spa_version(ds->ds_dir->dd_pool->dp_spa) >=
11215378Sck153898 	    SPA_VERSION_UNIQUE_ACCURATE)
11225378Sck153898 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
11235378Sck153898 }
11245378Sck153898 
11255378Sck153898 static uint64_t
11265378Sck153898 dsl_dataset_unique(dsl_dataset_t *ds)
11275378Sck153898 {
11285378Sck153898 	if (!DS_UNIQUE_IS_ACCURATE(ds) && !dsl_dataset_is_snapshot(ds))
11295378Sck153898 		dsl_dataset_recalc_head_uniq(ds);
11305378Sck153898 
11315378Sck153898 	return (ds->ds_phys->ds_unique_bytes);
11325378Sck153898 }
11335378Sck153898 
1134789Sahrens struct killarg {
11355378Sck153898 	int64_t *usedp;
11365378Sck153898 	int64_t *compressedp;
11375378Sck153898 	int64_t *uncompressedp;
1138789Sahrens 	zio_t *zio;
1139789Sahrens 	dmu_tx_t *tx;
1140789Sahrens };
1141789Sahrens 
1142789Sahrens static int
1143789Sahrens kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
1144789Sahrens {
1145789Sahrens 	struct killarg *ka = arg;
1146789Sahrens 	blkptr_t *bp = &bc->bc_blkptr;
1147789Sahrens 
1148789Sahrens 	ASSERT3U(bc->bc_errno, ==, 0);
1149789Sahrens 
1150789Sahrens 	/*
1151789Sahrens 	 * Since this callback is not called concurrently, no lock is
1152789Sahrens 	 * needed on the accounting values.
1153789Sahrens 	 */
11542082Seschrock 	*ka->usedp += bp_get_dasize(spa, bp);
1155789Sahrens 	*ka->compressedp += BP_GET_PSIZE(bp);
1156789Sahrens 	*ka->uncompressedp += BP_GET_UCSIZE(bp);
1157789Sahrens 	/* XXX check for EIO? */
1158789Sahrens 	(void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
1159789Sahrens 	    ARC_NOWAIT);
1160789Sahrens 	return (0);
1161789Sahrens }
1162789Sahrens 
1163789Sahrens /* ARGSUSED */
11642199Sahrens static int
11652199Sahrens dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
1166789Sahrens {
11672199Sahrens 	dsl_dataset_t *ds = arg1;
11685367Sahrens 	dmu_objset_type_t *ost = arg2;
1169789Sahrens 
11702199Sahrens 	/*
11715367Sahrens 	 * We can only roll back to emptyness if it is a ZPL objset.
11722199Sahrens 	 */
11735367Sahrens 	if (*ost != DMU_OST_ZFS && ds->ds_phys->ds_prev_snap_txg == 0)
1174789Sahrens 		return (EINVAL);
1175789Sahrens 
11762199Sahrens 	/*
11772199Sahrens 	 * This must not be a snapshot.
11782199Sahrens 	 */
11792199Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
11802199Sahrens 		return (EINVAL);
1181789Sahrens 
1182789Sahrens 	/*
1183789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
1184789Sahrens 	 * them.  Try again.
1185789Sahrens 	 */
11862199Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1187789Sahrens 		return (EAGAIN);
11882199Sahrens 
11892199Sahrens 	return (0);
11902199Sahrens }
1191789Sahrens 
11922199Sahrens /* ARGSUSED */
11932199Sahrens static void
11944543Smarks dsl_dataset_rollback_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
11952199Sahrens {
11962199Sahrens 	dsl_dataset_t *ds = arg1;
11975367Sahrens 	dmu_objset_type_t *ost = arg2;
11982199Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
1199789Sahrens 
1200789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1201789Sahrens 
12024967Sperrin 	/*
12034967Sperrin 	 * Before the roll back destroy the zil.
12044967Sperrin 	 */
12054967Sperrin 	if (ds->ds_user_ptr != NULL) {
12064967Sperrin 		zil_rollback_destroy(
12074967Sperrin 		    ((objset_impl_t *)ds->ds_user_ptr)->os_zil, tx);
12085367Sahrens 
12095367Sahrens 		/*
12105367Sahrens 		 * We need to make sure that the objset_impl_t is reopened after
12115367Sahrens 		 * we do the rollback, otherwise it will have the wrong
12125367Sahrens 		 * objset_phys_t.  Normally this would happen when this
12136689Smaybee 		 * dataset-open is closed, thus causing the
12145367Sahrens 		 * dataset to be immediately evicted.  But when doing "zfs recv
12155367Sahrens 		 * -F", we reopen the objset before that, so that there is no
12165367Sahrens 		 * window where the dataset is closed and inconsistent.
12175367Sahrens 		 */
12185367Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
12195367Sahrens 		ds->ds_user_ptr = NULL;
12204967Sperrin 	}
12214935Sperrin 
1222789Sahrens 	/* Zero out the deadlist. */
1223789Sahrens 	bplist_close(&ds->ds_deadlist);
1224789Sahrens 	bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1225789Sahrens 	ds->ds_phys->ds_deadlist_obj =
1226789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
12271544Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
12281544Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1229789Sahrens 
1230789Sahrens 	{
1231789Sahrens 		/* Free blkptrs that we gave birth to */
1232789Sahrens 		zio_t *zio;
12335378Sck153898 		int64_t used = 0, compressed = 0, uncompressed = 0;
1234789Sahrens 		struct killarg ka;
12355518Sck153898 		int64_t delta;
1236789Sahrens 
1237789Sahrens 		zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
1238789Sahrens 		    ZIO_FLAG_MUSTSUCCEED);
1239789Sahrens 		ka.usedp = &used;
1240789Sahrens 		ka.compressedp = &compressed;
1241789Sahrens 		ka.uncompressedp = &uncompressed;
1242789Sahrens 		ka.zio = zio;
1243789Sahrens 		ka.tx = tx;
1244789Sahrens 		(void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1245789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1246789Sahrens 		(void) zio_wait(zio);
1247789Sahrens 
12485518Sck153898 		/* only deduct space beyond any refreservation */
12495518Sck153898 		delta = parent_delta(ds, -used);
12502199Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
12515518Sck153898 		    delta, -compressed, -uncompressed, tx);
1252789Sahrens 	}
1253789Sahrens 
12545367Sahrens 	if (ds->ds_prev) {
12555367Sahrens 		/* Change our contents to that of the prev snapshot */
12565367Sahrens 		ASSERT3U(ds->ds_prev->ds_object, ==,
12575367Sahrens 		    ds->ds_phys->ds_prev_snap_obj);
12585367Sahrens 		ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
12595367Sahrens 		ds->ds_phys->ds_used_bytes =
12605367Sahrens 		    ds->ds_prev->ds_phys->ds_used_bytes;
12615367Sahrens 		ds->ds_phys->ds_compressed_bytes =
12625367Sahrens 		    ds->ds_prev->ds_phys->ds_compressed_bytes;
12635367Sahrens 		ds->ds_phys->ds_uncompressed_bytes =
12645367Sahrens 		    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
12655367Sahrens 		ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
12665367Sahrens 		ds->ds_phys->ds_unique_bytes = 0;
1267789Sahrens 
12685367Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
12695367Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
12705367Sahrens 			ds->ds_prev->ds_phys->ds_unique_bytes = 0;
12715367Sahrens 		}
12725367Sahrens 	} else {
12735367Sahrens 		/* Zero out our contents, recreate objset */
12745367Sahrens 		bzero(&ds->ds_phys->ds_bp, sizeof (blkptr_t));
12755367Sahrens 		ds->ds_phys->ds_used_bytes = 0;
12765367Sahrens 		ds->ds_phys->ds_compressed_bytes = 0;
12775367Sahrens 		ds->ds_phys->ds_uncompressed_bytes = 0;
12785367Sahrens 		ds->ds_phys->ds_flags = 0;
12795367Sahrens 		ds->ds_phys->ds_unique_bytes = 0;
12805367Sahrens 		(void) dmu_objset_create_impl(ds->ds_dir->dd_pool->dp_spa, ds,
12815367Sahrens 		    &ds->ds_phys->ds_bp, *ost, tx);
12822532Sahrens 	}
12834543Smarks 
12844543Smarks 	spa_history_internal_log(LOG_DS_ROLLBACK, ds->ds_dir->dd_pool->dp_spa,
12854543Smarks 	    tx, cr, "dataset = %llu", ds->ds_object);
1286789Sahrens }
1287789Sahrens 
12881731Sbonwick /* ARGSUSED */
12891731Sbonwick static int
12902199Sahrens dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
12911731Sbonwick {
12922199Sahrens 	dsl_dataset_t *ds = arg1;
12935367Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
12945367Sahrens 	uint64_t count;
12955367Sahrens 	int err;
12961731Sbonwick 
12971731Sbonwick 	/*
12981731Sbonwick 	 * Can't delete a head dataset if there are snapshots of it.
12991731Sbonwick 	 * (Except if the only snapshots are from the branch we cloned
13001731Sbonwick 	 * from.)
13011731Sbonwick 	 */
13021731Sbonwick 	if (ds->ds_prev != NULL &&
13031731Sbonwick 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
13041731Sbonwick 		return (EINVAL);
13051731Sbonwick 
13065367Sahrens 	/*
13075367Sahrens 	 * This is really a dsl_dir thing, but check it here so that
13085367Sahrens 	 * we'll be less likely to leave this dataset inconsistent &
13095367Sahrens 	 * nearly destroyed.
13105367Sahrens 	 */
13115367Sahrens 	err = zap_count(mos, ds->ds_dir->dd_phys->dd_child_dir_zapobj, &count);
13125367Sahrens 	if (err)
13135367Sahrens 		return (err);
13145367Sahrens 	if (count != 0)
13155367Sahrens 		return (EEXIST);
13165367Sahrens 
13171731Sbonwick 	return (0);
13181731Sbonwick }
13191731Sbonwick 
13202199Sahrens /* ARGSUSED */
13212199Sahrens static void
13224543Smarks dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
1323789Sahrens {
13242199Sahrens 	dsl_dataset_t *ds = arg1;
13254543Smarks 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1326789Sahrens 
13272199Sahrens 	/* Mark it as inconsistent on-disk, in case we crash */
13282199Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
13292199Sahrens 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
13304543Smarks 
13314543Smarks 	spa_history_internal_log(LOG_DS_DESTROY_BEGIN, dp->dp_spa, tx,
13324543Smarks 	    cr, "dataset = %llu", ds->ds_object);
13332199Sahrens }
1334789Sahrens 
13352199Sahrens /* ARGSUSED */
13365367Sahrens int
13372199Sahrens dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
13382199Sahrens {
13392199Sahrens 	dsl_dataset_t *ds = arg1;
1340789Sahrens 
13416689Smaybee 	/* we have an owner hold, so noone else can destroy us */
13426689Smaybee 	ASSERT(!DSL_DATASET_IS_DESTROYED(ds));
13436689Smaybee 
1344789Sahrens 	/* Can't delete a branch point. */
13452199Sahrens 	if (ds->ds_phys->ds_num_children > 1)
13462199Sahrens 		return (EEXIST);
1347789Sahrens 
1348789Sahrens 	/*
1349789Sahrens 	 * Can't delete a head dataset if there are snapshots of it.
1350789Sahrens 	 * (Except if the only snapshots are from the branch we cloned
1351789Sahrens 	 * from.)
1352789Sahrens 	 */
1353789Sahrens 	if (ds->ds_prev != NULL &&
13542199Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1355789Sahrens 		return (EINVAL);
1356789Sahrens 
1357789Sahrens 	/*
1358789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
1359789Sahrens 	 * them.  Try again.
1360789Sahrens 	 */
13612199Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1362789Sahrens 		return (EAGAIN);
13632199Sahrens 
13642199Sahrens 	/* XXX we should do some i/o error checking... */
13652199Sahrens 	return (0);
13662199Sahrens }
13672199Sahrens 
13686689Smaybee struct refsarg {
13696689Smaybee 	kmutex_t lock;
13706689Smaybee 	boolean_t gone;
13716689Smaybee 	kcondvar_t cv;
13726689Smaybee };
13736689Smaybee 
13746689Smaybee /* ARGSUSED */
13756689Smaybee static void
13766689Smaybee dsl_dataset_refs_gone(dmu_buf_t *db, void *argv)
13776689Smaybee {
13786689Smaybee 	struct refsarg *arg = argv;
13796689Smaybee 
13806689Smaybee 	mutex_enter(&arg->lock);
13816689Smaybee 	arg->gone = TRUE;
13826689Smaybee 	cv_signal(&arg->cv);
13836689Smaybee 	mutex_exit(&arg->lock);
13846689Smaybee }
13856689Smaybee 
13866689Smaybee static void
13876689Smaybee dsl_dataset_drain_refs(dsl_dataset_t *ds, void *tag)
13886689Smaybee {
13896689Smaybee 	struct refsarg arg;
13906689Smaybee 
13916689Smaybee 	mutex_init(&arg.lock, NULL, MUTEX_DEFAULT, NULL);
13926689Smaybee 	cv_init(&arg.cv, NULL, CV_DEFAULT, NULL);
13936689Smaybee 	arg.gone = FALSE;
13946689Smaybee 	(void) dmu_buf_update_user(ds->ds_dbuf, ds, &arg, &ds->ds_phys,
13956689Smaybee 	    dsl_dataset_refs_gone);
13966689Smaybee 	dmu_buf_rele(ds->ds_dbuf, tag);
13976689Smaybee 	mutex_enter(&arg.lock);
13986689Smaybee 	while (!arg.gone)
13996689Smaybee 		cv_wait(&arg.cv, &arg.lock);
14006689Smaybee 	ASSERT(arg.gone);
14016689Smaybee 	mutex_exit(&arg.lock);
14026689Smaybee 	ds->ds_dbuf = NULL;
14036689Smaybee 	ds->ds_phys = NULL;
14046689Smaybee 	mutex_destroy(&arg.lock);
14056689Smaybee 	cv_destroy(&arg.cv);
14066689Smaybee }
14076689Smaybee 
14085367Sahrens void
14094543Smarks dsl_dataset_destroy_sync(void *arg1, void *tag, cred_t *cr, dmu_tx_t *tx)
14102199Sahrens {
14112199Sahrens 	dsl_dataset_t *ds = arg1;
14125378Sck153898 	int64_t used = 0, compressed = 0, uncompressed = 0;
14132199Sahrens 	zio_t *zio;
14142199Sahrens 	int err;
14152199Sahrens 	int after_branch_point = FALSE;
14162199Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
14172199Sahrens 	objset_t *mos = dp->dp_meta_objset;
14182199Sahrens 	dsl_dataset_t *ds_prev = NULL;
14192199Sahrens 	uint64_t obj;
14202199Sahrens 
14216689Smaybee 	ASSERT(ds->ds_owner);
14222199Sahrens 	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
14232199Sahrens 	ASSERT(ds->ds_prev == NULL ||
14242199Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
14252199Sahrens 	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
14262199Sahrens 
14276689Smaybee 	/* signal any waiters that this dataset is going away */
14286689Smaybee 	mutex_enter(&ds->ds_lock);
14296689Smaybee 	ds->ds_owner = dsl_reaper;
14306689Smaybee 	cv_broadcast(&ds->ds_exclusive_cv);
14316689Smaybee 	mutex_exit(&ds->ds_lock);
14326689Smaybee 
14335378Sck153898 	/* Remove our reservation */
14345378Sck153898 	if (ds->ds_reserved != 0) {
14355378Sck153898 		uint64_t val = 0;
14365378Sck153898 		dsl_dataset_set_reservation_sync(ds, &val, cr, tx);
14375378Sck153898 		ASSERT3U(ds->ds_reserved, ==, 0);
14385378Sck153898 	}
14395378Sck153898 
14402199Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
14412199Sahrens 
14422199Sahrens 	obj = ds->ds_object;
1443789Sahrens 
1444789Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1445789Sahrens 		if (ds->ds_prev) {
1446789Sahrens 			ds_prev = ds->ds_prev;
1447789Sahrens 		} else {
14486689Smaybee 			VERIFY(0 == dsl_dataset_hold_obj(dp,
14496689Smaybee 			    ds->ds_phys->ds_prev_snap_obj, FTAG, &ds_prev));
1450789Sahrens 		}
1451789Sahrens 		after_branch_point =
1452789Sahrens 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
1453789Sahrens 
1454789Sahrens 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1455789Sahrens 		if (after_branch_point &&
1456789Sahrens 		    ds->ds_phys->ds_next_snap_obj == 0) {
1457789Sahrens 			/* This clone is toast. */
1458789Sahrens 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
1459789Sahrens 			ds_prev->ds_phys->ds_num_children--;
1460789Sahrens 		} else if (!after_branch_point) {
1461789Sahrens 			ds_prev->ds_phys->ds_next_snap_obj =
1462789Sahrens 			    ds->ds_phys->ds_next_snap_obj;
1463789Sahrens 		}
1464789Sahrens 	}
1465789Sahrens 
1466789Sahrens 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1467789Sahrens 
1468789Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0) {
14692199Sahrens 		blkptr_t bp;
1470789Sahrens 		dsl_dataset_t *ds_next;
1471789Sahrens 		uint64_t itor = 0;
14725378Sck153898 		uint64_t old_unique;
1473789Sahrens 
1474789Sahrens 		spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1475789Sahrens 
14766689Smaybee 		VERIFY(0 == dsl_dataset_hold_obj(dp,
14776689Smaybee 		    ds->ds_phys->ds_next_snap_obj, FTAG, &ds_next));
1478789Sahrens 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
1479789Sahrens 
14805378Sck153898 		old_unique = dsl_dataset_unique(ds_next);
14815378Sck153898 
1482789Sahrens 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
1483789Sahrens 		ds_next->ds_phys->ds_prev_snap_obj =
1484789Sahrens 		    ds->ds_phys->ds_prev_snap_obj;
1485789Sahrens 		ds_next->ds_phys->ds_prev_snap_txg =
1486789Sahrens 		    ds->ds_phys->ds_prev_snap_txg;
1487789Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1488789Sahrens 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
1489789Sahrens 
1490789Sahrens 		/*
1491789Sahrens 		 * Transfer to our deadlist (which will become next's
1492789Sahrens 		 * new deadlist) any entries from next's current
1493789Sahrens 		 * deadlist which were born before prev, and free the
1494789Sahrens 		 * other entries.
1495789Sahrens 		 *
1496789Sahrens 		 * XXX we're doing this long task with the config lock held
1497789Sahrens 		 */
14986689Smaybee 		while (bplist_iterate(&ds_next->ds_deadlist, &itor, &bp) == 0) {
1499789Sahrens 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
15001544Seschrock 				VERIFY(0 == bplist_enqueue(&ds->ds_deadlist,
15011544Seschrock 				    &bp, tx));
1502789Sahrens 				if (ds_prev && !after_branch_point &&
1503789Sahrens 				    bp.blk_birth >
1504789Sahrens 				    ds_prev->ds_phys->ds_prev_snap_txg) {
1505789Sahrens 					ds_prev->ds_phys->ds_unique_bytes +=
15062082Seschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1507789Sahrens 				}
1508789Sahrens 			} else {
15092082Seschrock 				used += bp_get_dasize(dp->dp_spa, &bp);
1510789Sahrens 				compressed += BP_GET_PSIZE(&bp);
1511789Sahrens 				uncompressed += BP_GET_UCSIZE(&bp);
1512789Sahrens 				/* XXX check return value? */
1513789Sahrens 				(void) arc_free(zio, dp->dp_spa, tx->tx_txg,
1514789Sahrens 				    &bp, NULL, NULL, ARC_NOWAIT);
1515789Sahrens 			}
1516789Sahrens 		}
1517789Sahrens 
1518789Sahrens 		/* free next's deadlist */
1519789Sahrens 		bplist_close(&ds_next->ds_deadlist);
1520789Sahrens 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
1521789Sahrens 
1522789Sahrens 		/* set next's deadlist to our deadlist */
15236689Smaybee 		bplist_close(&ds->ds_deadlist);
1524789Sahrens 		ds_next->ds_phys->ds_deadlist_obj =
1525789Sahrens 		    ds->ds_phys->ds_deadlist_obj;
15261544Seschrock 		VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos,
15271544Seschrock 		    ds_next->ds_phys->ds_deadlist_obj));
1528789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1529789Sahrens 
1530789Sahrens 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
1531789Sahrens 			/*
1532789Sahrens 			 * Update next's unique to include blocks which
1533789Sahrens 			 * were previously shared by only this snapshot
1534789Sahrens 			 * and it.  Those blocks will be born after the
1535789Sahrens 			 * prev snap and before this snap, and will have
1536789Sahrens 			 * died after the next snap and before the one
1537789Sahrens 			 * after that (ie. be on the snap after next's
1538789Sahrens 			 * deadlist).
1539789Sahrens 			 *
1540789Sahrens 			 * XXX we're doing this long task with the
1541789Sahrens 			 * config lock held
1542789Sahrens 			 */
1543789Sahrens 			dsl_dataset_t *ds_after_next;
1544789Sahrens 
15456689Smaybee 			VERIFY(0 == dsl_dataset_hold_obj(dp,
15466689Smaybee 			    ds_next->ds_phys->ds_next_snap_obj,
15476689Smaybee 			    FTAG, &ds_after_next));
1548789Sahrens 			itor = 0;
1549789Sahrens 			while (bplist_iterate(&ds_after_next->ds_deadlist,
1550789Sahrens 			    &itor, &bp) == 0) {
1551789Sahrens 				if (bp.blk_birth >
1552789Sahrens 				    ds->ds_phys->ds_prev_snap_txg &&
1553789Sahrens 				    bp.blk_birth <=
1554789Sahrens 				    ds->ds_phys->ds_creation_txg) {
1555789Sahrens 					ds_next->ds_phys->ds_unique_bytes +=
15562082Seschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1557789Sahrens 				}
1558789Sahrens 			}
1559789Sahrens 
15606689Smaybee 			dsl_dataset_rele(ds_after_next, FTAG);
1561789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, NULL);
1562789Sahrens 		} else {
1563789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, ds);
15646689Smaybee 			dsl_dataset_drop_ref(ds_next->ds_prev, ds_next);
15656689Smaybee 			ds_next->ds_prev = NULL;
1566789Sahrens 			if (ds_prev) {
15676689Smaybee 				VERIFY(0 == dsl_dataset_get_ref(dp,
15686689Smaybee 				    ds->ds_phys->ds_prev_snap_obj,
15696689Smaybee 				    ds_next, &ds_next->ds_prev));
1570789Sahrens 			}
15715378Sck153898 
15725378Sck153898 			dsl_dataset_recalc_head_uniq(ds_next);
15735378Sck153898 
15745378Sck153898 			/*
15755378Sck153898 			 * Reduce the amount of our unconsmed refreservation
15765378Sck153898 			 * being charged to our parent by the amount of
15775378Sck153898 			 * new unique data we have gained.
15785378Sck153898 			 */
15795378Sck153898 			if (old_unique < ds_next->ds_reserved) {
15805378Sck153898 				int64_t mrsdelta;
15815378Sck153898 				uint64_t new_unique =
15825378Sck153898 				    ds_next->ds_phys->ds_unique_bytes;
15835378Sck153898 
15845378Sck153898 				ASSERT(old_unique <= new_unique);
15855378Sck153898 				mrsdelta = MIN(new_unique - old_unique,
15865378Sck153898 				    ds_next->ds_reserved - old_unique);
15875378Sck153898 				dsl_dir_diduse_space(ds->ds_dir, -mrsdelta,
15885378Sck153898 				    0, 0, tx);
15895378Sck153898 			}
1590789Sahrens 		}
15916689Smaybee 		dsl_dataset_rele(ds_next, FTAG);
1592789Sahrens 
1593789Sahrens 		/*
15945378Sck153898 		 * NB: unique_bytes might not be accurate for the head objset.
15955378Sck153898 		 * Before SPA_VERSION 9, we didn't update its value when we
15965378Sck153898 		 * deleted the most recent snapshot.
1597789Sahrens 		 */
1598789Sahrens 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
1599789Sahrens 	} else {
1600789Sahrens 		/*
1601789Sahrens 		 * There's no next snapshot, so this is a head dataset.
1602789Sahrens 		 * Destroy the deadlist.  Unless it's a clone, the
1603789Sahrens 		 * deadlist should be empty.  (If it's a clone, it's
1604789Sahrens 		 * safe to ignore the deadlist contents.)
1605789Sahrens 		 */
1606789Sahrens 		struct killarg ka;
1607789Sahrens 
1608789Sahrens 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
1609789Sahrens 		bplist_close(&ds->ds_deadlist);
1610789Sahrens 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1611789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1612789Sahrens 
1613789Sahrens 		/*
1614789Sahrens 		 * Free everything that we point to (that's born after
1615789Sahrens 		 * the previous snapshot, if we are a clone)
1616789Sahrens 		 *
1617789Sahrens 		 * XXX we're doing this long task with the config lock held
1618789Sahrens 		 */
1619789Sahrens 		ka.usedp = &used;
1620789Sahrens 		ka.compressedp = &compressed;
1621789Sahrens 		ka.uncompressedp = &uncompressed;
1622789Sahrens 		ka.zio = zio;
1623789Sahrens 		ka.tx = tx;
1624789Sahrens 		err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1625789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1626789Sahrens 		ASSERT3U(err, ==, 0);
16275378Sck153898 		ASSERT(spa_version(dp->dp_spa) <
16285378Sck153898 		    SPA_VERSION_UNIQUE_ACCURATE ||
16295378Sck153898 		    used == ds->ds_phys->ds_unique_bytes);
1630789Sahrens 	}
1631789Sahrens 
1632789Sahrens 	err = zio_wait(zio);
1633789Sahrens 	ASSERT3U(err, ==, 0);
1634789Sahrens 
16352199Sahrens 	dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx);
1636789Sahrens 
16376689Smaybee 	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
16386689Smaybee 		/* Erase the link in the dir */
16396689Smaybee 		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
16406689Smaybee 		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
16416689Smaybee 		ASSERT(ds->ds_phys->ds_snapnames_zapobj != 0);
1642789Sahrens 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1643789Sahrens 		ASSERT(err == 0);
1644789Sahrens 	} else {
1645789Sahrens 		/* remove from snapshot namespace */
1646789Sahrens 		dsl_dataset_t *ds_head;
16476689Smaybee 		ASSERT(ds->ds_phys->ds_snapnames_zapobj == 0);
16486689Smaybee 		VERIFY(0 == dsl_dataset_hold_obj(dp,
16496689Smaybee 		    ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &ds_head));
16502207Sahrens 		VERIFY(0 == dsl_dataset_get_snapname(ds));
1651789Sahrens #ifdef ZFS_DEBUG
1652789Sahrens 		{
1653789Sahrens 			uint64_t val;
16546492Stimh 
16556689Smaybee 			err = dsl_dataset_snap_lookup(ds_head,
16566492Stimh 			    ds->ds_snapname, &val);
1657789Sahrens 			ASSERT3U(err, ==, 0);
1658789Sahrens 			ASSERT3U(val, ==, obj);
1659789Sahrens 		}
1660789Sahrens #endif
16616689Smaybee 		err = dsl_dataset_snap_remove(ds_head, ds->ds_snapname, tx);
1662789Sahrens 		ASSERT(err == 0);
16636689Smaybee 		dsl_dataset_rele(ds_head, FTAG);
1664789Sahrens 	}
1665789Sahrens 
1666789Sahrens 	if (ds_prev && ds->ds_prev != ds_prev)
16676689Smaybee 		dsl_dataset_rele(ds_prev, FTAG);
1668789Sahrens 
16695094Slling 	spa_prop_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
16704543Smarks 	spa_history_internal_log(LOG_DS_DESTROY, dp->dp_spa, tx,
16714543Smarks 	    cr, "dataset = %llu", ds->ds_object);
16724543Smarks 
16736689Smaybee 	dsl_dir_close(ds->ds_dir, ds);
16746689Smaybee 	ds->ds_dir = NULL;
16756689Smaybee 	dsl_dataset_drain_refs(ds, tag);
16762199Sahrens 	VERIFY(0 == dmu_object_free(mos, obj, tx));
16772199Sahrens }
16782199Sahrens 
16795378Sck153898 static int
16805378Sck153898 dsl_dataset_snapshot_reserve_space(dsl_dataset_t *ds, dmu_tx_t *tx)
16815378Sck153898 {
16825378Sck153898 	uint64_t asize;
16835378Sck153898 
16845378Sck153898 	if (!dmu_tx_is_syncing(tx))
16855378Sck153898 		return (0);
16865378Sck153898 
16875378Sck153898 	/*
16885378Sck153898 	 * If there's an fs-only reservation, any blocks that might become
16895378Sck153898 	 * owned by the snapshot dataset must be accommodated by space
16905378Sck153898 	 * outside of the reservation.
16915378Sck153898 	 */
16925378Sck153898 	asize = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
16935378Sck153898 	if (asize > dsl_dir_space_available(ds->ds_dir, NULL, 0, FALSE))
16945378Sck153898 		return (ENOSPC);
16955378Sck153898 
16965378Sck153898 	/*
16975378Sck153898 	 * Propogate any reserved space for this snapshot to other
16985378Sck153898 	 * snapshot checks in this sync group.
16995378Sck153898 	 */
17005378Sck153898 	if (asize > 0)
17015378Sck153898 		dsl_dir_willuse_space(ds->ds_dir, asize, tx);
17025378Sck153898 
17035378Sck153898 	return (0);
17045378Sck153898 }
17055378Sck153898 
17062199Sahrens /* ARGSUSED */
17072199Sahrens int
17082199Sahrens dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
17092199Sahrens {
17105367Sahrens 	dsl_dataset_t *ds = arg1;
17112199Sahrens 	const char *snapname = arg2;
17122199Sahrens 	int err;
17132199Sahrens 	uint64_t value;
1714789Sahrens 
1715789Sahrens 	/*
17162199Sahrens 	 * We don't allow multiple snapshots of the same txg.  If there
17172199Sahrens 	 * is already one, try again.
17182199Sahrens 	 */
17192199Sahrens 	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
17202199Sahrens 		return (EAGAIN);
17212199Sahrens 
17222199Sahrens 	/*
17232199Sahrens 	 * Check for conflicting name snapshot name.
1724789Sahrens 	 */
17256689Smaybee 	err = dsl_dataset_snap_lookup(ds, snapname, &value);
17262199Sahrens 	if (err == 0)
17272199Sahrens 		return (EEXIST);
17282199Sahrens 	if (err != ENOENT)
17292199Sahrens 		return (err);
1730789Sahrens 
17313978Smmusante 	/*
17323978Smmusante 	 * Check that the dataset's name is not too long.  Name consists
17333978Smmusante 	 * of the dataset's length + 1 for the @-sign + snapshot name's length
17343978Smmusante 	 */
17353978Smmusante 	if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
17363978Smmusante 		return (ENAMETOOLONG);
17373978Smmusante 
17385378Sck153898 	err = dsl_dataset_snapshot_reserve_space(ds, tx);
17395378Sck153898 	if (err)
17405378Sck153898 		return (err);
17415378Sck153898 
17422199Sahrens 	ds->ds_trysnap_txg = tx->tx_txg;
1743789Sahrens 	return (0);
1744789Sahrens }
1745789Sahrens 
17462199Sahrens void
17474543Smarks dsl_dataset_snapshot_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
1748789Sahrens {
17495367Sahrens 	dsl_dataset_t *ds = arg1;
17502199Sahrens 	const char *snapname = arg2;
17512199Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1752789Sahrens 	dmu_buf_t *dbuf;
1753789Sahrens 	dsl_dataset_phys_t *dsphys;
17542199Sahrens 	uint64_t dsobj;
1755789Sahrens 	objset_t *mos = dp->dp_meta_objset;
1756789Sahrens 	int err;
1757789Sahrens 
1758789Sahrens 	spa_scrub_restart(dp->dp_spa, tx->tx_txg);
17592199Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
1760789Sahrens 
1761928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
1762928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
17631544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1764789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
1765789Sahrens 	dsphys = dbuf->db_data;
17666689Smaybee 	bzero(dsphys, sizeof (dsl_dataset_phys_t));
17672199Sahrens 	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
1768789Sahrens 	dsphys->ds_fsid_guid = unique_create();
1769789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1770789Sahrens 	    sizeof (dsphys->ds_guid));
1771789Sahrens 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1772789Sahrens 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1773789Sahrens 	dsphys->ds_next_snap_obj = ds->ds_object;
1774789Sahrens 	dsphys->ds_num_children = 1;
1775789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
1776789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
1777789Sahrens 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1778789Sahrens 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
1779789Sahrens 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1780789Sahrens 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
17812082Seschrock 	dsphys->ds_flags = ds->ds_phys->ds_flags;
1782789Sahrens 	dsphys->ds_bp = ds->ds_phys->ds_bp;
17831544Seschrock 	dmu_buf_rele(dbuf, FTAG);
1784789Sahrens 
17852199Sahrens 	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
17862199Sahrens 	if (ds->ds_prev) {
17872199Sahrens 		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
1788789Sahrens 		    ds->ds_object ||
17892199Sahrens 		    ds->ds_prev->ds_phys->ds_num_children > 1);
17902199Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
17912199Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1792789Sahrens 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
17932199Sahrens 			    ds->ds_prev->ds_phys->ds_creation_txg);
17942199Sahrens 			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1795789Sahrens 		}
1796789Sahrens 	}
1797789Sahrens 
17985378Sck153898 	/*
17995378Sck153898 	 * If we have a reference-reservation on this dataset, we will
18005378Sck153898 	 * need to increase the amount of refreservation being charged
18015378Sck153898 	 * since our unique space is going to zero.
18025378Sck153898 	 */
18035378Sck153898 	if (ds->ds_reserved) {
18045378Sck153898 		int64_t add = MIN(dsl_dataset_unique(ds), ds->ds_reserved);
18055378Sck153898 		dsl_dir_diduse_space(ds->ds_dir, add, 0, 0, tx);
18065378Sck153898 	}
18075378Sck153898 
1808789Sahrens 	bplist_close(&ds->ds_deadlist);
1809789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
18105712Sahrens 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, tx->tx_txg);
1811789Sahrens 	ds->ds_phys->ds_prev_snap_obj = dsobj;
18125712Sahrens 	ds->ds_phys->ds_prev_snap_txg = tx->tx_txg;
1813789Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
18145378Sck153898 	if (spa_version(dp->dp_spa) >= SPA_VERSION_UNIQUE_ACCURATE)
18155378Sck153898 		ds->ds_phys->ds_flags |= DS_FLAG_UNIQUE_ACCURATE;
1816789Sahrens 	ds->ds_phys->ds_deadlist_obj =
1817789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
18181544Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
18191544Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1820789Sahrens 
1821789Sahrens 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
1822789Sahrens 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1823789Sahrens 	    snapname, 8, 1, &dsobj, tx);
1824789Sahrens 	ASSERT(err == 0);
1825789Sahrens 
1826789Sahrens 	if (ds->ds_prev)
18276689Smaybee 		dsl_dataset_drop_ref(ds->ds_prev, ds);
18286689Smaybee 	VERIFY(0 == dsl_dataset_get_ref(dp,
18296689Smaybee 	    ds->ds_phys->ds_prev_snap_obj, ds, &ds->ds_prev));
18304543Smarks 
18314543Smarks 	spa_history_internal_log(LOG_DS_SNAPSHOT, dp->dp_spa, tx, cr,
18324603Sahrens 	    "dataset = %llu", dsobj);
1833789Sahrens }
1834789Sahrens 
1835789Sahrens void
18363547Smaybee dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
1837789Sahrens {
1838789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1839789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1840789Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1841789Sahrens 
18424787Sahrens 	/*
18434787Sahrens 	 * in case we had to change ds_fsid_guid when we opened it,
18444787Sahrens 	 * sync it out now.
18454787Sahrens 	 */
18464787Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
18474787Sahrens 	ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
18484787Sahrens 
1849789Sahrens 	dsl_dir_dirty(ds->ds_dir, tx);
18503547Smaybee 	dmu_objset_sync(ds->ds_user_ptr, zio, tx);
1851789Sahrens }
1852789Sahrens 
1853789Sahrens void
18542885Sahrens dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1855789Sahrens {
18565378Sck153898 	uint64_t refd, avail, uobjs, aobjs;
18575378Sck153898 
18582885Sahrens 	dsl_dir_stats(ds->ds_dir, nv);
1859789Sahrens 
18605378Sck153898 	dsl_dataset_space(ds, &refd, &avail, &uobjs, &aobjs);
18615378Sck153898 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_AVAILABLE, avail);
18625378Sck153898 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED, refd);
18635378Sck153898 
18642885Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
18652885Sahrens 	    ds->ds_phys->ds_creation_time);
18662885Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
18672885Sahrens 	    ds->ds_phys->ds_creation_txg);
18685378Sck153898 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFQUOTA,
18695378Sck153898 	    ds->ds_quota);
18705378Sck153898 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRESERVATION,
18715378Sck153898 	    ds->ds_reserved);
18726643Seschrock 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_GUID,
18736643Seschrock 	    ds->ds_phys->ds_guid);
1874789Sahrens 
1875789Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1876789Sahrens 		/*
1877789Sahrens 		 * This is a snapshot; override the dd's space used with
18782885Sahrens 		 * our unique space and compression ratio.
1879789Sahrens 		 */
18802885Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
18812885Sahrens 		    ds->ds_phys->ds_unique_bytes);
18822885Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
18832885Sahrens 		    ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
18842885Sahrens 		    (ds->ds_phys->ds_uncompressed_bytes * 100 /
18852885Sahrens 		    ds->ds_phys->ds_compressed_bytes));
1886789Sahrens 	}
1887789Sahrens }
1888789Sahrens 
18892885Sahrens void
18902885Sahrens dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1891789Sahrens {
18922885Sahrens 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
18932885Sahrens 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
18945367Sahrens 	stat->dds_guid = ds->ds_phys->ds_guid;
18952885Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
18962885Sahrens 		stat->dds_is_snapshot = B_TRUE;
18972885Sahrens 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
18982885Sahrens 	}
18992885Sahrens 
19002885Sahrens 	/* clone origin is really a dsl_dir thing... */
19015446Sahrens 	rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
19025367Sahrens 	if (ds->ds_dir->dd_phys->dd_origin_obj) {
19032885Sahrens 		dsl_dataset_t *ods;
19042885Sahrens 
19056689Smaybee 		VERIFY(0 == dsl_dataset_get_ref(ds->ds_dir->dd_pool,
19066689Smaybee 		    ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &ods));
19075367Sahrens 		dsl_dataset_name(ods, stat->dds_origin);
19086689Smaybee 		dsl_dataset_drop_ref(ods, FTAG);
19092885Sahrens 	}
19105446Sahrens 	rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
19112885Sahrens }
19122885Sahrens 
19132885Sahrens uint64_t
19142885Sahrens dsl_dataset_fsid_guid(dsl_dataset_t *ds)
19152885Sahrens {
19164787Sahrens 	return (ds->ds_fsid_guid);
19172885Sahrens }
19182885Sahrens 
19192885Sahrens void
19202885Sahrens dsl_dataset_space(dsl_dataset_t *ds,
19212885Sahrens     uint64_t *refdbytesp, uint64_t *availbytesp,
19222885Sahrens     uint64_t *usedobjsp, uint64_t *availobjsp)
19232885Sahrens {
19242885Sahrens 	*refdbytesp = ds->ds_phys->ds_used_bytes;
19252885Sahrens 	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
19265378Sck153898 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
19275378Sck153898 		*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
19285378Sck153898 	if (ds->ds_quota != 0) {
19295378Sck153898 		/*
19305378Sck153898 		 * Adjust available bytes according to refquota
19315378Sck153898 		 */
19325378Sck153898 		if (*refdbytesp < ds->ds_quota)
19335378Sck153898 			*availbytesp = MIN(*availbytesp,
19345378Sck153898 			    ds->ds_quota - *refdbytesp);
19355378Sck153898 		else
19365378Sck153898 			*availbytesp = 0;
19375378Sck153898 	}
19382885Sahrens 	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
19392885Sahrens 	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
1940789Sahrens }
1941789Sahrens 
19425326Sek110237 boolean_t
19435326Sek110237 dsl_dataset_modified_since_lastsnap(dsl_dataset_t *ds)
19445326Sek110237 {
19455326Sek110237 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
19465326Sek110237 
19475326Sek110237 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
19485326Sek110237 	    dsl_pool_sync_context(dp));
19495326Sek110237 	if (ds->ds_prev == NULL)
19505326Sek110237 		return (B_FALSE);
19515326Sek110237 	if (ds->ds_phys->ds_bp.blk_birth >
19525326Sek110237 	    ds->ds_prev->ds_phys->ds_creation_txg)
19535326Sek110237 		return (B_TRUE);
19545326Sek110237 	return (B_FALSE);
19555326Sek110237 }
19565326Sek110237 
19572199Sahrens /* ARGSUSED */
1958789Sahrens static int
19592199Sahrens dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
1960789Sahrens {
19612199Sahrens 	dsl_dataset_t *ds = arg1;
19622199Sahrens 	char *newsnapname = arg2;
19632199Sahrens 	dsl_dir_t *dd = ds->ds_dir;
19642199Sahrens 	dsl_dataset_t *hds;
19652199Sahrens 	uint64_t val;
1966789Sahrens 	int err;
1967789Sahrens 
19686689Smaybee 	err = dsl_dataset_hold_obj(dd->dd_pool,
19696689Smaybee 	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds);
1970789Sahrens 	if (err)
1971789Sahrens 		return (err);
1972789Sahrens 
19732199Sahrens 	/* new name better not be in use */
19746689Smaybee 	err = dsl_dataset_snap_lookup(hds, newsnapname, &val);
19756689Smaybee 	dsl_dataset_rele(hds, FTAG);
1976789Sahrens 
19772199Sahrens 	if (err == 0)
19782199Sahrens 		err = EEXIST;
19792199Sahrens 	else if (err == ENOENT)
19802199Sahrens 		err = 0;
19814007Smmusante 
19824007Smmusante 	/* dataset name + 1 for the "@" + the new snapshot name must fit */
19834007Smmusante 	if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
19844007Smmusante 		err = ENAMETOOLONG;
19854007Smmusante 
19862199Sahrens 	return (err);
19872199Sahrens }
1988789Sahrens 
19892199Sahrens static void
19904543Smarks dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2,
19914543Smarks     cred_t *cr, dmu_tx_t *tx)
19922199Sahrens {
19932199Sahrens 	dsl_dataset_t *ds = arg1;
19944543Smarks 	const char *newsnapname = arg2;
19952199Sahrens 	dsl_dir_t *dd = ds->ds_dir;
19962199Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
19972199Sahrens 	dsl_dataset_t *hds;
19982199Sahrens 	int err;
1999789Sahrens 
20002199Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
2001789Sahrens 
20026689Smaybee 	VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
20036689Smaybee 	    dd->dd_phys->dd_head_dataset_obj, FTAG, &hds));
2004789Sahrens 
20052199Sahrens 	VERIFY(0 == dsl_dataset_get_snapname(ds));
20066689Smaybee 	err = dsl_dataset_snap_remove(hds, ds->ds_snapname, tx);
2007789Sahrens 	ASSERT3U(err, ==, 0);
20082199Sahrens 	mutex_enter(&ds->ds_lock);
20092199Sahrens 	(void) strcpy(ds->ds_snapname, newsnapname);
20102199Sahrens 	mutex_exit(&ds->ds_lock);
20112199Sahrens 	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
20122199Sahrens 	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
2013789Sahrens 	ASSERT3U(err, ==, 0);
2014789Sahrens 
20154543Smarks 	spa_history_internal_log(LOG_DS_RENAME, dd->dd_pool->dp_spa, tx,
20164543Smarks 	    cr, "dataset = %llu", ds->ds_object);
20176689Smaybee 	dsl_dataset_rele(hds, FTAG);
2018789Sahrens }
2019789Sahrens 
20205326Sek110237 struct renamesnaparg {
20214007Smmusante 	dsl_sync_task_group_t *dstg;
20224007Smmusante 	char failed[MAXPATHLEN];
20234007Smmusante 	char *oldsnap;
20244007Smmusante 	char *newsnap;
20254007Smmusante };
20264007Smmusante 
20274007Smmusante static int
20284007Smmusante dsl_snapshot_rename_one(char *name, void *arg)
20294007Smmusante {
20305326Sek110237 	struct renamesnaparg *ra = arg;
20314007Smmusante 	dsl_dataset_t *ds = NULL;
20324007Smmusante 	char *cp;
20334007Smmusante 	int err;
20344007Smmusante 
20354007Smmusante 	cp = name + strlen(name);
20364007Smmusante 	*cp = '@';
20374007Smmusante 	(void) strcpy(cp + 1, ra->oldsnap);
20384543Smarks 
20394543Smarks 	/*
20404543Smarks 	 * For recursive snapshot renames the parent won't be changing
20414543Smarks 	 * so we just pass name for both the to/from argument.
20424543Smarks 	 */
20434543Smarks 	if (err = zfs_secpolicy_rename_perms(name, name, CRED())) {
20444543Smarks 		(void) strcpy(ra->failed, name);
20454543Smarks 		return (err);
20464543Smarks 	}
20474543Smarks 
20486689Smaybee #ifdef _KERNEL
20496689Smaybee 	/*
20506689Smaybee 	 * For all filesystems undergoing rename, we'll need to unmount it.
20516689Smaybee 	 */
20526689Smaybee 	(void) zfs_unmount_snap(name, NULL);
20536689Smaybee #endif
20546689Smaybee 	err = dsl_dataset_hold(name, ra->dstg, &ds);
20556689Smaybee 	*cp = '\0';
20564007Smmusante 	if (err == ENOENT) {
20574007Smmusante 		return (0);
20586689Smaybee 	} else if (err) {
20594007Smmusante 		(void) strcpy(ra->failed, name);
20604007Smmusante 		return (err);
20614007Smmusante 	}
20624007Smmusante 
20634007Smmusante 	dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
20644007Smmusante 	    dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
20654007Smmusante 
20664007Smmusante 	return (0);
20674007Smmusante }
20684007Smmusante 
20694007Smmusante static int
20704007Smmusante dsl_recursive_rename(char *oldname, const char *newname)
20714007Smmusante {
20724007Smmusante 	int err;
20735326Sek110237 	struct renamesnaparg *ra;
20744007Smmusante 	dsl_sync_task_t *dst;
20754007Smmusante 	spa_t *spa;
20764007Smmusante 	char *cp, *fsname = spa_strdup(oldname);
20774007Smmusante 	int len = strlen(oldname);
20784007Smmusante 
20794007Smmusante 	/* truncate the snapshot name to get the fsname */
20804007Smmusante 	cp = strchr(fsname, '@');
20814007Smmusante 	*cp = '\0';
20824007Smmusante 
20834603Sahrens 	err = spa_open(fsname, &spa, FTAG);
20844007Smmusante 	if (err) {
20854007Smmusante 		kmem_free(fsname, len + 1);
20864007Smmusante 		return (err);
20874007Smmusante 	}
20885326Sek110237 	ra = kmem_alloc(sizeof (struct renamesnaparg), KM_SLEEP);
20894007Smmusante 	ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
20904007Smmusante 
20914007Smmusante 	ra->oldsnap = strchr(oldname, '@') + 1;
20924007Smmusante 	ra->newsnap = strchr(newname, '@') + 1;
20934007Smmusante 	*ra->failed = '\0';
20944007Smmusante 
20954007Smmusante 	err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
20964007Smmusante 	    DS_FIND_CHILDREN);
20974007Smmusante 	kmem_free(fsname, len + 1);
20984007Smmusante 
20994007Smmusante 	if (err == 0) {
21004007Smmusante 		err = dsl_sync_task_group_wait(ra->dstg);
21014007Smmusante 	}
21024007Smmusante 
21034007Smmusante 	for (dst = list_head(&ra->dstg->dstg_tasks); dst;
21044007Smmusante 	    dst = list_next(&ra->dstg->dstg_tasks, dst)) {
21054007Smmusante 		dsl_dataset_t *ds = dst->dst_arg1;
21064007Smmusante 		if (dst->dst_err) {
21074007Smmusante 			dsl_dir_name(ds->ds_dir, ra->failed);
21084009Smmusante 			(void) strcat(ra->failed, "@");
21094009Smmusante 			(void) strcat(ra->failed, ra->newsnap);
21104007Smmusante 		}
21116689Smaybee 		dsl_dataset_rele(ds, ra->dstg);
21124007Smmusante 	}
21134007Smmusante 
21144543Smarks 	if (err)
21154543Smarks 		(void) strcpy(oldname, ra->failed);
21164007Smmusante 
21174007Smmusante 	dsl_sync_task_group_destroy(ra->dstg);
21185326Sek110237 	kmem_free(ra, sizeof (struct renamesnaparg));
21194007Smmusante 	spa_close(spa, FTAG);
21204007Smmusante 	return (err);
21214007Smmusante }
21224007Smmusante 
21234569Smmusante static int
21244569Smmusante dsl_valid_rename(char *oldname, void *arg)
21254569Smmusante {
21264569Smmusante 	int delta = *(int *)arg;
21274569Smmusante 
21284569Smmusante 	if (strlen(oldname) + delta >= MAXNAMELEN)
21294569Smmusante 		return (ENAMETOOLONG);
21304569Smmusante 
21314569Smmusante 	return (0);
21324569Smmusante }
21334569Smmusante 
2134789Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename
2135789Sahrens int
21366689Smaybee dsl_dataset_rename(char *oldname, const char *newname, boolean_t recursive)
2137789Sahrens {
2138789Sahrens 	dsl_dir_t *dd;
21392199Sahrens 	dsl_dataset_t *ds;
2140789Sahrens 	const char *tail;
2141789Sahrens 	int err;
2142789Sahrens 
21432199Sahrens 	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
21441544Seschrock 	if (err)
21451544Seschrock 		return (err);
2146789Sahrens 	if (tail == NULL) {
21474569Smmusante 		int delta = strlen(newname) - strlen(oldname);
21484569Smmusante 
21494569Smmusante 		/* if we're growing, validate child size lengths */
21504569Smmusante 		if (delta > 0)
21514569Smmusante 			err = dmu_objset_find(oldname, dsl_valid_rename,
21524569Smmusante 			    &delta, DS_FIND_CHILDREN | DS_FIND_SNAPSHOTS);
21534569Smmusante 
21544569Smmusante 		if (!err)
21554569Smmusante 			err = dsl_dir_rename(dd, newname);
2156789Sahrens 		dsl_dir_close(dd, FTAG);
2157789Sahrens 		return (err);
2158789Sahrens 	}
2159789Sahrens 	if (tail[0] != '@') {
2160789Sahrens 		/* the name ended in a nonexistant component */
2161789Sahrens 		dsl_dir_close(dd, FTAG);
2162789Sahrens 		return (ENOENT);
2163789Sahrens 	}
2164789Sahrens 
21652199Sahrens 	dsl_dir_close(dd, FTAG);
21662199Sahrens 
21672199Sahrens 	/* new name must be snapshot in same filesystem */
21682199Sahrens 	tail = strchr(newname, '@');
21692199Sahrens 	if (tail == NULL)
21702199Sahrens 		return (EINVAL);
21712199Sahrens 	tail++;
21722199Sahrens 	if (strncmp(oldname, newname, tail - newname) != 0)
21732199Sahrens 		return (EXDEV);
2174789Sahrens 
21754007Smmusante 	if (recursive) {
21764007Smmusante 		err = dsl_recursive_rename(oldname, newname);
21774007Smmusante 	} else {
21786689Smaybee 		err = dsl_dataset_hold(oldname, FTAG, &ds);
21794007Smmusante 		if (err)
21804007Smmusante 			return (err);
21812199Sahrens 
21824007Smmusante 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
21834007Smmusante 		    dsl_dataset_snapshot_rename_check,
21844007Smmusante 		    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
21852199Sahrens 
21866689Smaybee 		dsl_dataset_rele(ds, FTAG);
21874007Smmusante 	}
21882199Sahrens 
2189789Sahrens 	return (err);
2190789Sahrens }
21912082Seschrock 
21926689Smaybee struct promotedsarg {
21936689Smaybee 	list_node_t link;
21946689Smaybee 	dsl_dataset_t *ds;
21956689Smaybee };
21966689Smaybee 
21972199Sahrens struct promotearg {
21986689Smaybee 	list_t snap_list;
21996689Smaybee 	dsl_dataset_t *clone_origin, *old_head;
22002199Sahrens 	uint64_t used, comp, uncomp, unique;
22016689Smaybee 	uint64_t newnext_obj;
22022199Sahrens };
22032199Sahrens 
22044543Smarks /* ARGSUSED */
22052082Seschrock static int
22062199Sahrens dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
22072082Seschrock {
22082199Sahrens 	dsl_dataset_t *hds = arg1;
22092199Sahrens 	struct promotearg *pa = arg2;
22106689Smaybee 	struct promotedsarg *snap = list_head(&pa->snap_list);
22112199Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
22126689Smaybee 	dsl_dataset_t *origin_ds = snap->ds;
22136689Smaybee 	dsl_dataset_t *newnext_ds;
22146689Smaybee 	char *name;
22152199Sahrens 	uint64_t itor = 0;
22162082Seschrock 	blkptr_t bp;
22176689Smaybee 	int err;
22182199Sahrens 
22192082Seschrock 	/* Check that it is a clone */
22206689Smaybee 	if (hds->ds_dir->dd_phys->dd_origin_obj == 0)
22212082Seschrock 		return (EINVAL);
22222082Seschrock 
22232199Sahrens 	/* Since this is so expensive, don't do the preliminary check */
22242199Sahrens 	if (!dmu_tx_is_syncing(tx))
22252199Sahrens 		return (0);
22262199Sahrens 
22276689Smaybee 	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE)
22286689Smaybee 		return (EXDEV);
22292082Seschrock 
22305367Sahrens 	/* find origin's new next ds */
22316689Smaybee 	newnext_ds = hds;
22325367Sahrens 	while (newnext_ds->ds_phys->ds_prev_snap_obj != origin_ds->ds_object) {
22332082Seschrock 		dsl_dataset_t *prev;
22342082Seschrock 
22356689Smaybee 		err = dsl_dataset_hold_obj(dp,
22366689Smaybee 		    newnext_ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
22376689Smaybee 		if (newnext_ds != hds)
22386689Smaybee 			dsl_dataset_rele(newnext_ds, FTAG);
22396689Smaybee 		if (err)
22406689Smaybee 			return (err);
22412082Seschrock 		newnext_ds = prev;
22422082Seschrock 	}
22432199Sahrens 	pa->newnext_obj = newnext_ds->ds_object;
22442082Seschrock 
22455367Sahrens 	/* compute origin's new unique space */
22466689Smaybee 	pa->unique = 0;
22472082Seschrock 	while ((err = bplist_iterate(&newnext_ds->ds_deadlist,
22482082Seschrock 	    &itor, &bp)) == 0) {
22495367Sahrens 		if (bp.blk_birth > origin_ds->ds_phys->ds_prev_snap_txg)
22506689Smaybee 			pa->unique += bp_get_dasize(dp->dp_spa, &bp);
22512082Seschrock 	}
22526689Smaybee 	if (newnext_ds != hds)
22536689Smaybee 		dsl_dataset_rele(newnext_ds, FTAG);
22542082Seschrock 	if (err != ENOENT)
22556689Smaybee 		return (err);
22566689Smaybee 
22572082Seschrock 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
22586689Smaybee 
22596689Smaybee 	/*
22606689Smaybee 	 * Walk the snapshots that we are moving
22616689Smaybee 	 *
22626689Smaybee 	 * Compute space to transfer.  Each snapshot gave birth to:
22636689Smaybee 	 * (my used) - (prev's used) + (deadlist's used)
22646689Smaybee 	 * So a sequence would look like:
22656689Smaybee 	 * uN - u(N-1) + dN + ... + u1 - u0 + d1 + u0 - 0 + d0
22666689Smaybee 	 * Which simplifies to:
22676689Smaybee 	 * uN + dN + ... + d1 + d0
22686689Smaybee 	 * Note however, if we stop before we reach the ORIGIN we get:
22696689Smaybee 	 * uN + dN + ... + dM - uM-1
22706689Smaybee 	 */
22716689Smaybee 	pa->used = origin_ds->ds_phys->ds_used_bytes;
22726689Smaybee 	pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
22736689Smaybee 	pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
22746689Smaybee 	do {
22752082Seschrock 		uint64_t val, dlused, dlcomp, dluncomp;
22766689Smaybee 		dsl_dataset_t *ds = snap->ds;
22772082Seschrock 
22782082Seschrock 		/* Check that the snapshot name does not conflict */
22792082Seschrock 		dsl_dataset_name(ds, name);
22806689Smaybee 		err = dsl_dataset_snap_lookup(hds, ds->ds_snapname, &val);
22816689Smaybee 		if (err == 0)
22826689Smaybee 			err = EEXIST;
22836689Smaybee 		if (err != ENOENT)
22842082Seschrock 			break;
22856689Smaybee 		err = 0;
22866689Smaybee 
22876689Smaybee 		/* The very first snapshot does not have a deadlist */
22886689Smaybee 		if (ds->ds_phys->ds_prev_snap_obj != 0) {
22896689Smaybee 			if (err = bplist_space(&ds->ds_deadlist,
22906689Smaybee 			    &dlused, &dlcomp, &dluncomp))
22916689Smaybee 				break;
22926689Smaybee 			pa->used += dlused;
22936689Smaybee 			pa->comp += dlcomp;
22946689Smaybee 			pa->uncomp += dluncomp;
22952082Seschrock 		}
22966689Smaybee 	} while (snap = list_next(&pa->snap_list, snap));
22976689Smaybee 
22986689Smaybee 	/*
22996689Smaybee 	 * If we are a clone of a clone then we never reached ORIGIN,
23006689Smaybee 	 * so we need to subtract out the clone origin's used space.
23016689Smaybee 	 */
23026689Smaybee 	if (pa->clone_origin) {
23036689Smaybee 		pa->used -= pa->clone_origin->ds_phys->ds_used_bytes;
23046689Smaybee 		pa->comp -= pa->clone_origin->ds_phys->ds_compressed_bytes;
23056689Smaybee 		pa->uncomp -= pa->clone_origin->ds_phys->ds_uncompressed_bytes;
23062082Seschrock 	}
23072082Seschrock 
23086689Smaybee 	kmem_free(name, MAXPATHLEN);
23096689Smaybee 
23102082Seschrock 	/* Check that there is enough space here */
23116689Smaybee 	if (err == 0) {
23126689Smaybee 		dsl_dir_t *odd = origin_ds->ds_dir;
23136689Smaybee 		err = dsl_dir_transfer_possible(odd, hds->ds_dir, pa->used);
23146689Smaybee 	}
23156689Smaybee 
23162199Sahrens 	return (err);
23172199Sahrens }
23182082Seschrock 
23192199Sahrens static void
23204543Smarks dsl_dataset_promote_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
23212199Sahrens {
23222199Sahrens 	dsl_dataset_t *hds = arg1;
23232199Sahrens 	struct promotearg *pa = arg2;
23246689Smaybee 	struct promotedsarg *snap = list_head(&pa->snap_list);
23256689Smaybee 	dsl_dataset_t *origin_ds = snap->ds;
23262199Sahrens 	dsl_dir_t *dd = hds->ds_dir;
23272199Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
23285367Sahrens 	dsl_dir_t *odd = NULL;
23292199Sahrens 	char *name;
23302199Sahrens 
23312199Sahrens 	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
23322199Sahrens 
23332417Sahrens 	/*
23345367Sahrens 	 * We need to explicitly open odd, since origin_ds's dd will be
23352417Sahrens 	 * changing.
23362417Sahrens 	 */
23375367Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, origin_ds->ds_dir->dd_object,
23385367Sahrens 	    NULL, FTAG, &odd));
23392082Seschrock 
23406689Smaybee 	/* change origin's next snap */
23416689Smaybee 	dmu_buf_will_dirty(origin_ds->ds_dbuf, tx);
23426689Smaybee 	origin_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj;
23436689Smaybee 
23446689Smaybee 	/* change origin */
23456689Smaybee 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
23466689Smaybee 	ASSERT3U(dd->dd_phys->dd_origin_obj, ==, origin_ds->ds_object);
23476689Smaybee 	dd->dd_phys->dd_origin_obj = odd->dd_phys->dd_origin_obj;
23486689Smaybee 	dmu_buf_will_dirty(odd->dd_dbuf, tx);
23496689Smaybee 	odd->dd_phys->dd_origin_obj = origin_ds->ds_object;
23506689Smaybee 
23512082Seschrock 	/* move snapshots to this dir */
23522199Sahrens 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
23536689Smaybee 	do {
23546689Smaybee 		dsl_dataset_t *ds = snap->ds;
23552082Seschrock 
23562082Seschrock 		/* move snap name entry */
23572082Seschrock 		dsl_dataset_name(ds, name);
23586689Smaybee 		VERIFY(0 == dsl_dataset_snap_remove(pa->old_head,
23596689Smaybee 		    ds->ds_snapname, tx));
23602199Sahrens 		VERIFY(0 == zap_add(dp->dp_meta_objset,
23612082Seschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
23622082Seschrock 		    8, 1, &ds->ds_object, tx));
23632082Seschrock 
23642082Seschrock 		/* change containing dsl_dir */
23652082Seschrock 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
23665367Sahrens 		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, odd->dd_object);
23672082Seschrock 		ds->ds_phys->ds_dir_obj = dd->dd_object;
23685367Sahrens 		ASSERT3P(ds->ds_dir, ==, odd);
23692082Seschrock 		dsl_dir_close(ds->ds_dir, ds);
23702199Sahrens 		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
23712082Seschrock 		    NULL, ds, &ds->ds_dir));
23722082Seschrock 
23732082Seschrock 		ASSERT3U(dsl_prop_numcb(ds), ==, 0);
23746689Smaybee 	} while (snap = list_next(&pa->snap_list, snap));
23752082Seschrock 
23762082Seschrock 	/* change space accounting */
23775367Sahrens 	dsl_dir_diduse_space(odd, -pa->used, -pa->comp, -pa->uncomp, tx);
23782199Sahrens 	dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx);
23795367Sahrens 	origin_ds->ds_phys->ds_unique_bytes = pa->unique;
23802082Seschrock 
23814543Smarks 	/* log history record */
23824543Smarks 	spa_history_internal_log(LOG_DS_PROMOTE, dd->dd_pool->dp_spa, tx,
23836689Smaybee 	    cr, "dataset = %llu", hds->ds_object);
23844543Smarks 
23855367Sahrens 	dsl_dir_close(odd, FTAG);
23862199Sahrens 	kmem_free(name, MAXPATHLEN);
23872082Seschrock }
23882082Seschrock 
23892082Seschrock int
23902082Seschrock dsl_dataset_promote(const char *name)
23912082Seschrock {
23922082Seschrock 	dsl_dataset_t *ds;
23936689Smaybee 	dsl_dir_t *dd;
23946689Smaybee 	dsl_pool_t *dp;
23952082Seschrock 	dmu_object_info_t doi;
23962199Sahrens 	struct promotearg pa;
23976689Smaybee 	struct promotedsarg *snap;
23986689Smaybee 	uint64_t snap_obj;
23996689Smaybee 	uint64_t last_snap = 0;
24006689Smaybee 	int err;
24016689Smaybee 
24026689Smaybee 	err = dsl_dataset_hold(name, FTAG, &ds);
24032082Seschrock 	if (err)
24042082Seschrock 		return (err);
24056689Smaybee 	dd = ds->ds_dir;
24066689Smaybee 	dp = dd->dd_pool;
24076689Smaybee 
24086689Smaybee 	err = dmu_object_info(dp->dp_meta_objset,
24092082Seschrock 	    ds->ds_phys->ds_snapnames_zapobj, &doi);
24102082Seschrock 	if (err) {
24116689Smaybee 		dsl_dataset_rele(ds, FTAG);
24122082Seschrock 		return (err);
24132082Seschrock 	}
24142082Seschrock 
24152082Seschrock 	/*
24166689Smaybee 	 * We are going to inherit all the snapshots taken before our
24176689Smaybee 	 * origin (i.e., our new origin will be our parent's origin).
24186689Smaybee 	 * Take ownership of them so that we can rename them into our
24196689Smaybee 	 * namespace.
24206689Smaybee 	 */
24216689Smaybee 	pa.clone_origin = NULL;
24226689Smaybee 	list_create(&pa.snap_list,
24236689Smaybee 	    sizeof (struct promotedsarg), offsetof(struct promotedsarg, link));
24246689Smaybee 	rw_enter(&dp->dp_config_rwlock, RW_READER);
24256689Smaybee 	ASSERT(dd->dd_phys->dd_origin_obj != 0);
24266689Smaybee 	snap_obj = dd->dd_phys->dd_origin_obj;
24276689Smaybee 	while (snap_obj) {
24286689Smaybee 		snap = kmem_alloc(sizeof (struct promotedsarg), KM_SLEEP);
24296689Smaybee 		err = dsl_dataset_own_obj(dp, snap_obj, 0, FTAG, &snap->ds);
24306689Smaybee 		if (err == ENOENT) {
24316689Smaybee 			/* lost race with snapshot destroy */
24326689Smaybee 			struct promotedsarg *last = list_tail(&pa.snap_list);
24336689Smaybee 			ASSERT(snap_obj != last->ds->ds_phys->ds_prev_snap_obj);
24346689Smaybee 			snap_obj = last->ds->ds_phys->ds_prev_snap_obj;
24356689Smaybee 			kmem_free(snap, sizeof (struct promotedsarg));
24366689Smaybee 			continue;
24376689Smaybee 		} else if (err) {
24386689Smaybee 			kmem_free(snap, sizeof (struct promotedsarg));
24396689Smaybee 			rw_exit(&dp->dp_config_rwlock);
24406689Smaybee 			goto out;
24416689Smaybee 		}
24426689Smaybee 		/*
24436689Smaybee 		 * We could be a clone of a clone.  If we reach our
24446689Smaybee 		 * parent's branch point, we're done.
24456689Smaybee 		 */
24466689Smaybee 		if (last_snap &&
24476689Smaybee 		    snap->ds->ds_phys->ds_next_snap_obj != last_snap) {
24486689Smaybee 			pa.clone_origin = snap->ds;
24496689Smaybee 			kmem_free(snap, sizeof (struct promotedsarg));
24506689Smaybee 			snap_obj = 0;
24516689Smaybee 		} else {
24526689Smaybee 			list_insert_tail(&pa.snap_list, snap);
24536689Smaybee 			last_snap = snap_obj;
24546689Smaybee 			snap_obj = snap->ds->ds_phys->ds_prev_snap_obj;
24556689Smaybee 		}
24566689Smaybee 	}
24576689Smaybee 	snap = list_head(&pa.snap_list);
24586689Smaybee 	ASSERT(snap != NULL);
24596689Smaybee 	err = dsl_dataset_hold_obj(dp,
24606689Smaybee 	    snap->ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG, &pa.old_head);
24616689Smaybee 	rw_exit(&dp->dp_config_rwlock);
24626689Smaybee 
24636689Smaybee 	if (err)
24646689Smaybee 		goto out;
24656689Smaybee 
24666689Smaybee 	/*
24672082Seschrock 	 * Add in 128x the snapnames zapobj size, since we will be moving
24682082Seschrock 	 * a bunch of snapnames to the promoted ds, and dirtying their
24692082Seschrock 	 * bonus buffers.
24702082Seschrock 	 */
24716689Smaybee 	err = dsl_sync_task_do(dp, dsl_dataset_promote_check,
24722199Sahrens 	    dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks);
24736689Smaybee 
24746689Smaybee 	dsl_dataset_rele(pa.old_head, FTAG);
24756689Smaybee out:
24766689Smaybee 	while ((snap = list_tail(&pa.snap_list)) != NULL) {
24776689Smaybee 		list_remove(&pa.snap_list, snap);
24786689Smaybee 		dsl_dataset_disown(snap->ds, FTAG);
24796689Smaybee 		kmem_free(snap, sizeof (struct promotedsarg));
24806689Smaybee 	}
24816689Smaybee 	list_destroy(&pa.snap_list);
24826689Smaybee 	if (pa.clone_origin)
24836689Smaybee 		dsl_dataset_disown(pa.clone_origin, FTAG);
24846689Smaybee 	dsl_dataset_rele(ds, FTAG);
24852082Seschrock 	return (err);
24862082Seschrock }
24873912Slling 
24885367Sahrens struct cloneswaparg {
24895367Sahrens 	dsl_dataset_t *cds; /* clone dataset */
24905367Sahrens 	dsl_dataset_t *ohds; /* origin's head dataset */
24915367Sahrens 	boolean_t force;
24925481Sck153898 	int64_t unused_refres_delta; /* change in unconsumed refreservation */
24935367Sahrens };
24945326Sek110237 
24955326Sek110237 /* ARGSUSED */
24965326Sek110237 static int
24975326Sek110237 dsl_dataset_clone_swap_check(void *arg1, void *arg2, dmu_tx_t *tx)
24985326Sek110237 {
24995367Sahrens 	struct cloneswaparg *csa = arg1;
25005326Sek110237 
25015367Sahrens 	/* they should both be heads */
25025367Sahrens 	if (dsl_dataset_is_snapshot(csa->cds) ||
25035367Sahrens 	    dsl_dataset_is_snapshot(csa->ohds))
25045326Sek110237 		return (EINVAL);
25055326Sek110237 
25065367Sahrens 	/* the branch point should be just before them */
25075367Sahrens 	if (csa->cds->ds_prev != csa->ohds->ds_prev)
25085326Sek110237 		return (EINVAL);
25095326Sek110237 
25105367Sahrens 	/* cds should be the clone */
25115367Sahrens 	if (csa->cds->ds_prev->ds_phys->ds_next_snap_obj !=
25125367Sahrens 	    csa->ohds->ds_object)
25135367Sahrens 		return (EINVAL);
25145326Sek110237 
25155367Sahrens 	/* the clone should be a child of the origin */
25165367Sahrens 	if (csa->cds->ds_dir->dd_parent != csa->ohds->ds_dir)
25175367Sahrens 		return (EINVAL);
25185326Sek110237 
25195367Sahrens 	/* ohds shouldn't be modified unless 'force' */
25205367Sahrens 	if (!csa->force && dsl_dataset_modified_since_lastsnap(csa->ohds))
25215367Sahrens 		return (ETXTBSY);
25225481Sck153898 
25235481Sck153898 	/* adjust amount of any unconsumed refreservation */
25245481Sck153898 	csa->unused_refres_delta =
25255481Sck153898 	    (int64_t)MIN(csa->ohds->ds_reserved,
25265481Sck153898 	    csa->ohds->ds_phys->ds_unique_bytes) -
25275481Sck153898 	    (int64_t)MIN(csa->ohds->ds_reserved,
25285481Sck153898 	    csa->cds->ds_phys->ds_unique_bytes);
25295481Sck153898 
25305481Sck153898 	if (csa->unused_refres_delta > 0 &&
25315481Sck153898 	    csa->unused_refres_delta >
25325481Sck153898 	    dsl_dir_space_available(csa->ohds->ds_dir, NULL, 0, TRUE))
25335481Sck153898 		return (ENOSPC);
25345481Sck153898 
25355367Sahrens 	return (0);
25365326Sek110237 }
25375326Sek110237 
25385326Sek110237 /* ARGSUSED */
25395326Sek110237 static void
25405326Sek110237 dsl_dataset_clone_swap_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
25415326Sek110237 {
25425367Sahrens 	struct cloneswaparg *csa = arg1;
25435367Sahrens 	dsl_pool_t *dp = csa->cds->ds_dir->dd_pool;
25445326Sek110237 	uint64_t itor = 0;
25455326Sek110237 	blkptr_t bp;
25465326Sek110237 	uint64_t unique = 0;
25475326Sek110237 	int err;
25485326Sek110237 
25495481Sck153898 	ASSERT(csa->cds->ds_reserved == 0);
25505481Sck153898 	ASSERT(csa->cds->ds_quota == csa->ohds->ds_quota);
25515481Sck153898 
25525367Sahrens 	dmu_buf_will_dirty(csa->cds->ds_dbuf, tx);
25535367Sahrens 	dmu_buf_will_dirty(csa->ohds->ds_dbuf, tx);
25545367Sahrens 	dmu_buf_will_dirty(csa->cds->ds_prev->ds_dbuf, tx);
25555326Sek110237 
25565367Sahrens 	if (csa->cds->ds_user_ptr != NULL) {
25575367Sahrens 		csa->cds->ds_user_evict_func(csa->cds, csa->cds->ds_user_ptr);
25585367Sahrens 		csa->cds->ds_user_ptr = NULL;
25595367Sahrens 	}
25605326Sek110237 
25615367Sahrens 	if (csa->ohds->ds_user_ptr != NULL) {
25625367Sahrens 		csa->ohds->ds_user_evict_func(csa->ohds,
25635367Sahrens 		    csa->ohds->ds_user_ptr);
25645367Sahrens 		csa->ohds->ds_user_ptr = NULL;
25655367Sahrens 	}
25665326Sek110237 
25675326Sek110237 	/* compute unique space */
25685367Sahrens 	while ((err = bplist_iterate(&csa->cds->ds_deadlist,
25695367Sahrens 	    &itor, &bp)) == 0) {
25705367Sahrens 		if (bp.blk_birth > csa->cds->ds_prev->ds_phys->ds_prev_snap_txg)
25715367Sahrens 			unique += bp_get_dasize(dp->dp_spa, &bp);
25725326Sek110237 	}
25735326Sek110237 	VERIFY(err == ENOENT);
25745326Sek110237 
25755326Sek110237 	/* reset origin's unique bytes */
25765367Sahrens 	csa->cds->ds_prev->ds_phys->ds_unique_bytes = unique;
25775326Sek110237 
25785326Sek110237 	/* swap blkptrs */
25795326Sek110237 	{
25805326Sek110237 		blkptr_t tmp;
25815367Sahrens 		tmp = csa->ohds->ds_phys->ds_bp;
25825367Sahrens 		csa->ohds->ds_phys->ds_bp = csa->cds->ds_phys->ds_bp;
25835367Sahrens 		csa->cds->ds_phys->ds_bp = tmp;
25845326Sek110237 	}
25855326Sek110237 
25865326Sek110237 	/* set dd_*_bytes */
25875326Sek110237 	{
25885326Sek110237 		int64_t dused, dcomp, duncomp;
25895326Sek110237 		uint64_t cdl_used, cdl_comp, cdl_uncomp;
25905326Sek110237 		uint64_t odl_used, odl_comp, odl_uncomp;
25915326Sek110237 
25925367Sahrens 		VERIFY(0 == bplist_space(&csa->cds->ds_deadlist, &cdl_used,
25935326Sek110237 		    &cdl_comp, &cdl_uncomp));
25945367Sahrens 		VERIFY(0 == bplist_space(&csa->ohds->ds_deadlist, &odl_used,
25955326Sek110237 		    &odl_comp, &odl_uncomp));
25965367Sahrens 		dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
25975367Sahrens 		    (csa->ohds->ds_phys->ds_used_bytes + odl_used);
25985367Sahrens 		dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
25995367Sahrens 		    (csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
26005367Sahrens 		duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
26015367Sahrens 		    cdl_uncomp -
26025367Sahrens 		    (csa->ohds->ds_phys->ds_uncompressed_bytes + odl_uncomp);
26035326Sek110237 
26045367Sahrens 		dsl_dir_diduse_space(csa->ohds->ds_dir,
26055367Sahrens 		    dused, dcomp, duncomp, tx);
26065367Sahrens 		dsl_dir_diduse_space(csa->cds->ds_dir,
26075367Sahrens 		    -dused, -dcomp, -duncomp, tx);
26085367Sahrens 	}
26095367Sahrens 
26105367Sahrens #define	SWITCH64(x, y) \
26115367Sahrens 	{ \
26125367Sahrens 		uint64_t __tmp = (x); \
26135367Sahrens 		(x) = (y); \
26145367Sahrens 		(y) = __tmp; \
26155326Sek110237 	}
26165326Sek110237 
26175326Sek110237 	/* swap ds_*_bytes */
26185367Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
26195367Sahrens 	    csa->cds->ds_phys->ds_used_bytes);
26205367Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
26215367Sahrens 	    csa->cds->ds_phys->ds_compressed_bytes);
26225367Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
26235367Sahrens 	    csa->cds->ds_phys->ds_uncompressed_bytes);
26245481Sck153898 	SWITCH64(csa->ohds->ds_phys->ds_unique_bytes,
26255481Sck153898 	    csa->cds->ds_phys->ds_unique_bytes);
26265481Sck153898 
26275481Sck153898 	/* apply any parent delta for change in unconsumed refreservation */
26285481Sck153898 	dsl_dir_diduse_space(csa->ohds->ds_dir, csa->unused_refres_delta,
26295481Sck153898 	    0, 0, tx);
26305326Sek110237 
26315326Sek110237 	/* swap deadlists */
26325367Sahrens 	bplist_close(&csa->cds->ds_deadlist);
26335367Sahrens 	bplist_close(&csa->ohds->ds_deadlist);
26345367Sahrens 	SWITCH64(csa->ohds->ds_phys->ds_deadlist_obj,
26355367Sahrens 	    csa->cds->ds_phys->ds_deadlist_obj);
26365367Sahrens 	VERIFY(0 == bplist_open(&csa->cds->ds_deadlist, dp->dp_meta_objset,
26375367Sahrens 	    csa->cds->ds_phys->ds_deadlist_obj));
26385367Sahrens 	VERIFY(0 == bplist_open(&csa->ohds->ds_deadlist, dp->dp_meta_objset,
26395367Sahrens 	    csa->ohds->ds_phys->ds_deadlist_obj));
26405326Sek110237 }
26415326Sek110237 
26425326Sek110237 /*
26436689Smaybee  * Swap 'clone' with its origin head file system.  Used at the end
26446689Smaybee  * of "online recv" to swizzle the file system to the new version.
26455326Sek110237  */
26465326Sek110237 int
26475367Sahrens dsl_dataset_clone_swap(dsl_dataset_t *clone, dsl_dataset_t *origin_head,
26485367Sahrens     boolean_t force)
26495326Sek110237 {
26505367Sahrens 	struct cloneswaparg csa;
26516689Smaybee 	int error;
26526689Smaybee 
26536689Smaybee 	ASSERT(clone->ds_owner);
26546689Smaybee 	ASSERT(origin_head->ds_owner);
26556689Smaybee retry:
26566689Smaybee 	/* Need exclusive access for the swap */
26576689Smaybee 	rw_enter(&clone->ds_rwlock, RW_WRITER);
26586689Smaybee 	if (!rw_tryenter(&origin_head->ds_rwlock, RW_WRITER)) {
26596689Smaybee 		rw_exit(&clone->ds_rwlock);
26606689Smaybee 		rw_enter(&origin_head->ds_rwlock, RW_WRITER);
26616689Smaybee 		if (!rw_tryenter(&clone->ds_rwlock, RW_WRITER)) {
26626689Smaybee 			rw_exit(&origin_head->ds_rwlock);
26636689Smaybee 			goto retry;
26646689Smaybee 		}
26656689Smaybee 	}
26665367Sahrens 	csa.cds = clone;
26675367Sahrens 	csa.ohds = origin_head;
26685367Sahrens 	csa.force = force;
26696689Smaybee 	error = dsl_sync_task_do(clone->ds_dir->dd_pool,
26705326Sek110237 	    dsl_dataset_clone_swap_check,
26716689Smaybee 	    dsl_dataset_clone_swap_sync, &csa, NULL, 9);
26726689Smaybee 	return (error);
26735326Sek110237 }
26745326Sek110237 
26753912Slling /*
26763912Slling  * Given a pool name and a dataset object number in that pool,
26773912Slling  * return the name of that dataset.
26783912Slling  */
26793912Slling int
26803912Slling dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
26813912Slling {
26823912Slling 	spa_t *spa;
26833912Slling 	dsl_pool_t *dp;
26846689Smaybee 	dsl_dataset_t *ds;
26853912Slling 	int error;
26863912Slling 
26873912Slling 	if ((error = spa_open(pname, &spa, FTAG)) != 0)
26883912Slling 		return (error);
26893912Slling 	dp = spa_get_dsl(spa);
26903912Slling 	rw_enter(&dp->dp_config_rwlock, RW_READER);
26916689Smaybee 	if ((error = dsl_dataset_hold_obj(dp, obj, FTAG, &ds)) == 0) {
26926689Smaybee 		dsl_dataset_name(ds, buf);
26936689Smaybee 		dsl_dataset_rele(ds, FTAG);
26943912Slling 	}
26953912Slling 	rw_exit(&dp->dp_config_rwlock);
26963912Slling 	spa_close(spa, FTAG);
26973912Slling 
26986689Smaybee 	return (error);
26993912Slling }
27005378Sck153898 
27015378Sck153898 int
27025378Sck153898 dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
27036689Smaybee     uint64_t asize, uint64_t inflight, uint64_t *used, uint64_t *ref_rsrv)
27045378Sck153898 {
27055378Sck153898 	int error = 0;
27065378Sck153898 
27075378Sck153898 	ASSERT3S(asize, >, 0);
27085378Sck153898 
27095831Sck153898 	/*
27105831Sck153898 	 * *ref_rsrv is the portion of asize that will come from any
27115831Sck153898 	 * unconsumed refreservation space.
27125831Sck153898 	 */
27135831Sck153898 	*ref_rsrv = 0;
27145831Sck153898 
27155378Sck153898 	mutex_enter(&ds->ds_lock);
27165378Sck153898 	/*
27175378Sck153898 	 * Make a space adjustment for reserved bytes.
27185378Sck153898 	 */
27195378Sck153898 	if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes) {
27205378Sck153898 		ASSERT3U(*used, >=,
27215378Sck153898 		    ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
27225378Sck153898 		*used -= (ds->ds_reserved - ds->ds_phys->ds_unique_bytes);
27235831Sck153898 		*ref_rsrv =
27245831Sck153898 		    asize - MIN(asize, parent_delta(ds, asize + inflight));
27255378Sck153898 	}
27265378Sck153898 
27275378Sck153898 	if (!check_quota || ds->ds_quota == 0) {
27285378Sck153898 		mutex_exit(&ds->ds_lock);
27295378Sck153898 		return (0);
27305378Sck153898 	}
27315378Sck153898 	/*
27325378Sck153898 	 * If they are requesting more space, and our current estimate
27335378Sck153898 	 * is over quota, they get to try again unless the actual
27345378Sck153898 	 * on-disk is over quota and there are no pending changes (which
27355378Sck153898 	 * may free up space for us).
27365378Sck153898 	 */
27375378Sck153898 	if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
27385378Sck153898 		if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
27395378Sck153898 			error = ERESTART;
27405378Sck153898 		else
27415378Sck153898 			error = EDQUOT;
27425378Sck153898 	}
27435378Sck153898 	mutex_exit(&ds->ds_lock);
27445378Sck153898 
27455378Sck153898 	return (error);
27465378Sck153898 }
27475378Sck153898 
27485378Sck153898 /* ARGSUSED */
27495378Sck153898 static int
27505378Sck153898 dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
27515378Sck153898 {
27525378Sck153898 	dsl_dataset_t *ds = arg1;
27535378Sck153898 	uint64_t *quotap = arg2;
27545378Sck153898 	uint64_t new_quota = *quotap;
27555378Sck153898 
27565378Sck153898 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) < SPA_VERSION_REFQUOTA)
27575378Sck153898 		return (ENOTSUP);
27585378Sck153898 
27595378Sck153898 	if (new_quota == 0)
27605378Sck153898 		return (0);
27615378Sck153898 
27625378Sck153898 	if (new_quota < ds->ds_phys->ds_used_bytes ||
27635378Sck153898 	    new_quota < ds->ds_reserved)
27645378Sck153898 		return (ENOSPC);
27655378Sck153898 
27665378Sck153898 	return (0);
27675378Sck153898 }
27685378Sck153898 
27695378Sck153898 /* ARGSUSED */
27705378Sck153898 void
27715378Sck153898 dsl_dataset_set_quota_sync(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
27725378Sck153898 {
27735378Sck153898 	dsl_dataset_t *ds = arg1;
27745378Sck153898 	uint64_t *quotap = arg2;
27755378Sck153898 	uint64_t new_quota = *quotap;
27765378Sck153898 
27775378Sck153898 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
27785378Sck153898 
27795378Sck153898 	ds->ds_quota = new_quota;
27805378Sck153898 
27815378Sck153898 	dsl_prop_set_uint64_sync(ds->ds_dir, "refquota", new_quota, cr, tx);
27825378Sck153898 
27835378Sck153898 	spa_history_internal_log(LOG_DS_REFQUOTA, ds->ds_dir->dd_pool->dp_spa,
27845378Sck153898 	    tx, cr, "%lld dataset = %llu ",
27856689Smaybee 	    (longlong_t)new_quota, ds->ds_object);
27865378Sck153898 }
27875378Sck153898 
27885378Sck153898 int
27895378Sck153898 dsl_dataset_set_quota(const char *dsname, uint64_t quota)
27905378Sck153898 {
27915378Sck153898 	dsl_dataset_t *ds;
27925378Sck153898 	int err;
27935378Sck153898 
27946689Smaybee 	err = dsl_dataset_hold(dsname, FTAG, &ds);
27955378Sck153898 	if (err)
27965378Sck153898 		return (err);
27975378Sck153898 
27985481Sck153898 	if (quota != ds->ds_quota) {
27995481Sck153898 		/*
28005481Sck153898 		 * If someone removes a file, then tries to set the quota, we
28015481Sck153898 		 * want to make sure the file freeing takes effect.
28025481Sck153898 		 */
28035481Sck153898 		txg_wait_open(ds->ds_dir->dd_pool, 0);
28045481Sck153898 
28055481Sck153898 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
28065481Sck153898 		    dsl_dataset_set_quota_check, dsl_dataset_set_quota_sync,
28075481Sck153898 		    ds, &quota, 0);
28085481Sck153898 	}
28096689Smaybee 	dsl_dataset_rele(ds, FTAG);
28105378Sck153898 	return (err);
28115378Sck153898 }
28125378Sck153898 
28135378Sck153898 static int
28145378Sck153898 dsl_dataset_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
28155378Sck153898 {
28165378Sck153898 	dsl_dataset_t *ds = arg1;
28175378Sck153898 	uint64_t *reservationp = arg2;
28185378Sck153898 	uint64_t new_reservation = *reservationp;
28195378Sck153898 	int64_t delta;
28205378Sck153898 	uint64_t unique;
28215378Sck153898 
28225378Sck153898 	if (new_reservation > INT64_MAX)
28235378Sck153898 		return (EOVERFLOW);
28245378Sck153898 
28255378Sck153898 	if (spa_version(ds->ds_dir->dd_pool->dp_spa) <
28265378Sck153898 	    SPA_VERSION_REFRESERVATION)
28275378Sck153898 		return (ENOTSUP);
28285378Sck153898 
28295378Sck153898 	if (dsl_dataset_is_snapshot(ds))
28305378Sck153898 		return (EINVAL);
28315378Sck153898 
28325378Sck153898 	/*
28335378Sck153898 	 * If we are doing the preliminary check in open context, the
28345378Sck153898 	 * space estimates may be inaccurate.
28355378Sck153898 	 */
28365378Sck153898 	if (!dmu_tx_is_syncing(tx))
28375378Sck153898 		return (0);
28385378Sck153898 
28395378Sck153898 	mutex_enter(&ds->ds_lock);
28405378Sck153898 	unique = dsl_dataset_unique(ds);
28415378Sck153898 	delta = MAX(unique, new_reservation) - MAX(unique, ds->ds_reserved);
28425378Sck153898 	mutex_exit(&ds->ds_lock);
28435378Sck153898 
28445378Sck153898 	if (delta > 0 &&
28455378Sck153898 	    delta > dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE))
28465378Sck153898 		return (ENOSPC);
28475378Sck153898 	if (delta > 0 && ds->ds_quota > 0 &&
28485378Sck153898 	    new_reservation > ds->ds_quota)
28495378Sck153898 		return (ENOSPC);
28505378Sck153898 
28515378Sck153898 	return (0);
28525378Sck153898 }
28535378Sck153898 
28545378Sck153898 /* ARGSUSED */
28555378Sck153898 static void
28565378Sck153898 dsl_dataset_set_reservation_sync(void *arg1, void *arg2, cred_t *cr,
28575378Sck153898     dmu_tx_t *tx)
28585378Sck153898 {
28595378Sck153898 	dsl_dataset_t *ds = arg1;
28605378Sck153898 	uint64_t *reservationp = arg2;
28615378Sck153898 	uint64_t new_reservation = *reservationp;
28625378Sck153898 	uint64_t unique;
28635378Sck153898 	int64_t delta;
28645378Sck153898 
28655378Sck153898 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
28665378Sck153898 
28675378Sck153898 	mutex_enter(&ds->ds_lock);
28685378Sck153898 	unique = dsl_dataset_unique(ds);
28695378Sck153898 	delta = MAX(0, (int64_t)(new_reservation - unique)) -
28705378Sck153898 	    MAX(0, (int64_t)(ds->ds_reserved - unique));
28715378Sck153898 	ds->ds_reserved = new_reservation;
28725378Sck153898 	mutex_exit(&ds->ds_lock);
28735378Sck153898 
28745378Sck153898 	dsl_prop_set_uint64_sync(ds->ds_dir, "refreservation",
28755378Sck153898 	    new_reservation, cr, tx);
28765378Sck153898 
28775378Sck153898 	dsl_dir_diduse_space(ds->ds_dir, delta, 0, 0, tx);
28785378Sck153898 
28795378Sck153898 	spa_history_internal_log(LOG_DS_REFRESERV,
28805378Sck153898 	    ds->ds_dir->dd_pool->dp_spa, tx, cr, "%lld dataset = %llu",
28815378Sck153898 	    (longlong_t)new_reservation,
28825378Sck153898 	    ds->ds_dir->dd_phys->dd_head_dataset_obj);
28835378Sck153898 }
28845378Sck153898 
28855378Sck153898 int
28865378Sck153898 dsl_dataset_set_reservation(const char *dsname, uint64_t reservation)
28875378Sck153898 {
28885378Sck153898 	dsl_dataset_t *ds;
28895378Sck153898 	int err;
28905378Sck153898 
28916689Smaybee 	err = dsl_dataset_hold(dsname, FTAG, &ds);
28925378Sck153898 	if (err)
28935378Sck153898 		return (err);
28945378Sck153898 
28955378Sck153898 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
28965378Sck153898 	    dsl_dataset_set_reservation_check,
28975378Sck153898 	    dsl_dataset_set_reservation_sync, ds, &reservation, 0);
28986689Smaybee 	dsl_dataset_rele(ds, FTAG);
28995378Sck153898 	return (err);
29005378Sck153898 }
2901