xref: /onnv-gate/usr/src/uts/common/fs/zfs/dsl_dataset.c (revision 3547:e396e0a440b1)
1789Sahrens /*
2789Sahrens  * CDDL HEADER START
3789Sahrens  *
4789Sahrens  * The contents of this file are subject to the terms of the
51544Seschrock  * Common Development and Distribution License (the "License").
61544Seschrock  * You may not use this file except in compliance with the License.
7789Sahrens  *
8789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens  * or http://www.opensolaris.org/os/licensing.
10789Sahrens  * See the License for the specific language governing permissions
11789Sahrens  * and limitations under the License.
12789Sahrens  *
13789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens  *
19789Sahrens  * CDDL HEADER END
20789Sahrens  */
21789Sahrens /*
223444Sek110237  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23789Sahrens  * Use is subject to license terms.
24789Sahrens  */
25789Sahrens 
26789Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
27789Sahrens 
28789Sahrens #include <sys/dmu_objset.h>
29789Sahrens #include <sys/dsl_dataset.h>
30789Sahrens #include <sys/dsl_dir.h>
312082Seschrock #include <sys/dsl_prop.h>
322199Sahrens #include <sys/dsl_synctask.h>
33789Sahrens #include <sys/dmu_traverse.h>
34789Sahrens #include <sys/dmu_tx.h>
35789Sahrens #include <sys/arc.h>
36789Sahrens #include <sys/zio.h>
37789Sahrens #include <sys/zap.h>
38789Sahrens #include <sys/unique.h>
39789Sahrens #include <sys/zfs_context.h>
40789Sahrens 
412199Sahrens static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
422199Sahrens static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
432199Sahrens static dsl_checkfunc_t dsl_dataset_rollback_check;
442199Sahrens static dsl_syncfunc_t dsl_dataset_rollback_sync;
452199Sahrens static dsl_checkfunc_t dsl_dataset_destroy_check;
462199Sahrens static dsl_syncfunc_t dsl_dataset_destroy_sync;
471731Sbonwick 
483444Sek110237 #define	DS_REF_MAX	(1ULL << 62)
49789Sahrens 
50789Sahrens #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
51789Sahrens 
52789Sahrens /*
53789Sahrens  * We use weighted reference counts to express the various forms of exclusion
54789Sahrens  * between different open modes.  A STANDARD open is 1 point, an EXCLUSIVE open
553444Sek110237  * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE.
56789Sahrens  * This makes the exclusion logic simple: the total refcnt for all opens cannot
573444Sek110237  * exceed DS_REF_MAX.  For example, EXCLUSIVE opens are exclusive because their
583444Sek110237  * weight (DS_REF_MAX) consumes the entire refcnt space.  PRIMARY opens consume
59789Sahrens  * just over half of the refcnt space, so there can't be more than one, but it
60789Sahrens  * can peacefully coexist with any number of STANDARD opens.
61789Sahrens  */
62789Sahrens static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
633444Sek110237 	0,			/* DS_MODE_NONE - invalid		*/
643444Sek110237 	1,			/* DS_MODE_STANDARD - unlimited number	*/
653444Sek110237 	(DS_REF_MAX >> 1) + 1,	/* DS_MODE_PRIMARY - only one of these	*/
663444Sek110237 	DS_REF_MAX		/* DS_MODE_EXCLUSIVE - no other opens	*/
67789Sahrens };
68789Sahrens 
69789Sahrens 
70789Sahrens void
71789Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
72789Sahrens {
732082Seschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
74789Sahrens 	int compressed = BP_GET_PSIZE(bp);
75789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
76789Sahrens 
77789Sahrens 	dprintf_bp(bp, "born, ds=%p\n", ds);
78789Sahrens 
79789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
80789Sahrens 	/* It could have been compressed away to nothing */
81789Sahrens 	if (BP_IS_HOLE(bp))
82789Sahrens 		return;
83789Sahrens 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
84789Sahrens 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
85789Sahrens 	if (ds == NULL) {
86789Sahrens 		/*
87789Sahrens 		 * Account for the meta-objset space in its placeholder
88789Sahrens 		 * dsl_dir.
89789Sahrens 		 */
90789Sahrens 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
91789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
92789Sahrens 		    used, compressed, uncompressed, tx);
93789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
94789Sahrens 		return;
95789Sahrens 	}
96789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
97789Sahrens 	mutex_enter(&ds->ds_lock);
98789Sahrens 	ds->ds_phys->ds_used_bytes += used;
99789Sahrens 	ds->ds_phys->ds_compressed_bytes += compressed;
100789Sahrens 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
101789Sahrens 	ds->ds_phys->ds_unique_bytes += used;
102789Sahrens 	mutex_exit(&ds->ds_lock);
103789Sahrens 	dsl_dir_diduse_space(ds->ds_dir,
104789Sahrens 	    used, compressed, uncompressed, tx);
105789Sahrens }
106789Sahrens 
107789Sahrens void
108*3547Smaybee dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
109*3547Smaybee     dmu_tx_t *tx)
110789Sahrens {
1112082Seschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
112789Sahrens 	int compressed = BP_GET_PSIZE(bp);
113789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
114789Sahrens 
115789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
116*3547Smaybee 	/* No block pointer => nothing to free */
117789Sahrens 	if (BP_IS_HOLE(bp))
118789Sahrens 		return;
119789Sahrens 
120789Sahrens 	ASSERT(used > 0);
121789Sahrens 	if (ds == NULL) {
122*3547Smaybee 		int err;
123789Sahrens 		/*
124789Sahrens 		 * Account for the meta-objset space in its placeholder
125789Sahrens 		 * dataset.
126789Sahrens 		 */
127*3547Smaybee 		err = arc_free(pio, tx->tx_pool->dp_spa,
128*3547Smaybee 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
129*3547Smaybee 		ASSERT(err == 0);
130789Sahrens 
131789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
132789Sahrens 		    -used, -compressed, -uncompressed, tx);
133789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
134789Sahrens 		return;
135789Sahrens 	}
136789Sahrens 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
137789Sahrens 
138789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
139789Sahrens 
140789Sahrens 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
141*3547Smaybee 		int err;
142*3547Smaybee 
143789Sahrens 		dprintf_bp(bp, "freeing: %s", "");
144*3547Smaybee 		err = arc_free(pio, tx->tx_pool->dp_spa,
145*3547Smaybee 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
146*3547Smaybee 		ASSERT(err == 0);
147789Sahrens 
148789Sahrens 		mutex_enter(&ds->ds_lock);
149789Sahrens 		/* XXX unique_bytes is not accurate for head datasets */
150789Sahrens 		/* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */
151789Sahrens 		ds->ds_phys->ds_unique_bytes -= used;
152789Sahrens 		mutex_exit(&ds->ds_lock);
153789Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
154789Sahrens 		    -used, -compressed, -uncompressed, tx);
155789Sahrens 	} else {
156789Sahrens 		dprintf_bp(bp, "putting on dead list: %s", "");
1571544Seschrock 		VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
158789Sahrens 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
159789Sahrens 		if (ds->ds_phys->ds_prev_snap_obj != 0) {
160789Sahrens 			ASSERT3U(ds->ds_prev->ds_object, ==,
161789Sahrens 			    ds->ds_phys->ds_prev_snap_obj);
162789Sahrens 			ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
163789Sahrens 			if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
1642082Seschrock 			    ds->ds_object && bp->blk_birth >
165789Sahrens 			    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
166789Sahrens 				dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
167789Sahrens 				mutex_enter(&ds->ds_prev->ds_lock);
168789Sahrens 				ds->ds_prev->ds_phys->ds_unique_bytes +=
169789Sahrens 				    used;
170789Sahrens 				mutex_exit(&ds->ds_prev->ds_lock);
171789Sahrens 			}
172789Sahrens 		}
173789Sahrens 	}
174789Sahrens 	mutex_enter(&ds->ds_lock);
175789Sahrens 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
176789Sahrens 	ds->ds_phys->ds_used_bytes -= used;
177789Sahrens 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
178789Sahrens 	ds->ds_phys->ds_compressed_bytes -= compressed;
179789Sahrens 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
180789Sahrens 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
181789Sahrens 	mutex_exit(&ds->ds_lock);
182789Sahrens }
183789Sahrens 
1841544Seschrock uint64_t
1851544Seschrock dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
186789Sahrens {
1872885Sahrens 	uint64_t trysnap = 0;
1882885Sahrens 
189789Sahrens 	if (ds == NULL)
1901544Seschrock 		return (0);
191789Sahrens 	/*
192789Sahrens 	 * The snapshot creation could fail, but that would cause an
193789Sahrens 	 * incorrect FALSE return, which would only result in an
194789Sahrens 	 * overestimation of the amount of space that an operation would
195789Sahrens 	 * consume, which is OK.
196789Sahrens 	 *
197789Sahrens 	 * There's also a small window where we could miss a pending
198789Sahrens 	 * snapshot, because we could set the sync task in the quiescing
199789Sahrens 	 * phase.  So this should only be used as a guess.
200789Sahrens 	 */
2012885Sahrens 	if (ds->ds_trysnap_txg >
2022885Sahrens 	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
2032885Sahrens 		trysnap = ds->ds_trysnap_txg;
2042885Sahrens 	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
2051544Seschrock }
2061544Seschrock 
2071544Seschrock int
2081544Seschrock dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
2091544Seschrock {
2101544Seschrock 	return (blk_birth > dsl_dataset_prev_snap_txg(ds));
211789Sahrens }
212789Sahrens 
213789Sahrens /* ARGSUSED */
214789Sahrens static void
215789Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv)
216789Sahrens {
217789Sahrens 	dsl_dataset_t *ds = dsv;
218789Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
219789Sahrens 
2203444Sek110237 	/* open_refcount == DS_REF_MAX when deleting */
221789Sahrens 	ASSERT(ds->ds_open_refcount == 0 ||
2223444Sek110237 	    ds->ds_open_refcount == DS_REF_MAX);
223789Sahrens 
224789Sahrens 	dprintf_ds(ds, "evicting %s\n", "");
225789Sahrens 
226789Sahrens 	unique_remove(ds->ds_phys->ds_fsid_guid);
227789Sahrens 
228789Sahrens 	if (ds->ds_user_ptr != NULL)
229789Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
230789Sahrens 
231789Sahrens 	if (ds->ds_prev) {
232789Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
233789Sahrens 		ds->ds_prev = NULL;
234789Sahrens 	}
235789Sahrens 
236789Sahrens 	bplist_close(&ds->ds_deadlist);
237789Sahrens 	dsl_dir_close(ds->ds_dir, ds);
238789Sahrens 
239789Sahrens 	if (list_link_active(&ds->ds_synced_link))
240789Sahrens 		list_remove(&dp->dp_synced_objsets, ds);
241789Sahrens 
2422856Snd150628 	mutex_destroy(&ds->ds_lock);
2432856Snd150628 	mutex_destroy(&ds->ds_deadlist.bpl_lock);
2442856Snd150628 
245789Sahrens 	kmem_free(ds, sizeof (dsl_dataset_t));
246789Sahrens }
247789Sahrens 
2481544Seschrock static int
249789Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds)
250789Sahrens {
251789Sahrens 	dsl_dataset_phys_t *headphys;
252789Sahrens 	int err;
253789Sahrens 	dmu_buf_t *headdbuf;
254789Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
255789Sahrens 	objset_t *mos = dp->dp_meta_objset;
256789Sahrens 
257789Sahrens 	if (ds->ds_snapname[0])
2581544Seschrock 		return (0);
259789Sahrens 	if (ds->ds_phys->ds_next_snap_obj == 0)
2601544Seschrock 		return (0);
261789Sahrens 
2621544Seschrock 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
2631544Seschrock 	    FTAG, &headdbuf);
2641544Seschrock 	if (err)
2651544Seschrock 		return (err);
266789Sahrens 	headphys = headdbuf->db_data;
267789Sahrens 	err = zap_value_search(dp->dp_meta_objset,
268789Sahrens 	    headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname);
2691544Seschrock 	dmu_buf_rele(headdbuf, FTAG);
2701544Seschrock 	return (err);
271789Sahrens }
272789Sahrens 
2731544Seschrock int
274789Sahrens dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
2751544Seschrock     int mode, void *tag, dsl_dataset_t **dsp)
276789Sahrens {
277789Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
278789Sahrens 	objset_t *mos = dp->dp_meta_objset;
279789Sahrens 	dmu_buf_t *dbuf;
280789Sahrens 	dsl_dataset_t *ds;
2811544Seschrock 	int err;
282789Sahrens 
283789Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
284789Sahrens 	    dsl_pool_sync_context(dp));
285789Sahrens 
2861544Seschrock 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
2871544Seschrock 	if (err)
2881544Seschrock 		return (err);
289789Sahrens 	ds = dmu_buf_get_user(dbuf);
290789Sahrens 	if (ds == NULL) {
291789Sahrens 		dsl_dataset_t *winner;
292789Sahrens 
293789Sahrens 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
294789Sahrens 		ds->ds_dbuf = dbuf;
295789Sahrens 		ds->ds_object = dsobj;
296789Sahrens 		ds->ds_phys = dbuf->db_data;
297789Sahrens 
2982856Snd150628 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
2992856Snd150628 		mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT,
3002856Snd150628 		    NULL);
3012856Snd150628 
3021544Seschrock 		err = bplist_open(&ds->ds_deadlist,
303789Sahrens 		    mos, ds->ds_phys->ds_deadlist_obj);
3041544Seschrock 		if (err == 0) {
3051544Seschrock 			err = dsl_dir_open_obj(dp,
3061544Seschrock 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
3071544Seschrock 		}
3081544Seschrock 		if (err) {
3091544Seschrock 			/*
3101544Seschrock 			 * we don't really need to close the blist if we
3111544Seschrock 			 * just opened it.
3121544Seschrock 			 */
3132856Snd150628 			mutex_destroy(&ds->ds_lock);
3142856Snd150628 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
3151544Seschrock 			kmem_free(ds, sizeof (dsl_dataset_t));
3161544Seschrock 			dmu_buf_rele(dbuf, tag);
3171544Seschrock 			return (err);
3181544Seschrock 		}
319789Sahrens 
320789Sahrens 		if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
321789Sahrens 			ds->ds_snapname[0] = '\0';
322789Sahrens 			if (ds->ds_phys->ds_prev_snap_obj) {
3231544Seschrock 				err = dsl_dataset_open_obj(dp,
324789Sahrens 				    ds->ds_phys->ds_prev_snap_obj, NULL,
3251544Seschrock 				    DS_MODE_NONE, ds, &ds->ds_prev);
326789Sahrens 			}
327789Sahrens 		} else {
328789Sahrens 			if (snapname) {
329789Sahrens #ifdef ZFS_DEBUG
330789Sahrens 				dsl_dataset_phys_t *headphys;
3311544Seschrock 				dmu_buf_t *headdbuf;
3321544Seschrock 				err = dmu_bonus_hold(mos,
3331544Seschrock 				    ds->ds_dir->dd_phys->dd_head_dataset_obj,
3341544Seschrock 				    FTAG, &headdbuf);
3351544Seschrock 				if (err == 0) {
3361544Seschrock 					headphys = headdbuf->db_data;
3371544Seschrock 					uint64_t foundobj;
3381544Seschrock 					err = zap_lookup(dp->dp_meta_objset,
3391544Seschrock 					    headphys->ds_snapnames_zapobj,
3401544Seschrock 					    snapname, sizeof (foundobj), 1,
3411544Seschrock 					    &foundobj);
3421544Seschrock 					ASSERT3U(foundobj, ==, dsobj);
3431544Seschrock 					dmu_buf_rele(headdbuf, FTAG);
3441544Seschrock 				}
345789Sahrens #endif
346789Sahrens 				(void) strcat(ds->ds_snapname, snapname);
347789Sahrens 			} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
3481544Seschrock 				err = dsl_dataset_get_snapname(ds);
349789Sahrens 			}
350789Sahrens 		}
351789Sahrens 
3521544Seschrock 		if (err == 0) {
3531544Seschrock 			winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
3541544Seschrock 			    dsl_dataset_evict);
3551544Seschrock 		}
3561544Seschrock 		if (err || winner) {
357789Sahrens 			bplist_close(&ds->ds_deadlist);
358789Sahrens 			if (ds->ds_prev) {
359789Sahrens 				dsl_dataset_close(ds->ds_prev,
360789Sahrens 				    DS_MODE_NONE, ds);
361789Sahrens 			}
362789Sahrens 			dsl_dir_close(ds->ds_dir, ds);
3632856Snd150628 			mutex_destroy(&ds->ds_lock);
3642856Snd150628 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
365789Sahrens 			kmem_free(ds, sizeof (dsl_dataset_t));
3661544Seschrock 			if (err) {
3671544Seschrock 				dmu_buf_rele(dbuf, tag);
3681544Seschrock 				return (err);
3691544Seschrock 			}
370789Sahrens 			ds = winner;
371789Sahrens 		} else {
372789Sahrens 			uint64_t new =
373789Sahrens 			    unique_insert(ds->ds_phys->ds_fsid_guid);
374789Sahrens 			if (new != ds->ds_phys->ds_fsid_guid) {
375789Sahrens 				/* XXX it won't necessarily be synced... */
376789Sahrens 				ds->ds_phys->ds_fsid_guid = new;
377789Sahrens 			}
378789Sahrens 		}
379789Sahrens 	}
380789Sahrens 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
381789Sahrens 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
382789Sahrens 
383789Sahrens 	mutex_enter(&ds->ds_lock);
384789Sahrens 	if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
3852082Seschrock 	    (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) &&
3862082Seschrock 	    !DS_MODE_IS_INCONSISTENT(mode)) ||
3873444Sek110237 	    (ds->ds_open_refcount + weight > DS_REF_MAX)) {
388789Sahrens 		mutex_exit(&ds->ds_lock);
389789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
3901544Seschrock 		return (EBUSY);
391789Sahrens 	}
392789Sahrens 	ds->ds_open_refcount += weight;
393789Sahrens 	mutex_exit(&ds->ds_lock);
394789Sahrens 
3951544Seschrock 	*dsp = ds;
3961544Seschrock 	return (0);
397789Sahrens }
398789Sahrens 
399789Sahrens int
400789Sahrens dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
401789Sahrens     void *tag, dsl_dataset_t **dsp)
402789Sahrens {
403789Sahrens 	dsl_dir_t *dd;
404789Sahrens 	dsl_pool_t *dp;
405789Sahrens 	const char *tail;
406789Sahrens 	uint64_t obj;
407789Sahrens 	dsl_dataset_t *ds = NULL;
408789Sahrens 	int err = 0;
409789Sahrens 
4101544Seschrock 	err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail);
4111544Seschrock 	if (err)
4121544Seschrock 		return (err);
413789Sahrens 
414789Sahrens 	dp = dd->dd_pool;
415789Sahrens 	obj = dd->dd_phys->dd_head_dataset_obj;
416789Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
417789Sahrens 	if (obj == 0) {
418789Sahrens 		/* A dataset with no associated objset */
419789Sahrens 		err = ENOENT;
420789Sahrens 		goto out;
421789Sahrens 	}
422789Sahrens 
423789Sahrens 	if (tail != NULL) {
424789Sahrens 		objset_t *mos = dp->dp_meta_objset;
425789Sahrens 
4261544Seschrock 		err = dsl_dataset_open_obj(dp, obj, NULL,
4271544Seschrock 		    DS_MODE_NONE, tag, &ds);
4281544Seschrock 		if (err)
4291544Seschrock 			goto out;
430789Sahrens 		obj = ds->ds_phys->ds_snapnames_zapobj;
431789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
432789Sahrens 		ds = NULL;
433789Sahrens 
434789Sahrens 		if (tail[0] != '@') {
435789Sahrens 			err = ENOENT;
436789Sahrens 			goto out;
437789Sahrens 		}
438789Sahrens 		tail++;
439789Sahrens 
440789Sahrens 		/* Look for a snapshot */
441789Sahrens 		if (!DS_MODE_IS_READONLY(mode)) {
442789Sahrens 			err = EROFS;
443789Sahrens 			goto out;
444789Sahrens 		}
445789Sahrens 		dprintf("looking for snapshot '%s'\n", tail);
446789Sahrens 		err = zap_lookup(mos, obj, tail, 8, 1, &obj);
447789Sahrens 		if (err)
448789Sahrens 			goto out;
449789Sahrens 	}
4501544Seschrock 	err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds);
451789Sahrens 
452789Sahrens out:
453789Sahrens 	rw_exit(&dp->dp_config_rwlock);
454789Sahrens 	dsl_dir_close(dd, FTAG);
455789Sahrens 
456789Sahrens 	ASSERT3U((err == 0), ==, (ds != NULL));
457789Sahrens 	/* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */
458789Sahrens 
459789Sahrens 	*dsp = ds;
460789Sahrens 	return (err);
461789Sahrens }
462789Sahrens 
463789Sahrens int
464789Sahrens dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp)
465789Sahrens {
466789Sahrens 	return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp));
467789Sahrens }
468789Sahrens 
469789Sahrens void
470789Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name)
471789Sahrens {
472789Sahrens 	if (ds == NULL) {
473789Sahrens 		(void) strcpy(name, "mos");
474789Sahrens 	} else {
475789Sahrens 		dsl_dir_name(ds->ds_dir, name);
4761544Seschrock 		VERIFY(0 == dsl_dataset_get_snapname(ds));
477789Sahrens 		if (ds->ds_snapname[0]) {
478789Sahrens 			(void) strcat(name, "@");
479789Sahrens 			if (!MUTEX_HELD(&ds->ds_lock)) {
480789Sahrens 				/*
481789Sahrens 				 * We use a "recursive" mutex so that we
482789Sahrens 				 * can call dprintf_ds() with ds_lock held.
483789Sahrens 				 */
484789Sahrens 				mutex_enter(&ds->ds_lock);
485789Sahrens 				(void) strcat(name, ds->ds_snapname);
486789Sahrens 				mutex_exit(&ds->ds_lock);
487789Sahrens 			} else {
488789Sahrens 				(void) strcat(name, ds->ds_snapname);
489789Sahrens 			}
490789Sahrens 		}
491789Sahrens 	}
492789Sahrens }
493789Sahrens 
494789Sahrens void
495789Sahrens dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag)
496789Sahrens {
497789Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
498789Sahrens 	mutex_enter(&ds->ds_lock);
499789Sahrens 	ASSERT3U(ds->ds_open_refcount, >=, weight);
500789Sahrens 	ds->ds_open_refcount -= weight;
501789Sahrens 	dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n",
502789Sahrens 	    mode, ds->ds_open_refcount);
503789Sahrens 	mutex_exit(&ds->ds_lock);
504789Sahrens 
5051544Seschrock 	dmu_buf_rele(ds->ds_dbuf, tag);
506789Sahrens }
507789Sahrens 
508789Sahrens void
509789Sahrens dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
510789Sahrens {
511789Sahrens 	objset_t *mos = dp->dp_meta_objset;
512789Sahrens 	dmu_buf_t *dbuf;
513789Sahrens 	dsl_dataset_phys_t *dsphys;
514789Sahrens 	dsl_dataset_t *ds;
515789Sahrens 	uint64_t dsobj;
516789Sahrens 	dsl_dir_t *dd;
517789Sahrens 
518789Sahrens 	dsl_dir_create_root(mos, ddobjp, tx);
5191544Seschrock 	VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd));
520789Sahrens 
521928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
522928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
5231544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
524789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
525789Sahrens 	dsphys = dbuf->db_data;
526789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
527789Sahrens 	dsphys->ds_fsid_guid = unique_create();
5281544Seschrock 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
529789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
530789Sahrens 	    sizeof (dsphys->ds_guid));
531789Sahrens 	dsphys->ds_snapnames_zapobj =
532885Sahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
533789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
534789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
535789Sahrens 	dsphys->ds_deadlist_obj =
536789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
5371544Seschrock 	dmu_buf_rele(dbuf, FTAG);
538789Sahrens 
539789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
540789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
541789Sahrens 	dsl_dir_close(dd, FTAG);
542789Sahrens 
5431544Seschrock 	VERIFY(0 ==
5441544Seschrock 	    dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds));
545*3547Smaybee 	(void) dmu_objset_create_impl(dp->dp_spa, ds,
546*3547Smaybee 	    &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx);
547789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
548789Sahrens }
549789Sahrens 
5502199Sahrens uint64_t
5512199Sahrens dsl_dataset_create_sync(dsl_dir_t *pdd,
552789Sahrens     const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx)
553789Sahrens {
5542199Sahrens 	dsl_pool_t *dp = pdd->dd_pool;
555789Sahrens 	dmu_buf_t *dbuf;
556789Sahrens 	dsl_dataset_phys_t *dsphys;
5572199Sahrens 	uint64_t dsobj, ddobj;
558789Sahrens 	objset_t *mos = dp->dp_meta_objset;
559789Sahrens 	dsl_dir_t *dd;
560789Sahrens 
5612199Sahrens 	ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp);
5622199Sahrens 	ASSERT(clone_parent == NULL ||
5632199Sahrens 	    clone_parent->ds_phys->ds_num_children > 0);
564789Sahrens 	ASSERT(lastname[0] != '@');
565789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
566789Sahrens 
5672199Sahrens 	ddobj = dsl_dir_create_sync(pdd, lastname, tx);
5682199Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
569789Sahrens 
570928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
571928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
5721544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
573789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
574789Sahrens 	dsphys = dbuf->db_data;
575789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
576789Sahrens 	dsphys->ds_fsid_guid = unique_create();
577789Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
578789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
579789Sahrens 	    sizeof (dsphys->ds_guid));
580789Sahrens 	dsphys->ds_snapnames_zapobj =
581885Sahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
582789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
583789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
584789Sahrens 	dsphys->ds_deadlist_obj =
585789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
586789Sahrens 	if (clone_parent) {
587789Sahrens 		dsphys->ds_prev_snap_obj = clone_parent->ds_object;
588789Sahrens 		dsphys->ds_prev_snap_txg =
589789Sahrens 		    clone_parent->ds_phys->ds_creation_txg;
590789Sahrens 		dsphys->ds_used_bytes =
591789Sahrens 		    clone_parent->ds_phys->ds_used_bytes;
592789Sahrens 		dsphys->ds_compressed_bytes =
593789Sahrens 		    clone_parent->ds_phys->ds_compressed_bytes;
594789Sahrens 		dsphys->ds_uncompressed_bytes =
595789Sahrens 		    clone_parent->ds_phys->ds_uncompressed_bytes;
596789Sahrens 		dsphys->ds_bp = clone_parent->ds_phys->ds_bp;
597789Sahrens 
598789Sahrens 		dmu_buf_will_dirty(clone_parent->ds_dbuf, tx);
599789Sahrens 		clone_parent->ds_phys->ds_num_children++;
600789Sahrens 
601789Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
602789Sahrens 		dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object;
603789Sahrens 	}
6041544Seschrock 	dmu_buf_rele(dbuf, FTAG);
605789Sahrens 
606789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
607789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
608789Sahrens 	dsl_dir_close(dd, FTAG);
609789Sahrens 
6102199Sahrens 	return (dsobj);
6112199Sahrens }
6122199Sahrens 
6132199Sahrens struct destroyarg {
6142199Sahrens 	dsl_sync_task_group_t *dstg;
6152199Sahrens 	char *snapname;
6162199Sahrens 	void *tag;
6172199Sahrens 	char *failed;
6182199Sahrens };
6192199Sahrens 
6202199Sahrens static int
6212199Sahrens dsl_snapshot_destroy_one(char *name, void *arg)
6222199Sahrens {
6232199Sahrens 	struct destroyarg *da = arg;
6242199Sahrens 	dsl_dataset_t *ds;
6252199Sahrens 	char *cp;
6262199Sahrens 	int err;
6272199Sahrens 
6282199Sahrens 	(void) strcat(name, "@");
6292199Sahrens 	(void) strcat(name, da->snapname);
6302199Sahrens 	err = dsl_dataset_open(name,
6312199Sahrens 	    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
6322199Sahrens 	    da->tag, &ds);
6332199Sahrens 	cp = strchr(name, '@');
6342199Sahrens 	*cp = '\0';
6352199Sahrens 	if (err == ENOENT)
6362199Sahrens 		return (0);
6372199Sahrens 	if (err) {
6382199Sahrens 		(void) strcpy(da->failed, name);
6392199Sahrens 		return (err);
6402199Sahrens 	}
6412199Sahrens 
6422199Sahrens 	dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
6432199Sahrens 	    dsl_dataset_destroy_sync, ds, da->tag, 0);
644789Sahrens 	return (0);
645789Sahrens }
646789Sahrens 
6472199Sahrens /*
6482199Sahrens  * Destroy 'snapname' in all descendants of 'fsname'.
6492199Sahrens  */
6502199Sahrens #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
6512199Sahrens int
6522199Sahrens dsl_snapshots_destroy(char *fsname, char *snapname)
6532199Sahrens {
6542199Sahrens 	int err;
6552199Sahrens 	struct destroyarg da;
6562199Sahrens 	dsl_sync_task_t *dst;
6572199Sahrens 	spa_t *spa;
6582199Sahrens 	char *cp;
6592199Sahrens 
6602199Sahrens 	cp = strchr(fsname, '/');
6612199Sahrens 	if (cp) {
6622199Sahrens 		*cp = '\0';
6632199Sahrens 		err = spa_open(fsname, &spa, FTAG);
6642199Sahrens 		*cp = '/';
6652199Sahrens 	} else {
6662199Sahrens 		err = spa_open(fsname, &spa, FTAG);
6672199Sahrens 	}
6682199Sahrens 	if (err)
6692199Sahrens 		return (err);
6702199Sahrens 	da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
6712199Sahrens 	da.snapname = snapname;
6722199Sahrens 	da.tag = FTAG;
6732199Sahrens 	da.failed = fsname;
6742199Sahrens 
6752199Sahrens 	err = dmu_objset_find(fsname,
6762417Sahrens 	    dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
6772199Sahrens 
6782199Sahrens 	if (err == 0)
6792199Sahrens 		err = dsl_sync_task_group_wait(da.dstg);
6802199Sahrens 
6812199Sahrens 	for (dst = list_head(&da.dstg->dstg_tasks); dst;
6822199Sahrens 	    dst = list_next(&da.dstg->dstg_tasks, dst)) {
6832199Sahrens 		dsl_dataset_t *ds = dst->dst_arg1;
6842199Sahrens 		if (dst->dst_err) {
6852199Sahrens 			dsl_dataset_name(ds, fsname);
6862199Sahrens 			cp = strchr(fsname, '@');
6872199Sahrens 			*cp = '\0';
6882199Sahrens 		}
6892199Sahrens 		/*
6902199Sahrens 		 * If it was successful, destroy_sync would have
6912199Sahrens 		 * closed the ds
6922199Sahrens 		 */
6932199Sahrens 		if (err)
6942199Sahrens 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
6952199Sahrens 	}
6962199Sahrens 
6972199Sahrens 	dsl_sync_task_group_destroy(da.dstg);
6982199Sahrens 	spa_close(spa, FTAG);
6992199Sahrens 	return (err);
7002199Sahrens }
7012199Sahrens 
702789Sahrens int
703789Sahrens dsl_dataset_destroy(const char *name)
704789Sahrens {
705789Sahrens 	int err;
7062199Sahrens 	dsl_sync_task_group_t *dstg;
7072199Sahrens 	objset_t *os;
7082199Sahrens 	dsl_dataset_t *ds;
709789Sahrens 	dsl_dir_t *dd;
7102199Sahrens 	uint64_t obj;
7112199Sahrens 
7122199Sahrens 	if (strchr(name, '@')) {
7132199Sahrens 		/* Destroying a snapshot is simpler */
7142199Sahrens 		err = dsl_dataset_open(name,
7152199Sahrens 		    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
7162199Sahrens 		    FTAG, &ds);
7172199Sahrens 		if (err)
7182199Sahrens 			return (err);
7192199Sahrens 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
7202199Sahrens 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
7212199Sahrens 		    ds, FTAG, 0);
7222199Sahrens 		if (err)
7232199Sahrens 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
7242199Sahrens 		return (err);
7252199Sahrens 	}
7262199Sahrens 
7272199Sahrens 	err = dmu_objset_open(name, DMU_OST_ANY,
7282199Sahrens 	    DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os);
7292199Sahrens 	if (err)
7302199Sahrens 		return (err);
7312199Sahrens 	ds = os->os->os_dsl_dataset;
7322199Sahrens 	dd = ds->ds_dir;
733789Sahrens 
7342199Sahrens 	/*
7352199Sahrens 	 * Check for errors and mark this ds as inconsistent, in
7362199Sahrens 	 * case we crash while freeing the objects.
7372199Sahrens 	 */
7382199Sahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
7392199Sahrens 	    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
7402199Sahrens 	if (err) {
7412199Sahrens 		dmu_objset_close(os);
7422199Sahrens 		return (err);
7432199Sahrens 	}
7442199Sahrens 
7452199Sahrens 	/*
7462199Sahrens 	 * remove the objects in open context, so that we won't
7472199Sahrens 	 * have too much to do in syncing context.
7482199Sahrens 	 */
7493025Sahrens 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
7503025Sahrens 	    ds->ds_phys->ds_prev_snap_txg)) {
7512199Sahrens 		dmu_tx_t *tx = dmu_tx_create(os);
7522199Sahrens 		dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
7532199Sahrens 		dmu_tx_hold_bonus(tx, obj);
7542199Sahrens 		err = dmu_tx_assign(tx, TXG_WAIT);
7552199Sahrens 		if (err) {
7562199Sahrens 			/*
7572199Sahrens 			 * Perhaps there is not enough disk
7582199Sahrens 			 * space.  Just deal with it from
7592199Sahrens 			 * dsl_dataset_destroy_sync().
7602199Sahrens 			 */
7612199Sahrens 			dmu_tx_abort(tx);
7622199Sahrens 			continue;
7632199Sahrens 		}
7642199Sahrens 		VERIFY(0 == dmu_object_free(os, obj, tx));
7652199Sahrens 		dmu_tx_commit(tx);
7662199Sahrens 	}
7672199Sahrens 	/* Make sure it's not dirty before we finish destroying it. */
7682199Sahrens 	txg_wait_synced(dd->dd_pool, 0);
7692199Sahrens 
7702199Sahrens 	dmu_objset_close(os);
7712199Sahrens 	if (err != ESRCH)
7722199Sahrens 		return (err);
7732199Sahrens 
7742199Sahrens 	err = dsl_dataset_open(name,
7752199Sahrens 	    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
7762199Sahrens 	    FTAG, &ds);
7771544Seschrock 	if (err)
7781544Seschrock 		return (err);
779789Sahrens 
7802199Sahrens 	err = dsl_dir_open(name, FTAG, &dd, NULL);
7812199Sahrens 	if (err) {
7822199Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
7832199Sahrens 		return (err);
784789Sahrens 	}
785789Sahrens 
7862199Sahrens 	/*
7872199Sahrens 	 * Blow away the dsl_dir + head dataset.
7882199Sahrens 	 */
7892199Sahrens 	dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
7902199Sahrens 	dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
7912199Sahrens 	    dsl_dataset_destroy_sync, ds, FTAG, 0);
7922199Sahrens 	dsl_sync_task_create(dstg, dsl_dir_destroy_check,
7932199Sahrens 	    dsl_dir_destroy_sync, dd, FTAG, 0);
7942199Sahrens 	err = dsl_sync_task_group_wait(dstg);
7952199Sahrens 	dsl_sync_task_group_destroy(dstg);
7962199Sahrens 	/* if it is successful, *destroy_sync will close the ds+dd */
7972199Sahrens 	if (err) {
7982199Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
7992199Sahrens 		dsl_dir_close(dd, FTAG);
8002199Sahrens 	}
801789Sahrens 	return (err);
802789Sahrens }
803789Sahrens 
804789Sahrens int
8052199Sahrens dsl_dataset_rollback(dsl_dataset_t *ds)
806789Sahrens {
8073444Sek110237 	ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
8082199Sahrens 	return (dsl_sync_task_do(ds->ds_dir->dd_pool,
8092199Sahrens 	    dsl_dataset_rollback_check, dsl_dataset_rollback_sync,
8102199Sahrens 	    ds, NULL, 0));
811789Sahrens }
812789Sahrens 
813789Sahrens void *
814789Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
815789Sahrens     void *p, dsl_dataset_evict_func_t func)
816789Sahrens {
817789Sahrens 	void *old;
818789Sahrens 
819789Sahrens 	mutex_enter(&ds->ds_lock);
820789Sahrens 	old = ds->ds_user_ptr;
821789Sahrens 	if (old == NULL) {
822789Sahrens 		ds->ds_user_ptr = p;
823789Sahrens 		ds->ds_user_evict_func = func;
824789Sahrens 	}
825789Sahrens 	mutex_exit(&ds->ds_lock);
826789Sahrens 	return (old);
827789Sahrens }
828789Sahrens 
829789Sahrens void *
830789Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
831789Sahrens {
832789Sahrens 	return (ds->ds_user_ptr);
833789Sahrens }
834789Sahrens 
835789Sahrens 
836*3547Smaybee blkptr_t *
837*3547Smaybee dsl_dataset_get_blkptr(dsl_dataset_t *ds)
838789Sahrens {
839*3547Smaybee 	return (&ds->ds_phys->ds_bp);
840789Sahrens }
841789Sahrens 
842789Sahrens void
843789Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
844789Sahrens {
845789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
846789Sahrens 	/* If it's the meta-objset, set dp_meta_rootbp */
847789Sahrens 	if (ds == NULL) {
848789Sahrens 		tx->tx_pool->dp_meta_rootbp = *bp;
849789Sahrens 	} else {
850789Sahrens 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
851789Sahrens 		ds->ds_phys->ds_bp = *bp;
852789Sahrens 	}
853789Sahrens }
854789Sahrens 
855789Sahrens spa_t *
856789Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds)
857789Sahrens {
858789Sahrens 	return (ds->ds_dir->dd_pool->dp_spa);
859789Sahrens }
860789Sahrens 
861789Sahrens void
862789Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
863789Sahrens {
864789Sahrens 	dsl_pool_t *dp;
865789Sahrens 
866789Sahrens 	if (ds == NULL) /* this is the meta-objset */
867789Sahrens 		return;
868789Sahrens 
869789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
8702885Sahrens 
8712885Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
8722885Sahrens 		panic("dirtying snapshot!");
873789Sahrens 
874789Sahrens 	dp = ds->ds_dir->dd_pool;
875789Sahrens 
876789Sahrens 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
877789Sahrens 		/* up the hold count until we can be written out */
878789Sahrens 		dmu_buf_add_ref(ds->ds_dbuf, ds);
879789Sahrens 	}
880789Sahrens }
881789Sahrens 
882789Sahrens struct killarg {
883789Sahrens 	uint64_t *usedp;
884789Sahrens 	uint64_t *compressedp;
885789Sahrens 	uint64_t *uncompressedp;
886789Sahrens 	zio_t *zio;
887789Sahrens 	dmu_tx_t *tx;
888789Sahrens };
889789Sahrens 
890789Sahrens static int
891789Sahrens kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
892789Sahrens {
893789Sahrens 	struct killarg *ka = arg;
894789Sahrens 	blkptr_t *bp = &bc->bc_blkptr;
895789Sahrens 
896789Sahrens 	ASSERT3U(bc->bc_errno, ==, 0);
897789Sahrens 
898789Sahrens 	/*
899789Sahrens 	 * Since this callback is not called concurrently, no lock is
900789Sahrens 	 * needed on the accounting values.
901789Sahrens 	 */
9022082Seschrock 	*ka->usedp += bp_get_dasize(spa, bp);
903789Sahrens 	*ka->compressedp += BP_GET_PSIZE(bp);
904789Sahrens 	*ka->uncompressedp += BP_GET_UCSIZE(bp);
905789Sahrens 	/* XXX check for EIO? */
906789Sahrens 	(void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
907789Sahrens 	    ARC_NOWAIT);
908789Sahrens 	return (0);
909789Sahrens }
910789Sahrens 
911789Sahrens /* ARGSUSED */
9122199Sahrens static int
9132199Sahrens dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
914789Sahrens {
9152199Sahrens 	dsl_dataset_t *ds = arg1;
916789Sahrens 
9172199Sahrens 	/*
9182199Sahrens 	 * There must be a previous snapshot.  I suppose we could roll
9192199Sahrens 	 * it back to being empty (and re-initialize the upper (ZPL)
9202199Sahrens 	 * layer).  But for now there's no way to do this via the user
9212199Sahrens 	 * interface.
9222199Sahrens 	 */
9232199Sahrens 	if (ds->ds_phys->ds_prev_snap_txg == 0)
924789Sahrens 		return (EINVAL);
925789Sahrens 
9262199Sahrens 	/*
9272199Sahrens 	 * This must not be a snapshot.
9282199Sahrens 	 */
9292199Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
9302199Sahrens 		return (EINVAL);
931789Sahrens 
932789Sahrens 	/*
933789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
934789Sahrens 	 * them.  Try again.
935789Sahrens 	 */
9362199Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
937789Sahrens 		return (EAGAIN);
9382199Sahrens 
9392199Sahrens 	return (0);
9402199Sahrens }
941789Sahrens 
9422199Sahrens /* ARGSUSED */
9432199Sahrens static void
9442199Sahrens dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx)
9452199Sahrens {
9462199Sahrens 	dsl_dataset_t *ds = arg1;
9472199Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
948789Sahrens 
949789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
950789Sahrens 
951789Sahrens 	/* Zero out the deadlist. */
952789Sahrens 	bplist_close(&ds->ds_deadlist);
953789Sahrens 	bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
954789Sahrens 	ds->ds_phys->ds_deadlist_obj =
955789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
9561544Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
9571544Seschrock 	    ds->ds_phys->ds_deadlist_obj));
958789Sahrens 
959789Sahrens 	{
960789Sahrens 		/* Free blkptrs that we gave birth to */
961789Sahrens 		zio_t *zio;
962789Sahrens 		uint64_t used = 0, compressed = 0, uncompressed = 0;
963789Sahrens 		struct killarg ka;
964789Sahrens 
965789Sahrens 		zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
966789Sahrens 		    ZIO_FLAG_MUSTSUCCEED);
967789Sahrens 		ka.usedp = &used;
968789Sahrens 		ka.compressedp = &compressed;
969789Sahrens 		ka.uncompressedp = &uncompressed;
970789Sahrens 		ka.zio = zio;
971789Sahrens 		ka.tx = tx;
972789Sahrens 		(void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
973789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
974789Sahrens 		(void) zio_wait(zio);
975789Sahrens 
9762199Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
977789Sahrens 		    -used, -compressed, -uncompressed, tx);
978789Sahrens 	}
979789Sahrens 
9802199Sahrens 	/* Change our contents to that of the prev snapshot */
981789Sahrens 	ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj);
982789Sahrens 	ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
983789Sahrens 	ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes;
984789Sahrens 	ds->ds_phys->ds_compressed_bytes =
985789Sahrens 	    ds->ds_prev->ds_phys->ds_compressed_bytes;
986789Sahrens 	ds->ds_phys->ds_uncompressed_bytes =
987789Sahrens 	    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
9882082Seschrock 	ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
989789Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
990789Sahrens 
9912532Sahrens 	if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
9922532Sahrens 		dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
9932532Sahrens 		ds->ds_prev->ds_phys->ds_unique_bytes = 0;
9942532Sahrens 	}
995789Sahrens }
996789Sahrens 
9971731Sbonwick /* ARGSUSED */
9981731Sbonwick static int
9992199Sahrens dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
10001731Sbonwick {
10012199Sahrens 	dsl_dataset_t *ds = arg1;
10021731Sbonwick 
10031731Sbonwick 	/*
10041731Sbonwick 	 * Can't delete a head dataset if there are snapshots of it.
10051731Sbonwick 	 * (Except if the only snapshots are from the branch we cloned
10061731Sbonwick 	 * from.)
10071731Sbonwick 	 */
10081731Sbonwick 	if (ds->ds_prev != NULL &&
10091731Sbonwick 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
10101731Sbonwick 		return (EINVAL);
10111731Sbonwick 
10121731Sbonwick 	return (0);
10131731Sbonwick }
10141731Sbonwick 
10152199Sahrens /* ARGSUSED */
10162199Sahrens static void
10172199Sahrens dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)
1018789Sahrens {
10192199Sahrens 	dsl_dataset_t *ds = arg1;
1020789Sahrens 
10212199Sahrens 	/* Mark it as inconsistent on-disk, in case we crash */
10222199Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
10232199Sahrens 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
10242199Sahrens }
1025789Sahrens 
10262199Sahrens /* ARGSUSED */
10272199Sahrens static int
10282199Sahrens dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
10292199Sahrens {
10302199Sahrens 	dsl_dataset_t *ds = arg1;
1031789Sahrens 
1032789Sahrens 	/* Can't delete a branch point. */
10332199Sahrens 	if (ds->ds_phys->ds_num_children > 1)
10342199Sahrens 		return (EEXIST);
1035789Sahrens 
1036789Sahrens 	/*
1037789Sahrens 	 * Can't delete a head dataset if there are snapshots of it.
1038789Sahrens 	 * (Except if the only snapshots are from the branch we cloned
1039789Sahrens 	 * from.)
1040789Sahrens 	 */
1041789Sahrens 	if (ds->ds_prev != NULL &&
10422199Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1043789Sahrens 		return (EINVAL);
1044789Sahrens 
1045789Sahrens 	/*
1046789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
1047789Sahrens 	 * them.  Try again.
1048789Sahrens 	 */
10492199Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1050789Sahrens 		return (EAGAIN);
10512199Sahrens 
10522199Sahrens 	/* XXX we should do some i/o error checking... */
10532199Sahrens 	return (0);
10542199Sahrens }
10552199Sahrens 
10562199Sahrens static void
10572199Sahrens dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
10582199Sahrens {
10592199Sahrens 	dsl_dataset_t *ds = arg1;
10602199Sahrens 	uint64_t used = 0, compressed = 0, uncompressed = 0;
10612199Sahrens 	zio_t *zio;
10622199Sahrens 	int err;
10632199Sahrens 	int after_branch_point = FALSE;
10642199Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
10652199Sahrens 	objset_t *mos = dp->dp_meta_objset;
10662199Sahrens 	dsl_dataset_t *ds_prev = NULL;
10672199Sahrens 	uint64_t obj;
10682199Sahrens 
10693444Sek110237 	ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
10702199Sahrens 	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
10712199Sahrens 	ASSERT(ds->ds_prev == NULL ||
10722199Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
10732199Sahrens 	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
10742199Sahrens 
10752199Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
10762199Sahrens 
10772199Sahrens 	obj = ds->ds_object;
1078789Sahrens 
1079789Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1080789Sahrens 		if (ds->ds_prev) {
1081789Sahrens 			ds_prev = ds->ds_prev;
1082789Sahrens 		} else {
10832199Sahrens 			VERIFY(0 == dsl_dataset_open_obj(dp,
1084789Sahrens 			    ds->ds_phys->ds_prev_snap_obj, NULL,
10852199Sahrens 			    DS_MODE_NONE, FTAG, &ds_prev));
1086789Sahrens 		}
1087789Sahrens 		after_branch_point =
1088789Sahrens 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
1089789Sahrens 
1090789Sahrens 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1091789Sahrens 		if (after_branch_point &&
1092789Sahrens 		    ds->ds_phys->ds_next_snap_obj == 0) {
1093789Sahrens 			/* This clone is toast. */
1094789Sahrens 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
1095789Sahrens 			ds_prev->ds_phys->ds_num_children--;
1096789Sahrens 		} else if (!after_branch_point) {
1097789Sahrens 			ds_prev->ds_phys->ds_next_snap_obj =
1098789Sahrens 			    ds->ds_phys->ds_next_snap_obj;
1099789Sahrens 		}
1100789Sahrens 	}
1101789Sahrens 
1102789Sahrens 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1103789Sahrens 
1104789Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0) {
11052199Sahrens 		blkptr_t bp;
1106789Sahrens 		dsl_dataset_t *ds_next;
1107789Sahrens 		uint64_t itor = 0;
1108789Sahrens 
1109789Sahrens 		spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1110789Sahrens 
11112199Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
11121544Seschrock 		    ds->ds_phys->ds_next_snap_obj, NULL,
11131544Seschrock 		    DS_MODE_NONE, FTAG, &ds_next));
1114789Sahrens 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
1115789Sahrens 
1116789Sahrens 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
1117789Sahrens 		ds_next->ds_phys->ds_prev_snap_obj =
1118789Sahrens 		    ds->ds_phys->ds_prev_snap_obj;
1119789Sahrens 		ds_next->ds_phys->ds_prev_snap_txg =
1120789Sahrens 		    ds->ds_phys->ds_prev_snap_txg;
1121789Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1122789Sahrens 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
1123789Sahrens 
1124789Sahrens 		/*
1125789Sahrens 		 * Transfer to our deadlist (which will become next's
1126789Sahrens 		 * new deadlist) any entries from next's current
1127789Sahrens 		 * deadlist which were born before prev, and free the
1128789Sahrens 		 * other entries.
1129789Sahrens 		 *
1130789Sahrens 		 * XXX we're doing this long task with the config lock held
1131789Sahrens 		 */
1132789Sahrens 		while (bplist_iterate(&ds_next->ds_deadlist, &itor,
1133789Sahrens 		    &bp) == 0) {
1134789Sahrens 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
11351544Seschrock 				VERIFY(0 == bplist_enqueue(&ds->ds_deadlist,
11361544Seschrock 				    &bp, tx));
1137789Sahrens 				if (ds_prev && !after_branch_point &&
1138789Sahrens 				    bp.blk_birth >
1139789Sahrens 				    ds_prev->ds_phys->ds_prev_snap_txg) {
1140789Sahrens 					ds_prev->ds_phys->ds_unique_bytes +=
11412082Seschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1142789Sahrens 				}
1143789Sahrens 			} else {
11442082Seschrock 				used += bp_get_dasize(dp->dp_spa, &bp);
1145789Sahrens 				compressed += BP_GET_PSIZE(&bp);
1146789Sahrens 				uncompressed += BP_GET_UCSIZE(&bp);
1147789Sahrens 				/* XXX check return value? */
1148789Sahrens 				(void) arc_free(zio, dp->dp_spa, tx->tx_txg,
1149789Sahrens 				    &bp, NULL, NULL, ARC_NOWAIT);
1150789Sahrens 			}
1151789Sahrens 		}
1152789Sahrens 
1153789Sahrens 		/* free next's deadlist */
1154789Sahrens 		bplist_close(&ds_next->ds_deadlist);
1155789Sahrens 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
1156789Sahrens 
1157789Sahrens 		/* set next's deadlist to our deadlist */
1158789Sahrens 		ds_next->ds_phys->ds_deadlist_obj =
1159789Sahrens 		    ds->ds_phys->ds_deadlist_obj;
11601544Seschrock 		VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos,
11611544Seschrock 		    ds_next->ds_phys->ds_deadlist_obj));
1162789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1163789Sahrens 
1164789Sahrens 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
1165789Sahrens 			/*
1166789Sahrens 			 * Update next's unique to include blocks which
1167789Sahrens 			 * were previously shared by only this snapshot
1168789Sahrens 			 * and it.  Those blocks will be born after the
1169789Sahrens 			 * prev snap and before this snap, and will have
1170789Sahrens 			 * died after the next snap and before the one
1171789Sahrens 			 * after that (ie. be on the snap after next's
1172789Sahrens 			 * deadlist).
1173789Sahrens 			 *
1174789Sahrens 			 * XXX we're doing this long task with the
1175789Sahrens 			 * config lock held
1176789Sahrens 			 */
1177789Sahrens 			dsl_dataset_t *ds_after_next;
1178789Sahrens 
11792199Sahrens 			VERIFY(0 == dsl_dataset_open_obj(dp,
1180789Sahrens 			    ds_next->ds_phys->ds_next_snap_obj, NULL,
11811544Seschrock 			    DS_MODE_NONE, FTAG, &ds_after_next));
1182789Sahrens 			itor = 0;
1183789Sahrens 			while (bplist_iterate(&ds_after_next->ds_deadlist,
1184789Sahrens 			    &itor, &bp) == 0) {
1185789Sahrens 				if (bp.blk_birth >
1186789Sahrens 				    ds->ds_phys->ds_prev_snap_txg &&
1187789Sahrens 				    bp.blk_birth <=
1188789Sahrens 				    ds->ds_phys->ds_creation_txg) {
1189789Sahrens 					ds_next->ds_phys->ds_unique_bytes +=
11902082Seschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1191789Sahrens 				}
1192789Sahrens 			}
1193789Sahrens 
1194789Sahrens 			dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG);
1195789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, NULL);
1196789Sahrens 		} else {
1197789Sahrens 			/*
1198789Sahrens 			 * It would be nice to update the head dataset's
1199789Sahrens 			 * unique.  To do so we would have to traverse
1200789Sahrens 			 * it for blocks born after ds_prev, which is
1201789Sahrens 			 * pretty expensive just to maintain something
1202789Sahrens 			 * for debugging purposes.
1203789Sahrens 			 */
1204789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, ds);
1205789Sahrens 			dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE,
1206789Sahrens 			    ds_next);
1207789Sahrens 			if (ds_prev) {
12082199Sahrens 				VERIFY(0 == dsl_dataset_open_obj(dp,
12091544Seschrock 				    ds->ds_phys->ds_prev_snap_obj, NULL,
12101544Seschrock 				    DS_MODE_NONE, ds_next, &ds_next->ds_prev));
1211789Sahrens 			} else {
1212789Sahrens 				ds_next->ds_prev = NULL;
1213789Sahrens 			}
1214789Sahrens 		}
1215789Sahrens 		dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG);
1216789Sahrens 
1217789Sahrens 		/*
1218789Sahrens 		 * NB: unique_bytes is not accurate for head objsets
1219789Sahrens 		 * because we don't update it when we delete the most
1220789Sahrens 		 * recent snapshot -- see above comment.
1221789Sahrens 		 */
1222789Sahrens 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
1223789Sahrens 	} else {
1224789Sahrens 		/*
1225789Sahrens 		 * There's no next snapshot, so this is a head dataset.
1226789Sahrens 		 * Destroy the deadlist.  Unless it's a clone, the
1227789Sahrens 		 * deadlist should be empty.  (If it's a clone, it's
1228789Sahrens 		 * safe to ignore the deadlist contents.)
1229789Sahrens 		 */
1230789Sahrens 		struct killarg ka;
1231789Sahrens 
1232789Sahrens 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
1233789Sahrens 		bplist_close(&ds->ds_deadlist);
1234789Sahrens 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1235789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1236789Sahrens 
1237789Sahrens 		/*
1238789Sahrens 		 * Free everything that we point to (that's born after
1239789Sahrens 		 * the previous snapshot, if we are a clone)
1240789Sahrens 		 *
1241789Sahrens 		 * XXX we're doing this long task with the config lock held
1242789Sahrens 		 */
1243789Sahrens 		ka.usedp = &used;
1244789Sahrens 		ka.compressedp = &compressed;
1245789Sahrens 		ka.uncompressedp = &uncompressed;
1246789Sahrens 		ka.zio = zio;
1247789Sahrens 		ka.tx = tx;
1248789Sahrens 		err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1249789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1250789Sahrens 		ASSERT3U(err, ==, 0);
1251789Sahrens 	}
1252789Sahrens 
1253789Sahrens 	err = zio_wait(zio);
1254789Sahrens 	ASSERT3U(err, ==, 0);
1255789Sahrens 
12562199Sahrens 	dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx);
1257789Sahrens 
1258789Sahrens 	if (ds->ds_phys->ds_snapnames_zapobj) {
1259789Sahrens 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1260789Sahrens 		ASSERT(err == 0);
1261789Sahrens 	}
1262789Sahrens 
12632199Sahrens 	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1264789Sahrens 		/* Erase the link in the dataset */
12652199Sahrens 		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
12662199Sahrens 		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
1267789Sahrens 		/*
1268789Sahrens 		 * dsl_dir_sync_destroy() called us, they'll destroy
1269789Sahrens 		 * the dataset.
1270789Sahrens 		 */
1271789Sahrens 	} else {
1272789Sahrens 		/* remove from snapshot namespace */
1273789Sahrens 		dsl_dataset_t *ds_head;
12742199Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
12752199Sahrens 		    ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL,
12761544Seschrock 		    DS_MODE_NONE, FTAG, &ds_head));
12772207Sahrens 		VERIFY(0 == dsl_dataset_get_snapname(ds));
1278789Sahrens #ifdef ZFS_DEBUG
1279789Sahrens 		{
1280789Sahrens 			uint64_t val;
1281789Sahrens 			err = zap_lookup(mos,
1282789Sahrens 			    ds_head->ds_phys->ds_snapnames_zapobj,
12832199Sahrens 			    ds->ds_snapname, 8, 1, &val);
1284789Sahrens 			ASSERT3U(err, ==, 0);
1285789Sahrens 			ASSERT3U(val, ==, obj);
1286789Sahrens 		}
1287789Sahrens #endif
1288789Sahrens 		err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj,
12892199Sahrens 		    ds->ds_snapname, tx);
1290789Sahrens 		ASSERT(err == 0);
1291789Sahrens 		dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG);
1292789Sahrens 	}
1293789Sahrens 
1294789Sahrens 	if (ds_prev && ds->ds_prev != ds_prev)
1295789Sahrens 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1296789Sahrens 
12972199Sahrens 	dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag);
12982199Sahrens 	VERIFY(0 == dmu_object_free(mos, obj, tx));
12992199Sahrens }
13002199Sahrens 
13012199Sahrens /* ARGSUSED */
13022199Sahrens int
13032199Sahrens dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
13042199Sahrens {
13052199Sahrens 	objset_t *os = arg1;
13062199Sahrens 	dsl_dataset_t *ds = os->os->os_dsl_dataset;
13072199Sahrens 	const char *snapname = arg2;
13082199Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
13092199Sahrens 	int err;
13102199Sahrens 	uint64_t value;
1311789Sahrens 
1312789Sahrens 	/*
13132199Sahrens 	 * We don't allow multiple snapshots of the same txg.  If there
13142199Sahrens 	 * is already one, try again.
13152199Sahrens 	 */
13162199Sahrens 	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
13172199Sahrens 		return (EAGAIN);
13182199Sahrens 
13192199Sahrens 	/*
13202199Sahrens 	 * Check for conflicting name snapshot name.
1321789Sahrens 	 */
13222199Sahrens 	err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
13232199Sahrens 	    snapname, 8, 1, &value);
13242199Sahrens 	if (err == 0)
13252199Sahrens 		return (EEXIST);
13262199Sahrens 	if (err != ENOENT)
13272199Sahrens 		return (err);
1328789Sahrens 
13292199Sahrens 	ds->ds_trysnap_txg = tx->tx_txg;
1330789Sahrens 	return (0);
1331789Sahrens }
1332789Sahrens 
13332199Sahrens void
13342199Sahrens dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
1335789Sahrens {
13362199Sahrens 	objset_t *os = arg1;
13372199Sahrens 	dsl_dataset_t *ds = os->os->os_dsl_dataset;
13382199Sahrens 	const char *snapname = arg2;
13392199Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1340789Sahrens 	dmu_buf_t *dbuf;
1341789Sahrens 	dsl_dataset_phys_t *dsphys;
13422199Sahrens 	uint64_t dsobj;
1343789Sahrens 	objset_t *mos = dp->dp_meta_objset;
1344789Sahrens 	int err;
1345789Sahrens 
1346789Sahrens 	spa_scrub_restart(dp->dp_spa, tx->tx_txg);
13472199Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
1348789Sahrens 
1349928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
1350928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
13511544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1352789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
1353789Sahrens 	dsphys = dbuf->db_data;
13542199Sahrens 	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
1355789Sahrens 	dsphys->ds_fsid_guid = unique_create();
1356789Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
1357789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1358789Sahrens 	    sizeof (dsphys->ds_guid));
1359789Sahrens 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1360789Sahrens 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1361789Sahrens 	dsphys->ds_next_snap_obj = ds->ds_object;
1362789Sahrens 	dsphys->ds_num_children = 1;
1363789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
1364789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
1365789Sahrens 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1366789Sahrens 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
1367789Sahrens 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1368789Sahrens 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
13692082Seschrock 	dsphys->ds_flags = ds->ds_phys->ds_flags;
1370789Sahrens 	dsphys->ds_bp = ds->ds_phys->ds_bp;
13711544Seschrock 	dmu_buf_rele(dbuf, FTAG);
1372789Sahrens 
13732199Sahrens 	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
13742199Sahrens 	if (ds->ds_prev) {
13752199Sahrens 		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
1376789Sahrens 		    ds->ds_object ||
13772199Sahrens 		    ds->ds_prev->ds_phys->ds_num_children > 1);
13782199Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
13792199Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1380789Sahrens 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
13812199Sahrens 			    ds->ds_prev->ds_phys->ds_creation_txg);
13822199Sahrens 			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1383789Sahrens 		}
1384789Sahrens 	}
1385789Sahrens 
1386789Sahrens 	bplist_close(&ds->ds_deadlist);
1387789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1388789Sahrens 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg);
1389789Sahrens 	ds->ds_phys->ds_prev_snap_obj = dsobj;
1390789Sahrens 	ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg;
1391789Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
1392789Sahrens 	ds->ds_phys->ds_deadlist_obj =
1393789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
13941544Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
13951544Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1396789Sahrens 
1397789Sahrens 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
1398789Sahrens 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1399789Sahrens 	    snapname, 8, 1, &dsobj, tx);
1400789Sahrens 	ASSERT(err == 0);
1401789Sahrens 
1402789Sahrens 	if (ds->ds_prev)
1403789Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
14041544Seschrock 	VERIFY(0 == dsl_dataset_open_obj(dp,
14051544Seschrock 	    ds->ds_phys->ds_prev_snap_obj, snapname,
14061544Seschrock 	    DS_MODE_NONE, ds, &ds->ds_prev));
1407789Sahrens }
1408789Sahrens 
1409789Sahrens void
1410*3547Smaybee dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
1411789Sahrens {
1412789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1413789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1414789Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1415789Sahrens 
1416789Sahrens 	dsl_dir_dirty(ds->ds_dir, tx);
1417*3547Smaybee 	dmu_objset_sync(ds->ds_user_ptr, zio, tx);
1418*3547Smaybee 	/* Unneeded? bplist_close(&ds->ds_deadlist); */
1419789Sahrens }
1420789Sahrens 
1421789Sahrens void
14222885Sahrens dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1423789Sahrens {
14242885Sahrens 	dsl_dir_stats(ds->ds_dir, nv);
1425789Sahrens 
14262885Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
14272885Sahrens 	    ds->ds_phys->ds_creation_time);
14282885Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
14292885Sahrens 	    ds->ds_phys->ds_creation_txg);
14302885Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED,
14312885Sahrens 	    ds->ds_phys->ds_used_bytes);
1432789Sahrens 
1433789Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1434789Sahrens 		/*
1435789Sahrens 		 * This is a snapshot; override the dd's space used with
14362885Sahrens 		 * our unique space and compression ratio.
1437789Sahrens 		 */
14382885Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
14392885Sahrens 		    ds->ds_phys->ds_unique_bytes);
14402885Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
14412885Sahrens 		    ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
14422885Sahrens 		    (ds->ds_phys->ds_uncompressed_bytes * 100 /
14432885Sahrens 		    ds->ds_phys->ds_compressed_bytes));
1444789Sahrens 	}
1445789Sahrens }
1446789Sahrens 
14472885Sahrens void
14482885Sahrens dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1449789Sahrens {
14502885Sahrens 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
14512885Sahrens 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
14522885Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
14532885Sahrens 		stat->dds_is_snapshot = B_TRUE;
14542885Sahrens 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
14552885Sahrens 	}
14562885Sahrens 
14572885Sahrens 	/* clone origin is really a dsl_dir thing... */
14582885Sahrens 	if (ds->ds_dir->dd_phys->dd_clone_parent_obj) {
14592885Sahrens 		dsl_dataset_t *ods;
14602885Sahrens 
14612885Sahrens 		rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
14622885Sahrens 		VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool,
14632885Sahrens 		    ds->ds_dir->dd_phys->dd_clone_parent_obj,
14642885Sahrens 		    NULL, DS_MODE_NONE, FTAG, &ods));
14652885Sahrens 		dsl_dataset_name(ods, stat->dds_clone_of);
14662885Sahrens 		dsl_dataset_close(ods, DS_MODE_NONE, FTAG);
14672885Sahrens 		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
14682885Sahrens 	}
14692885Sahrens }
14702885Sahrens 
14712885Sahrens uint64_t
14722885Sahrens dsl_dataset_fsid_guid(dsl_dataset_t *ds)
14732885Sahrens {
14742885Sahrens 	return (ds->ds_phys->ds_fsid_guid);
14752885Sahrens }
14762885Sahrens 
14772885Sahrens void
14782885Sahrens dsl_dataset_space(dsl_dataset_t *ds,
14792885Sahrens     uint64_t *refdbytesp, uint64_t *availbytesp,
14802885Sahrens     uint64_t *usedobjsp, uint64_t *availobjsp)
14812885Sahrens {
14822885Sahrens 	*refdbytesp = ds->ds_phys->ds_used_bytes;
14832885Sahrens 	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
14842885Sahrens 	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
14852885Sahrens 	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
1486789Sahrens }
1487789Sahrens 
14882199Sahrens /* ARGSUSED */
1489789Sahrens static int
14902199Sahrens dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
1491789Sahrens {
14922199Sahrens 	dsl_dataset_t *ds = arg1;
14932199Sahrens 	char *newsnapname = arg2;
14942199Sahrens 	dsl_dir_t *dd = ds->ds_dir;
1495789Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
14962199Sahrens 	dsl_dataset_t *hds;
14972199Sahrens 	uint64_t val;
1498789Sahrens 	int err;
1499789Sahrens 
15002199Sahrens 	err = dsl_dataset_open_obj(dd->dd_pool,
15012199Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds);
1502789Sahrens 	if (err)
1503789Sahrens 		return (err);
1504789Sahrens 
15052199Sahrens 	/* new name better not be in use */
15062199Sahrens 	err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj,
15072199Sahrens 	    newsnapname, 8, 1, &val);
15082199Sahrens 	dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
1509789Sahrens 
15102199Sahrens 	if (err == 0)
15112199Sahrens 		err = EEXIST;
15122199Sahrens 	else if (err == ENOENT)
15132199Sahrens 		err = 0;
15142199Sahrens 	return (err);
15152199Sahrens }
1516789Sahrens 
15172199Sahrens static void
15182199Sahrens dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
15192199Sahrens {
15202199Sahrens 	dsl_dataset_t *ds = arg1;
15212199Sahrens 	char *newsnapname = arg2;
15222199Sahrens 	dsl_dir_t *dd = ds->ds_dir;
15232199Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
15242199Sahrens 	dsl_dataset_t *hds;
15252199Sahrens 	int err;
1526789Sahrens 
15272199Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
1528789Sahrens 
15292199Sahrens 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
15302199Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds));
1531789Sahrens 
15322199Sahrens 	VERIFY(0 == dsl_dataset_get_snapname(ds));
15332199Sahrens 	err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj,
15342199Sahrens 	    ds->ds_snapname, tx);
1535789Sahrens 	ASSERT3U(err, ==, 0);
15362199Sahrens 	mutex_enter(&ds->ds_lock);
15372199Sahrens 	(void) strcpy(ds->ds_snapname, newsnapname);
15382199Sahrens 	mutex_exit(&ds->ds_lock);
15392199Sahrens 	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
15402199Sahrens 	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
1541789Sahrens 	ASSERT3U(err, ==, 0);
1542789Sahrens 
15432199Sahrens 	dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
1544789Sahrens }
1545789Sahrens 
1546789Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename
1547789Sahrens int
15482199Sahrens dsl_dataset_rename(const char *oldname, const char *newname)
1549789Sahrens {
1550789Sahrens 	dsl_dir_t *dd;
15512199Sahrens 	dsl_dataset_t *ds;
1552789Sahrens 	const char *tail;
1553789Sahrens 	int err;
1554789Sahrens 
15552199Sahrens 	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
15561544Seschrock 	if (err)
15571544Seschrock 		return (err);
1558789Sahrens 	if (tail == NULL) {
15592199Sahrens 		err = dsl_dir_rename(dd, newname);
1560789Sahrens 		dsl_dir_close(dd, FTAG);
1561789Sahrens 		return (err);
1562789Sahrens 	}
1563789Sahrens 	if (tail[0] != '@') {
1564789Sahrens 		/* the name ended in a nonexistant component */
1565789Sahrens 		dsl_dir_close(dd, FTAG);
1566789Sahrens 		return (ENOENT);
1567789Sahrens 	}
1568789Sahrens 
15692199Sahrens 	dsl_dir_close(dd, FTAG);
15702199Sahrens 
15712199Sahrens 	/* new name must be snapshot in same filesystem */
15722199Sahrens 	tail = strchr(newname, '@');
15732199Sahrens 	if (tail == NULL)
15742199Sahrens 		return (EINVAL);
15752199Sahrens 	tail++;
15762199Sahrens 	if (strncmp(oldname, newname, tail - newname) != 0)
15772199Sahrens 		return (EXDEV);
1578789Sahrens 
15792199Sahrens 	err = dsl_dataset_open(oldname,
15802199Sahrens 	    DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds);
15812199Sahrens 	if (err)
15822199Sahrens 		return (err);
15832199Sahrens 
15842199Sahrens 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
15852199Sahrens 	    dsl_dataset_snapshot_rename_check,
15862199Sahrens 	    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
15872199Sahrens 
15882199Sahrens 	dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
15892199Sahrens 
1590789Sahrens 	return (err);
1591789Sahrens }
15922082Seschrock 
15932199Sahrens struct promotearg {
15942199Sahrens 	uint64_t used, comp, uncomp, unique;
15952199Sahrens 	uint64_t newnext_obj, snapnames_obj;
15962199Sahrens };
15972199Sahrens 
15982082Seschrock static int
15992199Sahrens dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
16002082Seschrock {
16012199Sahrens 	dsl_dataset_t *hds = arg1;
16022199Sahrens 	struct promotearg *pa = arg2;
16032199Sahrens 	dsl_dir_t *dd = hds->ds_dir;
16042199Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
16052082Seschrock 	dsl_dir_t *pdd = NULL;
16062082Seschrock 	dsl_dataset_t *ds = NULL;
16072082Seschrock 	dsl_dataset_t *pivot_ds = NULL;
16082082Seschrock 	dsl_dataset_t *newnext_ds = NULL;
16092082Seschrock 	int err;
16102082Seschrock 	char *name = NULL;
16112199Sahrens 	uint64_t itor = 0;
16122082Seschrock 	blkptr_t bp;
16132082Seschrock 
16142199Sahrens 	bzero(pa, sizeof (*pa));
16152199Sahrens 
16162082Seschrock 	/* Check that it is a clone */
16172082Seschrock 	if (dd->dd_phys->dd_clone_parent_obj == 0)
16182082Seschrock 		return (EINVAL);
16192082Seschrock 
16202199Sahrens 	/* Since this is so expensive, don't do the preliminary check */
16212199Sahrens 	if (!dmu_tx_is_syncing(tx))
16222199Sahrens 		return (0);
16232199Sahrens 
16242199Sahrens 	if (err = dsl_dataset_open_obj(dp,
16252082Seschrock 	    dd->dd_phys->dd_clone_parent_obj,
16262082Seschrock 	    NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds))
16272082Seschrock 		goto out;
16282082Seschrock 	pdd = pivot_ds->ds_dir;
16292199Sahrens 
16302199Sahrens 	{
16312199Sahrens 		dsl_dataset_t *phds;
16322199Sahrens 		if (err = dsl_dataset_open_obj(dd->dd_pool,
16332199Sahrens 		    pdd->dd_phys->dd_head_dataset_obj,
16342199Sahrens 		    NULL, DS_MODE_NONE, FTAG, &phds))
16352199Sahrens 			goto out;
16362199Sahrens 		pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj;
16372199Sahrens 		dsl_dataset_close(phds, DS_MODE_NONE, FTAG);
16382199Sahrens 	}
16392082Seschrock 
16402082Seschrock 	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) {
16412082Seschrock 		err = EXDEV;
16422082Seschrock 		goto out;
16432082Seschrock 	}
16442082Seschrock 
16452082Seschrock 	/* find pivot point's new next ds */
16462082Seschrock 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object,
16472082Seschrock 	    NULL, DS_MODE_NONE, FTAG, &newnext_ds));
16482082Seschrock 	while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) {
16492082Seschrock 		dsl_dataset_t *prev;
16502082Seschrock 
16512082Seschrock 		if (err = dsl_dataset_open_obj(dd->dd_pool,
16522199Sahrens 		    newnext_ds->ds_phys->ds_prev_snap_obj,
16532199Sahrens 		    NULL, DS_MODE_NONE, FTAG, &prev))
16542082Seschrock 			goto out;
16552082Seschrock 		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
16562082Seschrock 		newnext_ds = prev;
16572082Seschrock 	}
16582199Sahrens 	pa->newnext_obj = newnext_ds->ds_object;
16592082Seschrock 
16602082Seschrock 	/* compute pivot point's new unique space */
16612082Seschrock 	while ((err = bplist_iterate(&newnext_ds->ds_deadlist,
16622082Seschrock 	    &itor, &bp)) == 0) {
16632082Seschrock 		if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg)
16642199Sahrens 			pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp);
16652082Seschrock 	}
16662082Seschrock 	if (err != ENOENT)
16672082Seschrock 		goto out;
16682082Seschrock 
16692082Seschrock 	/* Walk the snapshots that we are moving */
16702082Seschrock 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
16712082Seschrock 	ds = pivot_ds;
16722082Seschrock 	/* CONSTCOND */
16732082Seschrock 	while (TRUE) {
16742082Seschrock 		uint64_t val, dlused, dlcomp, dluncomp;
16752082Seschrock 		dsl_dataset_t *prev;
16762082Seschrock 
16772082Seschrock 		/* Check that the snapshot name does not conflict */
16782082Seschrock 		dsl_dataset_name(ds, name);
16792082Seschrock 		err = zap_lookup(dd->dd_pool->dp_meta_objset,
16802082Seschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
16812082Seschrock 		    8, 1, &val);
16822082Seschrock 		if (err != ENOENT) {
16832082Seschrock 			if (err == 0)
16842082Seschrock 				err = EEXIST;
16852082Seschrock 			goto out;
16862082Seschrock 		}
16872082Seschrock 
16882082Seschrock 		/*
16892082Seschrock 		 * compute space to transfer.  Each snapshot gave birth to:
16902082Seschrock 		 * (my used) - (prev's used) + (deadlist's used)
16912082Seschrock 		 */
16922199Sahrens 		pa->used += ds->ds_phys->ds_used_bytes;
16932199Sahrens 		pa->comp += ds->ds_phys->ds_compressed_bytes;
16942199Sahrens 		pa->uncomp += ds->ds_phys->ds_uncompressed_bytes;
16952082Seschrock 
16962082Seschrock 		/* If we reach the first snapshot, we're done. */
16972082Seschrock 		if (ds->ds_phys->ds_prev_snap_obj == 0)
16982082Seschrock 			break;
16992082Seschrock 
17002082Seschrock 		if (err = bplist_space(&ds->ds_deadlist,
17012082Seschrock 		    &dlused, &dlcomp, &dluncomp))
17022082Seschrock 			goto out;
17032082Seschrock 		if (err = dsl_dataset_open_obj(dd->dd_pool,
17042082Seschrock 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
17052082Seschrock 		    FTAG, &prev))
17062082Seschrock 			goto out;
17072199Sahrens 		pa->used += dlused - prev->ds_phys->ds_used_bytes;
17082199Sahrens 		pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes;
17092199Sahrens 		pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes;
17102082Seschrock 
17112082Seschrock 		/*
17122082Seschrock 		 * We could be a clone of a clone.  If we reach our
17132082Seschrock 		 * parent's branch point, we're done.
17142082Seschrock 		 */
17152082Seschrock 		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
17162082Seschrock 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
17172082Seschrock 			break;
17182082Seschrock 		}
17192082Seschrock 		if (ds != pivot_ds)
17202082Seschrock 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
17212082Seschrock 		ds = prev;
17222082Seschrock 	}
17232082Seschrock 
17242082Seschrock 	/* Check that there is enough space here */
17252199Sahrens 	err = dsl_dir_transfer_possible(pdd, dd, pa->used);
17262199Sahrens 
17272199Sahrens out:
17282199Sahrens 	if (ds && ds != pivot_ds)
17292199Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
17302199Sahrens 	if (pivot_ds)
17312199Sahrens 		dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG);
17322199Sahrens 	if (newnext_ds)
17332199Sahrens 		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
17342199Sahrens 	if (name)
17352199Sahrens 		kmem_free(name, MAXPATHLEN);
17362199Sahrens 	return (err);
17372199Sahrens }
17382082Seschrock 
17392199Sahrens static void
17402199Sahrens dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
17412199Sahrens {
17422199Sahrens 	dsl_dataset_t *hds = arg1;
17432199Sahrens 	struct promotearg *pa = arg2;
17442199Sahrens 	dsl_dir_t *dd = hds->ds_dir;
17452199Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
17462199Sahrens 	dsl_dir_t *pdd = NULL;
17472199Sahrens 	dsl_dataset_t *ds, *pivot_ds;
17482199Sahrens 	char *name;
17492199Sahrens 
17502199Sahrens 	ASSERT(dd->dd_phys->dd_clone_parent_obj != 0);
17512199Sahrens 	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
17522199Sahrens 
17532199Sahrens 	VERIFY(0 == dsl_dataset_open_obj(dp,
17542199Sahrens 	    dd->dd_phys->dd_clone_parent_obj,
17552199Sahrens 	    NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds));
17562417Sahrens 	/*
17572417Sahrens 	 * We need to explicitly open pdd, since pivot_ds's pdd will be
17582417Sahrens 	 * changing.
17592417Sahrens 	 */
17602417Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object,
17612417Sahrens 	    NULL, FTAG, &pdd));
17622082Seschrock 
17632082Seschrock 	/* move snapshots to this dir */
17642199Sahrens 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
17652082Seschrock 	ds = pivot_ds;
17662082Seschrock 	/* CONSTCOND */
17672082Seschrock 	while (TRUE) {
17682082Seschrock 		dsl_dataset_t *prev;
17692082Seschrock 
17702082Seschrock 		/* move snap name entry */
17712082Seschrock 		dsl_dataset_name(ds, name);
17722199Sahrens 		VERIFY(0 == zap_remove(dp->dp_meta_objset,
17732199Sahrens 		    pa->snapnames_obj, ds->ds_snapname, tx));
17742199Sahrens 		VERIFY(0 == zap_add(dp->dp_meta_objset,
17752082Seschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
17762082Seschrock 		    8, 1, &ds->ds_object, tx));
17772082Seschrock 
17782082Seschrock 		/* change containing dsl_dir */
17792082Seschrock 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
17802082Seschrock 		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object);
17812082Seschrock 		ds->ds_phys->ds_dir_obj = dd->dd_object;
17822082Seschrock 		ASSERT3P(ds->ds_dir, ==, pdd);
17832082Seschrock 		dsl_dir_close(ds->ds_dir, ds);
17842199Sahrens 		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
17852082Seschrock 		    NULL, ds, &ds->ds_dir));
17862082Seschrock 
17872082Seschrock 		ASSERT3U(dsl_prop_numcb(ds), ==, 0);
17882082Seschrock 
17892082Seschrock 		if (ds->ds_phys->ds_prev_snap_obj == 0)
17902082Seschrock 			break;
17912082Seschrock 
17922199Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
17932082Seschrock 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
17942082Seschrock 		    FTAG, &prev));
17952082Seschrock 
17962082Seschrock 		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
17972082Seschrock 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
17982082Seschrock 			break;
17992082Seschrock 		}
18002082Seschrock 		if (ds != pivot_ds)
18012082Seschrock 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
18022082Seschrock 		ds = prev;
18032082Seschrock 	}
18042199Sahrens 	if (ds != pivot_ds)
18052199Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
18062082Seschrock 
18072082Seschrock 	/* change pivot point's next snap */
18082082Seschrock 	dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx);
18092199Sahrens 	pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj;
18102082Seschrock 
18112082Seschrock 	/* change clone_parent-age */
18122082Seschrock 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
18132082Seschrock 	ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object);
18142082Seschrock 	dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj;
18152082Seschrock 	dmu_buf_will_dirty(pdd->dd_dbuf, tx);
18162082Seschrock 	pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object;
18172082Seschrock 
18182082Seschrock 	/* change space accounting */
18192199Sahrens 	dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx);
18202199Sahrens 	dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx);
18212199Sahrens 	pivot_ds->ds_phys->ds_unique_bytes = pa->unique;
18222082Seschrock 
18232417Sahrens 	dsl_dir_close(pdd, FTAG);
18242199Sahrens 	dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG);
18252199Sahrens 	kmem_free(name, MAXPATHLEN);
18262082Seschrock }
18272082Seschrock 
18282082Seschrock int
18292082Seschrock dsl_dataset_promote(const char *name)
18302082Seschrock {
18312082Seschrock 	dsl_dataset_t *ds;
18322082Seschrock 	int err;
18332082Seschrock 	dmu_object_info_t doi;
18342199Sahrens 	struct promotearg pa;
18352082Seschrock 
18362082Seschrock 	err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds);
18372082Seschrock 	if (err)
18382082Seschrock 		return (err);
18392082Seschrock 
18402082Seschrock 	err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset,
18412082Seschrock 	    ds->ds_phys->ds_snapnames_zapobj, &doi);
18422082Seschrock 	if (err) {
18432082Seschrock 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
18442082Seschrock 		return (err);
18452082Seschrock 	}
18462082Seschrock 
18472082Seschrock 	/*
18482082Seschrock 	 * Add in 128x the snapnames zapobj size, since we will be moving
18492082Seschrock 	 * a bunch of snapnames to the promoted ds, and dirtying their
18502082Seschrock 	 * bonus buffers.
18512082Seschrock 	 */
18522199Sahrens 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
18532199Sahrens 	    dsl_dataset_promote_check,
18542199Sahrens 	    dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks);
18552082Seschrock 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
18562082Seschrock 	return (err);
18572082Seschrock }
1858