xref: /onnv-gate/usr/src/uts/common/fs/zfs/dsl_dataset.c (revision 3025:4e5ee8301d84)
1789Sahrens /*
2789Sahrens  * CDDL HEADER START
3789Sahrens  *
4789Sahrens  * The contents of this file are subject to the terms of the
51544Seschrock  * Common Development and Distribution License (the "License").
61544Seschrock  * You may not use this file except in compliance with the License.
7789Sahrens  *
8789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens  * or http://www.opensolaris.org/os/licensing.
10789Sahrens  * See the License for the specific language governing permissions
11789Sahrens  * and limitations under the License.
12789Sahrens  *
13789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens  *
19789Sahrens  * CDDL HEADER END
20789Sahrens  */
21789Sahrens /*
221544Seschrock  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23789Sahrens  * Use is subject to license terms.
24789Sahrens  */
25789Sahrens 
26789Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
27789Sahrens 
28789Sahrens #include <sys/dmu_objset.h>
29789Sahrens #include <sys/dsl_dataset.h>
30789Sahrens #include <sys/dsl_dir.h>
312082Seschrock #include <sys/dsl_prop.h>
322199Sahrens #include <sys/dsl_synctask.h>
33789Sahrens #include <sys/dmu_traverse.h>
34789Sahrens #include <sys/dmu_tx.h>
35789Sahrens #include <sys/arc.h>
36789Sahrens #include <sys/zio.h>
37789Sahrens #include <sys/zap.h>
38789Sahrens #include <sys/unique.h>
39789Sahrens #include <sys/zfs_context.h>
40789Sahrens 
412199Sahrens static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
422199Sahrens static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
432199Sahrens static dsl_checkfunc_t dsl_dataset_rollback_check;
442199Sahrens static dsl_syncfunc_t dsl_dataset_rollback_sync;
452199Sahrens static dsl_checkfunc_t dsl_dataset_destroy_check;
462199Sahrens static dsl_syncfunc_t dsl_dataset_destroy_sync;
471731Sbonwick 
48789Sahrens #define	DOS_REF_MAX	(1ULL << 62)
49789Sahrens 
50789Sahrens #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
51789Sahrens 
52789Sahrens /*
53789Sahrens  * We use weighted reference counts to express the various forms of exclusion
54789Sahrens  * between different open modes.  A STANDARD open is 1 point, an EXCLUSIVE open
55789Sahrens  * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE.
56789Sahrens  * This makes the exclusion logic simple: the total refcnt for all opens cannot
57789Sahrens  * exceed DOS_REF_MAX.  For example, EXCLUSIVE opens are exclusive because their
58789Sahrens  * weight (DOS_REF_MAX) consumes the entire refcnt space.  PRIMARY opens consume
59789Sahrens  * just over half of the refcnt space, so there can't be more than one, but it
60789Sahrens  * can peacefully coexist with any number of STANDARD opens.
61789Sahrens  */
62789Sahrens static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
63789Sahrens 	0,			/* DOS_MODE_NONE - invalid		*/
64789Sahrens 	1,			/* DOS_MODE_STANDARD - unlimited number	*/
65789Sahrens 	(DOS_REF_MAX >> 1) + 1,	/* DOS_MODE_PRIMARY - only one of these	*/
66789Sahrens 	DOS_REF_MAX		/* DOS_MODE_EXCLUSIVE - no other opens	*/
67789Sahrens };
68789Sahrens 
69789Sahrens 
70789Sahrens void
71789Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
72789Sahrens {
732082Seschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
74789Sahrens 	int compressed = BP_GET_PSIZE(bp);
75789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
76789Sahrens 
77789Sahrens 	dprintf_bp(bp, "born, ds=%p\n", ds);
78789Sahrens 
79789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
80789Sahrens 	/* It could have been compressed away to nothing */
81789Sahrens 	if (BP_IS_HOLE(bp))
82789Sahrens 		return;
83789Sahrens 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
84789Sahrens 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
85789Sahrens 	if (ds == NULL) {
86789Sahrens 		/*
87789Sahrens 		 * Account for the meta-objset space in its placeholder
88789Sahrens 		 * dsl_dir.
89789Sahrens 		 */
90789Sahrens 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
91789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
92789Sahrens 		    used, compressed, uncompressed, tx);
93789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
94789Sahrens 		return;
95789Sahrens 	}
96789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
97789Sahrens 	mutex_enter(&ds->ds_lock);
98789Sahrens 	ds->ds_phys->ds_used_bytes += used;
99789Sahrens 	ds->ds_phys->ds_compressed_bytes += compressed;
100789Sahrens 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
101789Sahrens 	ds->ds_phys->ds_unique_bytes += used;
102789Sahrens 	mutex_exit(&ds->ds_lock);
103789Sahrens 	dsl_dir_diduse_space(ds->ds_dir,
104789Sahrens 	    used, compressed, uncompressed, tx);
105789Sahrens }
106789Sahrens 
107789Sahrens void
108789Sahrens dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
109789Sahrens {
1102082Seschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
111789Sahrens 	int compressed = BP_GET_PSIZE(bp);
112789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
113789Sahrens 
114789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
115789Sahrens 	if (BP_IS_HOLE(bp))
116789Sahrens 		return;
117789Sahrens 
118789Sahrens 	ASSERT(used > 0);
119789Sahrens 	if (ds == NULL) {
120789Sahrens 		/*
121789Sahrens 		 * Account for the meta-objset space in its placeholder
122789Sahrens 		 * dataset.
123789Sahrens 		 */
124789Sahrens 		/* XXX this can fail, what do we do when it does? */
125789Sahrens 		(void) arc_free(NULL, tx->tx_pool->dp_spa,
126789Sahrens 		    tx->tx_txg, bp, NULL, NULL, ARC_WAIT);
127789Sahrens 		bzero(bp, sizeof (blkptr_t));
128789Sahrens 
129789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
130789Sahrens 		    -used, -compressed, -uncompressed, tx);
131789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
132789Sahrens 		return;
133789Sahrens 	}
134789Sahrens 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
135789Sahrens 
136789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
137789Sahrens 
138789Sahrens 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
139789Sahrens 		dprintf_bp(bp, "freeing: %s", "");
140789Sahrens 		/* XXX check return code? */
141789Sahrens 		(void) arc_free(NULL, tx->tx_pool->dp_spa,
142789Sahrens 		    tx->tx_txg, bp, NULL, NULL, ARC_WAIT);
143789Sahrens 
144789Sahrens 		mutex_enter(&ds->ds_lock);
145789Sahrens 		/* XXX unique_bytes is not accurate for head datasets */
146789Sahrens 		/* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */
147789Sahrens 		ds->ds_phys->ds_unique_bytes -= used;
148789Sahrens 		mutex_exit(&ds->ds_lock);
149789Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
150789Sahrens 		    -used, -compressed, -uncompressed, tx);
151789Sahrens 	} else {
152789Sahrens 		dprintf_bp(bp, "putting on dead list: %s", "");
1531544Seschrock 		VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
154789Sahrens 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
155789Sahrens 		if (ds->ds_phys->ds_prev_snap_obj != 0) {
156789Sahrens 			ASSERT3U(ds->ds_prev->ds_object, ==,
157789Sahrens 			    ds->ds_phys->ds_prev_snap_obj);
158789Sahrens 			ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
159789Sahrens 			if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
1602082Seschrock 			    ds->ds_object && bp->blk_birth >
161789Sahrens 			    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
162789Sahrens 				dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
163789Sahrens 				mutex_enter(&ds->ds_prev->ds_lock);
164789Sahrens 				ds->ds_prev->ds_phys->ds_unique_bytes +=
165789Sahrens 				    used;
166789Sahrens 				mutex_exit(&ds->ds_prev->ds_lock);
167789Sahrens 			}
168789Sahrens 		}
169789Sahrens 	}
170789Sahrens 	bzero(bp, sizeof (blkptr_t));
171789Sahrens 	mutex_enter(&ds->ds_lock);
172789Sahrens 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
173789Sahrens 	ds->ds_phys->ds_used_bytes -= used;
174789Sahrens 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
175789Sahrens 	ds->ds_phys->ds_compressed_bytes -= compressed;
176789Sahrens 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
177789Sahrens 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
178789Sahrens 	mutex_exit(&ds->ds_lock);
179789Sahrens }
180789Sahrens 
1811544Seschrock uint64_t
1821544Seschrock dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
183789Sahrens {
1842885Sahrens 	uint64_t trysnap = 0;
1852885Sahrens 
186789Sahrens 	if (ds == NULL)
1871544Seschrock 		return (0);
188789Sahrens 	/*
189789Sahrens 	 * The snapshot creation could fail, but that would cause an
190789Sahrens 	 * incorrect FALSE return, which would only result in an
191789Sahrens 	 * overestimation of the amount of space that an operation would
192789Sahrens 	 * consume, which is OK.
193789Sahrens 	 *
194789Sahrens 	 * There's also a small window where we could miss a pending
195789Sahrens 	 * snapshot, because we could set the sync task in the quiescing
196789Sahrens 	 * phase.  So this should only be used as a guess.
197789Sahrens 	 */
1982885Sahrens 	if (ds->ds_trysnap_txg >
1992885Sahrens 	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
2002885Sahrens 		trysnap = ds->ds_trysnap_txg;
2012885Sahrens 	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
2021544Seschrock }
2031544Seschrock 
2041544Seschrock int
2051544Seschrock dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
2061544Seschrock {
2071544Seschrock 	return (blk_birth > dsl_dataset_prev_snap_txg(ds));
208789Sahrens }
209789Sahrens 
210789Sahrens /* ARGSUSED */
211789Sahrens static void
212789Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv)
213789Sahrens {
214789Sahrens 	dsl_dataset_t *ds = dsv;
215789Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
216789Sahrens 
217789Sahrens 	/* open_refcount == DOS_REF_MAX when deleting */
218789Sahrens 	ASSERT(ds->ds_open_refcount == 0 ||
219789Sahrens 	    ds->ds_open_refcount == DOS_REF_MAX);
220789Sahrens 
221789Sahrens 	dprintf_ds(ds, "evicting %s\n", "");
222789Sahrens 
223789Sahrens 	unique_remove(ds->ds_phys->ds_fsid_guid);
224789Sahrens 
225789Sahrens 	if (ds->ds_user_ptr != NULL)
226789Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
227789Sahrens 
228789Sahrens 	if (ds->ds_prev) {
229789Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
230789Sahrens 		ds->ds_prev = NULL;
231789Sahrens 	}
232789Sahrens 
233789Sahrens 	bplist_close(&ds->ds_deadlist);
234789Sahrens 	dsl_dir_close(ds->ds_dir, ds);
235789Sahrens 
236789Sahrens 	if (list_link_active(&ds->ds_synced_link))
237789Sahrens 		list_remove(&dp->dp_synced_objsets, ds);
238789Sahrens 
2392856Snd150628 	mutex_destroy(&ds->ds_lock);
2402856Snd150628 	mutex_destroy(&ds->ds_deadlist.bpl_lock);
2412856Snd150628 
242789Sahrens 	kmem_free(ds, sizeof (dsl_dataset_t));
243789Sahrens }
244789Sahrens 
2451544Seschrock static int
246789Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds)
247789Sahrens {
248789Sahrens 	dsl_dataset_phys_t *headphys;
249789Sahrens 	int err;
250789Sahrens 	dmu_buf_t *headdbuf;
251789Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
252789Sahrens 	objset_t *mos = dp->dp_meta_objset;
253789Sahrens 
254789Sahrens 	if (ds->ds_snapname[0])
2551544Seschrock 		return (0);
256789Sahrens 	if (ds->ds_phys->ds_next_snap_obj == 0)
2571544Seschrock 		return (0);
258789Sahrens 
2591544Seschrock 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
2601544Seschrock 	    FTAG, &headdbuf);
2611544Seschrock 	if (err)
2621544Seschrock 		return (err);
263789Sahrens 	headphys = headdbuf->db_data;
264789Sahrens 	err = zap_value_search(dp->dp_meta_objset,
265789Sahrens 	    headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname);
2661544Seschrock 	dmu_buf_rele(headdbuf, FTAG);
2671544Seschrock 	return (err);
268789Sahrens }
269789Sahrens 
2701544Seschrock int
271789Sahrens dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
2721544Seschrock     int mode, void *tag, dsl_dataset_t **dsp)
273789Sahrens {
274789Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
275789Sahrens 	objset_t *mos = dp->dp_meta_objset;
276789Sahrens 	dmu_buf_t *dbuf;
277789Sahrens 	dsl_dataset_t *ds;
2781544Seschrock 	int err;
279789Sahrens 
280789Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
281789Sahrens 	    dsl_pool_sync_context(dp));
282789Sahrens 
2831544Seschrock 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
2841544Seschrock 	if (err)
2851544Seschrock 		return (err);
286789Sahrens 	ds = dmu_buf_get_user(dbuf);
287789Sahrens 	if (ds == NULL) {
288789Sahrens 		dsl_dataset_t *winner;
289789Sahrens 
290789Sahrens 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
291789Sahrens 		ds->ds_dbuf = dbuf;
292789Sahrens 		ds->ds_object = dsobj;
293789Sahrens 		ds->ds_phys = dbuf->db_data;
294789Sahrens 
2952856Snd150628 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
2962856Snd150628 		mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT,
2972856Snd150628 		    NULL);
2982856Snd150628 
2991544Seschrock 		err = bplist_open(&ds->ds_deadlist,
300789Sahrens 		    mos, ds->ds_phys->ds_deadlist_obj);
3011544Seschrock 		if (err == 0) {
3021544Seschrock 			err = dsl_dir_open_obj(dp,
3031544Seschrock 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
3041544Seschrock 		}
3051544Seschrock 		if (err) {
3061544Seschrock 			/*
3071544Seschrock 			 * we don't really need to close the blist if we
3081544Seschrock 			 * just opened it.
3091544Seschrock 			 */
3102856Snd150628 			mutex_destroy(&ds->ds_lock);
3112856Snd150628 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
3121544Seschrock 			kmem_free(ds, sizeof (dsl_dataset_t));
3131544Seschrock 			dmu_buf_rele(dbuf, tag);
3141544Seschrock 			return (err);
3151544Seschrock 		}
316789Sahrens 
317789Sahrens 		if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
318789Sahrens 			ds->ds_snapname[0] = '\0';
319789Sahrens 			if (ds->ds_phys->ds_prev_snap_obj) {
3201544Seschrock 				err = dsl_dataset_open_obj(dp,
321789Sahrens 				    ds->ds_phys->ds_prev_snap_obj, NULL,
3221544Seschrock 				    DS_MODE_NONE, ds, &ds->ds_prev);
323789Sahrens 			}
324789Sahrens 		} else {
325789Sahrens 			if (snapname) {
326789Sahrens #ifdef ZFS_DEBUG
327789Sahrens 				dsl_dataset_phys_t *headphys;
3281544Seschrock 				dmu_buf_t *headdbuf;
3291544Seschrock 				err = dmu_bonus_hold(mos,
3301544Seschrock 				    ds->ds_dir->dd_phys->dd_head_dataset_obj,
3311544Seschrock 				    FTAG, &headdbuf);
3321544Seschrock 				if (err == 0) {
3331544Seschrock 					headphys = headdbuf->db_data;
3341544Seschrock 					uint64_t foundobj;
3351544Seschrock 					err = zap_lookup(dp->dp_meta_objset,
3361544Seschrock 					    headphys->ds_snapnames_zapobj,
3371544Seschrock 					    snapname, sizeof (foundobj), 1,
3381544Seschrock 					    &foundobj);
3391544Seschrock 					ASSERT3U(foundobj, ==, dsobj);
3401544Seschrock 					dmu_buf_rele(headdbuf, FTAG);
3411544Seschrock 				}
342789Sahrens #endif
343789Sahrens 				(void) strcat(ds->ds_snapname, snapname);
344789Sahrens 			} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
3451544Seschrock 				err = dsl_dataset_get_snapname(ds);
346789Sahrens 			}
347789Sahrens 		}
348789Sahrens 
3491544Seschrock 		if (err == 0) {
3501544Seschrock 			winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
3511544Seschrock 			    dsl_dataset_evict);
3521544Seschrock 		}
3531544Seschrock 		if (err || winner) {
354789Sahrens 			bplist_close(&ds->ds_deadlist);
355789Sahrens 			if (ds->ds_prev) {
356789Sahrens 				dsl_dataset_close(ds->ds_prev,
357789Sahrens 				    DS_MODE_NONE, ds);
358789Sahrens 			}
359789Sahrens 			dsl_dir_close(ds->ds_dir, ds);
3602856Snd150628 			mutex_destroy(&ds->ds_lock);
3612856Snd150628 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
362789Sahrens 			kmem_free(ds, sizeof (dsl_dataset_t));
3631544Seschrock 			if (err) {
3641544Seschrock 				dmu_buf_rele(dbuf, tag);
3651544Seschrock 				return (err);
3661544Seschrock 			}
367789Sahrens 			ds = winner;
368789Sahrens 		} else {
369789Sahrens 			uint64_t new =
370789Sahrens 			    unique_insert(ds->ds_phys->ds_fsid_guid);
371789Sahrens 			if (new != ds->ds_phys->ds_fsid_guid) {
372789Sahrens 				/* XXX it won't necessarily be synced... */
373789Sahrens 				ds->ds_phys->ds_fsid_guid = new;
374789Sahrens 			}
375789Sahrens 		}
376789Sahrens 	}
377789Sahrens 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
378789Sahrens 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
379789Sahrens 
380789Sahrens 	mutex_enter(&ds->ds_lock);
381789Sahrens 	if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
3822082Seschrock 	    (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) &&
3832082Seschrock 	    !DS_MODE_IS_INCONSISTENT(mode)) ||
384789Sahrens 	    (ds->ds_open_refcount + weight > DOS_REF_MAX)) {
385789Sahrens 		mutex_exit(&ds->ds_lock);
386789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
3871544Seschrock 		return (EBUSY);
388789Sahrens 	}
389789Sahrens 	ds->ds_open_refcount += weight;
390789Sahrens 	mutex_exit(&ds->ds_lock);
391789Sahrens 
3921544Seschrock 	*dsp = ds;
3931544Seschrock 	return (0);
394789Sahrens }
395789Sahrens 
396789Sahrens int
397789Sahrens dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
398789Sahrens     void *tag, dsl_dataset_t **dsp)
399789Sahrens {
400789Sahrens 	dsl_dir_t *dd;
401789Sahrens 	dsl_pool_t *dp;
402789Sahrens 	const char *tail;
403789Sahrens 	uint64_t obj;
404789Sahrens 	dsl_dataset_t *ds = NULL;
405789Sahrens 	int err = 0;
406789Sahrens 
4071544Seschrock 	err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail);
4081544Seschrock 	if (err)
4091544Seschrock 		return (err);
410789Sahrens 
411789Sahrens 	dp = dd->dd_pool;
412789Sahrens 	obj = dd->dd_phys->dd_head_dataset_obj;
413789Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
414789Sahrens 	if (obj == 0) {
415789Sahrens 		/* A dataset with no associated objset */
416789Sahrens 		err = ENOENT;
417789Sahrens 		goto out;
418789Sahrens 	}
419789Sahrens 
420789Sahrens 	if (tail != NULL) {
421789Sahrens 		objset_t *mos = dp->dp_meta_objset;
422789Sahrens 
4231544Seschrock 		err = dsl_dataset_open_obj(dp, obj, NULL,
4241544Seschrock 		    DS_MODE_NONE, tag, &ds);
4251544Seschrock 		if (err)
4261544Seschrock 			goto out;
427789Sahrens 		obj = ds->ds_phys->ds_snapnames_zapobj;
428789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
429789Sahrens 		ds = NULL;
430789Sahrens 
431789Sahrens 		if (tail[0] != '@') {
432789Sahrens 			err = ENOENT;
433789Sahrens 			goto out;
434789Sahrens 		}
435789Sahrens 		tail++;
436789Sahrens 
437789Sahrens 		/* Look for a snapshot */
438789Sahrens 		if (!DS_MODE_IS_READONLY(mode)) {
439789Sahrens 			err = EROFS;
440789Sahrens 			goto out;
441789Sahrens 		}
442789Sahrens 		dprintf("looking for snapshot '%s'\n", tail);
443789Sahrens 		err = zap_lookup(mos, obj, tail, 8, 1, &obj);
444789Sahrens 		if (err)
445789Sahrens 			goto out;
446789Sahrens 	}
4471544Seschrock 	err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds);
448789Sahrens 
449789Sahrens out:
450789Sahrens 	rw_exit(&dp->dp_config_rwlock);
451789Sahrens 	dsl_dir_close(dd, FTAG);
452789Sahrens 
453789Sahrens 	ASSERT3U((err == 0), ==, (ds != NULL));
454789Sahrens 	/* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */
455789Sahrens 
456789Sahrens 	*dsp = ds;
457789Sahrens 	return (err);
458789Sahrens }
459789Sahrens 
460789Sahrens int
461789Sahrens dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp)
462789Sahrens {
463789Sahrens 	return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp));
464789Sahrens }
465789Sahrens 
466789Sahrens void
467789Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name)
468789Sahrens {
469789Sahrens 	if (ds == NULL) {
470789Sahrens 		(void) strcpy(name, "mos");
471789Sahrens 	} else {
472789Sahrens 		dsl_dir_name(ds->ds_dir, name);
4731544Seschrock 		VERIFY(0 == dsl_dataset_get_snapname(ds));
474789Sahrens 		if (ds->ds_snapname[0]) {
475789Sahrens 			(void) strcat(name, "@");
476789Sahrens 			if (!MUTEX_HELD(&ds->ds_lock)) {
477789Sahrens 				/*
478789Sahrens 				 * We use a "recursive" mutex so that we
479789Sahrens 				 * can call dprintf_ds() with ds_lock held.
480789Sahrens 				 */
481789Sahrens 				mutex_enter(&ds->ds_lock);
482789Sahrens 				(void) strcat(name, ds->ds_snapname);
483789Sahrens 				mutex_exit(&ds->ds_lock);
484789Sahrens 			} else {
485789Sahrens 				(void) strcat(name, ds->ds_snapname);
486789Sahrens 			}
487789Sahrens 		}
488789Sahrens 	}
489789Sahrens }
490789Sahrens 
491789Sahrens void
492789Sahrens dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag)
493789Sahrens {
494789Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
495789Sahrens 	mutex_enter(&ds->ds_lock);
496789Sahrens 	ASSERT3U(ds->ds_open_refcount, >=, weight);
497789Sahrens 	ds->ds_open_refcount -= weight;
498789Sahrens 	dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n",
499789Sahrens 	    mode, ds->ds_open_refcount);
500789Sahrens 	mutex_exit(&ds->ds_lock);
501789Sahrens 
5021544Seschrock 	dmu_buf_rele(ds->ds_dbuf, tag);
503789Sahrens }
504789Sahrens 
505789Sahrens void
506789Sahrens dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
507789Sahrens {
508789Sahrens 	objset_t *mos = dp->dp_meta_objset;
509789Sahrens 	dmu_buf_t *dbuf;
510789Sahrens 	dsl_dataset_phys_t *dsphys;
511789Sahrens 	dsl_dataset_t *ds;
512789Sahrens 	uint64_t dsobj;
513789Sahrens 	dsl_dir_t *dd;
514789Sahrens 
515789Sahrens 	dsl_dir_create_root(mos, ddobjp, tx);
5161544Seschrock 	VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd));
517789Sahrens 
518928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
519928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
5201544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
521789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
522789Sahrens 	dsphys = dbuf->db_data;
523789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
524789Sahrens 	dsphys->ds_fsid_guid = unique_create();
5251544Seschrock 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
526789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
527789Sahrens 	    sizeof (dsphys->ds_guid));
528789Sahrens 	dsphys->ds_snapnames_zapobj =
529885Sahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
530789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
531789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
532789Sahrens 	dsphys->ds_deadlist_obj =
533789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
5341544Seschrock 	dmu_buf_rele(dbuf, FTAG);
535789Sahrens 
536789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
537789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
538789Sahrens 	dsl_dir_close(dd, FTAG);
539789Sahrens 
5401544Seschrock 	VERIFY(0 ==
5411544Seschrock 	    dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds));
542789Sahrens 	(void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx);
543789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
544789Sahrens }
545789Sahrens 
5462199Sahrens uint64_t
5472199Sahrens dsl_dataset_create_sync(dsl_dir_t *pdd,
548789Sahrens     const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx)
549789Sahrens {
5502199Sahrens 	dsl_pool_t *dp = pdd->dd_pool;
551789Sahrens 	dmu_buf_t *dbuf;
552789Sahrens 	dsl_dataset_phys_t *dsphys;
5532199Sahrens 	uint64_t dsobj, ddobj;
554789Sahrens 	objset_t *mos = dp->dp_meta_objset;
555789Sahrens 	dsl_dir_t *dd;
556789Sahrens 
5572199Sahrens 	ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp);
5582199Sahrens 	ASSERT(clone_parent == NULL ||
5592199Sahrens 	    clone_parent->ds_phys->ds_num_children > 0);
560789Sahrens 	ASSERT(lastname[0] != '@');
561789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
562789Sahrens 
5632199Sahrens 	ddobj = dsl_dir_create_sync(pdd, lastname, tx);
5642199Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
565789Sahrens 
566928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
567928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
5681544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
569789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
570789Sahrens 	dsphys = dbuf->db_data;
571789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
572789Sahrens 	dsphys->ds_fsid_guid = unique_create();
573789Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
574789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
575789Sahrens 	    sizeof (dsphys->ds_guid));
576789Sahrens 	dsphys->ds_snapnames_zapobj =
577885Sahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
578789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
579789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
580789Sahrens 	dsphys->ds_deadlist_obj =
581789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
582789Sahrens 	if (clone_parent) {
583789Sahrens 		dsphys->ds_prev_snap_obj = clone_parent->ds_object;
584789Sahrens 		dsphys->ds_prev_snap_txg =
585789Sahrens 		    clone_parent->ds_phys->ds_creation_txg;
586789Sahrens 		dsphys->ds_used_bytes =
587789Sahrens 		    clone_parent->ds_phys->ds_used_bytes;
588789Sahrens 		dsphys->ds_compressed_bytes =
589789Sahrens 		    clone_parent->ds_phys->ds_compressed_bytes;
590789Sahrens 		dsphys->ds_uncompressed_bytes =
591789Sahrens 		    clone_parent->ds_phys->ds_uncompressed_bytes;
592789Sahrens 		dsphys->ds_bp = clone_parent->ds_phys->ds_bp;
593789Sahrens 
594789Sahrens 		dmu_buf_will_dirty(clone_parent->ds_dbuf, tx);
595789Sahrens 		clone_parent->ds_phys->ds_num_children++;
596789Sahrens 
597789Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
598789Sahrens 		dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object;
599789Sahrens 	}
6001544Seschrock 	dmu_buf_rele(dbuf, FTAG);
601789Sahrens 
602789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
603789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
604789Sahrens 	dsl_dir_close(dd, FTAG);
605789Sahrens 
6062199Sahrens 	return (dsobj);
6072199Sahrens }
6082199Sahrens 
6092199Sahrens struct destroyarg {
6102199Sahrens 	dsl_sync_task_group_t *dstg;
6112199Sahrens 	char *snapname;
6122199Sahrens 	void *tag;
6132199Sahrens 	char *failed;
6142199Sahrens };
6152199Sahrens 
6162199Sahrens static int
6172199Sahrens dsl_snapshot_destroy_one(char *name, void *arg)
6182199Sahrens {
6192199Sahrens 	struct destroyarg *da = arg;
6202199Sahrens 	dsl_dataset_t *ds;
6212199Sahrens 	char *cp;
6222199Sahrens 	int err;
6232199Sahrens 
6242199Sahrens 	(void) strcat(name, "@");
6252199Sahrens 	(void) strcat(name, da->snapname);
6262199Sahrens 	err = dsl_dataset_open(name,
6272199Sahrens 	    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
6282199Sahrens 	    da->tag, &ds);
6292199Sahrens 	cp = strchr(name, '@');
6302199Sahrens 	*cp = '\0';
6312199Sahrens 	if (err == ENOENT)
6322199Sahrens 		return (0);
6332199Sahrens 	if (err) {
6342199Sahrens 		(void) strcpy(da->failed, name);
6352199Sahrens 		return (err);
6362199Sahrens 	}
6372199Sahrens 
6382199Sahrens 	dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
6392199Sahrens 	    dsl_dataset_destroy_sync, ds, da->tag, 0);
640789Sahrens 	return (0);
641789Sahrens }
642789Sahrens 
6432199Sahrens /*
6442199Sahrens  * Destroy 'snapname' in all descendants of 'fsname'.
6452199Sahrens  */
6462199Sahrens #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
6472199Sahrens int
6482199Sahrens dsl_snapshots_destroy(char *fsname, char *snapname)
6492199Sahrens {
6502199Sahrens 	int err;
6512199Sahrens 	struct destroyarg da;
6522199Sahrens 	dsl_sync_task_t *dst;
6532199Sahrens 	spa_t *spa;
6542199Sahrens 	char *cp;
6552199Sahrens 
6562199Sahrens 	cp = strchr(fsname, '/');
6572199Sahrens 	if (cp) {
6582199Sahrens 		*cp = '\0';
6592199Sahrens 		err = spa_open(fsname, &spa, FTAG);
6602199Sahrens 		*cp = '/';
6612199Sahrens 	} else {
6622199Sahrens 		err = spa_open(fsname, &spa, FTAG);
6632199Sahrens 	}
6642199Sahrens 	if (err)
6652199Sahrens 		return (err);
6662199Sahrens 	da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
6672199Sahrens 	da.snapname = snapname;
6682199Sahrens 	da.tag = FTAG;
6692199Sahrens 	da.failed = fsname;
6702199Sahrens 
6712199Sahrens 	err = dmu_objset_find(fsname,
6722417Sahrens 	    dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
6732199Sahrens 
6742199Sahrens 	if (err == 0)
6752199Sahrens 		err = dsl_sync_task_group_wait(da.dstg);
6762199Sahrens 
6772199Sahrens 	for (dst = list_head(&da.dstg->dstg_tasks); dst;
6782199Sahrens 	    dst = list_next(&da.dstg->dstg_tasks, dst)) {
6792199Sahrens 		dsl_dataset_t *ds = dst->dst_arg1;
6802199Sahrens 		if (dst->dst_err) {
6812199Sahrens 			dsl_dataset_name(ds, fsname);
6822199Sahrens 			cp = strchr(fsname, '@');
6832199Sahrens 			*cp = '\0';
6842199Sahrens 		}
6852199Sahrens 		/*
6862199Sahrens 		 * If it was successful, destroy_sync would have
6872199Sahrens 		 * closed the ds
6882199Sahrens 		 */
6892199Sahrens 		if (err)
6902199Sahrens 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
6912199Sahrens 	}
6922199Sahrens 
6932199Sahrens 	dsl_sync_task_group_destroy(da.dstg);
6942199Sahrens 	spa_close(spa, FTAG);
6952199Sahrens 	return (err);
6962199Sahrens }
6972199Sahrens 
698789Sahrens int
699789Sahrens dsl_dataset_destroy(const char *name)
700789Sahrens {
701789Sahrens 	int err;
7022199Sahrens 	dsl_sync_task_group_t *dstg;
7032199Sahrens 	objset_t *os;
7042199Sahrens 	dsl_dataset_t *ds;
705789Sahrens 	dsl_dir_t *dd;
7062199Sahrens 	uint64_t obj;
7072199Sahrens 
7082199Sahrens 	if (strchr(name, '@')) {
7092199Sahrens 		/* Destroying a snapshot is simpler */
7102199Sahrens 		err = dsl_dataset_open(name,
7112199Sahrens 		    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
7122199Sahrens 		    FTAG, &ds);
7132199Sahrens 		if (err)
7142199Sahrens 			return (err);
7152199Sahrens 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
7162199Sahrens 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
7172199Sahrens 		    ds, FTAG, 0);
7182199Sahrens 		if (err)
7192199Sahrens 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
7202199Sahrens 		return (err);
7212199Sahrens 	}
7222199Sahrens 
7232199Sahrens 	err = dmu_objset_open(name, DMU_OST_ANY,
7242199Sahrens 	    DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os);
7252199Sahrens 	if (err)
7262199Sahrens 		return (err);
7272199Sahrens 	ds = os->os->os_dsl_dataset;
7282199Sahrens 	dd = ds->ds_dir;
729789Sahrens 
7302199Sahrens 	/*
7312199Sahrens 	 * Check for errors and mark this ds as inconsistent, in
7322199Sahrens 	 * case we crash while freeing the objects.
7332199Sahrens 	 */
7342199Sahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
7352199Sahrens 	    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
7362199Sahrens 	if (err) {
7372199Sahrens 		dmu_objset_close(os);
7382199Sahrens 		return (err);
7392199Sahrens 	}
7402199Sahrens 
7412199Sahrens 	/*
7422199Sahrens 	 * remove the objects in open context, so that we won't
7432199Sahrens 	 * have too much to do in syncing context.
7442199Sahrens 	 */
745*3025Sahrens 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
746*3025Sahrens 	    ds->ds_phys->ds_prev_snap_txg)) {
7472199Sahrens 		dmu_tx_t *tx = dmu_tx_create(os);
7482199Sahrens 		dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
7492199Sahrens 		dmu_tx_hold_bonus(tx, obj);
7502199Sahrens 		err = dmu_tx_assign(tx, TXG_WAIT);
7512199Sahrens 		if (err) {
7522199Sahrens 			/*
7532199Sahrens 			 * Perhaps there is not enough disk
7542199Sahrens 			 * space.  Just deal with it from
7552199Sahrens 			 * dsl_dataset_destroy_sync().
7562199Sahrens 			 */
7572199Sahrens 			dmu_tx_abort(tx);
7582199Sahrens 			continue;
7592199Sahrens 		}
7602199Sahrens 		VERIFY(0 == dmu_object_free(os, obj, tx));
7612199Sahrens 		dmu_tx_commit(tx);
7622199Sahrens 	}
7632199Sahrens 	/* Make sure it's not dirty before we finish destroying it. */
7642199Sahrens 	txg_wait_synced(dd->dd_pool, 0);
7652199Sahrens 
7662199Sahrens 	dmu_objset_close(os);
7672199Sahrens 	if (err != ESRCH)
7682199Sahrens 		return (err);
7692199Sahrens 
7702199Sahrens 	err = dsl_dataset_open(name,
7712199Sahrens 	    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
7722199Sahrens 	    FTAG, &ds);
7731544Seschrock 	if (err)
7741544Seschrock 		return (err);
775789Sahrens 
7762199Sahrens 	err = dsl_dir_open(name, FTAG, &dd, NULL);
7772199Sahrens 	if (err) {
7782199Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
7792199Sahrens 		return (err);
780789Sahrens 	}
781789Sahrens 
7822199Sahrens 	/*
7832199Sahrens 	 * Blow away the dsl_dir + head dataset.
7842199Sahrens 	 */
7852199Sahrens 	dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
7862199Sahrens 	dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
7872199Sahrens 	    dsl_dataset_destroy_sync, ds, FTAG, 0);
7882199Sahrens 	dsl_sync_task_create(dstg, dsl_dir_destroy_check,
7892199Sahrens 	    dsl_dir_destroy_sync, dd, FTAG, 0);
7902199Sahrens 	err = dsl_sync_task_group_wait(dstg);
7912199Sahrens 	dsl_sync_task_group_destroy(dstg);
7922199Sahrens 	/* if it is successful, *destroy_sync will close the ds+dd */
7932199Sahrens 	if (err) {
7942199Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
7952199Sahrens 		dsl_dir_close(dd, FTAG);
7962199Sahrens 	}
797789Sahrens 	return (err);
798789Sahrens }
799789Sahrens 
800789Sahrens int
8012199Sahrens dsl_dataset_rollback(dsl_dataset_t *ds)
802789Sahrens {
8032199Sahrens 	ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX);
8042199Sahrens 	return (dsl_sync_task_do(ds->ds_dir->dd_pool,
8052199Sahrens 	    dsl_dataset_rollback_check, dsl_dataset_rollback_sync,
8062199Sahrens 	    ds, NULL, 0));
807789Sahrens }
808789Sahrens 
809789Sahrens void *
810789Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
811789Sahrens     void *p, dsl_dataset_evict_func_t func)
812789Sahrens {
813789Sahrens 	void *old;
814789Sahrens 
815789Sahrens 	mutex_enter(&ds->ds_lock);
816789Sahrens 	old = ds->ds_user_ptr;
817789Sahrens 	if (old == NULL) {
818789Sahrens 		ds->ds_user_ptr = p;
819789Sahrens 		ds->ds_user_evict_func = func;
820789Sahrens 	}
821789Sahrens 	mutex_exit(&ds->ds_lock);
822789Sahrens 	return (old);
823789Sahrens }
824789Sahrens 
825789Sahrens void *
826789Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
827789Sahrens {
828789Sahrens 	return (ds->ds_user_ptr);
829789Sahrens }
830789Sahrens 
831789Sahrens 
832789Sahrens void
833789Sahrens dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp)
834789Sahrens {
835789Sahrens 	*bp = ds->ds_phys->ds_bp;
836789Sahrens }
837789Sahrens 
838789Sahrens void
839789Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
840789Sahrens {
841789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
842789Sahrens 	/* If it's the meta-objset, set dp_meta_rootbp */
843789Sahrens 	if (ds == NULL) {
844789Sahrens 		tx->tx_pool->dp_meta_rootbp = *bp;
845789Sahrens 	} else {
846789Sahrens 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
847789Sahrens 		ds->ds_phys->ds_bp = *bp;
848789Sahrens 	}
849789Sahrens }
850789Sahrens 
851789Sahrens spa_t *
852789Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds)
853789Sahrens {
854789Sahrens 	return (ds->ds_dir->dd_pool->dp_spa);
855789Sahrens }
856789Sahrens 
857789Sahrens void
858789Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
859789Sahrens {
860789Sahrens 	dsl_pool_t *dp;
861789Sahrens 
862789Sahrens 	if (ds == NULL) /* this is the meta-objset */
863789Sahrens 		return;
864789Sahrens 
865789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
8662885Sahrens 
8672885Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
8682885Sahrens 		panic("dirtying snapshot!");
869789Sahrens 
870789Sahrens 	dp = ds->ds_dir->dd_pool;
871789Sahrens 
872789Sahrens 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
873789Sahrens 		/* up the hold count until we can be written out */
874789Sahrens 		dmu_buf_add_ref(ds->ds_dbuf, ds);
875789Sahrens 	}
876789Sahrens }
877789Sahrens 
878789Sahrens struct killarg {
879789Sahrens 	uint64_t *usedp;
880789Sahrens 	uint64_t *compressedp;
881789Sahrens 	uint64_t *uncompressedp;
882789Sahrens 	zio_t *zio;
883789Sahrens 	dmu_tx_t *tx;
884789Sahrens };
885789Sahrens 
886789Sahrens static int
887789Sahrens kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
888789Sahrens {
889789Sahrens 	struct killarg *ka = arg;
890789Sahrens 	blkptr_t *bp = &bc->bc_blkptr;
891789Sahrens 
892789Sahrens 	ASSERT3U(bc->bc_errno, ==, 0);
893789Sahrens 
894789Sahrens 	/*
895789Sahrens 	 * Since this callback is not called concurrently, no lock is
896789Sahrens 	 * needed on the accounting values.
897789Sahrens 	 */
8982082Seschrock 	*ka->usedp += bp_get_dasize(spa, bp);
899789Sahrens 	*ka->compressedp += BP_GET_PSIZE(bp);
900789Sahrens 	*ka->uncompressedp += BP_GET_UCSIZE(bp);
901789Sahrens 	/* XXX check for EIO? */
902789Sahrens 	(void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
903789Sahrens 	    ARC_NOWAIT);
904789Sahrens 	return (0);
905789Sahrens }
906789Sahrens 
907789Sahrens /* ARGSUSED */
9082199Sahrens static int
9092199Sahrens dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
910789Sahrens {
9112199Sahrens 	dsl_dataset_t *ds = arg1;
912789Sahrens 
9132199Sahrens 	/*
9142199Sahrens 	 * There must be a previous snapshot.  I suppose we could roll
9152199Sahrens 	 * it back to being empty (and re-initialize the upper (ZPL)
9162199Sahrens 	 * layer).  But for now there's no way to do this via the user
9172199Sahrens 	 * interface.
9182199Sahrens 	 */
9192199Sahrens 	if (ds->ds_phys->ds_prev_snap_txg == 0)
920789Sahrens 		return (EINVAL);
921789Sahrens 
9222199Sahrens 	/*
9232199Sahrens 	 * This must not be a snapshot.
9242199Sahrens 	 */
9252199Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
9262199Sahrens 		return (EINVAL);
927789Sahrens 
928789Sahrens 	/*
929789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
930789Sahrens 	 * them.  Try again.
931789Sahrens 	 */
9322199Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
933789Sahrens 		return (EAGAIN);
9342199Sahrens 
9352199Sahrens 	return (0);
9362199Sahrens }
937789Sahrens 
9382199Sahrens /* ARGSUSED */
9392199Sahrens static void
9402199Sahrens dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx)
9412199Sahrens {
9422199Sahrens 	dsl_dataset_t *ds = arg1;
9432199Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
944789Sahrens 
945789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
946789Sahrens 
947789Sahrens 	/* Zero out the deadlist. */
948789Sahrens 	bplist_close(&ds->ds_deadlist);
949789Sahrens 	bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
950789Sahrens 	ds->ds_phys->ds_deadlist_obj =
951789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
9521544Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
9531544Seschrock 	    ds->ds_phys->ds_deadlist_obj));
954789Sahrens 
955789Sahrens 	{
956789Sahrens 		/* Free blkptrs that we gave birth to */
957789Sahrens 		zio_t *zio;
958789Sahrens 		uint64_t used = 0, compressed = 0, uncompressed = 0;
959789Sahrens 		struct killarg ka;
960789Sahrens 
961789Sahrens 		zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
962789Sahrens 		    ZIO_FLAG_MUSTSUCCEED);
963789Sahrens 		ka.usedp = &used;
964789Sahrens 		ka.compressedp = &compressed;
965789Sahrens 		ka.uncompressedp = &uncompressed;
966789Sahrens 		ka.zio = zio;
967789Sahrens 		ka.tx = tx;
968789Sahrens 		(void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
969789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
970789Sahrens 		(void) zio_wait(zio);
971789Sahrens 
9722199Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
973789Sahrens 		    -used, -compressed, -uncompressed, tx);
974789Sahrens 	}
975789Sahrens 
9762199Sahrens 	/* Change our contents to that of the prev snapshot */
977789Sahrens 	ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj);
978789Sahrens 	ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
979789Sahrens 	ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes;
980789Sahrens 	ds->ds_phys->ds_compressed_bytes =
981789Sahrens 	    ds->ds_prev->ds_phys->ds_compressed_bytes;
982789Sahrens 	ds->ds_phys->ds_uncompressed_bytes =
983789Sahrens 	    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
9842082Seschrock 	ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
985789Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
986789Sahrens 
9872532Sahrens 	if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
9882532Sahrens 		dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
9892532Sahrens 		ds->ds_prev->ds_phys->ds_unique_bytes = 0;
9902532Sahrens 	}
991789Sahrens }
992789Sahrens 
9931731Sbonwick /* ARGSUSED */
9941731Sbonwick static int
9952199Sahrens dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
9961731Sbonwick {
9972199Sahrens 	dsl_dataset_t *ds = arg1;
9981731Sbonwick 
9991731Sbonwick 	/*
10001731Sbonwick 	 * Can't delete a head dataset if there are snapshots of it.
10011731Sbonwick 	 * (Except if the only snapshots are from the branch we cloned
10021731Sbonwick 	 * from.)
10031731Sbonwick 	 */
10041731Sbonwick 	if (ds->ds_prev != NULL &&
10051731Sbonwick 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
10061731Sbonwick 		return (EINVAL);
10071731Sbonwick 
10081731Sbonwick 	return (0);
10091731Sbonwick }
10101731Sbonwick 
10112199Sahrens /* ARGSUSED */
10122199Sahrens static void
10132199Sahrens dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)
1014789Sahrens {
10152199Sahrens 	dsl_dataset_t *ds = arg1;
1016789Sahrens 
10172199Sahrens 	/* Mark it as inconsistent on-disk, in case we crash */
10182199Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
10192199Sahrens 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
10202199Sahrens }
1021789Sahrens 
10222199Sahrens /* ARGSUSED */
10232199Sahrens static int
10242199Sahrens dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
10252199Sahrens {
10262199Sahrens 	dsl_dataset_t *ds = arg1;
1027789Sahrens 
1028789Sahrens 	/* Can't delete a branch point. */
10292199Sahrens 	if (ds->ds_phys->ds_num_children > 1)
10302199Sahrens 		return (EEXIST);
1031789Sahrens 
1032789Sahrens 	/*
1033789Sahrens 	 * Can't delete a head dataset if there are snapshots of it.
1034789Sahrens 	 * (Except if the only snapshots are from the branch we cloned
1035789Sahrens 	 * from.)
1036789Sahrens 	 */
1037789Sahrens 	if (ds->ds_prev != NULL &&
10382199Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1039789Sahrens 		return (EINVAL);
1040789Sahrens 
1041789Sahrens 	/*
1042789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
1043789Sahrens 	 * them.  Try again.
1044789Sahrens 	 */
10452199Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1046789Sahrens 		return (EAGAIN);
10472199Sahrens 
10482199Sahrens 	/* XXX we should do some i/o error checking... */
10492199Sahrens 	return (0);
10502199Sahrens }
10512199Sahrens 
10522199Sahrens static void
10532199Sahrens dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
10542199Sahrens {
10552199Sahrens 	dsl_dataset_t *ds = arg1;
10562199Sahrens 	uint64_t used = 0, compressed = 0, uncompressed = 0;
10572199Sahrens 	zio_t *zio;
10582199Sahrens 	int err;
10592199Sahrens 	int after_branch_point = FALSE;
10602199Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
10612199Sahrens 	objset_t *mos = dp->dp_meta_objset;
10622199Sahrens 	dsl_dataset_t *ds_prev = NULL;
10632199Sahrens 	uint64_t obj;
10642199Sahrens 
10652199Sahrens 	ASSERT3U(ds->ds_open_refcount, ==, DOS_REF_MAX);
10662199Sahrens 	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
10672199Sahrens 	ASSERT(ds->ds_prev == NULL ||
10682199Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
10692199Sahrens 	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
10702199Sahrens 
10712199Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
10722199Sahrens 
10732199Sahrens 	obj = ds->ds_object;
1074789Sahrens 
1075789Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1076789Sahrens 		if (ds->ds_prev) {
1077789Sahrens 			ds_prev = ds->ds_prev;
1078789Sahrens 		} else {
10792199Sahrens 			VERIFY(0 == dsl_dataset_open_obj(dp,
1080789Sahrens 			    ds->ds_phys->ds_prev_snap_obj, NULL,
10812199Sahrens 			    DS_MODE_NONE, FTAG, &ds_prev));
1082789Sahrens 		}
1083789Sahrens 		after_branch_point =
1084789Sahrens 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
1085789Sahrens 
1086789Sahrens 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1087789Sahrens 		if (after_branch_point &&
1088789Sahrens 		    ds->ds_phys->ds_next_snap_obj == 0) {
1089789Sahrens 			/* This clone is toast. */
1090789Sahrens 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
1091789Sahrens 			ds_prev->ds_phys->ds_num_children--;
1092789Sahrens 		} else if (!after_branch_point) {
1093789Sahrens 			ds_prev->ds_phys->ds_next_snap_obj =
1094789Sahrens 			    ds->ds_phys->ds_next_snap_obj;
1095789Sahrens 		}
1096789Sahrens 	}
1097789Sahrens 
1098789Sahrens 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1099789Sahrens 
1100789Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0) {
11012199Sahrens 		blkptr_t bp;
1102789Sahrens 		dsl_dataset_t *ds_next;
1103789Sahrens 		uint64_t itor = 0;
1104789Sahrens 
1105789Sahrens 		spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1106789Sahrens 
11072199Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
11081544Seschrock 		    ds->ds_phys->ds_next_snap_obj, NULL,
11091544Seschrock 		    DS_MODE_NONE, FTAG, &ds_next));
1110789Sahrens 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
1111789Sahrens 
1112789Sahrens 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
1113789Sahrens 		ds_next->ds_phys->ds_prev_snap_obj =
1114789Sahrens 		    ds->ds_phys->ds_prev_snap_obj;
1115789Sahrens 		ds_next->ds_phys->ds_prev_snap_txg =
1116789Sahrens 		    ds->ds_phys->ds_prev_snap_txg;
1117789Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1118789Sahrens 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
1119789Sahrens 
1120789Sahrens 		/*
1121789Sahrens 		 * Transfer to our deadlist (which will become next's
1122789Sahrens 		 * new deadlist) any entries from next's current
1123789Sahrens 		 * deadlist which were born before prev, and free the
1124789Sahrens 		 * other entries.
1125789Sahrens 		 *
1126789Sahrens 		 * XXX we're doing this long task with the config lock held
1127789Sahrens 		 */
1128789Sahrens 		while (bplist_iterate(&ds_next->ds_deadlist, &itor,
1129789Sahrens 		    &bp) == 0) {
1130789Sahrens 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
11311544Seschrock 				VERIFY(0 == bplist_enqueue(&ds->ds_deadlist,
11321544Seschrock 				    &bp, tx));
1133789Sahrens 				if (ds_prev && !after_branch_point &&
1134789Sahrens 				    bp.blk_birth >
1135789Sahrens 				    ds_prev->ds_phys->ds_prev_snap_txg) {
1136789Sahrens 					ds_prev->ds_phys->ds_unique_bytes +=
11372082Seschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1138789Sahrens 				}
1139789Sahrens 			} else {
11402082Seschrock 				used += bp_get_dasize(dp->dp_spa, &bp);
1141789Sahrens 				compressed += BP_GET_PSIZE(&bp);
1142789Sahrens 				uncompressed += BP_GET_UCSIZE(&bp);
1143789Sahrens 				/* XXX check return value? */
1144789Sahrens 				(void) arc_free(zio, dp->dp_spa, tx->tx_txg,
1145789Sahrens 				    &bp, NULL, NULL, ARC_NOWAIT);
1146789Sahrens 			}
1147789Sahrens 		}
1148789Sahrens 
1149789Sahrens 		/* free next's deadlist */
1150789Sahrens 		bplist_close(&ds_next->ds_deadlist);
1151789Sahrens 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
1152789Sahrens 
1153789Sahrens 		/* set next's deadlist to our deadlist */
1154789Sahrens 		ds_next->ds_phys->ds_deadlist_obj =
1155789Sahrens 		    ds->ds_phys->ds_deadlist_obj;
11561544Seschrock 		VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos,
11571544Seschrock 		    ds_next->ds_phys->ds_deadlist_obj));
1158789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1159789Sahrens 
1160789Sahrens 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
1161789Sahrens 			/*
1162789Sahrens 			 * Update next's unique to include blocks which
1163789Sahrens 			 * were previously shared by only this snapshot
1164789Sahrens 			 * and it.  Those blocks will be born after the
1165789Sahrens 			 * prev snap and before this snap, and will have
1166789Sahrens 			 * died after the next snap and before the one
1167789Sahrens 			 * after that (ie. be on the snap after next's
1168789Sahrens 			 * deadlist).
1169789Sahrens 			 *
1170789Sahrens 			 * XXX we're doing this long task with the
1171789Sahrens 			 * config lock held
1172789Sahrens 			 */
1173789Sahrens 			dsl_dataset_t *ds_after_next;
1174789Sahrens 
11752199Sahrens 			VERIFY(0 == dsl_dataset_open_obj(dp,
1176789Sahrens 			    ds_next->ds_phys->ds_next_snap_obj, NULL,
11771544Seschrock 			    DS_MODE_NONE, FTAG, &ds_after_next));
1178789Sahrens 			itor = 0;
1179789Sahrens 			while (bplist_iterate(&ds_after_next->ds_deadlist,
1180789Sahrens 			    &itor, &bp) == 0) {
1181789Sahrens 				if (bp.blk_birth >
1182789Sahrens 				    ds->ds_phys->ds_prev_snap_txg &&
1183789Sahrens 				    bp.blk_birth <=
1184789Sahrens 				    ds->ds_phys->ds_creation_txg) {
1185789Sahrens 					ds_next->ds_phys->ds_unique_bytes +=
11862082Seschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1187789Sahrens 				}
1188789Sahrens 			}
1189789Sahrens 
1190789Sahrens 			dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG);
1191789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, NULL);
1192789Sahrens 		} else {
1193789Sahrens 			/*
1194789Sahrens 			 * It would be nice to update the head dataset's
1195789Sahrens 			 * unique.  To do so we would have to traverse
1196789Sahrens 			 * it for blocks born after ds_prev, which is
1197789Sahrens 			 * pretty expensive just to maintain something
1198789Sahrens 			 * for debugging purposes.
1199789Sahrens 			 */
1200789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, ds);
1201789Sahrens 			dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE,
1202789Sahrens 			    ds_next);
1203789Sahrens 			if (ds_prev) {
12042199Sahrens 				VERIFY(0 == dsl_dataset_open_obj(dp,
12051544Seschrock 				    ds->ds_phys->ds_prev_snap_obj, NULL,
12061544Seschrock 				    DS_MODE_NONE, ds_next, &ds_next->ds_prev));
1207789Sahrens 			} else {
1208789Sahrens 				ds_next->ds_prev = NULL;
1209789Sahrens 			}
1210789Sahrens 		}
1211789Sahrens 		dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG);
1212789Sahrens 
1213789Sahrens 		/*
1214789Sahrens 		 * NB: unique_bytes is not accurate for head objsets
1215789Sahrens 		 * because we don't update it when we delete the most
1216789Sahrens 		 * recent snapshot -- see above comment.
1217789Sahrens 		 */
1218789Sahrens 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
1219789Sahrens 	} else {
1220789Sahrens 		/*
1221789Sahrens 		 * There's no next snapshot, so this is a head dataset.
1222789Sahrens 		 * Destroy the deadlist.  Unless it's a clone, the
1223789Sahrens 		 * deadlist should be empty.  (If it's a clone, it's
1224789Sahrens 		 * safe to ignore the deadlist contents.)
1225789Sahrens 		 */
1226789Sahrens 		struct killarg ka;
1227789Sahrens 
1228789Sahrens 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
1229789Sahrens 		bplist_close(&ds->ds_deadlist);
1230789Sahrens 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1231789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1232789Sahrens 
1233789Sahrens 		/*
1234789Sahrens 		 * Free everything that we point to (that's born after
1235789Sahrens 		 * the previous snapshot, if we are a clone)
1236789Sahrens 		 *
1237789Sahrens 		 * XXX we're doing this long task with the config lock held
1238789Sahrens 		 */
1239789Sahrens 		ka.usedp = &used;
1240789Sahrens 		ka.compressedp = &compressed;
1241789Sahrens 		ka.uncompressedp = &uncompressed;
1242789Sahrens 		ka.zio = zio;
1243789Sahrens 		ka.tx = tx;
1244789Sahrens 		err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1245789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1246789Sahrens 		ASSERT3U(err, ==, 0);
1247789Sahrens 	}
1248789Sahrens 
1249789Sahrens 	err = zio_wait(zio);
1250789Sahrens 	ASSERT3U(err, ==, 0);
1251789Sahrens 
12522199Sahrens 	dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx);
1253789Sahrens 
1254789Sahrens 	if (ds->ds_phys->ds_snapnames_zapobj) {
1255789Sahrens 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1256789Sahrens 		ASSERT(err == 0);
1257789Sahrens 	}
1258789Sahrens 
12592199Sahrens 	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1260789Sahrens 		/* Erase the link in the dataset */
12612199Sahrens 		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
12622199Sahrens 		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
1263789Sahrens 		/*
1264789Sahrens 		 * dsl_dir_sync_destroy() called us, they'll destroy
1265789Sahrens 		 * the dataset.
1266789Sahrens 		 */
1267789Sahrens 	} else {
1268789Sahrens 		/* remove from snapshot namespace */
1269789Sahrens 		dsl_dataset_t *ds_head;
12702199Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
12712199Sahrens 		    ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL,
12721544Seschrock 		    DS_MODE_NONE, FTAG, &ds_head));
12732207Sahrens 		VERIFY(0 == dsl_dataset_get_snapname(ds));
1274789Sahrens #ifdef ZFS_DEBUG
1275789Sahrens 		{
1276789Sahrens 			uint64_t val;
1277789Sahrens 			err = zap_lookup(mos,
1278789Sahrens 			    ds_head->ds_phys->ds_snapnames_zapobj,
12792199Sahrens 			    ds->ds_snapname, 8, 1, &val);
1280789Sahrens 			ASSERT3U(err, ==, 0);
1281789Sahrens 			ASSERT3U(val, ==, obj);
1282789Sahrens 		}
1283789Sahrens #endif
1284789Sahrens 		err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj,
12852199Sahrens 		    ds->ds_snapname, tx);
1286789Sahrens 		ASSERT(err == 0);
1287789Sahrens 		dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG);
1288789Sahrens 	}
1289789Sahrens 
1290789Sahrens 	if (ds_prev && ds->ds_prev != ds_prev)
1291789Sahrens 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1292789Sahrens 
12932199Sahrens 	dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag);
12942199Sahrens 	VERIFY(0 == dmu_object_free(mos, obj, tx));
12952199Sahrens }
12962199Sahrens 
12972199Sahrens /* ARGSUSED */
12982199Sahrens int
12992199Sahrens dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
13002199Sahrens {
13012199Sahrens 	objset_t *os = arg1;
13022199Sahrens 	dsl_dataset_t *ds = os->os->os_dsl_dataset;
13032199Sahrens 	const char *snapname = arg2;
13042199Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
13052199Sahrens 	int err;
13062199Sahrens 	uint64_t value;
1307789Sahrens 
1308789Sahrens 	/*
13092199Sahrens 	 * We don't allow multiple snapshots of the same txg.  If there
13102199Sahrens 	 * is already one, try again.
13112199Sahrens 	 */
13122199Sahrens 	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
13132199Sahrens 		return (EAGAIN);
13142199Sahrens 
13152199Sahrens 	/*
13162199Sahrens 	 * Check for conflicting name snapshot name.
1317789Sahrens 	 */
13182199Sahrens 	err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
13192199Sahrens 	    snapname, 8, 1, &value);
13202199Sahrens 	if (err == 0)
13212199Sahrens 		return (EEXIST);
13222199Sahrens 	if (err != ENOENT)
13232199Sahrens 		return (err);
1324789Sahrens 
13252199Sahrens 	ds->ds_trysnap_txg = tx->tx_txg;
1326789Sahrens 	return (0);
1327789Sahrens }
1328789Sahrens 
13292199Sahrens void
13302199Sahrens dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
1331789Sahrens {
13322199Sahrens 	objset_t *os = arg1;
13332199Sahrens 	dsl_dataset_t *ds = os->os->os_dsl_dataset;
13342199Sahrens 	const char *snapname = arg2;
13352199Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1336789Sahrens 	dmu_buf_t *dbuf;
1337789Sahrens 	dsl_dataset_phys_t *dsphys;
13382199Sahrens 	uint64_t dsobj;
1339789Sahrens 	objset_t *mos = dp->dp_meta_objset;
1340789Sahrens 	int err;
1341789Sahrens 
1342789Sahrens 	spa_scrub_restart(dp->dp_spa, tx->tx_txg);
13432199Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
1344789Sahrens 
1345928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
1346928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
13471544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1348789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
1349789Sahrens 	dsphys = dbuf->db_data;
13502199Sahrens 	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
1351789Sahrens 	dsphys->ds_fsid_guid = unique_create();
1352789Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
1353789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1354789Sahrens 	    sizeof (dsphys->ds_guid));
1355789Sahrens 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1356789Sahrens 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1357789Sahrens 	dsphys->ds_next_snap_obj = ds->ds_object;
1358789Sahrens 	dsphys->ds_num_children = 1;
1359789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
1360789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
1361789Sahrens 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1362789Sahrens 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
1363789Sahrens 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1364789Sahrens 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
13652082Seschrock 	dsphys->ds_flags = ds->ds_phys->ds_flags;
1366789Sahrens 	dsphys->ds_bp = ds->ds_phys->ds_bp;
13671544Seschrock 	dmu_buf_rele(dbuf, FTAG);
1368789Sahrens 
13692199Sahrens 	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
13702199Sahrens 	if (ds->ds_prev) {
13712199Sahrens 		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
1372789Sahrens 		    ds->ds_object ||
13732199Sahrens 		    ds->ds_prev->ds_phys->ds_num_children > 1);
13742199Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
13752199Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1376789Sahrens 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
13772199Sahrens 			    ds->ds_prev->ds_phys->ds_creation_txg);
13782199Sahrens 			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1379789Sahrens 		}
1380789Sahrens 	}
1381789Sahrens 
1382789Sahrens 	bplist_close(&ds->ds_deadlist);
1383789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1384789Sahrens 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg);
1385789Sahrens 	ds->ds_phys->ds_prev_snap_obj = dsobj;
1386789Sahrens 	ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg;
1387789Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
1388789Sahrens 	ds->ds_phys->ds_deadlist_obj =
1389789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
13901544Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
13911544Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1392789Sahrens 
1393789Sahrens 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
1394789Sahrens 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1395789Sahrens 	    snapname, 8, 1, &dsobj, tx);
1396789Sahrens 	ASSERT(err == 0);
1397789Sahrens 
1398789Sahrens 	if (ds->ds_prev)
1399789Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
14001544Seschrock 	VERIFY(0 == dsl_dataset_open_obj(dp,
14011544Seschrock 	    ds->ds_phys->ds_prev_snap_obj, snapname,
14021544Seschrock 	    DS_MODE_NONE, ds, &ds->ds_prev));
1403789Sahrens }
1404789Sahrens 
1405789Sahrens void
1406789Sahrens dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx)
1407789Sahrens {
1408789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1409789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1410789Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1411789Sahrens 
1412789Sahrens 	dmu_objset_sync(ds->ds_user_ptr, tx);
1413789Sahrens 	dsl_dir_dirty(ds->ds_dir, tx);
1414789Sahrens 	bplist_close(&ds->ds_deadlist);
1415789Sahrens 
14161544Seschrock 	dmu_buf_rele(ds->ds_dbuf, ds);
1417789Sahrens }
1418789Sahrens 
1419789Sahrens void
14202885Sahrens dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1421789Sahrens {
14222885Sahrens 	dsl_dir_stats(ds->ds_dir, nv);
1423789Sahrens 
14242885Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
14252885Sahrens 	    ds->ds_phys->ds_creation_time);
14262885Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
14272885Sahrens 	    ds->ds_phys->ds_creation_txg);
14282885Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED,
14292885Sahrens 	    ds->ds_phys->ds_used_bytes);
1430789Sahrens 
1431789Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1432789Sahrens 		/*
1433789Sahrens 		 * This is a snapshot; override the dd's space used with
14342885Sahrens 		 * our unique space and compression ratio.
1435789Sahrens 		 */
14362885Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
14372885Sahrens 		    ds->ds_phys->ds_unique_bytes);
14382885Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
14392885Sahrens 		    ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
14402885Sahrens 		    (ds->ds_phys->ds_uncompressed_bytes * 100 /
14412885Sahrens 		    ds->ds_phys->ds_compressed_bytes));
1442789Sahrens 	}
1443789Sahrens }
1444789Sahrens 
14452885Sahrens void
14462885Sahrens dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1447789Sahrens {
14482885Sahrens 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
14492885Sahrens 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
14502885Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
14512885Sahrens 		stat->dds_is_snapshot = B_TRUE;
14522885Sahrens 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
14532885Sahrens 	}
14542885Sahrens 
14552885Sahrens 	/* clone origin is really a dsl_dir thing... */
14562885Sahrens 	if (ds->ds_dir->dd_phys->dd_clone_parent_obj) {
14572885Sahrens 		dsl_dataset_t *ods;
14582885Sahrens 
14592885Sahrens 		rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
14602885Sahrens 		VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool,
14612885Sahrens 		    ds->ds_dir->dd_phys->dd_clone_parent_obj,
14622885Sahrens 		    NULL, DS_MODE_NONE, FTAG, &ods));
14632885Sahrens 		dsl_dataset_name(ods, stat->dds_clone_of);
14642885Sahrens 		dsl_dataset_close(ods, DS_MODE_NONE, FTAG);
14652885Sahrens 		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
14662885Sahrens 	}
14672885Sahrens }
14682885Sahrens 
14692885Sahrens uint64_t
14702885Sahrens dsl_dataset_fsid_guid(dsl_dataset_t *ds)
14712885Sahrens {
14722885Sahrens 	return (ds->ds_phys->ds_fsid_guid);
14732885Sahrens }
14742885Sahrens 
14752885Sahrens void
14762885Sahrens dsl_dataset_space(dsl_dataset_t *ds,
14772885Sahrens     uint64_t *refdbytesp, uint64_t *availbytesp,
14782885Sahrens     uint64_t *usedobjsp, uint64_t *availobjsp)
14792885Sahrens {
14802885Sahrens 	*refdbytesp = ds->ds_phys->ds_used_bytes;
14812885Sahrens 	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
14822885Sahrens 	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
14832885Sahrens 	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
1484789Sahrens }
1485789Sahrens 
14862199Sahrens /* ARGSUSED */
1487789Sahrens static int
14882199Sahrens dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
1489789Sahrens {
14902199Sahrens 	dsl_dataset_t *ds = arg1;
14912199Sahrens 	char *newsnapname = arg2;
14922199Sahrens 	dsl_dir_t *dd = ds->ds_dir;
1493789Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
14942199Sahrens 	dsl_dataset_t *hds;
14952199Sahrens 	uint64_t val;
1496789Sahrens 	int err;
1497789Sahrens 
14982199Sahrens 	err = dsl_dataset_open_obj(dd->dd_pool,
14992199Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds);
1500789Sahrens 	if (err)
1501789Sahrens 		return (err);
1502789Sahrens 
15032199Sahrens 	/* new name better not be in use */
15042199Sahrens 	err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj,
15052199Sahrens 	    newsnapname, 8, 1, &val);
15062199Sahrens 	dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
1507789Sahrens 
15082199Sahrens 	if (err == 0)
15092199Sahrens 		err = EEXIST;
15102199Sahrens 	else if (err == ENOENT)
15112199Sahrens 		err = 0;
15122199Sahrens 	return (err);
15132199Sahrens }
1514789Sahrens 
15152199Sahrens static void
15162199Sahrens dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
15172199Sahrens {
15182199Sahrens 	dsl_dataset_t *ds = arg1;
15192199Sahrens 	char *newsnapname = arg2;
15202199Sahrens 	dsl_dir_t *dd = ds->ds_dir;
15212199Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
15222199Sahrens 	dsl_dataset_t *hds;
15232199Sahrens 	int err;
1524789Sahrens 
15252199Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
1526789Sahrens 
15272199Sahrens 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
15282199Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds));
1529789Sahrens 
15302199Sahrens 	VERIFY(0 == dsl_dataset_get_snapname(ds));
15312199Sahrens 	err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj,
15322199Sahrens 	    ds->ds_snapname, tx);
1533789Sahrens 	ASSERT3U(err, ==, 0);
15342199Sahrens 	mutex_enter(&ds->ds_lock);
15352199Sahrens 	(void) strcpy(ds->ds_snapname, newsnapname);
15362199Sahrens 	mutex_exit(&ds->ds_lock);
15372199Sahrens 	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
15382199Sahrens 	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
1539789Sahrens 	ASSERT3U(err, ==, 0);
1540789Sahrens 
15412199Sahrens 	dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
1542789Sahrens }
1543789Sahrens 
1544789Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename
1545789Sahrens int
15462199Sahrens dsl_dataset_rename(const char *oldname, const char *newname)
1547789Sahrens {
1548789Sahrens 	dsl_dir_t *dd;
15492199Sahrens 	dsl_dataset_t *ds;
1550789Sahrens 	const char *tail;
1551789Sahrens 	int err;
1552789Sahrens 
15532199Sahrens 	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
15541544Seschrock 	if (err)
15551544Seschrock 		return (err);
1556789Sahrens 	if (tail == NULL) {
15572199Sahrens 		err = dsl_dir_rename(dd, newname);
1558789Sahrens 		dsl_dir_close(dd, FTAG);
1559789Sahrens 		return (err);
1560789Sahrens 	}
1561789Sahrens 	if (tail[0] != '@') {
1562789Sahrens 		/* the name ended in a nonexistant component */
1563789Sahrens 		dsl_dir_close(dd, FTAG);
1564789Sahrens 		return (ENOENT);
1565789Sahrens 	}
1566789Sahrens 
15672199Sahrens 	dsl_dir_close(dd, FTAG);
15682199Sahrens 
15692199Sahrens 	/* new name must be snapshot in same filesystem */
15702199Sahrens 	tail = strchr(newname, '@');
15712199Sahrens 	if (tail == NULL)
15722199Sahrens 		return (EINVAL);
15732199Sahrens 	tail++;
15742199Sahrens 	if (strncmp(oldname, newname, tail - newname) != 0)
15752199Sahrens 		return (EXDEV);
1576789Sahrens 
15772199Sahrens 	err = dsl_dataset_open(oldname,
15782199Sahrens 	    DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds);
15792199Sahrens 	if (err)
15802199Sahrens 		return (err);
15812199Sahrens 
15822199Sahrens 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
15832199Sahrens 	    dsl_dataset_snapshot_rename_check,
15842199Sahrens 	    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
15852199Sahrens 
15862199Sahrens 	dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
15872199Sahrens 
1588789Sahrens 	return (err);
1589789Sahrens }
15902082Seschrock 
15912199Sahrens struct promotearg {
15922199Sahrens 	uint64_t used, comp, uncomp, unique;
15932199Sahrens 	uint64_t newnext_obj, snapnames_obj;
15942199Sahrens };
15952199Sahrens 
15962082Seschrock static int
15972199Sahrens dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
15982082Seschrock {
15992199Sahrens 	dsl_dataset_t *hds = arg1;
16002199Sahrens 	struct promotearg *pa = arg2;
16012199Sahrens 	dsl_dir_t *dd = hds->ds_dir;
16022199Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
16032082Seschrock 	dsl_dir_t *pdd = NULL;
16042082Seschrock 	dsl_dataset_t *ds = NULL;
16052082Seschrock 	dsl_dataset_t *pivot_ds = NULL;
16062082Seschrock 	dsl_dataset_t *newnext_ds = NULL;
16072082Seschrock 	int err;
16082082Seschrock 	char *name = NULL;
16092199Sahrens 	uint64_t itor = 0;
16102082Seschrock 	blkptr_t bp;
16112082Seschrock 
16122199Sahrens 	bzero(pa, sizeof (*pa));
16132199Sahrens 
16142082Seschrock 	/* Check that it is a clone */
16152082Seschrock 	if (dd->dd_phys->dd_clone_parent_obj == 0)
16162082Seschrock 		return (EINVAL);
16172082Seschrock 
16182199Sahrens 	/* Since this is so expensive, don't do the preliminary check */
16192199Sahrens 	if (!dmu_tx_is_syncing(tx))
16202199Sahrens 		return (0);
16212199Sahrens 
16222199Sahrens 	if (err = dsl_dataset_open_obj(dp,
16232082Seschrock 	    dd->dd_phys->dd_clone_parent_obj,
16242082Seschrock 	    NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds))
16252082Seschrock 		goto out;
16262082Seschrock 	pdd = pivot_ds->ds_dir;
16272199Sahrens 
16282199Sahrens 	{
16292199Sahrens 		dsl_dataset_t *phds;
16302199Sahrens 		if (err = dsl_dataset_open_obj(dd->dd_pool,
16312199Sahrens 		    pdd->dd_phys->dd_head_dataset_obj,
16322199Sahrens 		    NULL, DS_MODE_NONE, FTAG, &phds))
16332199Sahrens 			goto out;
16342199Sahrens 		pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj;
16352199Sahrens 		dsl_dataset_close(phds, DS_MODE_NONE, FTAG);
16362199Sahrens 	}
16372082Seschrock 
16382082Seschrock 	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) {
16392082Seschrock 		err = EXDEV;
16402082Seschrock 		goto out;
16412082Seschrock 	}
16422082Seschrock 
16432082Seschrock 	/* find pivot point's new next ds */
16442082Seschrock 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object,
16452082Seschrock 	    NULL, DS_MODE_NONE, FTAG, &newnext_ds));
16462082Seschrock 	while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) {
16472082Seschrock 		dsl_dataset_t *prev;
16482082Seschrock 
16492082Seschrock 		if (err = dsl_dataset_open_obj(dd->dd_pool,
16502199Sahrens 		    newnext_ds->ds_phys->ds_prev_snap_obj,
16512199Sahrens 		    NULL, DS_MODE_NONE, FTAG, &prev))
16522082Seschrock 			goto out;
16532082Seschrock 		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
16542082Seschrock 		newnext_ds = prev;
16552082Seschrock 	}
16562199Sahrens 	pa->newnext_obj = newnext_ds->ds_object;
16572082Seschrock 
16582082Seschrock 	/* compute pivot point's new unique space */
16592082Seschrock 	while ((err = bplist_iterate(&newnext_ds->ds_deadlist,
16602082Seschrock 	    &itor, &bp)) == 0) {
16612082Seschrock 		if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg)
16622199Sahrens 			pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp);
16632082Seschrock 	}
16642082Seschrock 	if (err != ENOENT)
16652082Seschrock 		goto out;
16662082Seschrock 
16672082Seschrock 	/* Walk the snapshots that we are moving */
16682082Seschrock 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
16692082Seschrock 	ds = pivot_ds;
16702082Seschrock 	/* CONSTCOND */
16712082Seschrock 	while (TRUE) {
16722082Seschrock 		uint64_t val, dlused, dlcomp, dluncomp;
16732082Seschrock 		dsl_dataset_t *prev;
16742082Seschrock 
16752082Seschrock 		/* Check that the snapshot name does not conflict */
16762082Seschrock 		dsl_dataset_name(ds, name);
16772082Seschrock 		err = zap_lookup(dd->dd_pool->dp_meta_objset,
16782082Seschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
16792082Seschrock 		    8, 1, &val);
16802082Seschrock 		if (err != ENOENT) {
16812082Seschrock 			if (err == 0)
16822082Seschrock 				err = EEXIST;
16832082Seschrock 			goto out;
16842082Seschrock 		}
16852082Seschrock 
16862082Seschrock 		/*
16872082Seschrock 		 * compute space to transfer.  Each snapshot gave birth to:
16882082Seschrock 		 * (my used) - (prev's used) + (deadlist's used)
16892082Seschrock 		 */
16902199Sahrens 		pa->used += ds->ds_phys->ds_used_bytes;
16912199Sahrens 		pa->comp += ds->ds_phys->ds_compressed_bytes;
16922199Sahrens 		pa->uncomp += ds->ds_phys->ds_uncompressed_bytes;
16932082Seschrock 
16942082Seschrock 		/* If we reach the first snapshot, we're done. */
16952082Seschrock 		if (ds->ds_phys->ds_prev_snap_obj == 0)
16962082Seschrock 			break;
16972082Seschrock 
16982082Seschrock 		if (err = bplist_space(&ds->ds_deadlist,
16992082Seschrock 		    &dlused, &dlcomp, &dluncomp))
17002082Seschrock 			goto out;
17012082Seschrock 		if (err = dsl_dataset_open_obj(dd->dd_pool,
17022082Seschrock 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
17032082Seschrock 		    FTAG, &prev))
17042082Seschrock 			goto out;
17052199Sahrens 		pa->used += dlused - prev->ds_phys->ds_used_bytes;
17062199Sahrens 		pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes;
17072199Sahrens 		pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes;
17082082Seschrock 
17092082Seschrock 		/*
17102082Seschrock 		 * We could be a clone of a clone.  If we reach our
17112082Seschrock 		 * parent's branch point, we're done.
17122082Seschrock 		 */
17132082Seschrock 		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
17142082Seschrock 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
17152082Seschrock 			break;
17162082Seschrock 		}
17172082Seschrock 		if (ds != pivot_ds)
17182082Seschrock 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
17192082Seschrock 		ds = prev;
17202082Seschrock 	}
17212082Seschrock 
17222082Seschrock 	/* Check that there is enough space here */
17232199Sahrens 	err = dsl_dir_transfer_possible(pdd, dd, pa->used);
17242199Sahrens 
17252199Sahrens out:
17262199Sahrens 	if (ds && ds != pivot_ds)
17272199Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
17282199Sahrens 	if (pivot_ds)
17292199Sahrens 		dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG);
17302199Sahrens 	if (newnext_ds)
17312199Sahrens 		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
17322199Sahrens 	if (name)
17332199Sahrens 		kmem_free(name, MAXPATHLEN);
17342199Sahrens 	return (err);
17352199Sahrens }
17362082Seschrock 
17372199Sahrens static void
17382199Sahrens dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
17392199Sahrens {
17402199Sahrens 	dsl_dataset_t *hds = arg1;
17412199Sahrens 	struct promotearg *pa = arg2;
17422199Sahrens 	dsl_dir_t *dd = hds->ds_dir;
17432199Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
17442199Sahrens 	dsl_dir_t *pdd = NULL;
17452199Sahrens 	dsl_dataset_t *ds, *pivot_ds;
17462199Sahrens 	char *name;
17472199Sahrens 
17482199Sahrens 	ASSERT(dd->dd_phys->dd_clone_parent_obj != 0);
17492199Sahrens 	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
17502199Sahrens 
17512199Sahrens 	VERIFY(0 == dsl_dataset_open_obj(dp,
17522199Sahrens 	    dd->dd_phys->dd_clone_parent_obj,
17532199Sahrens 	    NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds));
17542417Sahrens 	/*
17552417Sahrens 	 * We need to explicitly open pdd, since pivot_ds's pdd will be
17562417Sahrens 	 * changing.
17572417Sahrens 	 */
17582417Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object,
17592417Sahrens 	    NULL, FTAG, &pdd));
17602082Seschrock 
17612082Seschrock 	/* move snapshots to this dir */
17622199Sahrens 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
17632082Seschrock 	ds = pivot_ds;
17642082Seschrock 	/* CONSTCOND */
17652082Seschrock 	while (TRUE) {
17662082Seschrock 		dsl_dataset_t *prev;
17672082Seschrock 
17682082Seschrock 		/* move snap name entry */
17692082Seschrock 		dsl_dataset_name(ds, name);
17702199Sahrens 		VERIFY(0 == zap_remove(dp->dp_meta_objset,
17712199Sahrens 		    pa->snapnames_obj, ds->ds_snapname, tx));
17722199Sahrens 		VERIFY(0 == zap_add(dp->dp_meta_objset,
17732082Seschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
17742082Seschrock 		    8, 1, &ds->ds_object, tx));
17752082Seschrock 
17762082Seschrock 		/* change containing dsl_dir */
17772082Seschrock 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
17782082Seschrock 		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object);
17792082Seschrock 		ds->ds_phys->ds_dir_obj = dd->dd_object;
17802082Seschrock 		ASSERT3P(ds->ds_dir, ==, pdd);
17812082Seschrock 		dsl_dir_close(ds->ds_dir, ds);
17822199Sahrens 		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
17832082Seschrock 		    NULL, ds, &ds->ds_dir));
17842082Seschrock 
17852082Seschrock 		ASSERT3U(dsl_prop_numcb(ds), ==, 0);
17862082Seschrock 
17872082Seschrock 		if (ds->ds_phys->ds_prev_snap_obj == 0)
17882082Seschrock 			break;
17892082Seschrock 
17902199Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
17912082Seschrock 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
17922082Seschrock 		    FTAG, &prev));
17932082Seschrock 
17942082Seschrock 		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
17952082Seschrock 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
17962082Seschrock 			break;
17972082Seschrock 		}
17982082Seschrock 		if (ds != pivot_ds)
17992082Seschrock 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
18002082Seschrock 		ds = prev;
18012082Seschrock 	}
18022199Sahrens 	if (ds != pivot_ds)
18032199Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
18042082Seschrock 
18052082Seschrock 	/* change pivot point's next snap */
18062082Seschrock 	dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx);
18072199Sahrens 	pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj;
18082082Seschrock 
18092082Seschrock 	/* change clone_parent-age */
18102082Seschrock 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
18112082Seschrock 	ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object);
18122082Seschrock 	dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj;
18132082Seschrock 	dmu_buf_will_dirty(pdd->dd_dbuf, tx);
18142082Seschrock 	pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object;
18152082Seschrock 
18162082Seschrock 	/* change space accounting */
18172199Sahrens 	dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx);
18182199Sahrens 	dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx);
18192199Sahrens 	pivot_ds->ds_phys->ds_unique_bytes = pa->unique;
18202082Seschrock 
18212417Sahrens 	dsl_dir_close(pdd, FTAG);
18222199Sahrens 	dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG);
18232199Sahrens 	kmem_free(name, MAXPATHLEN);
18242082Seschrock }
18252082Seschrock 
18262082Seschrock int
18272082Seschrock dsl_dataset_promote(const char *name)
18282082Seschrock {
18292082Seschrock 	dsl_dataset_t *ds;
18302082Seschrock 	int err;
18312082Seschrock 	dmu_object_info_t doi;
18322199Sahrens 	struct promotearg pa;
18332082Seschrock 
18342082Seschrock 	err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds);
18352082Seschrock 	if (err)
18362082Seschrock 		return (err);
18372082Seschrock 
18382082Seschrock 	err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset,
18392082Seschrock 	    ds->ds_phys->ds_snapnames_zapobj, &doi);
18402082Seschrock 	if (err) {
18412082Seschrock 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
18422082Seschrock 		return (err);
18432082Seschrock 	}
18442082Seschrock 
18452082Seschrock 	/*
18462082Seschrock 	 * Add in 128x the snapnames zapobj size, since we will be moving
18472082Seschrock 	 * a bunch of snapnames to the promoted ds, and dirtying their
18482082Seschrock 	 * bonus buffers.
18492082Seschrock 	 */
18502199Sahrens 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
18512199Sahrens 	    dsl_dataset_promote_check,
18522199Sahrens 	    dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks);
18532082Seschrock 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
18542082Seschrock 	return (err);
18552082Seschrock }
1856