xref: /onnv-gate/usr/src/uts/common/fs/zfs/dsl_dataset.c (revision 4007:c6f5c6753018)
1789Sahrens /*
2789Sahrens  * CDDL HEADER START
3789Sahrens  *
4789Sahrens  * The contents of this file are subject to the terms of the
51544Seschrock  * Common Development and Distribution License (the "License").
61544Seschrock  * You may not use this file except in compliance with the License.
7789Sahrens  *
8789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens  * or http://www.opensolaris.org/os/licensing.
10789Sahrens  * See the License for the specific language governing permissions
11789Sahrens  * and limitations under the License.
12789Sahrens  *
13789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens  *
19789Sahrens  * CDDL HEADER END
20789Sahrens  */
21789Sahrens /*
223444Sek110237  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23789Sahrens  * Use is subject to license terms.
24789Sahrens  */
25789Sahrens 
26789Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
27789Sahrens 
28789Sahrens #include <sys/dmu_objset.h>
29789Sahrens #include <sys/dsl_dataset.h>
30789Sahrens #include <sys/dsl_dir.h>
312082Seschrock #include <sys/dsl_prop.h>
322199Sahrens #include <sys/dsl_synctask.h>
33789Sahrens #include <sys/dmu_traverse.h>
34789Sahrens #include <sys/dmu_tx.h>
35789Sahrens #include <sys/arc.h>
36789Sahrens #include <sys/zio.h>
37789Sahrens #include <sys/zap.h>
38789Sahrens #include <sys/unique.h>
39789Sahrens #include <sys/zfs_context.h>
40*4007Smmusante #include <sys/zfs_ioctl.h>
41789Sahrens 
422199Sahrens static dsl_checkfunc_t dsl_dataset_destroy_begin_check;
432199Sahrens static dsl_syncfunc_t dsl_dataset_destroy_begin_sync;
442199Sahrens static dsl_checkfunc_t dsl_dataset_rollback_check;
452199Sahrens static dsl_syncfunc_t dsl_dataset_rollback_sync;
462199Sahrens static dsl_checkfunc_t dsl_dataset_destroy_check;
472199Sahrens static dsl_syncfunc_t dsl_dataset_destroy_sync;
481731Sbonwick 
493444Sek110237 #define	DS_REF_MAX	(1ULL << 62)
50789Sahrens 
51789Sahrens #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
52789Sahrens 
53789Sahrens /*
54789Sahrens  * We use weighted reference counts to express the various forms of exclusion
55789Sahrens  * between different open modes.  A STANDARD open is 1 point, an EXCLUSIVE open
563444Sek110237  * is DS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE.
57789Sahrens  * This makes the exclusion logic simple: the total refcnt for all opens cannot
583444Sek110237  * exceed DS_REF_MAX.  For example, EXCLUSIVE opens are exclusive because their
593444Sek110237  * weight (DS_REF_MAX) consumes the entire refcnt space.  PRIMARY opens consume
60789Sahrens  * just over half of the refcnt space, so there can't be more than one, but it
61789Sahrens  * can peacefully coexist with any number of STANDARD opens.
62789Sahrens  */
63789Sahrens static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
643444Sek110237 	0,			/* DS_MODE_NONE - invalid		*/
653444Sek110237 	1,			/* DS_MODE_STANDARD - unlimited number	*/
663444Sek110237 	(DS_REF_MAX >> 1) + 1,	/* DS_MODE_PRIMARY - only one of these	*/
673444Sek110237 	DS_REF_MAX		/* DS_MODE_EXCLUSIVE - no other opens	*/
68789Sahrens };
69789Sahrens 
70789Sahrens 
71789Sahrens void
72789Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
73789Sahrens {
742082Seschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
75789Sahrens 	int compressed = BP_GET_PSIZE(bp);
76789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
77789Sahrens 
78789Sahrens 	dprintf_bp(bp, "born, ds=%p\n", ds);
79789Sahrens 
80789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
81789Sahrens 	/* It could have been compressed away to nothing */
82789Sahrens 	if (BP_IS_HOLE(bp))
83789Sahrens 		return;
84789Sahrens 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
85789Sahrens 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
86789Sahrens 	if (ds == NULL) {
87789Sahrens 		/*
88789Sahrens 		 * Account for the meta-objset space in its placeholder
89789Sahrens 		 * dsl_dir.
90789Sahrens 		 */
91789Sahrens 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
92789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
93789Sahrens 		    used, compressed, uncompressed, tx);
94789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
95789Sahrens 		return;
96789Sahrens 	}
97789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
98789Sahrens 	mutex_enter(&ds->ds_lock);
99789Sahrens 	ds->ds_phys->ds_used_bytes += used;
100789Sahrens 	ds->ds_phys->ds_compressed_bytes += compressed;
101789Sahrens 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
102789Sahrens 	ds->ds_phys->ds_unique_bytes += used;
103789Sahrens 	mutex_exit(&ds->ds_lock);
104789Sahrens 	dsl_dir_diduse_space(ds->ds_dir,
105789Sahrens 	    used, compressed, uncompressed, tx);
106789Sahrens }
107789Sahrens 
108789Sahrens void
1093547Smaybee dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, zio_t *pio,
1103547Smaybee     dmu_tx_t *tx)
111789Sahrens {
1122082Seschrock 	int used = bp_get_dasize(tx->tx_pool->dp_spa, bp);
113789Sahrens 	int compressed = BP_GET_PSIZE(bp);
114789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
115789Sahrens 
116789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1173547Smaybee 	/* No block pointer => nothing to free */
118789Sahrens 	if (BP_IS_HOLE(bp))
119789Sahrens 		return;
120789Sahrens 
121789Sahrens 	ASSERT(used > 0);
122789Sahrens 	if (ds == NULL) {
1233547Smaybee 		int err;
124789Sahrens 		/*
125789Sahrens 		 * Account for the meta-objset space in its placeholder
126789Sahrens 		 * dataset.
127789Sahrens 		 */
1283547Smaybee 		err = arc_free(pio, tx->tx_pool->dp_spa,
1293547Smaybee 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
1303547Smaybee 		ASSERT(err == 0);
131789Sahrens 
132789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
133789Sahrens 		    -used, -compressed, -uncompressed, tx);
134789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
135789Sahrens 		return;
136789Sahrens 	}
137789Sahrens 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
138789Sahrens 
139789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
140789Sahrens 
141789Sahrens 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
1423547Smaybee 		int err;
1433547Smaybee 
144789Sahrens 		dprintf_bp(bp, "freeing: %s", "");
1453547Smaybee 		err = arc_free(pio, tx->tx_pool->dp_spa,
1463547Smaybee 		    tx->tx_txg, bp, NULL, NULL, pio ? ARC_NOWAIT: ARC_WAIT);
1473547Smaybee 		ASSERT(err == 0);
148789Sahrens 
149789Sahrens 		mutex_enter(&ds->ds_lock);
150789Sahrens 		/* XXX unique_bytes is not accurate for head datasets */
151789Sahrens 		/* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */
152789Sahrens 		ds->ds_phys->ds_unique_bytes -= used;
153789Sahrens 		mutex_exit(&ds->ds_lock);
154789Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
155789Sahrens 		    -used, -compressed, -uncompressed, tx);
156789Sahrens 	} else {
157789Sahrens 		dprintf_bp(bp, "putting on dead list: %s", "");
1581544Seschrock 		VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
159789Sahrens 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
160789Sahrens 		if (ds->ds_phys->ds_prev_snap_obj != 0) {
161789Sahrens 			ASSERT3U(ds->ds_prev->ds_object, ==,
162789Sahrens 			    ds->ds_phys->ds_prev_snap_obj);
163789Sahrens 			ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
164789Sahrens 			if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
1652082Seschrock 			    ds->ds_object && bp->blk_birth >
166789Sahrens 			    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
167789Sahrens 				dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
168789Sahrens 				mutex_enter(&ds->ds_prev->ds_lock);
169789Sahrens 				ds->ds_prev->ds_phys->ds_unique_bytes +=
170789Sahrens 				    used;
171789Sahrens 				mutex_exit(&ds->ds_prev->ds_lock);
172789Sahrens 			}
173789Sahrens 		}
174789Sahrens 	}
175789Sahrens 	mutex_enter(&ds->ds_lock);
176789Sahrens 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
177789Sahrens 	ds->ds_phys->ds_used_bytes -= used;
178789Sahrens 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
179789Sahrens 	ds->ds_phys->ds_compressed_bytes -= compressed;
180789Sahrens 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
181789Sahrens 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
182789Sahrens 	mutex_exit(&ds->ds_lock);
183789Sahrens }
184789Sahrens 
1851544Seschrock uint64_t
1861544Seschrock dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
187789Sahrens {
1882885Sahrens 	uint64_t trysnap = 0;
1892885Sahrens 
190789Sahrens 	if (ds == NULL)
1911544Seschrock 		return (0);
192789Sahrens 	/*
193789Sahrens 	 * The snapshot creation could fail, but that would cause an
194789Sahrens 	 * incorrect FALSE return, which would only result in an
195789Sahrens 	 * overestimation of the amount of space that an operation would
196789Sahrens 	 * consume, which is OK.
197789Sahrens 	 *
198789Sahrens 	 * There's also a small window where we could miss a pending
199789Sahrens 	 * snapshot, because we could set the sync task in the quiescing
200789Sahrens 	 * phase.  So this should only be used as a guess.
201789Sahrens 	 */
2022885Sahrens 	if (ds->ds_trysnap_txg >
2032885Sahrens 	    spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
2042885Sahrens 		trysnap = ds->ds_trysnap_txg;
2052885Sahrens 	return (MAX(ds->ds_phys->ds_prev_snap_txg, trysnap));
2061544Seschrock }
2071544Seschrock 
2081544Seschrock int
2091544Seschrock dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
2101544Seschrock {
2111544Seschrock 	return (blk_birth > dsl_dataset_prev_snap_txg(ds));
212789Sahrens }
213789Sahrens 
214789Sahrens /* ARGSUSED */
215789Sahrens static void
216789Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv)
217789Sahrens {
218789Sahrens 	dsl_dataset_t *ds = dsv;
219789Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
220789Sahrens 
2213444Sek110237 	/* open_refcount == DS_REF_MAX when deleting */
222789Sahrens 	ASSERT(ds->ds_open_refcount == 0 ||
2233444Sek110237 	    ds->ds_open_refcount == DS_REF_MAX);
224789Sahrens 
225789Sahrens 	dprintf_ds(ds, "evicting %s\n", "");
226789Sahrens 
227789Sahrens 	unique_remove(ds->ds_phys->ds_fsid_guid);
228789Sahrens 
229789Sahrens 	if (ds->ds_user_ptr != NULL)
230789Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
231789Sahrens 
232789Sahrens 	if (ds->ds_prev) {
233789Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
234789Sahrens 		ds->ds_prev = NULL;
235789Sahrens 	}
236789Sahrens 
237789Sahrens 	bplist_close(&ds->ds_deadlist);
238789Sahrens 	dsl_dir_close(ds->ds_dir, ds);
239789Sahrens 
240789Sahrens 	if (list_link_active(&ds->ds_synced_link))
241789Sahrens 		list_remove(&dp->dp_synced_objsets, ds);
242789Sahrens 
2432856Snd150628 	mutex_destroy(&ds->ds_lock);
2442856Snd150628 	mutex_destroy(&ds->ds_deadlist.bpl_lock);
2452856Snd150628 
246789Sahrens 	kmem_free(ds, sizeof (dsl_dataset_t));
247789Sahrens }
248789Sahrens 
2491544Seschrock static int
250789Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds)
251789Sahrens {
252789Sahrens 	dsl_dataset_phys_t *headphys;
253789Sahrens 	int err;
254789Sahrens 	dmu_buf_t *headdbuf;
255789Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
256789Sahrens 	objset_t *mos = dp->dp_meta_objset;
257789Sahrens 
258789Sahrens 	if (ds->ds_snapname[0])
2591544Seschrock 		return (0);
260789Sahrens 	if (ds->ds_phys->ds_next_snap_obj == 0)
2611544Seschrock 		return (0);
262789Sahrens 
2631544Seschrock 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
2641544Seschrock 	    FTAG, &headdbuf);
2651544Seschrock 	if (err)
2661544Seschrock 		return (err);
267789Sahrens 	headphys = headdbuf->db_data;
268789Sahrens 	err = zap_value_search(dp->dp_meta_objset,
269789Sahrens 	    headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname);
2701544Seschrock 	dmu_buf_rele(headdbuf, FTAG);
2711544Seschrock 	return (err);
272789Sahrens }
273789Sahrens 
2741544Seschrock int
275789Sahrens dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
2761544Seschrock     int mode, void *tag, dsl_dataset_t **dsp)
277789Sahrens {
278789Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
279789Sahrens 	objset_t *mos = dp->dp_meta_objset;
280789Sahrens 	dmu_buf_t *dbuf;
281789Sahrens 	dsl_dataset_t *ds;
2821544Seschrock 	int err;
283789Sahrens 
284789Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
285789Sahrens 	    dsl_pool_sync_context(dp));
286789Sahrens 
2871544Seschrock 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
2881544Seschrock 	if (err)
2891544Seschrock 		return (err);
290789Sahrens 	ds = dmu_buf_get_user(dbuf);
291789Sahrens 	if (ds == NULL) {
292789Sahrens 		dsl_dataset_t *winner;
293789Sahrens 
294789Sahrens 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
295789Sahrens 		ds->ds_dbuf = dbuf;
296789Sahrens 		ds->ds_object = dsobj;
297789Sahrens 		ds->ds_phys = dbuf->db_data;
298789Sahrens 
2992856Snd150628 		mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
3002856Snd150628 		mutex_init(&ds->ds_deadlist.bpl_lock, NULL, MUTEX_DEFAULT,
3012856Snd150628 		    NULL);
3022856Snd150628 
3031544Seschrock 		err = bplist_open(&ds->ds_deadlist,
304789Sahrens 		    mos, ds->ds_phys->ds_deadlist_obj);
3051544Seschrock 		if (err == 0) {
3061544Seschrock 			err = dsl_dir_open_obj(dp,
3071544Seschrock 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
3081544Seschrock 		}
3091544Seschrock 		if (err) {
3101544Seschrock 			/*
3111544Seschrock 			 * we don't really need to close the blist if we
3121544Seschrock 			 * just opened it.
3131544Seschrock 			 */
3142856Snd150628 			mutex_destroy(&ds->ds_lock);
3152856Snd150628 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
3161544Seschrock 			kmem_free(ds, sizeof (dsl_dataset_t));
3171544Seschrock 			dmu_buf_rele(dbuf, tag);
3181544Seschrock 			return (err);
3191544Seschrock 		}
320789Sahrens 
321789Sahrens 		if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
322789Sahrens 			ds->ds_snapname[0] = '\0';
323789Sahrens 			if (ds->ds_phys->ds_prev_snap_obj) {
3241544Seschrock 				err = dsl_dataset_open_obj(dp,
325789Sahrens 				    ds->ds_phys->ds_prev_snap_obj, NULL,
3261544Seschrock 				    DS_MODE_NONE, ds, &ds->ds_prev);
327789Sahrens 			}
328789Sahrens 		} else {
329789Sahrens 			if (snapname) {
330789Sahrens #ifdef ZFS_DEBUG
331789Sahrens 				dsl_dataset_phys_t *headphys;
3321544Seschrock 				dmu_buf_t *headdbuf;
3331544Seschrock 				err = dmu_bonus_hold(mos,
3341544Seschrock 				    ds->ds_dir->dd_phys->dd_head_dataset_obj,
3351544Seschrock 				    FTAG, &headdbuf);
3361544Seschrock 				if (err == 0) {
3371544Seschrock 					headphys = headdbuf->db_data;
3381544Seschrock 					uint64_t foundobj;
3391544Seschrock 					err = zap_lookup(dp->dp_meta_objset,
3401544Seschrock 					    headphys->ds_snapnames_zapobj,
3411544Seschrock 					    snapname, sizeof (foundobj), 1,
3421544Seschrock 					    &foundobj);
3431544Seschrock 					ASSERT3U(foundobj, ==, dsobj);
3441544Seschrock 					dmu_buf_rele(headdbuf, FTAG);
3451544Seschrock 				}
346789Sahrens #endif
347789Sahrens 				(void) strcat(ds->ds_snapname, snapname);
348789Sahrens 			} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
3491544Seschrock 				err = dsl_dataset_get_snapname(ds);
350789Sahrens 			}
351789Sahrens 		}
352789Sahrens 
3531544Seschrock 		if (err == 0) {
3541544Seschrock 			winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
3551544Seschrock 			    dsl_dataset_evict);
3561544Seschrock 		}
3571544Seschrock 		if (err || winner) {
358789Sahrens 			bplist_close(&ds->ds_deadlist);
359789Sahrens 			if (ds->ds_prev) {
360789Sahrens 				dsl_dataset_close(ds->ds_prev,
361789Sahrens 				    DS_MODE_NONE, ds);
362789Sahrens 			}
363789Sahrens 			dsl_dir_close(ds->ds_dir, ds);
3642856Snd150628 			mutex_destroy(&ds->ds_lock);
3652856Snd150628 			mutex_destroy(&ds->ds_deadlist.bpl_lock);
366789Sahrens 			kmem_free(ds, sizeof (dsl_dataset_t));
3671544Seschrock 			if (err) {
3681544Seschrock 				dmu_buf_rele(dbuf, tag);
3691544Seschrock 				return (err);
3701544Seschrock 			}
371789Sahrens 			ds = winner;
372789Sahrens 		} else {
373789Sahrens 			uint64_t new =
374789Sahrens 			    unique_insert(ds->ds_phys->ds_fsid_guid);
375789Sahrens 			if (new != ds->ds_phys->ds_fsid_guid) {
376789Sahrens 				/* XXX it won't necessarily be synced... */
377789Sahrens 				ds->ds_phys->ds_fsid_guid = new;
378789Sahrens 			}
379789Sahrens 		}
380789Sahrens 	}
381789Sahrens 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
382789Sahrens 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
383789Sahrens 
384789Sahrens 	mutex_enter(&ds->ds_lock);
385789Sahrens 	if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
3862082Seschrock 	    (ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT) &&
3872082Seschrock 	    !DS_MODE_IS_INCONSISTENT(mode)) ||
3883444Sek110237 	    (ds->ds_open_refcount + weight > DS_REF_MAX)) {
389789Sahrens 		mutex_exit(&ds->ds_lock);
390789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
3911544Seschrock 		return (EBUSY);
392789Sahrens 	}
393789Sahrens 	ds->ds_open_refcount += weight;
394789Sahrens 	mutex_exit(&ds->ds_lock);
395789Sahrens 
3961544Seschrock 	*dsp = ds;
3971544Seschrock 	return (0);
398789Sahrens }
399789Sahrens 
400789Sahrens int
401789Sahrens dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
402789Sahrens     void *tag, dsl_dataset_t **dsp)
403789Sahrens {
404789Sahrens 	dsl_dir_t *dd;
405789Sahrens 	dsl_pool_t *dp;
406789Sahrens 	const char *tail;
407789Sahrens 	uint64_t obj;
408789Sahrens 	dsl_dataset_t *ds = NULL;
409789Sahrens 	int err = 0;
410789Sahrens 
4111544Seschrock 	err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail);
4121544Seschrock 	if (err)
4131544Seschrock 		return (err);
414789Sahrens 
415789Sahrens 	dp = dd->dd_pool;
416789Sahrens 	obj = dd->dd_phys->dd_head_dataset_obj;
417789Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
418789Sahrens 	if (obj == 0) {
419789Sahrens 		/* A dataset with no associated objset */
420789Sahrens 		err = ENOENT;
421789Sahrens 		goto out;
422789Sahrens 	}
423789Sahrens 
424789Sahrens 	if (tail != NULL) {
425789Sahrens 		objset_t *mos = dp->dp_meta_objset;
426789Sahrens 
4271544Seschrock 		err = dsl_dataset_open_obj(dp, obj, NULL,
4281544Seschrock 		    DS_MODE_NONE, tag, &ds);
4291544Seschrock 		if (err)
4301544Seschrock 			goto out;
431789Sahrens 		obj = ds->ds_phys->ds_snapnames_zapobj;
432789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
433789Sahrens 		ds = NULL;
434789Sahrens 
435789Sahrens 		if (tail[0] != '@') {
436789Sahrens 			err = ENOENT;
437789Sahrens 			goto out;
438789Sahrens 		}
439789Sahrens 		tail++;
440789Sahrens 
441789Sahrens 		/* Look for a snapshot */
442789Sahrens 		if (!DS_MODE_IS_READONLY(mode)) {
443789Sahrens 			err = EROFS;
444789Sahrens 			goto out;
445789Sahrens 		}
446789Sahrens 		dprintf("looking for snapshot '%s'\n", tail);
447789Sahrens 		err = zap_lookup(mos, obj, tail, 8, 1, &obj);
448789Sahrens 		if (err)
449789Sahrens 			goto out;
450789Sahrens 	}
4511544Seschrock 	err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds);
452789Sahrens 
453789Sahrens out:
454789Sahrens 	rw_exit(&dp->dp_config_rwlock);
455789Sahrens 	dsl_dir_close(dd, FTAG);
456789Sahrens 
457789Sahrens 	ASSERT3U((err == 0), ==, (ds != NULL));
458789Sahrens 	/* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */
459789Sahrens 
460789Sahrens 	*dsp = ds;
461789Sahrens 	return (err);
462789Sahrens }
463789Sahrens 
464789Sahrens int
465789Sahrens dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp)
466789Sahrens {
467789Sahrens 	return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp));
468789Sahrens }
469789Sahrens 
470789Sahrens void
471789Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name)
472789Sahrens {
473789Sahrens 	if (ds == NULL) {
474789Sahrens 		(void) strcpy(name, "mos");
475789Sahrens 	} else {
476789Sahrens 		dsl_dir_name(ds->ds_dir, name);
4771544Seschrock 		VERIFY(0 == dsl_dataset_get_snapname(ds));
478789Sahrens 		if (ds->ds_snapname[0]) {
479789Sahrens 			(void) strcat(name, "@");
480789Sahrens 			if (!MUTEX_HELD(&ds->ds_lock)) {
481789Sahrens 				/*
482789Sahrens 				 * We use a "recursive" mutex so that we
483789Sahrens 				 * can call dprintf_ds() with ds_lock held.
484789Sahrens 				 */
485789Sahrens 				mutex_enter(&ds->ds_lock);
486789Sahrens 				(void) strcat(name, ds->ds_snapname);
487789Sahrens 				mutex_exit(&ds->ds_lock);
488789Sahrens 			} else {
489789Sahrens 				(void) strcat(name, ds->ds_snapname);
490789Sahrens 			}
491789Sahrens 		}
492789Sahrens 	}
493789Sahrens }
494789Sahrens 
4953978Smmusante static int
4963978Smmusante dsl_dataset_namelen(dsl_dataset_t *ds)
4973978Smmusante {
4983978Smmusante 	int result;
4993978Smmusante 
5003978Smmusante 	if (ds == NULL) {
5013978Smmusante 		result = 3;	/* "mos" */
5023978Smmusante 	} else {
5033978Smmusante 		result = dsl_dir_namelen(ds->ds_dir);
5043978Smmusante 		VERIFY(0 == dsl_dataset_get_snapname(ds));
5053978Smmusante 		if (ds->ds_snapname[0]) {
5063978Smmusante 			++result;	/* adding one for the @-sign */
5073978Smmusante 			if (!MUTEX_HELD(&ds->ds_lock)) {
5083978Smmusante 				/* see dsl_datset_name */
5093978Smmusante 				mutex_enter(&ds->ds_lock);
5103978Smmusante 				result += strlen(ds->ds_snapname);
5113978Smmusante 				mutex_exit(&ds->ds_lock);
5123978Smmusante 			} else {
5133978Smmusante 				result += strlen(ds->ds_snapname);
5143978Smmusante 			}
5153978Smmusante 		}
5163978Smmusante 	}
5173978Smmusante 
5183978Smmusante 	return (result);
5193978Smmusante }
5203978Smmusante 
521789Sahrens void
522789Sahrens dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag)
523789Sahrens {
524789Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
525789Sahrens 	mutex_enter(&ds->ds_lock);
526789Sahrens 	ASSERT3U(ds->ds_open_refcount, >=, weight);
527789Sahrens 	ds->ds_open_refcount -= weight;
528789Sahrens 	dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n",
529789Sahrens 	    mode, ds->ds_open_refcount);
530789Sahrens 	mutex_exit(&ds->ds_lock);
531789Sahrens 
5321544Seschrock 	dmu_buf_rele(ds->ds_dbuf, tag);
533789Sahrens }
534789Sahrens 
535789Sahrens void
536789Sahrens dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
537789Sahrens {
538789Sahrens 	objset_t *mos = dp->dp_meta_objset;
539789Sahrens 	dmu_buf_t *dbuf;
540789Sahrens 	dsl_dataset_phys_t *dsphys;
541789Sahrens 	dsl_dataset_t *ds;
542789Sahrens 	uint64_t dsobj;
543789Sahrens 	dsl_dir_t *dd;
544789Sahrens 
545789Sahrens 	dsl_dir_create_root(mos, ddobjp, tx);
5461544Seschrock 	VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd));
547789Sahrens 
548928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
549928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
5501544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
551789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
552789Sahrens 	dsphys = dbuf->db_data;
553789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
554789Sahrens 	dsphys->ds_fsid_guid = unique_create();
5551544Seschrock 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
556789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
557789Sahrens 	    sizeof (dsphys->ds_guid));
558789Sahrens 	dsphys->ds_snapnames_zapobj =
559885Sahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
560789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
561789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
562789Sahrens 	dsphys->ds_deadlist_obj =
563789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
5641544Seschrock 	dmu_buf_rele(dbuf, FTAG);
565789Sahrens 
566789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
567789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
568789Sahrens 	dsl_dir_close(dd, FTAG);
569789Sahrens 
5701544Seschrock 	VERIFY(0 ==
5711544Seschrock 	    dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds));
5723547Smaybee 	(void) dmu_objset_create_impl(dp->dp_spa, ds,
5733547Smaybee 	    &ds->ds_phys->ds_bp, DMU_OST_ZFS, tx);
574789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
575789Sahrens }
576789Sahrens 
5772199Sahrens uint64_t
5782199Sahrens dsl_dataset_create_sync(dsl_dir_t *pdd,
579789Sahrens     const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx)
580789Sahrens {
5812199Sahrens 	dsl_pool_t *dp = pdd->dd_pool;
582789Sahrens 	dmu_buf_t *dbuf;
583789Sahrens 	dsl_dataset_phys_t *dsphys;
5842199Sahrens 	uint64_t dsobj, ddobj;
585789Sahrens 	objset_t *mos = dp->dp_meta_objset;
586789Sahrens 	dsl_dir_t *dd;
587789Sahrens 
5882199Sahrens 	ASSERT(clone_parent == NULL || clone_parent->ds_dir->dd_pool == dp);
5892199Sahrens 	ASSERT(clone_parent == NULL ||
5902199Sahrens 	    clone_parent->ds_phys->ds_num_children > 0);
591789Sahrens 	ASSERT(lastname[0] != '@');
592789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
593789Sahrens 
5942199Sahrens 	ddobj = dsl_dir_create_sync(pdd, lastname, tx);
5952199Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, ddobj, lastname, FTAG, &dd));
596789Sahrens 
597928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
598928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
5991544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
600789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
601789Sahrens 	dsphys = dbuf->db_data;
602789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
603789Sahrens 	dsphys->ds_fsid_guid = unique_create();
604789Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
605789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
606789Sahrens 	    sizeof (dsphys->ds_guid));
607789Sahrens 	dsphys->ds_snapnames_zapobj =
608885Sahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
609789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
610789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
611789Sahrens 	dsphys->ds_deadlist_obj =
612789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
613789Sahrens 	if (clone_parent) {
614789Sahrens 		dsphys->ds_prev_snap_obj = clone_parent->ds_object;
615789Sahrens 		dsphys->ds_prev_snap_txg =
616789Sahrens 		    clone_parent->ds_phys->ds_creation_txg;
617789Sahrens 		dsphys->ds_used_bytes =
618789Sahrens 		    clone_parent->ds_phys->ds_used_bytes;
619789Sahrens 		dsphys->ds_compressed_bytes =
620789Sahrens 		    clone_parent->ds_phys->ds_compressed_bytes;
621789Sahrens 		dsphys->ds_uncompressed_bytes =
622789Sahrens 		    clone_parent->ds_phys->ds_uncompressed_bytes;
623789Sahrens 		dsphys->ds_bp = clone_parent->ds_phys->ds_bp;
624789Sahrens 
625789Sahrens 		dmu_buf_will_dirty(clone_parent->ds_dbuf, tx);
626789Sahrens 		clone_parent->ds_phys->ds_num_children++;
627789Sahrens 
628789Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
629789Sahrens 		dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object;
630789Sahrens 	}
6311544Seschrock 	dmu_buf_rele(dbuf, FTAG);
632789Sahrens 
633789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
634789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
635789Sahrens 	dsl_dir_close(dd, FTAG);
636789Sahrens 
6372199Sahrens 	return (dsobj);
6382199Sahrens }
6392199Sahrens 
6402199Sahrens struct destroyarg {
6412199Sahrens 	dsl_sync_task_group_t *dstg;
6422199Sahrens 	char *snapname;
6432199Sahrens 	char *failed;
6442199Sahrens };
6452199Sahrens 
6462199Sahrens static int
6472199Sahrens dsl_snapshot_destroy_one(char *name, void *arg)
6482199Sahrens {
6492199Sahrens 	struct destroyarg *da = arg;
6502199Sahrens 	dsl_dataset_t *ds;
6512199Sahrens 	char *cp;
6522199Sahrens 	int err;
6532199Sahrens 
6542199Sahrens 	(void) strcat(name, "@");
6552199Sahrens 	(void) strcat(name, da->snapname);
6562199Sahrens 	err = dsl_dataset_open(name,
6572199Sahrens 	    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
658*4007Smmusante 	    da->dstg, &ds);
6592199Sahrens 	cp = strchr(name, '@');
6602199Sahrens 	*cp = '\0';
6612199Sahrens 	if (err == ENOENT)
6622199Sahrens 		return (0);
6632199Sahrens 	if (err) {
6642199Sahrens 		(void) strcpy(da->failed, name);
6652199Sahrens 		return (err);
6662199Sahrens 	}
6672199Sahrens 
6682199Sahrens 	dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
669*4007Smmusante 	    dsl_dataset_destroy_sync, ds, da->dstg, 0);
670789Sahrens 	return (0);
671789Sahrens }
672789Sahrens 
6732199Sahrens /*
6742199Sahrens  * Destroy 'snapname' in all descendants of 'fsname'.
6752199Sahrens  */
6762199Sahrens #pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
6772199Sahrens int
6782199Sahrens dsl_snapshots_destroy(char *fsname, char *snapname)
6792199Sahrens {
6802199Sahrens 	int err;
6812199Sahrens 	struct destroyarg da;
6822199Sahrens 	dsl_sync_task_t *dst;
6832199Sahrens 	spa_t *spa;
6842199Sahrens 	char *cp;
6852199Sahrens 
6862199Sahrens 	cp = strchr(fsname, '/');
6872199Sahrens 	if (cp) {
6882199Sahrens 		*cp = '\0';
6892199Sahrens 		err = spa_open(fsname, &spa, FTAG);
6902199Sahrens 		*cp = '/';
6912199Sahrens 	} else {
6922199Sahrens 		err = spa_open(fsname, &spa, FTAG);
6932199Sahrens 	}
6942199Sahrens 	if (err)
6952199Sahrens 		return (err);
6962199Sahrens 	da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
6972199Sahrens 	da.snapname = snapname;
6982199Sahrens 	da.failed = fsname;
6992199Sahrens 
7002199Sahrens 	err = dmu_objset_find(fsname,
7012417Sahrens 	    dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
7022199Sahrens 
7032199Sahrens 	if (err == 0)
7042199Sahrens 		err = dsl_sync_task_group_wait(da.dstg);
7052199Sahrens 
7062199Sahrens 	for (dst = list_head(&da.dstg->dstg_tasks); dst;
7072199Sahrens 	    dst = list_next(&da.dstg->dstg_tasks, dst)) {
7082199Sahrens 		dsl_dataset_t *ds = dst->dst_arg1;
7092199Sahrens 		if (dst->dst_err) {
7102199Sahrens 			dsl_dataset_name(ds, fsname);
7112199Sahrens 			cp = strchr(fsname, '@');
7122199Sahrens 			*cp = '\0';
7132199Sahrens 		}
7142199Sahrens 		/*
7152199Sahrens 		 * If it was successful, destroy_sync would have
7162199Sahrens 		 * closed the ds
7172199Sahrens 		 */
7182199Sahrens 		if (err)
719*4007Smmusante 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, da.dstg);
7202199Sahrens 	}
7212199Sahrens 
7222199Sahrens 	dsl_sync_task_group_destroy(da.dstg);
7232199Sahrens 	spa_close(spa, FTAG);
7242199Sahrens 	return (err);
7252199Sahrens }
7262199Sahrens 
727789Sahrens int
728789Sahrens dsl_dataset_destroy(const char *name)
729789Sahrens {
730789Sahrens 	int err;
7312199Sahrens 	dsl_sync_task_group_t *dstg;
7322199Sahrens 	objset_t *os;
7332199Sahrens 	dsl_dataset_t *ds;
734789Sahrens 	dsl_dir_t *dd;
7352199Sahrens 	uint64_t obj;
7362199Sahrens 
7372199Sahrens 	if (strchr(name, '@')) {
7382199Sahrens 		/* Destroying a snapshot is simpler */
7392199Sahrens 		err = dsl_dataset_open(name,
7402199Sahrens 		    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
7412199Sahrens 		    FTAG, &ds);
7422199Sahrens 		if (err)
7432199Sahrens 			return (err);
7442199Sahrens 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
7452199Sahrens 		    dsl_dataset_destroy_check, dsl_dataset_destroy_sync,
7462199Sahrens 		    ds, FTAG, 0);
7472199Sahrens 		if (err)
7482199Sahrens 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
7492199Sahrens 		return (err);
7502199Sahrens 	}
7512199Sahrens 
7522199Sahrens 	err = dmu_objset_open(name, DMU_OST_ANY,
7532199Sahrens 	    DS_MODE_EXCLUSIVE | DS_MODE_INCONSISTENT, &os);
7542199Sahrens 	if (err)
7552199Sahrens 		return (err);
7562199Sahrens 	ds = os->os->os_dsl_dataset;
7572199Sahrens 	dd = ds->ds_dir;
758789Sahrens 
7592199Sahrens 	/*
7602199Sahrens 	 * Check for errors and mark this ds as inconsistent, in
7612199Sahrens 	 * case we crash while freeing the objects.
7622199Sahrens 	 */
7632199Sahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
7642199Sahrens 	    dsl_dataset_destroy_begin_sync, ds, NULL, 0);
7652199Sahrens 	if (err) {
7662199Sahrens 		dmu_objset_close(os);
7672199Sahrens 		return (err);
7682199Sahrens 	}
7692199Sahrens 
7702199Sahrens 	/*
7712199Sahrens 	 * remove the objects in open context, so that we won't
7722199Sahrens 	 * have too much to do in syncing context.
7732199Sahrens 	 */
7743025Sahrens 	for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
7753025Sahrens 	    ds->ds_phys->ds_prev_snap_txg)) {
7762199Sahrens 		dmu_tx_t *tx = dmu_tx_create(os);
7772199Sahrens 		dmu_tx_hold_free(tx, obj, 0, DMU_OBJECT_END);
7782199Sahrens 		dmu_tx_hold_bonus(tx, obj);
7792199Sahrens 		err = dmu_tx_assign(tx, TXG_WAIT);
7802199Sahrens 		if (err) {
7812199Sahrens 			/*
7822199Sahrens 			 * Perhaps there is not enough disk
7832199Sahrens 			 * space.  Just deal with it from
7842199Sahrens 			 * dsl_dataset_destroy_sync().
7852199Sahrens 			 */
7862199Sahrens 			dmu_tx_abort(tx);
7872199Sahrens 			continue;
7882199Sahrens 		}
7892199Sahrens 		VERIFY(0 == dmu_object_free(os, obj, tx));
7902199Sahrens 		dmu_tx_commit(tx);
7912199Sahrens 	}
7922199Sahrens 	/* Make sure it's not dirty before we finish destroying it. */
7932199Sahrens 	txg_wait_synced(dd->dd_pool, 0);
7942199Sahrens 
7952199Sahrens 	dmu_objset_close(os);
7962199Sahrens 	if (err != ESRCH)
7972199Sahrens 		return (err);
7982199Sahrens 
7992199Sahrens 	err = dsl_dataset_open(name,
8002199Sahrens 	    DS_MODE_EXCLUSIVE | DS_MODE_READONLY | DS_MODE_INCONSISTENT,
8012199Sahrens 	    FTAG, &ds);
8021544Seschrock 	if (err)
8031544Seschrock 		return (err);
804789Sahrens 
8052199Sahrens 	err = dsl_dir_open(name, FTAG, &dd, NULL);
8062199Sahrens 	if (err) {
8072199Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
8082199Sahrens 		return (err);
809789Sahrens 	}
810789Sahrens 
8112199Sahrens 	/*
8122199Sahrens 	 * Blow away the dsl_dir + head dataset.
8132199Sahrens 	 */
8142199Sahrens 	dstg = dsl_sync_task_group_create(ds->ds_dir->dd_pool);
8152199Sahrens 	dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
8162199Sahrens 	    dsl_dataset_destroy_sync, ds, FTAG, 0);
8172199Sahrens 	dsl_sync_task_create(dstg, dsl_dir_destroy_check,
8182199Sahrens 	    dsl_dir_destroy_sync, dd, FTAG, 0);
8192199Sahrens 	err = dsl_sync_task_group_wait(dstg);
8202199Sahrens 	dsl_sync_task_group_destroy(dstg);
8212199Sahrens 	/* if it is successful, *destroy_sync will close the ds+dd */
8222199Sahrens 	if (err) {
8232199Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
8242199Sahrens 		dsl_dir_close(dd, FTAG);
8252199Sahrens 	}
826789Sahrens 	return (err);
827789Sahrens }
828789Sahrens 
829789Sahrens int
8302199Sahrens dsl_dataset_rollback(dsl_dataset_t *ds)
831789Sahrens {
8323444Sek110237 	ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
8332199Sahrens 	return (dsl_sync_task_do(ds->ds_dir->dd_pool,
8342199Sahrens 	    dsl_dataset_rollback_check, dsl_dataset_rollback_sync,
8352199Sahrens 	    ds, NULL, 0));
836789Sahrens }
837789Sahrens 
838789Sahrens void *
839789Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
840789Sahrens     void *p, dsl_dataset_evict_func_t func)
841789Sahrens {
842789Sahrens 	void *old;
843789Sahrens 
844789Sahrens 	mutex_enter(&ds->ds_lock);
845789Sahrens 	old = ds->ds_user_ptr;
846789Sahrens 	if (old == NULL) {
847789Sahrens 		ds->ds_user_ptr = p;
848789Sahrens 		ds->ds_user_evict_func = func;
849789Sahrens 	}
850789Sahrens 	mutex_exit(&ds->ds_lock);
851789Sahrens 	return (old);
852789Sahrens }
853789Sahrens 
854789Sahrens void *
855789Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
856789Sahrens {
857789Sahrens 	return (ds->ds_user_ptr);
858789Sahrens }
859789Sahrens 
860789Sahrens 
8613547Smaybee blkptr_t *
8623547Smaybee dsl_dataset_get_blkptr(dsl_dataset_t *ds)
863789Sahrens {
8643547Smaybee 	return (&ds->ds_phys->ds_bp);
865789Sahrens }
866789Sahrens 
867789Sahrens void
868789Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
869789Sahrens {
870789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
871789Sahrens 	/* If it's the meta-objset, set dp_meta_rootbp */
872789Sahrens 	if (ds == NULL) {
873789Sahrens 		tx->tx_pool->dp_meta_rootbp = *bp;
874789Sahrens 	} else {
875789Sahrens 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
876789Sahrens 		ds->ds_phys->ds_bp = *bp;
877789Sahrens 	}
878789Sahrens }
879789Sahrens 
880789Sahrens spa_t *
881789Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds)
882789Sahrens {
883789Sahrens 	return (ds->ds_dir->dd_pool->dp_spa);
884789Sahrens }
885789Sahrens 
886789Sahrens void
887789Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
888789Sahrens {
889789Sahrens 	dsl_pool_t *dp;
890789Sahrens 
891789Sahrens 	if (ds == NULL) /* this is the meta-objset */
892789Sahrens 		return;
893789Sahrens 
894789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
8952885Sahrens 
8962885Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
8972885Sahrens 		panic("dirtying snapshot!");
898789Sahrens 
899789Sahrens 	dp = ds->ds_dir->dd_pool;
900789Sahrens 
901789Sahrens 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
902789Sahrens 		/* up the hold count until we can be written out */
903789Sahrens 		dmu_buf_add_ref(ds->ds_dbuf, ds);
904789Sahrens 	}
905789Sahrens }
906789Sahrens 
907789Sahrens struct killarg {
908789Sahrens 	uint64_t *usedp;
909789Sahrens 	uint64_t *compressedp;
910789Sahrens 	uint64_t *uncompressedp;
911789Sahrens 	zio_t *zio;
912789Sahrens 	dmu_tx_t *tx;
913789Sahrens };
914789Sahrens 
915789Sahrens static int
916789Sahrens kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
917789Sahrens {
918789Sahrens 	struct killarg *ka = arg;
919789Sahrens 	blkptr_t *bp = &bc->bc_blkptr;
920789Sahrens 
921789Sahrens 	ASSERT3U(bc->bc_errno, ==, 0);
922789Sahrens 
923789Sahrens 	/*
924789Sahrens 	 * Since this callback is not called concurrently, no lock is
925789Sahrens 	 * needed on the accounting values.
926789Sahrens 	 */
9272082Seschrock 	*ka->usedp += bp_get_dasize(spa, bp);
928789Sahrens 	*ka->compressedp += BP_GET_PSIZE(bp);
929789Sahrens 	*ka->uncompressedp += BP_GET_UCSIZE(bp);
930789Sahrens 	/* XXX check for EIO? */
931789Sahrens 	(void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
932789Sahrens 	    ARC_NOWAIT);
933789Sahrens 	return (0);
934789Sahrens }
935789Sahrens 
936789Sahrens /* ARGSUSED */
9372199Sahrens static int
9382199Sahrens dsl_dataset_rollback_check(void *arg1, void *arg2, dmu_tx_t *tx)
939789Sahrens {
9402199Sahrens 	dsl_dataset_t *ds = arg1;
941789Sahrens 
9422199Sahrens 	/*
9432199Sahrens 	 * There must be a previous snapshot.  I suppose we could roll
9442199Sahrens 	 * it back to being empty (and re-initialize the upper (ZPL)
9452199Sahrens 	 * layer).  But for now there's no way to do this via the user
9462199Sahrens 	 * interface.
9472199Sahrens 	 */
9482199Sahrens 	if (ds->ds_phys->ds_prev_snap_txg == 0)
949789Sahrens 		return (EINVAL);
950789Sahrens 
9512199Sahrens 	/*
9522199Sahrens 	 * This must not be a snapshot.
9532199Sahrens 	 */
9542199Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0)
9552199Sahrens 		return (EINVAL);
956789Sahrens 
957789Sahrens 	/*
958789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
959789Sahrens 	 * them.  Try again.
960789Sahrens 	 */
9612199Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
962789Sahrens 		return (EAGAIN);
9632199Sahrens 
9642199Sahrens 	return (0);
9652199Sahrens }
966789Sahrens 
9672199Sahrens /* ARGSUSED */
9682199Sahrens static void
9692199Sahrens dsl_dataset_rollback_sync(void *arg1, void *arg2, dmu_tx_t *tx)
9702199Sahrens {
9712199Sahrens 	dsl_dataset_t *ds = arg1;
9722199Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
973789Sahrens 
974789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
975789Sahrens 
976789Sahrens 	/* Zero out the deadlist. */
977789Sahrens 	bplist_close(&ds->ds_deadlist);
978789Sahrens 	bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
979789Sahrens 	ds->ds_phys->ds_deadlist_obj =
980789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
9811544Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
9821544Seschrock 	    ds->ds_phys->ds_deadlist_obj));
983789Sahrens 
984789Sahrens 	{
985789Sahrens 		/* Free blkptrs that we gave birth to */
986789Sahrens 		zio_t *zio;
987789Sahrens 		uint64_t used = 0, compressed = 0, uncompressed = 0;
988789Sahrens 		struct killarg ka;
989789Sahrens 
990789Sahrens 		zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
991789Sahrens 		    ZIO_FLAG_MUSTSUCCEED);
992789Sahrens 		ka.usedp = &used;
993789Sahrens 		ka.compressedp = &compressed;
994789Sahrens 		ka.uncompressedp = &uncompressed;
995789Sahrens 		ka.zio = zio;
996789Sahrens 		ka.tx = tx;
997789Sahrens 		(void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
998789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
999789Sahrens 		(void) zio_wait(zio);
1000789Sahrens 
10012199Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
1002789Sahrens 		    -used, -compressed, -uncompressed, tx);
1003789Sahrens 	}
1004789Sahrens 
10052199Sahrens 	/* Change our contents to that of the prev snapshot */
1006789Sahrens 	ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj);
1007789Sahrens 	ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
1008789Sahrens 	ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes;
1009789Sahrens 	ds->ds_phys->ds_compressed_bytes =
1010789Sahrens 	    ds->ds_prev->ds_phys->ds_compressed_bytes;
1011789Sahrens 	ds->ds_phys->ds_uncompressed_bytes =
1012789Sahrens 	    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
10132082Seschrock 	ds->ds_phys->ds_flags = ds->ds_prev->ds_phys->ds_flags;
1014789Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
1015789Sahrens 
10162532Sahrens 	if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
10172532Sahrens 		dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
10182532Sahrens 		ds->ds_prev->ds_phys->ds_unique_bytes = 0;
10192532Sahrens 	}
1020789Sahrens }
1021789Sahrens 
10221731Sbonwick /* ARGSUSED */
10231731Sbonwick static int
10242199Sahrens dsl_dataset_destroy_begin_check(void *arg1, void *arg2, dmu_tx_t *tx)
10251731Sbonwick {
10262199Sahrens 	dsl_dataset_t *ds = arg1;
10271731Sbonwick 
10281731Sbonwick 	/*
10291731Sbonwick 	 * Can't delete a head dataset if there are snapshots of it.
10301731Sbonwick 	 * (Except if the only snapshots are from the branch we cloned
10311731Sbonwick 	 * from.)
10321731Sbonwick 	 */
10331731Sbonwick 	if (ds->ds_prev != NULL &&
10341731Sbonwick 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
10351731Sbonwick 		return (EINVAL);
10361731Sbonwick 
10371731Sbonwick 	return (0);
10381731Sbonwick }
10391731Sbonwick 
10402199Sahrens /* ARGSUSED */
10412199Sahrens static void
10422199Sahrens dsl_dataset_destroy_begin_sync(void *arg1, void *arg2, dmu_tx_t *tx)
1043789Sahrens {
10442199Sahrens 	dsl_dataset_t *ds = arg1;
1045789Sahrens 
10462199Sahrens 	/* Mark it as inconsistent on-disk, in case we crash */
10472199Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
10482199Sahrens 	ds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
10492199Sahrens }
1050789Sahrens 
10512199Sahrens /* ARGSUSED */
10522199Sahrens static int
10532199Sahrens dsl_dataset_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
10542199Sahrens {
10552199Sahrens 	dsl_dataset_t *ds = arg1;
1056789Sahrens 
1057789Sahrens 	/* Can't delete a branch point. */
10582199Sahrens 	if (ds->ds_phys->ds_num_children > 1)
10592199Sahrens 		return (EEXIST);
1060789Sahrens 
1061789Sahrens 	/*
1062789Sahrens 	 * Can't delete a head dataset if there are snapshots of it.
1063789Sahrens 	 * (Except if the only snapshots are from the branch we cloned
1064789Sahrens 	 * from.)
1065789Sahrens 	 */
1066789Sahrens 	if (ds->ds_prev != NULL &&
10672199Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object)
1068789Sahrens 		return (EINVAL);
1069789Sahrens 
1070789Sahrens 	/*
1071789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
1072789Sahrens 	 * them.  Try again.
1073789Sahrens 	 */
10742199Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg)
1075789Sahrens 		return (EAGAIN);
10762199Sahrens 
10772199Sahrens 	/* XXX we should do some i/o error checking... */
10782199Sahrens 	return (0);
10792199Sahrens }
10802199Sahrens 
10812199Sahrens static void
10822199Sahrens dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
10832199Sahrens {
10842199Sahrens 	dsl_dataset_t *ds = arg1;
10852199Sahrens 	uint64_t used = 0, compressed = 0, uncompressed = 0;
10862199Sahrens 	zio_t *zio;
10872199Sahrens 	int err;
10882199Sahrens 	int after_branch_point = FALSE;
10892199Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
10902199Sahrens 	objset_t *mos = dp->dp_meta_objset;
10912199Sahrens 	dsl_dataset_t *ds_prev = NULL;
10922199Sahrens 	uint64_t obj;
10932199Sahrens 
10943444Sek110237 	ASSERT3U(ds->ds_open_refcount, ==, DS_REF_MAX);
10952199Sahrens 	ASSERT3U(ds->ds_phys->ds_num_children, <=, 1);
10962199Sahrens 	ASSERT(ds->ds_prev == NULL ||
10972199Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj != ds->ds_object);
10982199Sahrens 	ASSERT3U(ds->ds_phys->ds_bp.blk_birth, <=, tx->tx_txg);
10992199Sahrens 
11002199Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
11012199Sahrens 
11022199Sahrens 	obj = ds->ds_object;
1103789Sahrens 
1104789Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1105789Sahrens 		if (ds->ds_prev) {
1106789Sahrens 			ds_prev = ds->ds_prev;
1107789Sahrens 		} else {
11082199Sahrens 			VERIFY(0 == dsl_dataset_open_obj(dp,
1109789Sahrens 			    ds->ds_phys->ds_prev_snap_obj, NULL,
11102199Sahrens 			    DS_MODE_NONE, FTAG, &ds_prev));
1111789Sahrens 		}
1112789Sahrens 		after_branch_point =
1113789Sahrens 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
1114789Sahrens 
1115789Sahrens 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1116789Sahrens 		if (after_branch_point &&
1117789Sahrens 		    ds->ds_phys->ds_next_snap_obj == 0) {
1118789Sahrens 			/* This clone is toast. */
1119789Sahrens 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
1120789Sahrens 			ds_prev->ds_phys->ds_num_children--;
1121789Sahrens 		} else if (!after_branch_point) {
1122789Sahrens 			ds_prev->ds_phys->ds_next_snap_obj =
1123789Sahrens 			    ds->ds_phys->ds_next_snap_obj;
1124789Sahrens 		}
1125789Sahrens 	}
1126789Sahrens 
1127789Sahrens 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1128789Sahrens 
1129789Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0) {
11302199Sahrens 		blkptr_t bp;
1131789Sahrens 		dsl_dataset_t *ds_next;
1132789Sahrens 		uint64_t itor = 0;
1133789Sahrens 
1134789Sahrens 		spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1135789Sahrens 
11362199Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
11371544Seschrock 		    ds->ds_phys->ds_next_snap_obj, NULL,
11381544Seschrock 		    DS_MODE_NONE, FTAG, &ds_next));
1139789Sahrens 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
1140789Sahrens 
1141789Sahrens 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
1142789Sahrens 		ds_next->ds_phys->ds_prev_snap_obj =
1143789Sahrens 		    ds->ds_phys->ds_prev_snap_obj;
1144789Sahrens 		ds_next->ds_phys->ds_prev_snap_txg =
1145789Sahrens 		    ds->ds_phys->ds_prev_snap_txg;
1146789Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1147789Sahrens 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
1148789Sahrens 
1149789Sahrens 		/*
1150789Sahrens 		 * Transfer to our deadlist (which will become next's
1151789Sahrens 		 * new deadlist) any entries from next's current
1152789Sahrens 		 * deadlist which were born before prev, and free the
1153789Sahrens 		 * other entries.
1154789Sahrens 		 *
1155789Sahrens 		 * XXX we're doing this long task with the config lock held
1156789Sahrens 		 */
1157789Sahrens 		while (bplist_iterate(&ds_next->ds_deadlist, &itor,
1158789Sahrens 		    &bp) == 0) {
1159789Sahrens 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
11601544Seschrock 				VERIFY(0 == bplist_enqueue(&ds->ds_deadlist,
11611544Seschrock 				    &bp, tx));
1162789Sahrens 				if (ds_prev && !after_branch_point &&
1163789Sahrens 				    bp.blk_birth >
1164789Sahrens 				    ds_prev->ds_phys->ds_prev_snap_txg) {
1165789Sahrens 					ds_prev->ds_phys->ds_unique_bytes +=
11662082Seschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1167789Sahrens 				}
1168789Sahrens 			} else {
11692082Seschrock 				used += bp_get_dasize(dp->dp_spa, &bp);
1170789Sahrens 				compressed += BP_GET_PSIZE(&bp);
1171789Sahrens 				uncompressed += BP_GET_UCSIZE(&bp);
1172789Sahrens 				/* XXX check return value? */
1173789Sahrens 				(void) arc_free(zio, dp->dp_spa, tx->tx_txg,
1174789Sahrens 				    &bp, NULL, NULL, ARC_NOWAIT);
1175789Sahrens 			}
1176789Sahrens 		}
1177789Sahrens 
1178789Sahrens 		/* free next's deadlist */
1179789Sahrens 		bplist_close(&ds_next->ds_deadlist);
1180789Sahrens 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
1181789Sahrens 
1182789Sahrens 		/* set next's deadlist to our deadlist */
1183789Sahrens 		ds_next->ds_phys->ds_deadlist_obj =
1184789Sahrens 		    ds->ds_phys->ds_deadlist_obj;
11851544Seschrock 		VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos,
11861544Seschrock 		    ds_next->ds_phys->ds_deadlist_obj));
1187789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1188789Sahrens 
1189789Sahrens 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
1190789Sahrens 			/*
1191789Sahrens 			 * Update next's unique to include blocks which
1192789Sahrens 			 * were previously shared by only this snapshot
1193789Sahrens 			 * and it.  Those blocks will be born after the
1194789Sahrens 			 * prev snap and before this snap, and will have
1195789Sahrens 			 * died after the next snap and before the one
1196789Sahrens 			 * after that (ie. be on the snap after next's
1197789Sahrens 			 * deadlist).
1198789Sahrens 			 *
1199789Sahrens 			 * XXX we're doing this long task with the
1200789Sahrens 			 * config lock held
1201789Sahrens 			 */
1202789Sahrens 			dsl_dataset_t *ds_after_next;
1203789Sahrens 
12042199Sahrens 			VERIFY(0 == dsl_dataset_open_obj(dp,
1205789Sahrens 			    ds_next->ds_phys->ds_next_snap_obj, NULL,
12061544Seschrock 			    DS_MODE_NONE, FTAG, &ds_after_next));
1207789Sahrens 			itor = 0;
1208789Sahrens 			while (bplist_iterate(&ds_after_next->ds_deadlist,
1209789Sahrens 			    &itor, &bp) == 0) {
1210789Sahrens 				if (bp.blk_birth >
1211789Sahrens 				    ds->ds_phys->ds_prev_snap_txg &&
1212789Sahrens 				    bp.blk_birth <=
1213789Sahrens 				    ds->ds_phys->ds_creation_txg) {
1214789Sahrens 					ds_next->ds_phys->ds_unique_bytes +=
12152082Seschrock 					    bp_get_dasize(dp->dp_spa, &bp);
1216789Sahrens 				}
1217789Sahrens 			}
1218789Sahrens 
1219789Sahrens 			dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG);
1220789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, NULL);
1221789Sahrens 		} else {
1222789Sahrens 			/*
1223789Sahrens 			 * It would be nice to update the head dataset's
1224789Sahrens 			 * unique.  To do so we would have to traverse
1225789Sahrens 			 * it for blocks born after ds_prev, which is
1226789Sahrens 			 * pretty expensive just to maintain something
1227789Sahrens 			 * for debugging purposes.
1228789Sahrens 			 */
1229789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, ds);
1230789Sahrens 			dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE,
1231789Sahrens 			    ds_next);
1232789Sahrens 			if (ds_prev) {
12332199Sahrens 				VERIFY(0 == dsl_dataset_open_obj(dp,
12341544Seschrock 				    ds->ds_phys->ds_prev_snap_obj, NULL,
12351544Seschrock 				    DS_MODE_NONE, ds_next, &ds_next->ds_prev));
1236789Sahrens 			} else {
1237789Sahrens 				ds_next->ds_prev = NULL;
1238789Sahrens 			}
1239789Sahrens 		}
1240789Sahrens 		dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG);
1241789Sahrens 
1242789Sahrens 		/*
1243789Sahrens 		 * NB: unique_bytes is not accurate for head objsets
1244789Sahrens 		 * because we don't update it when we delete the most
1245789Sahrens 		 * recent snapshot -- see above comment.
1246789Sahrens 		 */
1247789Sahrens 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
1248789Sahrens 	} else {
1249789Sahrens 		/*
1250789Sahrens 		 * There's no next snapshot, so this is a head dataset.
1251789Sahrens 		 * Destroy the deadlist.  Unless it's a clone, the
1252789Sahrens 		 * deadlist should be empty.  (If it's a clone, it's
1253789Sahrens 		 * safe to ignore the deadlist contents.)
1254789Sahrens 		 */
1255789Sahrens 		struct killarg ka;
1256789Sahrens 
1257789Sahrens 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
1258789Sahrens 		bplist_close(&ds->ds_deadlist);
1259789Sahrens 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1260789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1261789Sahrens 
1262789Sahrens 		/*
1263789Sahrens 		 * Free everything that we point to (that's born after
1264789Sahrens 		 * the previous snapshot, if we are a clone)
1265789Sahrens 		 *
1266789Sahrens 		 * XXX we're doing this long task with the config lock held
1267789Sahrens 		 */
1268789Sahrens 		ka.usedp = &used;
1269789Sahrens 		ka.compressedp = &compressed;
1270789Sahrens 		ka.uncompressedp = &uncompressed;
1271789Sahrens 		ka.zio = zio;
1272789Sahrens 		ka.tx = tx;
1273789Sahrens 		err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1274789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1275789Sahrens 		ASSERT3U(err, ==, 0);
1276789Sahrens 	}
1277789Sahrens 
1278789Sahrens 	err = zio_wait(zio);
1279789Sahrens 	ASSERT3U(err, ==, 0);
1280789Sahrens 
12812199Sahrens 	dsl_dir_diduse_space(ds->ds_dir, -used, -compressed, -uncompressed, tx);
1282789Sahrens 
1283789Sahrens 	if (ds->ds_phys->ds_snapnames_zapobj) {
1284789Sahrens 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1285789Sahrens 		ASSERT(err == 0);
1286789Sahrens 	}
1287789Sahrens 
12882199Sahrens 	if (ds->ds_dir->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1289789Sahrens 		/* Erase the link in the dataset */
12902199Sahrens 		dmu_buf_will_dirty(ds->ds_dir->dd_dbuf, tx);
12912199Sahrens 		ds->ds_dir->dd_phys->dd_head_dataset_obj = 0;
1292789Sahrens 		/*
1293789Sahrens 		 * dsl_dir_sync_destroy() called us, they'll destroy
1294789Sahrens 		 * the dataset.
1295789Sahrens 		 */
1296789Sahrens 	} else {
1297789Sahrens 		/* remove from snapshot namespace */
1298789Sahrens 		dsl_dataset_t *ds_head;
12992199Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
13002199Sahrens 		    ds->ds_dir->dd_phys->dd_head_dataset_obj, NULL,
13011544Seschrock 		    DS_MODE_NONE, FTAG, &ds_head));
13022207Sahrens 		VERIFY(0 == dsl_dataset_get_snapname(ds));
1303789Sahrens #ifdef ZFS_DEBUG
1304789Sahrens 		{
1305789Sahrens 			uint64_t val;
1306789Sahrens 			err = zap_lookup(mos,
1307789Sahrens 			    ds_head->ds_phys->ds_snapnames_zapobj,
13082199Sahrens 			    ds->ds_snapname, 8, 1, &val);
1309789Sahrens 			ASSERT3U(err, ==, 0);
1310789Sahrens 			ASSERT3U(val, ==, obj);
1311789Sahrens 		}
1312789Sahrens #endif
1313789Sahrens 		err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj,
13142199Sahrens 		    ds->ds_snapname, tx);
1315789Sahrens 		ASSERT(err == 0);
1316789Sahrens 		dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG);
1317789Sahrens 	}
1318789Sahrens 
1319789Sahrens 	if (ds_prev && ds->ds_prev != ds_prev)
1320789Sahrens 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1321789Sahrens 
13223912Slling 	spa_clear_bootfs(dp->dp_spa, ds->ds_object, tx);
13232199Sahrens 	dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, tag);
13242199Sahrens 	VERIFY(0 == dmu_object_free(mos, obj, tx));
13253912Slling 
13262199Sahrens }
13272199Sahrens 
13282199Sahrens /* ARGSUSED */
13292199Sahrens int
13302199Sahrens dsl_dataset_snapshot_check(void *arg1, void *arg2, dmu_tx_t *tx)
13312199Sahrens {
13322199Sahrens 	objset_t *os = arg1;
13332199Sahrens 	dsl_dataset_t *ds = os->os->os_dsl_dataset;
13342199Sahrens 	const char *snapname = arg2;
13352199Sahrens 	objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
13362199Sahrens 	int err;
13372199Sahrens 	uint64_t value;
1338789Sahrens 
1339789Sahrens 	/*
13402199Sahrens 	 * We don't allow multiple snapshots of the same txg.  If there
13412199Sahrens 	 * is already one, try again.
13422199Sahrens 	 */
13432199Sahrens 	if (ds->ds_phys->ds_prev_snap_txg >= tx->tx_txg)
13442199Sahrens 		return (EAGAIN);
13452199Sahrens 
13462199Sahrens 	/*
13472199Sahrens 	 * Check for conflicting name snapshot name.
1348789Sahrens 	 */
13492199Sahrens 	err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
13502199Sahrens 	    snapname, 8, 1, &value);
13512199Sahrens 	if (err == 0)
13522199Sahrens 		return (EEXIST);
13532199Sahrens 	if (err != ENOENT)
13542199Sahrens 		return (err);
1355789Sahrens 
13563978Smmusante 	/*
13573978Smmusante 	 * Check that the dataset's name is not too long.  Name consists
13583978Smmusante 	 * of the dataset's length + 1 for the @-sign + snapshot name's length
13593978Smmusante 	 */
13603978Smmusante 	if (dsl_dataset_namelen(ds) + 1 + strlen(snapname) >= MAXNAMELEN)
13613978Smmusante 		return (ENAMETOOLONG);
13623978Smmusante 
13632199Sahrens 	ds->ds_trysnap_txg = tx->tx_txg;
1364789Sahrens 	return (0);
1365789Sahrens }
1366789Sahrens 
13672199Sahrens void
13682199Sahrens dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
1369789Sahrens {
13702199Sahrens 	objset_t *os = arg1;
13712199Sahrens 	dsl_dataset_t *ds = os->os->os_dsl_dataset;
13722199Sahrens 	const char *snapname = arg2;
13732199Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
1374789Sahrens 	dmu_buf_t *dbuf;
1375789Sahrens 	dsl_dataset_phys_t *dsphys;
13762199Sahrens 	uint64_t dsobj;
1377789Sahrens 	objset_t *mos = dp->dp_meta_objset;
1378789Sahrens 	int err;
1379789Sahrens 
1380789Sahrens 	spa_scrub_restart(dp->dp_spa, tx->tx_txg);
13812199Sahrens 	ASSERT(RW_WRITE_HELD(&dp->dp_config_rwlock));
1382789Sahrens 
1383928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
1384928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
13851544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1386789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
1387789Sahrens 	dsphys = dbuf->db_data;
13882199Sahrens 	dsphys->ds_dir_obj = ds->ds_dir->dd_object;
1389789Sahrens 	dsphys->ds_fsid_guid = unique_create();
1390789Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
1391789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1392789Sahrens 	    sizeof (dsphys->ds_guid));
1393789Sahrens 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1394789Sahrens 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1395789Sahrens 	dsphys->ds_next_snap_obj = ds->ds_object;
1396789Sahrens 	dsphys->ds_num_children = 1;
1397789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
1398789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
1399789Sahrens 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1400789Sahrens 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
1401789Sahrens 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1402789Sahrens 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
14032082Seschrock 	dsphys->ds_flags = ds->ds_phys->ds_flags;
1404789Sahrens 	dsphys->ds_bp = ds->ds_phys->ds_bp;
14051544Seschrock 	dmu_buf_rele(dbuf, FTAG);
1406789Sahrens 
14072199Sahrens 	ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
14082199Sahrens 	if (ds->ds_prev) {
14092199Sahrens 		ASSERT(ds->ds_prev->ds_phys->ds_next_snap_obj ==
1410789Sahrens 		    ds->ds_object ||
14112199Sahrens 		    ds->ds_prev->ds_phys->ds_num_children > 1);
14122199Sahrens 		if (ds->ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
14132199Sahrens 			dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1414789Sahrens 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
14152199Sahrens 			    ds->ds_prev->ds_phys->ds_creation_txg);
14162199Sahrens 			ds->ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1417789Sahrens 		}
1418789Sahrens 	}
1419789Sahrens 
1420789Sahrens 	bplist_close(&ds->ds_deadlist);
1421789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1422789Sahrens 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg);
1423789Sahrens 	ds->ds_phys->ds_prev_snap_obj = dsobj;
1424789Sahrens 	ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg;
1425789Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
1426789Sahrens 	ds->ds_phys->ds_deadlist_obj =
1427789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
14281544Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
14291544Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1430789Sahrens 
1431789Sahrens 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
1432789Sahrens 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1433789Sahrens 	    snapname, 8, 1, &dsobj, tx);
1434789Sahrens 	ASSERT(err == 0);
1435789Sahrens 
1436789Sahrens 	if (ds->ds_prev)
1437789Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
14381544Seschrock 	VERIFY(0 == dsl_dataset_open_obj(dp,
14391544Seschrock 	    ds->ds_phys->ds_prev_snap_obj, snapname,
14401544Seschrock 	    DS_MODE_NONE, ds, &ds->ds_prev));
1441789Sahrens }
1442789Sahrens 
1443789Sahrens void
14443547Smaybee dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
1445789Sahrens {
1446789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1447789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1448789Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1449789Sahrens 
1450789Sahrens 	dsl_dir_dirty(ds->ds_dir, tx);
14513547Smaybee 	dmu_objset_sync(ds->ds_user_ptr, zio, tx);
14523547Smaybee 	/* Unneeded? bplist_close(&ds->ds_deadlist); */
1453789Sahrens }
1454789Sahrens 
1455789Sahrens void
14562885Sahrens dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
1457789Sahrens {
14582885Sahrens 	dsl_dir_stats(ds->ds_dir, nv);
1459789Sahrens 
14602885Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATION,
14612885Sahrens 	    ds->ds_phys->ds_creation_time);
14622885Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_CREATETXG,
14632885Sahrens 	    ds->ds_phys->ds_creation_txg);
14642885Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFERENCED,
14652885Sahrens 	    ds->ds_phys->ds_used_bytes);
1466789Sahrens 
1467789Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1468789Sahrens 		/*
1469789Sahrens 		 * This is a snapshot; override the dd's space used with
14702885Sahrens 		 * our unique space and compression ratio.
1471789Sahrens 		 */
14722885Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
14732885Sahrens 		    ds->ds_phys->ds_unique_bytes);
14742885Sahrens 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
14752885Sahrens 		    ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
14762885Sahrens 		    (ds->ds_phys->ds_uncompressed_bytes * 100 /
14772885Sahrens 		    ds->ds_phys->ds_compressed_bytes));
1478789Sahrens 	}
1479789Sahrens }
1480789Sahrens 
14812885Sahrens void
14822885Sahrens dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat)
1483789Sahrens {
14842885Sahrens 	stat->dds_creation_txg = ds->ds_phys->ds_creation_txg;
14852885Sahrens 	stat->dds_inconsistent = ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT;
14862885Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
14872885Sahrens 		stat->dds_is_snapshot = B_TRUE;
14882885Sahrens 		stat->dds_num_clones = ds->ds_phys->ds_num_children - 1;
14892885Sahrens 	}
14902885Sahrens 
14912885Sahrens 	/* clone origin is really a dsl_dir thing... */
14922885Sahrens 	if (ds->ds_dir->dd_phys->dd_clone_parent_obj) {
14932885Sahrens 		dsl_dataset_t *ods;
14942885Sahrens 
14952885Sahrens 		rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
14962885Sahrens 		VERIFY(0 == dsl_dataset_open_obj(ds->ds_dir->dd_pool,
14972885Sahrens 		    ds->ds_dir->dd_phys->dd_clone_parent_obj,
14982885Sahrens 		    NULL, DS_MODE_NONE, FTAG, &ods));
14992885Sahrens 		dsl_dataset_name(ods, stat->dds_clone_of);
15002885Sahrens 		dsl_dataset_close(ods, DS_MODE_NONE, FTAG);
15012885Sahrens 		rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
15022885Sahrens 	}
15032885Sahrens }
15042885Sahrens 
15052885Sahrens uint64_t
15062885Sahrens dsl_dataset_fsid_guid(dsl_dataset_t *ds)
15072885Sahrens {
15082885Sahrens 	return (ds->ds_phys->ds_fsid_guid);
15092885Sahrens }
15102885Sahrens 
15112885Sahrens void
15122885Sahrens dsl_dataset_space(dsl_dataset_t *ds,
15132885Sahrens     uint64_t *refdbytesp, uint64_t *availbytesp,
15142885Sahrens     uint64_t *usedobjsp, uint64_t *availobjsp)
15152885Sahrens {
15162885Sahrens 	*refdbytesp = ds->ds_phys->ds_used_bytes;
15172885Sahrens 	*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
15182885Sahrens 	*usedobjsp = ds->ds_phys->ds_bp.blk_fill;
15192885Sahrens 	*availobjsp = DN_MAX_OBJECT - *usedobjsp;
1520789Sahrens }
1521789Sahrens 
15222199Sahrens /* ARGSUSED */
1523789Sahrens static int
15242199Sahrens dsl_dataset_snapshot_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
1525789Sahrens {
15262199Sahrens 	dsl_dataset_t *ds = arg1;
15272199Sahrens 	char *newsnapname = arg2;
15282199Sahrens 	dsl_dir_t *dd = ds->ds_dir;
1529789Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
15302199Sahrens 	dsl_dataset_t *hds;
15312199Sahrens 	uint64_t val;
1532789Sahrens 	int err;
1533789Sahrens 
15342199Sahrens 	err = dsl_dataset_open_obj(dd->dd_pool,
15352199Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds);
1536789Sahrens 	if (err)
1537789Sahrens 		return (err);
1538789Sahrens 
15392199Sahrens 	/* new name better not be in use */
15402199Sahrens 	err = zap_lookup(mos, hds->ds_phys->ds_snapnames_zapobj,
15412199Sahrens 	    newsnapname, 8, 1, &val);
15422199Sahrens 	dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
1543789Sahrens 
15442199Sahrens 	if (err == 0)
15452199Sahrens 		err = EEXIST;
15462199Sahrens 	else if (err == ENOENT)
15472199Sahrens 		err = 0;
1548*4007Smmusante 
1549*4007Smmusante 	/* dataset name + 1 for the "@" + the new snapshot name must fit */
1550*4007Smmusante 	if (dsl_dir_namelen(ds->ds_dir) + 1 + strlen(newsnapname) >= MAXNAMELEN)
1551*4007Smmusante 		err = ENAMETOOLONG;
1552*4007Smmusante 
15532199Sahrens 	return (err);
15542199Sahrens }
1555789Sahrens 
15562199Sahrens static void
15572199Sahrens dsl_dataset_snapshot_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
15582199Sahrens {
15592199Sahrens 	dsl_dataset_t *ds = arg1;
15602199Sahrens 	char *newsnapname = arg2;
15612199Sahrens 	dsl_dir_t *dd = ds->ds_dir;
15622199Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
15632199Sahrens 	dsl_dataset_t *hds;
15642199Sahrens 	int err;
1565789Sahrens 
15662199Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj != 0);
1567789Sahrens 
15682199Sahrens 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
15692199Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &hds));
1570789Sahrens 
15712199Sahrens 	VERIFY(0 == dsl_dataset_get_snapname(ds));
15722199Sahrens 	err = zap_remove(mos, hds->ds_phys->ds_snapnames_zapobj,
15732199Sahrens 	    ds->ds_snapname, tx);
1574789Sahrens 	ASSERT3U(err, ==, 0);
15752199Sahrens 	mutex_enter(&ds->ds_lock);
15762199Sahrens 	(void) strcpy(ds->ds_snapname, newsnapname);
15772199Sahrens 	mutex_exit(&ds->ds_lock);
15782199Sahrens 	err = zap_add(mos, hds->ds_phys->ds_snapnames_zapobj,
15792199Sahrens 	    ds->ds_snapname, 8, 1, &ds->ds_object, tx);
1580789Sahrens 	ASSERT3U(err, ==, 0);
1581789Sahrens 
15822199Sahrens 	dsl_dataset_close(hds, DS_MODE_NONE, FTAG);
1583789Sahrens }
1584789Sahrens 
1585*4007Smmusante struct renamearg {
1586*4007Smmusante 	dsl_sync_task_group_t *dstg;
1587*4007Smmusante 	char failed[MAXPATHLEN];
1588*4007Smmusante 	char *oldsnap;
1589*4007Smmusante 	char *newsnap;
1590*4007Smmusante };
1591*4007Smmusante 
1592*4007Smmusante static int
1593*4007Smmusante dsl_snapshot_rename_one(char *name, void *arg)
1594*4007Smmusante {
1595*4007Smmusante 	struct renamearg *ra = arg;
1596*4007Smmusante 	dsl_dataset_t *ds = NULL;
1597*4007Smmusante 	objset_t *os;
1598*4007Smmusante 	char *cp;
1599*4007Smmusante 	int err;
1600*4007Smmusante 
1601*4007Smmusante 	cp = name + strlen(name);
1602*4007Smmusante 	*cp = '@';
1603*4007Smmusante 	(void) strcpy(cp + 1, ra->oldsnap);
1604*4007Smmusante 	err = dsl_dataset_open(name, DS_MODE_READONLY | DS_MODE_STANDARD,
1605*4007Smmusante 	    ra->dstg, &ds);
1606*4007Smmusante 	if (err == ENOENT) {
1607*4007Smmusante 		*cp = '\0';
1608*4007Smmusante 		return (0);
1609*4007Smmusante 	}
1610*4007Smmusante 	if (err) {
1611*4007Smmusante 		(void) strcpy(ra->failed, name);
1612*4007Smmusante 		*cp = '\0';
1613*4007Smmusante 		dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg);
1614*4007Smmusante 		return (err);
1615*4007Smmusante 	}
1616*4007Smmusante 
1617*4007Smmusante #ifdef _KERNEL
1618*4007Smmusante 	/* for all filesystems undergoing rename, we'll need to unmount it */
1619*4007Smmusante 	(void) zfs_unmount_snap(name, NULL);
1620*4007Smmusante #endif
1621*4007Smmusante 
1622*4007Smmusante 	*cp = '\0';
1623*4007Smmusante 
1624*4007Smmusante 	dsl_sync_task_create(ra->dstg, dsl_dataset_snapshot_rename_check,
1625*4007Smmusante 	    dsl_dataset_snapshot_rename_sync, ds, ra->newsnap, 0);
1626*4007Smmusante 
1627*4007Smmusante 	return (0);
1628*4007Smmusante }
1629*4007Smmusante 
1630*4007Smmusante static int
1631*4007Smmusante dsl_recursive_rename(char *oldname, const char *newname)
1632*4007Smmusante {
1633*4007Smmusante 	int err;
1634*4007Smmusante 	struct renamearg *ra;
1635*4007Smmusante 	dsl_sync_task_t *dst;
1636*4007Smmusante 	spa_t *spa;
1637*4007Smmusante 	char *cp, *fsname = spa_strdup(oldname);
1638*4007Smmusante 	int len = strlen(oldname);
1639*4007Smmusante 
1640*4007Smmusante 	/* truncate the snapshot name to get the fsname */
1641*4007Smmusante 	cp = strchr(fsname, '@');
1642*4007Smmusante 	*cp = '\0';
1643*4007Smmusante 
1644*4007Smmusante 	cp = strchr(fsname, '/');
1645*4007Smmusante 	if (cp) {
1646*4007Smmusante 		*cp = '\0';
1647*4007Smmusante 		err = spa_open(fsname, &spa, FTAG);
1648*4007Smmusante 		*cp = '/';
1649*4007Smmusante 	} else {
1650*4007Smmusante 		err = spa_open(fsname, &spa, FTAG);
1651*4007Smmusante 	}
1652*4007Smmusante 	if (err) {
1653*4007Smmusante 		kmem_free(fsname, len + 1);
1654*4007Smmusante 		return (err);
1655*4007Smmusante 	}
1656*4007Smmusante 	ra = kmem_alloc(sizeof (struct renamearg), KM_SLEEP);
1657*4007Smmusante 	ra->dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
1658*4007Smmusante 
1659*4007Smmusante 	ra->oldsnap = strchr(oldname, '@') + 1;
1660*4007Smmusante 	ra->newsnap = strchr(newname, '@') + 1;
1661*4007Smmusante 	*ra->failed = '\0';
1662*4007Smmusante 
1663*4007Smmusante 	err = dmu_objset_find(fsname, dsl_snapshot_rename_one, ra,
1664*4007Smmusante 	    DS_FIND_CHILDREN);
1665*4007Smmusante 	kmem_free(fsname, len + 1);
1666*4007Smmusante 
1667*4007Smmusante 	if (err == 0) {
1668*4007Smmusante 		err = dsl_sync_task_group_wait(ra->dstg);
1669*4007Smmusante 	}
1670*4007Smmusante 
1671*4007Smmusante 	for (dst = list_head(&ra->dstg->dstg_tasks); dst;
1672*4007Smmusante 	    dst = list_next(&ra->dstg->dstg_tasks, dst)) {
1673*4007Smmusante 		dsl_dataset_t *ds = dst->dst_arg1;
1674*4007Smmusante 		if (dst->dst_err) {
1675*4007Smmusante 			dsl_dir_name(ds->ds_dir, ra->failed);
1676*4007Smmusante 			strcat(ra->failed, "@");
1677*4007Smmusante 			strcat(ra->failed, ra->newsnap);
1678*4007Smmusante 		}
1679*4007Smmusante 		dsl_dataset_close(ds, DS_MODE_STANDARD, ra->dstg);
1680*4007Smmusante 	}
1681*4007Smmusante 
1682*4007Smmusante 	(void) strcpy(oldname, ra->failed);
1683*4007Smmusante 
1684*4007Smmusante 	dsl_sync_task_group_destroy(ra->dstg);
1685*4007Smmusante 	kmem_free(ra, sizeof (struct renamearg));
1686*4007Smmusante 	spa_close(spa, FTAG);
1687*4007Smmusante 	return (err);
1688*4007Smmusante }
1689*4007Smmusante 
1690789Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename
1691789Sahrens int
1692*4007Smmusante dsl_dataset_rename(char *oldname, const char *newname,
1693*4007Smmusante     boolean_t recursive)
1694789Sahrens {
1695789Sahrens 	dsl_dir_t *dd;
16962199Sahrens 	dsl_dataset_t *ds;
1697789Sahrens 	const char *tail;
1698789Sahrens 	int err;
1699789Sahrens 
17002199Sahrens 	err = dsl_dir_open(oldname, FTAG, &dd, &tail);
17011544Seschrock 	if (err)
17021544Seschrock 		return (err);
1703789Sahrens 	if (tail == NULL) {
17042199Sahrens 		err = dsl_dir_rename(dd, newname);
1705789Sahrens 		dsl_dir_close(dd, FTAG);
1706789Sahrens 		return (err);
1707789Sahrens 	}
1708789Sahrens 	if (tail[0] != '@') {
1709789Sahrens 		/* the name ended in a nonexistant component */
1710789Sahrens 		dsl_dir_close(dd, FTAG);
1711789Sahrens 		return (ENOENT);
1712789Sahrens 	}
1713789Sahrens 
17142199Sahrens 	dsl_dir_close(dd, FTAG);
17152199Sahrens 
17162199Sahrens 	/* new name must be snapshot in same filesystem */
17172199Sahrens 	tail = strchr(newname, '@');
17182199Sahrens 	if (tail == NULL)
17192199Sahrens 		return (EINVAL);
17202199Sahrens 	tail++;
17212199Sahrens 	if (strncmp(oldname, newname, tail - newname) != 0)
17222199Sahrens 		return (EXDEV);
1723789Sahrens 
1724*4007Smmusante 	if (recursive) {
1725*4007Smmusante 		err = dsl_recursive_rename(oldname, newname);
1726*4007Smmusante 	} else {
1727*4007Smmusante 		err = dsl_dataset_open(oldname,
1728*4007Smmusante 		    DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &ds);
1729*4007Smmusante 		if (err)
1730*4007Smmusante 			return (err);
17312199Sahrens 
1732*4007Smmusante 		err = dsl_sync_task_do(ds->ds_dir->dd_pool,
1733*4007Smmusante 		    dsl_dataset_snapshot_rename_check,
1734*4007Smmusante 		    dsl_dataset_snapshot_rename_sync, ds, (char *)tail, 1);
17352199Sahrens 
1736*4007Smmusante 		dsl_dataset_close(ds, DS_MODE_STANDARD, FTAG);
1737*4007Smmusante 	}
17382199Sahrens 
1739789Sahrens 	return (err);
1740789Sahrens }
17412082Seschrock 
17422199Sahrens struct promotearg {
17432199Sahrens 	uint64_t used, comp, uncomp, unique;
17442199Sahrens 	uint64_t newnext_obj, snapnames_obj;
17452199Sahrens };
17462199Sahrens 
17472082Seschrock static int
17482199Sahrens dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
17492082Seschrock {
17502199Sahrens 	dsl_dataset_t *hds = arg1;
17512199Sahrens 	struct promotearg *pa = arg2;
17522199Sahrens 	dsl_dir_t *dd = hds->ds_dir;
17532199Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
17542082Seschrock 	dsl_dir_t *pdd = NULL;
17552082Seschrock 	dsl_dataset_t *ds = NULL;
17562082Seschrock 	dsl_dataset_t *pivot_ds = NULL;
17572082Seschrock 	dsl_dataset_t *newnext_ds = NULL;
17582082Seschrock 	int err;
17592082Seschrock 	char *name = NULL;
17602199Sahrens 	uint64_t itor = 0;
17612082Seschrock 	blkptr_t bp;
17622082Seschrock 
17632199Sahrens 	bzero(pa, sizeof (*pa));
17642199Sahrens 
17652082Seschrock 	/* Check that it is a clone */
17662082Seschrock 	if (dd->dd_phys->dd_clone_parent_obj == 0)
17672082Seschrock 		return (EINVAL);
17682082Seschrock 
17692199Sahrens 	/* Since this is so expensive, don't do the preliminary check */
17702199Sahrens 	if (!dmu_tx_is_syncing(tx))
17712199Sahrens 		return (0);
17722199Sahrens 
17732199Sahrens 	if (err = dsl_dataset_open_obj(dp,
17742082Seschrock 	    dd->dd_phys->dd_clone_parent_obj,
17752082Seschrock 	    NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds))
17762082Seschrock 		goto out;
17772082Seschrock 	pdd = pivot_ds->ds_dir;
17782199Sahrens 
17792199Sahrens 	{
17802199Sahrens 		dsl_dataset_t *phds;
17812199Sahrens 		if (err = dsl_dataset_open_obj(dd->dd_pool,
17822199Sahrens 		    pdd->dd_phys->dd_head_dataset_obj,
17832199Sahrens 		    NULL, DS_MODE_NONE, FTAG, &phds))
17842199Sahrens 			goto out;
17852199Sahrens 		pa->snapnames_obj = phds->ds_phys->ds_snapnames_zapobj;
17862199Sahrens 		dsl_dataset_close(phds, DS_MODE_NONE, FTAG);
17872199Sahrens 	}
17882082Seschrock 
17892082Seschrock 	if (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE) {
17902082Seschrock 		err = EXDEV;
17912082Seschrock 		goto out;
17922082Seschrock 	}
17932082Seschrock 
17942082Seschrock 	/* find pivot point's new next ds */
17952082Seschrock 	VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, hds->ds_object,
17962082Seschrock 	    NULL, DS_MODE_NONE, FTAG, &newnext_ds));
17972082Seschrock 	while (newnext_ds->ds_phys->ds_prev_snap_obj != pivot_ds->ds_object) {
17982082Seschrock 		dsl_dataset_t *prev;
17992082Seschrock 
18002082Seschrock 		if (err = dsl_dataset_open_obj(dd->dd_pool,
18012199Sahrens 		    newnext_ds->ds_phys->ds_prev_snap_obj,
18022199Sahrens 		    NULL, DS_MODE_NONE, FTAG, &prev))
18032082Seschrock 			goto out;
18042082Seschrock 		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
18052082Seschrock 		newnext_ds = prev;
18062082Seschrock 	}
18072199Sahrens 	pa->newnext_obj = newnext_ds->ds_object;
18082082Seschrock 
18092082Seschrock 	/* compute pivot point's new unique space */
18102082Seschrock 	while ((err = bplist_iterate(&newnext_ds->ds_deadlist,
18112082Seschrock 	    &itor, &bp)) == 0) {
18122082Seschrock 		if (bp.blk_birth > pivot_ds->ds_phys->ds_prev_snap_txg)
18132199Sahrens 			pa->unique += bp_get_dasize(dd->dd_pool->dp_spa, &bp);
18142082Seschrock 	}
18152082Seschrock 	if (err != ENOENT)
18162082Seschrock 		goto out;
18172082Seschrock 
18182082Seschrock 	/* Walk the snapshots that we are moving */
18192082Seschrock 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
18202082Seschrock 	ds = pivot_ds;
18212082Seschrock 	/* CONSTCOND */
18222082Seschrock 	while (TRUE) {
18232082Seschrock 		uint64_t val, dlused, dlcomp, dluncomp;
18242082Seschrock 		dsl_dataset_t *prev;
18252082Seschrock 
18262082Seschrock 		/* Check that the snapshot name does not conflict */
18272082Seschrock 		dsl_dataset_name(ds, name);
18282082Seschrock 		err = zap_lookup(dd->dd_pool->dp_meta_objset,
18292082Seschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
18302082Seschrock 		    8, 1, &val);
18312082Seschrock 		if (err != ENOENT) {
18322082Seschrock 			if (err == 0)
18332082Seschrock 				err = EEXIST;
18342082Seschrock 			goto out;
18352082Seschrock 		}
18362082Seschrock 
18372082Seschrock 		/*
18382082Seschrock 		 * compute space to transfer.  Each snapshot gave birth to:
18392082Seschrock 		 * (my used) - (prev's used) + (deadlist's used)
18402082Seschrock 		 */
18412199Sahrens 		pa->used += ds->ds_phys->ds_used_bytes;
18422199Sahrens 		pa->comp += ds->ds_phys->ds_compressed_bytes;
18432199Sahrens 		pa->uncomp += ds->ds_phys->ds_uncompressed_bytes;
18442082Seschrock 
18452082Seschrock 		/* If we reach the first snapshot, we're done. */
18462082Seschrock 		if (ds->ds_phys->ds_prev_snap_obj == 0)
18472082Seschrock 			break;
18482082Seschrock 
18492082Seschrock 		if (err = bplist_space(&ds->ds_deadlist,
18502082Seschrock 		    &dlused, &dlcomp, &dluncomp))
18512082Seschrock 			goto out;
18522082Seschrock 		if (err = dsl_dataset_open_obj(dd->dd_pool,
18532082Seschrock 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
18542082Seschrock 		    FTAG, &prev))
18552082Seschrock 			goto out;
18562199Sahrens 		pa->used += dlused - prev->ds_phys->ds_used_bytes;
18572199Sahrens 		pa->comp += dlcomp - prev->ds_phys->ds_compressed_bytes;
18582199Sahrens 		pa->uncomp += dluncomp - prev->ds_phys->ds_uncompressed_bytes;
18592082Seschrock 
18602082Seschrock 		/*
18612082Seschrock 		 * We could be a clone of a clone.  If we reach our
18622082Seschrock 		 * parent's branch point, we're done.
18632082Seschrock 		 */
18642082Seschrock 		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
18652082Seschrock 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
18662082Seschrock 			break;
18672082Seschrock 		}
18682082Seschrock 		if (ds != pivot_ds)
18692082Seschrock 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
18702082Seschrock 		ds = prev;
18712082Seschrock 	}
18722082Seschrock 
18732082Seschrock 	/* Check that there is enough space here */
18742199Sahrens 	err = dsl_dir_transfer_possible(pdd, dd, pa->used);
18752199Sahrens 
18762199Sahrens out:
18772199Sahrens 	if (ds && ds != pivot_ds)
18782199Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
18792199Sahrens 	if (pivot_ds)
18802199Sahrens 		dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG);
18812199Sahrens 	if (newnext_ds)
18822199Sahrens 		dsl_dataset_close(newnext_ds, DS_MODE_NONE, FTAG);
18832199Sahrens 	if (name)
18842199Sahrens 		kmem_free(name, MAXPATHLEN);
18852199Sahrens 	return (err);
18862199Sahrens }
18872082Seschrock 
18882199Sahrens static void
18892199Sahrens dsl_dataset_promote_sync(void *arg1, void *arg2, dmu_tx_t *tx)
18902199Sahrens {
18912199Sahrens 	dsl_dataset_t *hds = arg1;
18922199Sahrens 	struct promotearg *pa = arg2;
18932199Sahrens 	dsl_dir_t *dd = hds->ds_dir;
18942199Sahrens 	dsl_pool_t *dp = hds->ds_dir->dd_pool;
18952199Sahrens 	dsl_dir_t *pdd = NULL;
18962199Sahrens 	dsl_dataset_t *ds, *pivot_ds;
18972199Sahrens 	char *name;
18982199Sahrens 
18992199Sahrens 	ASSERT(dd->dd_phys->dd_clone_parent_obj != 0);
19002199Sahrens 	ASSERT(0 == (hds->ds_phys->ds_flags & DS_FLAG_NOPROMOTE));
19012199Sahrens 
19022199Sahrens 	VERIFY(0 == dsl_dataset_open_obj(dp,
19032199Sahrens 	    dd->dd_phys->dd_clone_parent_obj,
19042199Sahrens 	    NULL, DS_MODE_EXCLUSIVE, FTAG, &pivot_ds));
19052417Sahrens 	/*
19062417Sahrens 	 * We need to explicitly open pdd, since pivot_ds's pdd will be
19072417Sahrens 	 * changing.
19082417Sahrens 	 */
19092417Sahrens 	VERIFY(0 == dsl_dir_open_obj(dp, pivot_ds->ds_dir->dd_object,
19102417Sahrens 	    NULL, FTAG, &pdd));
19112082Seschrock 
19122082Seschrock 	/* move snapshots to this dir */
19132199Sahrens 	name = kmem_alloc(MAXPATHLEN, KM_SLEEP);
19142082Seschrock 	ds = pivot_ds;
19152082Seschrock 	/* CONSTCOND */
19162082Seschrock 	while (TRUE) {
19172082Seschrock 		dsl_dataset_t *prev;
19182082Seschrock 
19192082Seschrock 		/* move snap name entry */
19202082Seschrock 		dsl_dataset_name(ds, name);
19212199Sahrens 		VERIFY(0 == zap_remove(dp->dp_meta_objset,
19222199Sahrens 		    pa->snapnames_obj, ds->ds_snapname, tx));
19232199Sahrens 		VERIFY(0 == zap_add(dp->dp_meta_objset,
19242082Seschrock 		    hds->ds_phys->ds_snapnames_zapobj, ds->ds_snapname,
19252082Seschrock 		    8, 1, &ds->ds_object, tx));
19262082Seschrock 
19272082Seschrock 		/* change containing dsl_dir */
19282082Seschrock 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
19292082Seschrock 		ASSERT3U(ds->ds_phys->ds_dir_obj, ==, pdd->dd_object);
19302082Seschrock 		ds->ds_phys->ds_dir_obj = dd->dd_object;
19312082Seschrock 		ASSERT3P(ds->ds_dir, ==, pdd);
19322082Seschrock 		dsl_dir_close(ds->ds_dir, ds);
19332199Sahrens 		VERIFY(0 == dsl_dir_open_obj(dp, dd->dd_object,
19342082Seschrock 		    NULL, ds, &ds->ds_dir));
19352082Seschrock 
19362082Seschrock 		ASSERT3U(dsl_prop_numcb(ds), ==, 0);
19372082Seschrock 
19382082Seschrock 		if (ds->ds_phys->ds_prev_snap_obj == 0)
19392082Seschrock 			break;
19402082Seschrock 
19412199Sahrens 		VERIFY(0 == dsl_dataset_open_obj(dp,
19422082Seschrock 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_EXCLUSIVE,
19432082Seschrock 		    FTAG, &prev));
19442082Seschrock 
19452082Seschrock 		if (prev->ds_phys->ds_next_snap_obj != ds->ds_object) {
19462082Seschrock 			dsl_dataset_close(prev, DS_MODE_EXCLUSIVE, FTAG);
19472082Seschrock 			break;
19482082Seschrock 		}
19492082Seschrock 		if (ds != pivot_ds)
19502082Seschrock 			dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
19512082Seschrock 		ds = prev;
19522082Seschrock 	}
19532199Sahrens 	if (ds != pivot_ds)
19542199Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
19552082Seschrock 
19562082Seschrock 	/* change pivot point's next snap */
19572082Seschrock 	dmu_buf_will_dirty(pivot_ds->ds_dbuf, tx);
19582199Sahrens 	pivot_ds->ds_phys->ds_next_snap_obj = pa->newnext_obj;
19592082Seschrock 
19602082Seschrock 	/* change clone_parent-age */
19612082Seschrock 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
19622082Seschrock 	ASSERT3U(dd->dd_phys->dd_clone_parent_obj, ==, pivot_ds->ds_object);
19632082Seschrock 	dd->dd_phys->dd_clone_parent_obj = pdd->dd_phys->dd_clone_parent_obj;
19642082Seschrock 	dmu_buf_will_dirty(pdd->dd_dbuf, tx);
19652082Seschrock 	pdd->dd_phys->dd_clone_parent_obj = pivot_ds->ds_object;
19662082Seschrock 
19672082Seschrock 	/* change space accounting */
19682199Sahrens 	dsl_dir_diduse_space(pdd, -pa->used, -pa->comp, -pa->uncomp, tx);
19692199Sahrens 	dsl_dir_diduse_space(dd, pa->used, pa->comp, pa->uncomp, tx);
19702199Sahrens 	pivot_ds->ds_phys->ds_unique_bytes = pa->unique;
19712082Seschrock 
19722417Sahrens 	dsl_dir_close(pdd, FTAG);
19732199Sahrens 	dsl_dataset_close(pivot_ds, DS_MODE_EXCLUSIVE, FTAG);
19742199Sahrens 	kmem_free(name, MAXPATHLEN);
19752082Seschrock }
19762082Seschrock 
19772082Seschrock int
19782082Seschrock dsl_dataset_promote(const char *name)
19792082Seschrock {
19802082Seschrock 	dsl_dataset_t *ds;
19812082Seschrock 	int err;
19822082Seschrock 	dmu_object_info_t doi;
19832199Sahrens 	struct promotearg pa;
19842082Seschrock 
19852082Seschrock 	err = dsl_dataset_open(name, DS_MODE_NONE, FTAG, &ds);
19862082Seschrock 	if (err)
19872082Seschrock 		return (err);
19882082Seschrock 
19892082Seschrock 	err = dmu_object_info(ds->ds_dir->dd_pool->dp_meta_objset,
19902082Seschrock 	    ds->ds_phys->ds_snapnames_zapobj, &doi);
19912082Seschrock 	if (err) {
19922082Seschrock 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
19932082Seschrock 		return (err);
19942082Seschrock 	}
19952082Seschrock 
19962082Seschrock 	/*
19972082Seschrock 	 * Add in 128x the snapnames zapobj size, since we will be moving
19982082Seschrock 	 * a bunch of snapnames to the promoted ds, and dirtying their
19992082Seschrock 	 * bonus buffers.
20002082Seschrock 	 */
20012199Sahrens 	err = dsl_sync_task_do(ds->ds_dir->dd_pool,
20022199Sahrens 	    dsl_dataset_promote_check,
20032199Sahrens 	    dsl_dataset_promote_sync, ds, &pa, 2 + 2 * doi.doi_physical_blks);
20042082Seschrock 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
20052082Seschrock 	return (err);
20062082Seschrock }
20073912Slling 
20083912Slling /*
20093912Slling  * Given a pool name and a dataset object number in that pool,
20103912Slling  * return the name of that dataset.
20113912Slling  */
20123912Slling int
20133912Slling dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf)
20143912Slling {
20153912Slling 	spa_t *spa;
20163912Slling 	dsl_pool_t *dp;
20173912Slling 	dsl_dataset_t *ds = NULL;
20183912Slling 	int error;
20193912Slling 
20203912Slling 	if ((error = spa_open(pname, &spa, FTAG)) != 0)
20213912Slling 		return (error);
20223912Slling 	dp = spa_get_dsl(spa);
20233912Slling 	rw_enter(&dp->dp_config_rwlock, RW_READER);
20243912Slling 	if ((error = dsl_dataset_open_obj(dp, obj,
20253912Slling 	    NULL, DS_MODE_NONE, FTAG, &ds)) != 0) {
20263912Slling 		rw_exit(&dp->dp_config_rwlock);
20273912Slling 		spa_close(spa, FTAG);
20283912Slling 		return (error);
20293912Slling 	}
20303912Slling 	dsl_dataset_name(ds, buf);
20313912Slling 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
20323912Slling 	rw_exit(&dp->dp_config_rwlock);
20333912Slling 	spa_close(spa, FTAG);
20343912Slling 
20353912Slling 	return (0);
20363912Slling }
2037