xref: /onnv-gate/usr/src/uts/common/fs/zfs/dsl_dataset.c (revision 1544:938876158511)
1789Sahrens /*
2789Sahrens  * CDDL HEADER START
3789Sahrens  *
4789Sahrens  * The contents of this file are subject to the terms of the
5*1544Seschrock  * Common Development and Distribution License (the "License").
6*1544Seschrock  * You may not use this file except in compliance with the License.
7789Sahrens  *
8789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens  * or http://www.opensolaris.org/os/licensing.
10789Sahrens  * See the License for the specific language governing permissions
11789Sahrens  * and limitations under the License.
12789Sahrens  *
13789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens  *
19789Sahrens  * CDDL HEADER END
20789Sahrens  */
21789Sahrens /*
22*1544Seschrock  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23789Sahrens  * Use is subject to license terms.
24789Sahrens  */
25789Sahrens 
26789Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
27789Sahrens 
28789Sahrens #include <sys/dmu_objset.h>
29789Sahrens #include <sys/dsl_dataset.h>
30789Sahrens #include <sys/dsl_dir.h>
31789Sahrens #include <sys/dmu_traverse.h>
32789Sahrens #include <sys/dmu_tx.h>
33789Sahrens #include <sys/arc.h>
34789Sahrens #include <sys/zio.h>
35789Sahrens #include <sys/zap.h>
36789Sahrens #include <sys/unique.h>
37789Sahrens #include <sys/zfs_context.h>
38789Sahrens 
39789Sahrens #define	DOS_REF_MAX	(1ULL << 62)
40789Sahrens 
41789Sahrens #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
42789Sahrens 
43789Sahrens #define	BP_GET_UCSIZE(bp) \
44789Sahrens 	((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
45789Sahrens 	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
46789Sahrens 
47789Sahrens /*
48789Sahrens  * We use weighted reference counts to express the various forms of exclusion
49789Sahrens  * between different open modes.  A STANDARD open is 1 point, an EXCLUSIVE open
50789Sahrens  * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE.
51789Sahrens  * This makes the exclusion logic simple: the total refcnt for all opens cannot
52789Sahrens  * exceed DOS_REF_MAX.  For example, EXCLUSIVE opens are exclusive because their
53789Sahrens  * weight (DOS_REF_MAX) consumes the entire refcnt space.  PRIMARY opens consume
54789Sahrens  * just over half of the refcnt space, so there can't be more than one, but it
55789Sahrens  * can peacefully coexist with any number of STANDARD opens.
56789Sahrens  */
57789Sahrens static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
58789Sahrens 	0,			/* DOS_MODE_NONE - invalid		*/
59789Sahrens 	1,			/* DOS_MODE_STANDARD - unlimited number	*/
60789Sahrens 	(DOS_REF_MAX >> 1) + 1,	/* DOS_MODE_PRIMARY - only one of these	*/
61789Sahrens 	DOS_REF_MAX		/* DOS_MODE_EXCLUSIVE - no other opens	*/
62789Sahrens };
63789Sahrens 
64789Sahrens 
65789Sahrens void
66789Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
67789Sahrens {
68789Sahrens 	int used = BP_GET_ASIZE(bp);
69789Sahrens 	int compressed = BP_GET_PSIZE(bp);
70789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
71789Sahrens 
72789Sahrens 	dprintf_bp(bp, "born, ds=%p\n", ds);
73789Sahrens 
74789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
75789Sahrens 	/* It could have been compressed away to nothing */
76789Sahrens 	if (BP_IS_HOLE(bp))
77789Sahrens 		return;
78789Sahrens 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
79789Sahrens 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
80789Sahrens 	if (ds == NULL) {
81789Sahrens 		/*
82789Sahrens 		 * Account for the meta-objset space in its placeholder
83789Sahrens 		 * dsl_dir.
84789Sahrens 		 */
85789Sahrens 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
86789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
87789Sahrens 		    used, compressed, uncompressed, tx);
88789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
89789Sahrens 		return;
90789Sahrens 	}
91789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
92789Sahrens 	mutex_enter(&ds->ds_lock);
93789Sahrens 	ds->ds_phys->ds_used_bytes += used;
94789Sahrens 	ds->ds_phys->ds_compressed_bytes += compressed;
95789Sahrens 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
96789Sahrens 	ds->ds_phys->ds_unique_bytes += used;
97789Sahrens 	mutex_exit(&ds->ds_lock);
98789Sahrens 	dsl_dir_diduse_space(ds->ds_dir,
99789Sahrens 	    used, compressed, uncompressed, tx);
100789Sahrens }
101789Sahrens 
102789Sahrens void
103789Sahrens dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
104789Sahrens {
105789Sahrens 	int used = BP_GET_ASIZE(bp);
106789Sahrens 	int compressed = BP_GET_PSIZE(bp);
107789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
108789Sahrens 
109789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
110789Sahrens 	if (BP_IS_HOLE(bp))
111789Sahrens 		return;
112789Sahrens 
113789Sahrens 	ASSERT(used > 0);
114789Sahrens 	if (ds == NULL) {
115789Sahrens 		/*
116789Sahrens 		 * Account for the meta-objset space in its placeholder
117789Sahrens 		 * dataset.
118789Sahrens 		 */
119789Sahrens 		/* XXX this can fail, what do we do when it does? */
120789Sahrens 		(void) arc_free(NULL, tx->tx_pool->dp_spa,
121789Sahrens 		    tx->tx_txg, bp, NULL, NULL, ARC_WAIT);
122789Sahrens 		bzero(bp, sizeof (blkptr_t));
123789Sahrens 
124789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
125789Sahrens 		    -used, -compressed, -uncompressed, tx);
126789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
127789Sahrens 		return;
128789Sahrens 	}
129789Sahrens 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
130789Sahrens 
131789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
132789Sahrens 
133789Sahrens 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
134789Sahrens 		dprintf_bp(bp, "freeing: %s", "");
135789Sahrens 		/* XXX check return code? */
136789Sahrens 		(void) arc_free(NULL, tx->tx_pool->dp_spa,
137789Sahrens 		    tx->tx_txg, bp, NULL, NULL, ARC_WAIT);
138789Sahrens 
139789Sahrens 		mutex_enter(&ds->ds_lock);
140789Sahrens 		/* XXX unique_bytes is not accurate for head datasets */
141789Sahrens 		/* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */
142789Sahrens 		ds->ds_phys->ds_unique_bytes -= used;
143789Sahrens 		mutex_exit(&ds->ds_lock);
144789Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
145789Sahrens 		    -used, -compressed, -uncompressed, tx);
146789Sahrens 	} else {
147789Sahrens 		dprintf_bp(bp, "putting on dead list: %s", "");
148*1544Seschrock 		VERIFY(0 == bplist_enqueue(&ds->ds_deadlist, bp, tx));
149789Sahrens 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
150789Sahrens 		if (ds->ds_phys->ds_prev_snap_obj != 0) {
151789Sahrens 			ASSERT3U(ds->ds_prev->ds_object, ==,
152789Sahrens 			    ds->ds_phys->ds_prev_snap_obj);
153789Sahrens 			ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
154789Sahrens 			if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
155789Sahrens 			    ds->ds_object &&
156789Sahrens 			    bp->blk_birth >
157789Sahrens 			    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
158789Sahrens 				dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
159789Sahrens 				mutex_enter(&ds->ds_prev->ds_lock);
160789Sahrens 				ds->ds_prev->ds_phys->ds_unique_bytes +=
161789Sahrens 				    used;
162789Sahrens 				mutex_exit(&ds->ds_prev->ds_lock);
163789Sahrens 			}
164789Sahrens 		}
165789Sahrens 	}
166789Sahrens 	bzero(bp, sizeof (blkptr_t));
167789Sahrens 	mutex_enter(&ds->ds_lock);
168789Sahrens 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
169789Sahrens 	ds->ds_phys->ds_used_bytes -= used;
170789Sahrens 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
171789Sahrens 	ds->ds_phys->ds_compressed_bytes -= compressed;
172789Sahrens 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
173789Sahrens 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
174789Sahrens 	mutex_exit(&ds->ds_lock);
175789Sahrens }
176789Sahrens 
177*1544Seschrock uint64_t
178*1544Seschrock dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
179789Sahrens {
180*1544Seschrock 	uint64_t txg;
181789Sahrens 	dsl_dir_t *dd;
182*1544Seschrock 
183789Sahrens 	if (ds == NULL)
184*1544Seschrock 		return (0);
185789Sahrens 	/*
186789Sahrens 	 * The snapshot creation could fail, but that would cause an
187789Sahrens 	 * incorrect FALSE return, which would only result in an
188789Sahrens 	 * overestimation of the amount of space that an operation would
189789Sahrens 	 * consume, which is OK.
190789Sahrens 	 *
191789Sahrens 	 * There's also a small window where we could miss a pending
192789Sahrens 	 * snapshot, because we could set the sync task in the quiescing
193789Sahrens 	 * phase.  So this should only be used as a guess.
194789Sahrens 	 */
195789Sahrens 	dd = ds->ds_dir;
196789Sahrens 	mutex_enter(&dd->dd_lock);
197*1544Seschrock 	if (dd->dd_sync_func == dsl_dataset_snapshot_sync)
198*1544Seschrock 		txg = dd->dd_sync_txg;
199789Sahrens 	else
200*1544Seschrock 		txg = ds->ds_phys->ds_prev_snap_txg;
201789Sahrens 	mutex_exit(&dd->dd_lock);
202*1544Seschrock 
203*1544Seschrock 	return (txg);
204*1544Seschrock }
205*1544Seschrock 
206*1544Seschrock int
207*1544Seschrock dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth)
208*1544Seschrock {
209*1544Seschrock 	return (blk_birth > dsl_dataset_prev_snap_txg(ds));
210789Sahrens }
211789Sahrens 
212789Sahrens /* ARGSUSED */
213789Sahrens static void
214789Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv)
215789Sahrens {
216789Sahrens 	dsl_dataset_t *ds = dsv;
217789Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
218789Sahrens 
219789Sahrens 	/* open_refcount == DOS_REF_MAX when deleting */
220789Sahrens 	ASSERT(ds->ds_open_refcount == 0 ||
221789Sahrens 	    ds->ds_open_refcount == DOS_REF_MAX);
222789Sahrens 
223789Sahrens 	dprintf_ds(ds, "evicting %s\n", "");
224789Sahrens 
225789Sahrens 	unique_remove(ds->ds_phys->ds_fsid_guid);
226789Sahrens 
227789Sahrens 	if (ds->ds_user_ptr != NULL)
228789Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
229789Sahrens 
230789Sahrens 	if (ds->ds_prev) {
231789Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
232789Sahrens 		ds->ds_prev = NULL;
233789Sahrens 	}
234789Sahrens 
235789Sahrens 	bplist_close(&ds->ds_deadlist);
236789Sahrens 	dsl_dir_close(ds->ds_dir, ds);
237789Sahrens 
238789Sahrens 	if (list_link_active(&ds->ds_synced_link))
239789Sahrens 		list_remove(&dp->dp_synced_objsets, ds);
240789Sahrens 
241789Sahrens 	kmem_free(ds, sizeof (dsl_dataset_t));
242789Sahrens }
243789Sahrens 
244*1544Seschrock static int
245789Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds)
246789Sahrens {
247789Sahrens 	dsl_dataset_phys_t *headphys;
248789Sahrens 	int err;
249789Sahrens 	dmu_buf_t *headdbuf;
250789Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
251789Sahrens 	objset_t *mos = dp->dp_meta_objset;
252789Sahrens 
253789Sahrens 	if (ds->ds_snapname[0])
254*1544Seschrock 		return (0);
255789Sahrens 	if (ds->ds_phys->ds_next_snap_obj == 0)
256*1544Seschrock 		return (0);
257789Sahrens 
258*1544Seschrock 	err = dmu_bonus_hold(mos, ds->ds_dir->dd_phys->dd_head_dataset_obj,
259*1544Seschrock 	    FTAG, &headdbuf);
260*1544Seschrock 	if (err)
261*1544Seschrock 		return (err);
262789Sahrens 	headphys = headdbuf->db_data;
263789Sahrens 	err = zap_value_search(dp->dp_meta_objset,
264789Sahrens 	    headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname);
265*1544Seschrock 	dmu_buf_rele(headdbuf, FTAG);
266*1544Seschrock 	return (err);
267789Sahrens }
268789Sahrens 
269*1544Seschrock int
270789Sahrens dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
271*1544Seschrock     int mode, void *tag, dsl_dataset_t **dsp)
272789Sahrens {
273789Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
274789Sahrens 	objset_t *mos = dp->dp_meta_objset;
275789Sahrens 	dmu_buf_t *dbuf;
276789Sahrens 	dsl_dataset_t *ds;
277*1544Seschrock 	int err;
278789Sahrens 
279789Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
280789Sahrens 	    dsl_pool_sync_context(dp));
281789Sahrens 
282*1544Seschrock 	err = dmu_bonus_hold(mos, dsobj, tag, &dbuf);
283*1544Seschrock 	if (err)
284*1544Seschrock 		return (err);
285789Sahrens 	ds = dmu_buf_get_user(dbuf);
286789Sahrens 	if (ds == NULL) {
287789Sahrens 		dsl_dataset_t *winner;
288789Sahrens 
289789Sahrens 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
290789Sahrens 		ds->ds_dbuf = dbuf;
291789Sahrens 		ds->ds_object = dsobj;
292789Sahrens 		ds->ds_phys = dbuf->db_data;
293789Sahrens 
294*1544Seschrock 		err = bplist_open(&ds->ds_deadlist,
295789Sahrens 		    mos, ds->ds_phys->ds_deadlist_obj);
296*1544Seschrock 		if (err == 0) {
297*1544Seschrock 			err = dsl_dir_open_obj(dp,
298*1544Seschrock 			    ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
299*1544Seschrock 		}
300*1544Seschrock 		if (err) {
301*1544Seschrock 			/*
302*1544Seschrock 			 * we don't really need to close the blist if we
303*1544Seschrock 			 * just opened it.
304*1544Seschrock 			 */
305*1544Seschrock 			kmem_free(ds, sizeof (dsl_dataset_t));
306*1544Seschrock 			dmu_buf_rele(dbuf, tag);
307*1544Seschrock 			return (err);
308*1544Seschrock 		}
309789Sahrens 
310789Sahrens 		if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
311789Sahrens 			ds->ds_snapname[0] = '\0';
312789Sahrens 			if (ds->ds_phys->ds_prev_snap_obj) {
313*1544Seschrock 				err = dsl_dataset_open_obj(dp,
314789Sahrens 				    ds->ds_phys->ds_prev_snap_obj, NULL,
315*1544Seschrock 				    DS_MODE_NONE, ds, &ds->ds_prev);
316789Sahrens 			}
317789Sahrens 		} else {
318789Sahrens 			if (snapname) {
319789Sahrens #ifdef ZFS_DEBUG
320789Sahrens 				dsl_dataset_phys_t *headphys;
321*1544Seschrock 				dmu_buf_t *headdbuf;
322*1544Seschrock 				err = dmu_bonus_hold(mos,
323*1544Seschrock 				    ds->ds_dir->dd_phys->dd_head_dataset_obj,
324*1544Seschrock 				    FTAG, &headdbuf);
325*1544Seschrock 				if (err == 0) {
326*1544Seschrock 					headphys = headdbuf->db_data;
327*1544Seschrock 					uint64_t foundobj;
328*1544Seschrock 					err = zap_lookup(dp->dp_meta_objset,
329*1544Seschrock 					    headphys->ds_snapnames_zapobj,
330*1544Seschrock 					    snapname, sizeof (foundobj), 1,
331*1544Seschrock 					    &foundobj);
332*1544Seschrock 					ASSERT3U(foundobj, ==, dsobj);
333*1544Seschrock 					dmu_buf_rele(headdbuf, FTAG);
334*1544Seschrock 				}
335789Sahrens #endif
336789Sahrens 				(void) strcat(ds->ds_snapname, snapname);
337789Sahrens 			} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
338*1544Seschrock 				err = dsl_dataset_get_snapname(ds);
339789Sahrens 			}
340789Sahrens 		}
341789Sahrens 
342*1544Seschrock 		if (err == 0) {
343*1544Seschrock 			winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
344*1544Seschrock 			    dsl_dataset_evict);
345*1544Seschrock 		}
346*1544Seschrock 		if (err || winner) {
347789Sahrens 			bplist_close(&ds->ds_deadlist);
348789Sahrens 			if (ds->ds_prev) {
349789Sahrens 				dsl_dataset_close(ds->ds_prev,
350789Sahrens 				    DS_MODE_NONE, ds);
351789Sahrens 			}
352789Sahrens 			dsl_dir_close(ds->ds_dir, ds);
353789Sahrens 			kmem_free(ds, sizeof (dsl_dataset_t));
354*1544Seschrock 			if (err) {
355*1544Seschrock 				dmu_buf_rele(dbuf, tag);
356*1544Seschrock 				return (err);
357*1544Seschrock 			}
358789Sahrens 			ds = winner;
359789Sahrens 		} else {
360789Sahrens 			uint64_t new =
361789Sahrens 			    unique_insert(ds->ds_phys->ds_fsid_guid);
362789Sahrens 			if (new != ds->ds_phys->ds_fsid_guid) {
363789Sahrens 				/* XXX it won't necessarily be synced... */
364789Sahrens 				ds->ds_phys->ds_fsid_guid = new;
365789Sahrens 			}
366789Sahrens 		}
367789Sahrens 	}
368789Sahrens 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
369789Sahrens 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
370789Sahrens 
371789Sahrens 	mutex_enter(&ds->ds_lock);
372789Sahrens 	if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
373789Sahrens 	    ds->ds_phys->ds_restoring && !DS_MODE_IS_RESTORE(mode)) ||
374789Sahrens 	    (ds->ds_open_refcount + weight > DOS_REF_MAX)) {
375789Sahrens 		mutex_exit(&ds->ds_lock);
376789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
377*1544Seschrock 		return (EBUSY);
378789Sahrens 	}
379789Sahrens 	ds->ds_open_refcount += weight;
380789Sahrens 	mutex_exit(&ds->ds_lock);
381789Sahrens 
382*1544Seschrock 	*dsp = ds;
383*1544Seschrock 	return (0);
384789Sahrens }
385789Sahrens 
386789Sahrens int
387789Sahrens dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
388789Sahrens     void *tag, dsl_dataset_t **dsp)
389789Sahrens {
390789Sahrens 	dsl_dir_t *dd;
391789Sahrens 	dsl_pool_t *dp;
392789Sahrens 	const char *tail;
393789Sahrens 	uint64_t obj;
394789Sahrens 	dsl_dataset_t *ds = NULL;
395789Sahrens 	int err = 0;
396789Sahrens 
397*1544Seschrock 	err = dsl_dir_open_spa(spa, name, FTAG, &dd, &tail);
398*1544Seschrock 	if (err)
399*1544Seschrock 		return (err);
400789Sahrens 
401789Sahrens 	dp = dd->dd_pool;
402789Sahrens 	obj = dd->dd_phys->dd_head_dataset_obj;
403789Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
404789Sahrens 	if (obj == 0) {
405789Sahrens 		/* A dataset with no associated objset */
406789Sahrens 		err = ENOENT;
407789Sahrens 		goto out;
408789Sahrens 	}
409789Sahrens 
410789Sahrens 	if (tail != NULL) {
411789Sahrens 		objset_t *mos = dp->dp_meta_objset;
412789Sahrens 
413*1544Seschrock 		err = dsl_dataset_open_obj(dp, obj, NULL,
414*1544Seschrock 		    DS_MODE_NONE, tag, &ds);
415*1544Seschrock 		if (err)
416*1544Seschrock 			goto out;
417789Sahrens 		obj = ds->ds_phys->ds_snapnames_zapobj;
418789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
419789Sahrens 		ds = NULL;
420789Sahrens 
421789Sahrens 		if (tail[0] != '@') {
422789Sahrens 			err = ENOENT;
423789Sahrens 			goto out;
424789Sahrens 		}
425789Sahrens 		tail++;
426789Sahrens 
427789Sahrens 		/* Look for a snapshot */
428789Sahrens 		if (!DS_MODE_IS_READONLY(mode)) {
429789Sahrens 			err = EROFS;
430789Sahrens 			goto out;
431789Sahrens 		}
432789Sahrens 		dprintf("looking for snapshot '%s'\n", tail);
433789Sahrens 		err = zap_lookup(mos, obj, tail, 8, 1, &obj);
434789Sahrens 		if (err)
435789Sahrens 			goto out;
436789Sahrens 	}
437*1544Seschrock 	err = dsl_dataset_open_obj(dp, obj, tail, mode, tag, &ds);
438789Sahrens 
439789Sahrens out:
440789Sahrens 	rw_exit(&dp->dp_config_rwlock);
441789Sahrens 	dsl_dir_close(dd, FTAG);
442789Sahrens 
443789Sahrens 	ASSERT3U((err == 0), ==, (ds != NULL));
444789Sahrens 	/* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */
445789Sahrens 
446789Sahrens 	*dsp = ds;
447789Sahrens 	return (err);
448789Sahrens }
449789Sahrens 
450789Sahrens int
451789Sahrens dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp)
452789Sahrens {
453789Sahrens 	return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp));
454789Sahrens }
455789Sahrens 
456789Sahrens void
457789Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name)
458789Sahrens {
459789Sahrens 	if (ds == NULL) {
460789Sahrens 		(void) strcpy(name, "mos");
461789Sahrens 	} else {
462789Sahrens 		dsl_dir_name(ds->ds_dir, name);
463*1544Seschrock 		VERIFY(0 == dsl_dataset_get_snapname(ds));
464789Sahrens 		if (ds->ds_snapname[0]) {
465789Sahrens 			(void) strcat(name, "@");
466789Sahrens 			if (!MUTEX_HELD(&ds->ds_lock)) {
467789Sahrens 				/*
468789Sahrens 				 * We use a "recursive" mutex so that we
469789Sahrens 				 * can call dprintf_ds() with ds_lock held.
470789Sahrens 				 */
471789Sahrens 				mutex_enter(&ds->ds_lock);
472789Sahrens 				(void) strcat(name, ds->ds_snapname);
473789Sahrens 				mutex_exit(&ds->ds_lock);
474789Sahrens 			} else {
475789Sahrens 				(void) strcat(name, ds->ds_snapname);
476789Sahrens 			}
477789Sahrens 		}
478789Sahrens 	}
479789Sahrens }
480789Sahrens 
481789Sahrens void
482789Sahrens dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag)
483789Sahrens {
484789Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
485789Sahrens 	mutex_enter(&ds->ds_lock);
486789Sahrens 	ASSERT3U(ds->ds_open_refcount, >=, weight);
487789Sahrens 	ds->ds_open_refcount -= weight;
488789Sahrens 	dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n",
489789Sahrens 	    mode, ds->ds_open_refcount);
490789Sahrens 	mutex_exit(&ds->ds_lock);
491789Sahrens 
492*1544Seschrock 	dmu_buf_rele(ds->ds_dbuf, tag);
493789Sahrens }
494789Sahrens 
495789Sahrens void
496789Sahrens dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
497789Sahrens {
498789Sahrens 	objset_t *mos = dp->dp_meta_objset;
499789Sahrens 	dmu_buf_t *dbuf;
500789Sahrens 	dsl_dataset_phys_t *dsphys;
501789Sahrens 	dsl_dataset_t *ds;
502789Sahrens 	uint64_t dsobj;
503789Sahrens 	dsl_dir_t *dd;
504789Sahrens 
505789Sahrens 	dsl_dir_create_root(mos, ddobjp, tx);
506*1544Seschrock 	VERIFY(0 == dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG, &dd));
507789Sahrens 
508928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
509928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
510*1544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
511789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
512789Sahrens 	dsphys = dbuf->db_data;
513789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
514789Sahrens 	dsphys->ds_fsid_guid = unique_create();
515*1544Seschrock 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
516789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
517789Sahrens 	    sizeof (dsphys->ds_guid));
518789Sahrens 	dsphys->ds_snapnames_zapobj =
519885Sahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
520789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
521789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
522789Sahrens 	dsphys->ds_deadlist_obj =
523789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
524*1544Seschrock 	dmu_buf_rele(dbuf, FTAG);
525789Sahrens 
526789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
527789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
528789Sahrens 	dsl_dir_close(dd, FTAG);
529789Sahrens 
530*1544Seschrock 	VERIFY(0 ==
531*1544Seschrock 	    dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG, &ds));
532789Sahrens 	(void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx);
533789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
534789Sahrens }
535789Sahrens 
536789Sahrens int
537789Sahrens dsl_dataset_create_sync(dsl_dir_t *pds, const char *fullname,
538789Sahrens     const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx)
539789Sahrens {
540789Sahrens 	int err;
541789Sahrens 	dsl_pool_t *dp = pds->dd_pool;
542789Sahrens 	dmu_buf_t *dbuf;
543789Sahrens 	dsl_dataset_phys_t *dsphys;
544789Sahrens 	uint64_t dsobj;
545789Sahrens 	objset_t *mos = dp->dp_meta_objset;
546789Sahrens 	dsl_dir_t *dd;
547789Sahrens 
548789Sahrens 	if (clone_parent != NULL) {
549789Sahrens 		/*
550789Sahrens 		 * You can't clone across pools.
551789Sahrens 		 */
552789Sahrens 		if (clone_parent->ds_dir->dd_pool != dp)
553789Sahrens 			return (EXDEV);
554789Sahrens 
555789Sahrens 		/*
556789Sahrens 		 * You can only clone snapshots, not the head datasets.
557789Sahrens 		 */
558789Sahrens 		if (clone_parent->ds_phys->ds_num_children == 0)
559789Sahrens 			return (EINVAL);
560789Sahrens 	}
561789Sahrens 
562789Sahrens 	ASSERT(lastname[0] != '@');
563789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
564789Sahrens 
565789Sahrens 	err = dsl_dir_create_sync(pds, lastname, tx);
566789Sahrens 	if (err)
567789Sahrens 		return (err);
568*1544Seschrock 	VERIFY(0 == dsl_dir_open_spa(dp->dp_spa, fullname, FTAG, &dd, NULL));
569789Sahrens 
570789Sahrens 	/* This is the point of no (unsuccessful) return */
571789Sahrens 
572928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
573928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
574*1544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
575789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
576789Sahrens 	dsphys = dbuf->db_data;
577789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
578789Sahrens 	dsphys->ds_fsid_guid = unique_create();
579789Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
580789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
581789Sahrens 	    sizeof (dsphys->ds_guid));
582789Sahrens 	dsphys->ds_snapnames_zapobj =
583885Sahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
584789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
585789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
586789Sahrens 	dsphys->ds_deadlist_obj =
587789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
588789Sahrens 	if (clone_parent) {
589789Sahrens 		dsphys->ds_prev_snap_obj = clone_parent->ds_object;
590789Sahrens 		dsphys->ds_prev_snap_txg =
591789Sahrens 		    clone_parent->ds_phys->ds_creation_txg;
592789Sahrens 		dsphys->ds_used_bytes =
593789Sahrens 		    clone_parent->ds_phys->ds_used_bytes;
594789Sahrens 		dsphys->ds_compressed_bytes =
595789Sahrens 		    clone_parent->ds_phys->ds_compressed_bytes;
596789Sahrens 		dsphys->ds_uncompressed_bytes =
597789Sahrens 		    clone_parent->ds_phys->ds_uncompressed_bytes;
598789Sahrens 		dsphys->ds_bp = clone_parent->ds_phys->ds_bp;
599789Sahrens 
600789Sahrens 		dmu_buf_will_dirty(clone_parent->ds_dbuf, tx);
601789Sahrens 		clone_parent->ds_phys->ds_num_children++;
602789Sahrens 
603789Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
604789Sahrens 		dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object;
605789Sahrens 	}
606*1544Seschrock 	dmu_buf_rele(dbuf, FTAG);
607789Sahrens 
608789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
609789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
610789Sahrens 	dsl_dir_close(dd, FTAG);
611789Sahrens 
612789Sahrens 	return (0);
613789Sahrens }
614789Sahrens 
615789Sahrens 
616789Sahrens int
617789Sahrens dsl_dataset_destroy(const char *name)
618789Sahrens {
619789Sahrens 	int err;
620789Sahrens 	dsl_pool_t *dp;
621789Sahrens 	dsl_dir_t *dd;
622789Sahrens 	const char *tail;
623789Sahrens 
624*1544Seschrock 	err = dsl_dir_open(name, FTAG, &dd, &tail);
625*1544Seschrock 	if (err)
626*1544Seschrock 		return (err);
627789Sahrens 
628789Sahrens 	dp = dd->dd_pool;
629789Sahrens 	if (tail != NULL) {
630789Sahrens 		if (tail[0] != '@') {
631789Sahrens 			dsl_dir_close(dd, FTAG);
632789Sahrens 			return (ENOENT);
633789Sahrens 		}
634789Sahrens 		tail++;
635789Sahrens 		/* Just blow away the snapshot */
636789Sahrens 		do {
637789Sahrens 			txg_wait_synced(dp, 0);
638789Sahrens 			err = dsl_dir_sync_task(dd,
639789Sahrens 			    dsl_dataset_destroy_sync, (void*)tail, 0);
640789Sahrens 		} while (err == EAGAIN);
641789Sahrens 		dsl_dir_close(dd, FTAG);
642789Sahrens 	} else {
643789Sahrens 		char buf[MAXNAMELEN];
644789Sahrens 		char *cp;
645789Sahrens 
646789Sahrens 		dsl_dir_t *pds;
647789Sahrens 		if (dd->dd_phys->dd_parent_obj == 0) {
648789Sahrens 			dsl_dir_close(dd, FTAG);
649789Sahrens 			return (EINVAL);
650789Sahrens 		}
651789Sahrens 		/*
652789Sahrens 		 * Make sure it's not dirty before we destroy it.
653789Sahrens 		 */
654789Sahrens 		txg_wait_synced(dd->dd_pool, 0);
655789Sahrens 		/*
656789Sahrens 		 * Blow away the dsl_dir + head dataset.
657789Sahrens 		 * dsl_dir_destroy_sync() will call
658789Sahrens 		 * dsl_dataset_destroy_sync() to destroy the head dataset.
659789Sahrens 		 */
660789Sahrens 		rw_enter(&dp->dp_config_rwlock, RW_READER);
661*1544Seschrock 		err = dsl_dir_open_obj(dd->dd_pool,
662*1544Seschrock 		    dd->dd_phys->dd_parent_obj, NULL, FTAG, &pds);
663789Sahrens 		dsl_dir_close(dd, FTAG);
664789Sahrens 		rw_exit(&dp->dp_config_rwlock);
665*1544Seschrock 		if (err)
666*1544Seschrock 			return (err);
667789Sahrens 
668789Sahrens 		(void) strcpy(buf, name);
669789Sahrens 		cp = strrchr(buf, '/') + 1;
670789Sahrens 		ASSERT(cp[0] != '\0');
671789Sahrens 		do {
672789Sahrens 			txg_wait_synced(dp, 0);
673789Sahrens 			err = dsl_dir_sync_task(pds,
674789Sahrens 			    dsl_dir_destroy_sync, cp, 0);
675789Sahrens 		} while (err == EAGAIN);
676789Sahrens 		dsl_dir_close(pds, FTAG);
677789Sahrens 	}
678789Sahrens 
679789Sahrens 	return (err);
680789Sahrens }
681789Sahrens 
682789Sahrens int
683789Sahrens dsl_dataset_rollback(const char *name)
684789Sahrens {
685789Sahrens 	int err;
686789Sahrens 	dsl_dir_t *dd;
687789Sahrens 	const char *tail;
688789Sahrens 
689*1544Seschrock 	err = dsl_dir_open(name, FTAG, &dd, &tail);
690*1544Seschrock 	if (err)
691*1544Seschrock 		return (err);
692789Sahrens 
693789Sahrens 	if (tail != NULL) {
694789Sahrens 		dsl_dir_close(dd, FTAG);
695789Sahrens 		return (EINVAL);
696789Sahrens 	}
697789Sahrens 	do {
698789Sahrens 		txg_wait_synced(dd->dd_pool, 0);
699789Sahrens 		err = dsl_dir_sync_task(dd,
700789Sahrens 		    dsl_dataset_rollback_sync, NULL, 0);
701789Sahrens 	} while (err == EAGAIN);
702789Sahrens 	dsl_dir_close(dd, FTAG);
703789Sahrens 
704789Sahrens 	return (err);
705789Sahrens }
706789Sahrens 
707789Sahrens void *
708789Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
709789Sahrens     void *p, dsl_dataset_evict_func_t func)
710789Sahrens {
711789Sahrens 	void *old;
712789Sahrens 
713789Sahrens 	mutex_enter(&ds->ds_lock);
714789Sahrens 	old = ds->ds_user_ptr;
715789Sahrens 	if (old == NULL) {
716789Sahrens 		ds->ds_user_ptr = p;
717789Sahrens 		ds->ds_user_evict_func = func;
718789Sahrens 	}
719789Sahrens 	mutex_exit(&ds->ds_lock);
720789Sahrens 	return (old);
721789Sahrens }
722789Sahrens 
723789Sahrens void *
724789Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
725789Sahrens {
726789Sahrens 	return (ds->ds_user_ptr);
727789Sahrens }
728789Sahrens 
729789Sahrens 
730789Sahrens void
731789Sahrens dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp)
732789Sahrens {
733789Sahrens 	*bp = ds->ds_phys->ds_bp;
734789Sahrens }
735789Sahrens 
736789Sahrens void
737789Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
738789Sahrens {
739789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
740789Sahrens 	/* If it's the meta-objset, set dp_meta_rootbp */
741789Sahrens 	if (ds == NULL) {
742789Sahrens 		tx->tx_pool->dp_meta_rootbp = *bp;
743789Sahrens 	} else {
744789Sahrens 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
745789Sahrens 		ds->ds_phys->ds_bp = *bp;
746789Sahrens 	}
747789Sahrens }
748789Sahrens 
749789Sahrens spa_t *
750789Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds)
751789Sahrens {
752789Sahrens 	return (ds->ds_dir->dd_pool->dp_spa);
753789Sahrens }
754789Sahrens 
755789Sahrens void
756789Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
757789Sahrens {
758789Sahrens 	dsl_pool_t *dp;
759789Sahrens 
760789Sahrens 	if (ds == NULL) /* this is the meta-objset */
761789Sahrens 		return;
762789Sahrens 
763789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
764789Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
765789Sahrens 
766789Sahrens 	dp = ds->ds_dir->dd_pool;
767789Sahrens 
768789Sahrens 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
769789Sahrens 		/* up the hold count until we can be written out */
770789Sahrens 		dmu_buf_add_ref(ds->ds_dbuf, ds);
771789Sahrens 	}
772789Sahrens }
773789Sahrens 
774789Sahrens struct killarg {
775789Sahrens 	uint64_t *usedp;
776789Sahrens 	uint64_t *compressedp;
777789Sahrens 	uint64_t *uncompressedp;
778789Sahrens 	zio_t *zio;
779789Sahrens 	dmu_tx_t *tx;
780789Sahrens };
781789Sahrens 
782789Sahrens static int
783789Sahrens kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
784789Sahrens {
785789Sahrens 	struct killarg *ka = arg;
786789Sahrens 	blkptr_t *bp = &bc->bc_blkptr;
787789Sahrens 
788789Sahrens 	ASSERT3U(bc->bc_errno, ==, 0);
789789Sahrens 
790789Sahrens 	/*
791789Sahrens 	 * Since this callback is not called concurrently, no lock is
792789Sahrens 	 * needed on the accounting values.
793789Sahrens 	 */
794789Sahrens 	*ka->usedp += BP_GET_ASIZE(bp);
795789Sahrens 	*ka->compressedp += BP_GET_PSIZE(bp);
796789Sahrens 	*ka->uncompressedp += BP_GET_UCSIZE(bp);
797789Sahrens 	/* XXX check for EIO? */
798789Sahrens 	(void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
799789Sahrens 	    ARC_NOWAIT);
800789Sahrens 	return (0);
801789Sahrens }
802789Sahrens 
803789Sahrens /* ARGSUSED */
804789Sahrens int
805789Sahrens dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
806789Sahrens {
807789Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
808789Sahrens 	dsl_dataset_t *ds;
809*1544Seschrock 	int err;
810789Sahrens 
811789Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == 0)
812789Sahrens 		return (EINVAL);
813*1544Seschrock 	err = dsl_dataset_open_obj(dd->dd_pool,
814*1544Seschrock 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &ds);
815*1544Seschrock 	if (err)
816*1544Seschrock 		return (err);
817789Sahrens 
818789Sahrens 	if (ds->ds_phys->ds_prev_snap_txg == 0) {
819789Sahrens 		/*
820789Sahrens 		 * There's no previous snapshot.  I suppose we could
821789Sahrens 		 * roll it back to being empty (and re-initialize the
822789Sahrens 		 * upper (ZPL) layer).  But for now there's no way to do
823789Sahrens 		 * this via the user interface.
824789Sahrens 		 */
825789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
826789Sahrens 		return (EINVAL);
827789Sahrens 	}
828789Sahrens 
829789Sahrens 	mutex_enter(&ds->ds_lock);
830789Sahrens 	if (ds->ds_open_refcount > 0) {
831789Sahrens 		mutex_exit(&ds->ds_lock);
832789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
833789Sahrens 		return (EBUSY);
834789Sahrens 	}
835789Sahrens 
836789Sahrens 	/*
837789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
838789Sahrens 	 * them.  Try again.
839789Sahrens 	 */
840789Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) {
841789Sahrens 		mutex_exit(&ds->ds_lock);
842789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
843789Sahrens 		return (EAGAIN);
844789Sahrens 	}
845789Sahrens 
846789Sahrens 	/* THE POINT OF NO (unsuccessful) RETURN */
847789Sahrens 	ds->ds_open_refcount = DOS_REF_MAX;
848789Sahrens 	mutex_exit(&ds->ds_lock);
849789Sahrens 
850789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
851789Sahrens 
852789Sahrens 	/* Zero out the deadlist. */
853789Sahrens 	dprintf("old deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj);
854789Sahrens 	bplist_close(&ds->ds_deadlist);
855789Sahrens 	bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
856789Sahrens 	ds->ds_phys->ds_deadlist_obj =
857789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
858*1544Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
859*1544Seschrock 	    ds->ds_phys->ds_deadlist_obj));
860789Sahrens 	dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj);
861789Sahrens 
862789Sahrens 	{
863789Sahrens 		/* Free blkptrs that we gave birth to */
864789Sahrens 		zio_t *zio;
865789Sahrens 		uint64_t used = 0, compressed = 0, uncompressed = 0;
866789Sahrens 		struct killarg ka;
867789Sahrens 
868789Sahrens 		zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
869789Sahrens 		    ZIO_FLAG_MUSTSUCCEED);
870789Sahrens 		ka.usedp = &used;
871789Sahrens 		ka.compressedp = &compressed;
872789Sahrens 		ka.uncompressedp = &uncompressed;
873789Sahrens 		ka.zio = zio;
874789Sahrens 		ka.tx = tx;
875789Sahrens 		(void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
876789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
877789Sahrens 		(void) zio_wait(zio);
878789Sahrens 
879789Sahrens 		dsl_dir_diduse_space(dd,
880789Sahrens 		    -used, -compressed, -uncompressed, tx);
881789Sahrens 	}
882789Sahrens 
883789Sahrens 	/* Change our contents to that of the prev snapshot (finally!) */
884789Sahrens 	ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj);
885789Sahrens 	ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
886789Sahrens 	ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes;
887789Sahrens 	ds->ds_phys->ds_compressed_bytes =
888789Sahrens 	    ds->ds_prev->ds_phys->ds_compressed_bytes;
889789Sahrens 	ds->ds_phys->ds_uncompressed_bytes =
890789Sahrens 	    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
891789Sahrens 	ds->ds_phys->ds_restoring = ds->ds_prev->ds_phys->ds_restoring;
892789Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
893789Sahrens 
894789Sahrens 	dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
895789Sahrens 	ds->ds_prev->ds_phys->ds_unique_bytes = 0;
896789Sahrens 
897789Sahrens 	dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj);
898789Sahrens 	ds->ds_open_refcount = 0;
899789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
900789Sahrens 
901789Sahrens 	return (0);
902789Sahrens }
903789Sahrens 
904789Sahrens int
905789Sahrens dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
906789Sahrens {
907789Sahrens 	const char *snapname = arg;
908789Sahrens 	uint64_t used = 0, compressed = 0, uncompressed = 0;
909789Sahrens 	blkptr_t bp;
910789Sahrens 	zio_t *zio;
911789Sahrens 	int err;
912789Sahrens 	int after_branch_point = FALSE;
913789Sahrens 	int drop_lock = FALSE;
914789Sahrens 	dsl_pool_t *dp = dd->dd_pool;
915789Sahrens 	objset_t *mos = dp->dp_meta_objset;
916789Sahrens 	dsl_dataset_t *ds, *ds_prev = NULL;
917789Sahrens 	uint64_t obj;
918789Sahrens 
919789Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == 0)
920789Sahrens 		return (EINVAL);
921789Sahrens 
922789Sahrens 	if (!RW_WRITE_HELD(&dp->dp_config_rwlock)) {
923789Sahrens 		rw_enter(&dp->dp_config_rwlock, RW_WRITER);
924789Sahrens 		drop_lock = TRUE;
925789Sahrens 	}
926789Sahrens 
927*1544Seschrock 	err = dsl_dataset_open_obj(dd->dd_pool,
928789Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL,
929*1544Seschrock 	    snapname ? DS_MODE_NONE : DS_MODE_EXCLUSIVE, FTAG, &ds);
930789Sahrens 
931*1544Seschrock 	if (err == 0 && snapname) {
932789Sahrens 		err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
933789Sahrens 		    snapname, 8, 1, &obj);
934789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
935*1544Seschrock 		if (err == 0) {
936*1544Seschrock 			err = dsl_dataset_open_obj(dd->dd_pool, obj, NULL,
937*1544Seschrock 			    DS_MODE_EXCLUSIVE, FTAG, &ds);
938789Sahrens 		}
939789Sahrens 	}
940*1544Seschrock 	if (err) {
941789Sahrens 		if (drop_lock)
942789Sahrens 			rw_exit(&dp->dp_config_rwlock);
943*1544Seschrock 		return (err);
944789Sahrens 	}
945789Sahrens 
946789Sahrens 	obj = ds->ds_object;
947789Sahrens 
948789Sahrens 	/* Can't delete a branch point. */
949789Sahrens 	if (ds->ds_phys->ds_num_children > 1) {
950789Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
951789Sahrens 		if (drop_lock)
952789Sahrens 			rw_exit(&dp->dp_config_rwlock);
953789Sahrens 		return (EINVAL);
954789Sahrens 	}
955789Sahrens 
956789Sahrens 	/*
957789Sahrens 	 * Can't delete a head dataset if there are snapshots of it.
958789Sahrens 	 * (Except if the only snapshots are from the branch we cloned
959789Sahrens 	 * from.)
960789Sahrens 	 */
961789Sahrens 	if (ds->ds_prev != NULL &&
962789Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj == obj) {
963789Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
964789Sahrens 		if (drop_lock)
965789Sahrens 			rw_exit(&dp->dp_config_rwlock);
966789Sahrens 		return (EINVAL);
967789Sahrens 	}
968789Sahrens 
969789Sahrens 	/*
970789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
971789Sahrens 	 * them.  Try again.
972789Sahrens 	 */
973789Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) {
974789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
975885Sahrens 		if (drop_lock)
976885Sahrens 			rw_exit(&dp->dp_config_rwlock);
977789Sahrens 		return (EAGAIN);
978789Sahrens 	}
979789Sahrens 
980789Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
981789Sahrens 		if (ds->ds_prev) {
982789Sahrens 			ds_prev = ds->ds_prev;
983789Sahrens 		} else {
984*1544Seschrock 			err = dsl_dataset_open_obj(dd->dd_pool,
985789Sahrens 			    ds->ds_phys->ds_prev_snap_obj, NULL,
986*1544Seschrock 			    DS_MODE_NONE, FTAG, &ds_prev);
987*1544Seschrock 			if (err) {
988*1544Seschrock 				dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
989*1544Seschrock 				if (drop_lock)
990*1544Seschrock 					rw_exit(&dp->dp_config_rwlock);
991*1544Seschrock 				return (err);
992*1544Seschrock 			}
993789Sahrens 		}
994789Sahrens 		after_branch_point =
995789Sahrens 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
996789Sahrens 
997789Sahrens 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
998789Sahrens 		if (after_branch_point &&
999789Sahrens 		    ds->ds_phys->ds_next_snap_obj == 0) {
1000789Sahrens 			/* This clone is toast. */
1001789Sahrens 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
1002789Sahrens 			ds_prev->ds_phys->ds_num_children--;
1003789Sahrens 		} else if (!after_branch_point) {
1004789Sahrens 			ds_prev->ds_phys->ds_next_snap_obj =
1005789Sahrens 			    ds->ds_phys->ds_next_snap_obj;
1006789Sahrens 		}
1007789Sahrens 	}
1008789Sahrens 
1009*1544Seschrock 	/* THE POINT OF NO (unsuccessful) RETURN */
1010*1544Seschrock 
1011789Sahrens 	ASSERT3P(tx->tx_pool, ==, dd->dd_pool);
1012789Sahrens 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
1013789Sahrens 
1014789Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0) {
1015789Sahrens 		dsl_dataset_t *ds_next;
1016789Sahrens 		uint64_t itor = 0;
1017789Sahrens 
1018789Sahrens 		spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1019789Sahrens 
1020*1544Seschrock 		VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
1021*1544Seschrock 		    ds->ds_phys->ds_next_snap_obj, NULL,
1022*1544Seschrock 		    DS_MODE_NONE, FTAG, &ds_next));
1023789Sahrens 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
1024789Sahrens 
1025789Sahrens 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
1026789Sahrens 		ds_next->ds_phys->ds_prev_snap_obj =
1027789Sahrens 		    ds->ds_phys->ds_prev_snap_obj;
1028789Sahrens 		ds_next->ds_phys->ds_prev_snap_txg =
1029789Sahrens 		    ds->ds_phys->ds_prev_snap_txg;
1030789Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1031789Sahrens 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
1032789Sahrens 
1033789Sahrens 		/*
1034789Sahrens 		 * Transfer to our deadlist (which will become next's
1035789Sahrens 		 * new deadlist) any entries from next's current
1036789Sahrens 		 * deadlist which were born before prev, and free the
1037789Sahrens 		 * other entries.
1038789Sahrens 		 *
1039789Sahrens 		 * XXX we're doing this long task with the config lock held
1040789Sahrens 		 */
1041789Sahrens 		while (bplist_iterate(&ds_next->ds_deadlist, &itor,
1042789Sahrens 		    &bp) == 0) {
1043789Sahrens 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
1044*1544Seschrock 				VERIFY(0 == bplist_enqueue(&ds->ds_deadlist,
1045*1544Seschrock 				    &bp, tx));
1046789Sahrens 				if (ds_prev && !after_branch_point &&
1047789Sahrens 				    bp.blk_birth >
1048789Sahrens 				    ds_prev->ds_phys->ds_prev_snap_txg) {
1049789Sahrens 					ds_prev->ds_phys->ds_unique_bytes +=
1050789Sahrens 					    BP_GET_ASIZE(&bp);
1051789Sahrens 				}
1052789Sahrens 			} else {
1053789Sahrens 				used += BP_GET_ASIZE(&bp);
1054789Sahrens 				compressed += BP_GET_PSIZE(&bp);
1055789Sahrens 				uncompressed += BP_GET_UCSIZE(&bp);
1056789Sahrens 				/* XXX check return value? */
1057789Sahrens 				(void) arc_free(zio, dp->dp_spa, tx->tx_txg,
1058789Sahrens 				    &bp, NULL, NULL, ARC_NOWAIT);
1059789Sahrens 			}
1060789Sahrens 		}
1061789Sahrens 
1062789Sahrens 		/* free next's deadlist */
1063789Sahrens 		bplist_close(&ds_next->ds_deadlist);
1064789Sahrens 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
1065789Sahrens 
1066789Sahrens 		/* set next's deadlist to our deadlist */
1067789Sahrens 		ds_next->ds_phys->ds_deadlist_obj =
1068789Sahrens 		    ds->ds_phys->ds_deadlist_obj;
1069*1544Seschrock 		VERIFY(0 == bplist_open(&ds_next->ds_deadlist, mos,
1070*1544Seschrock 		    ds_next->ds_phys->ds_deadlist_obj));
1071789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1072789Sahrens 
1073789Sahrens 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
1074789Sahrens 			/*
1075789Sahrens 			 * Update next's unique to include blocks which
1076789Sahrens 			 * were previously shared by only this snapshot
1077789Sahrens 			 * and it.  Those blocks will be born after the
1078789Sahrens 			 * prev snap and before this snap, and will have
1079789Sahrens 			 * died after the next snap and before the one
1080789Sahrens 			 * after that (ie. be on the snap after next's
1081789Sahrens 			 * deadlist).
1082789Sahrens 			 *
1083789Sahrens 			 * XXX we're doing this long task with the
1084789Sahrens 			 * config lock held
1085789Sahrens 			 */
1086789Sahrens 			dsl_dataset_t *ds_after_next;
1087789Sahrens 
1088*1544Seschrock 			VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
1089789Sahrens 			    ds_next->ds_phys->ds_next_snap_obj, NULL,
1090*1544Seschrock 			    DS_MODE_NONE, FTAG, &ds_after_next));
1091789Sahrens 			itor = 0;
1092789Sahrens 			while (bplist_iterate(&ds_after_next->ds_deadlist,
1093789Sahrens 			    &itor, &bp) == 0) {
1094789Sahrens 				if (bp.blk_birth >
1095789Sahrens 				    ds->ds_phys->ds_prev_snap_txg &&
1096789Sahrens 				    bp.blk_birth <=
1097789Sahrens 				    ds->ds_phys->ds_creation_txg) {
1098789Sahrens 					ds_next->ds_phys->ds_unique_bytes +=
1099789Sahrens 					    BP_GET_ASIZE(&bp);
1100789Sahrens 				}
1101789Sahrens 			}
1102789Sahrens 
1103789Sahrens 			dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG);
1104789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, NULL);
1105789Sahrens 		} else {
1106789Sahrens 			/*
1107789Sahrens 			 * It would be nice to update the head dataset's
1108789Sahrens 			 * unique.  To do so we would have to traverse
1109789Sahrens 			 * it for blocks born after ds_prev, which is
1110789Sahrens 			 * pretty expensive just to maintain something
1111789Sahrens 			 * for debugging purposes.
1112789Sahrens 			 */
1113789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, ds);
1114789Sahrens 			dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE,
1115789Sahrens 			    ds_next);
1116789Sahrens 			if (ds_prev) {
1117*1544Seschrock 				VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
1118*1544Seschrock 				    ds->ds_phys->ds_prev_snap_obj, NULL,
1119*1544Seschrock 				    DS_MODE_NONE, ds_next, &ds_next->ds_prev));
1120789Sahrens 			} else {
1121789Sahrens 				ds_next->ds_prev = NULL;
1122789Sahrens 			}
1123789Sahrens 		}
1124789Sahrens 		dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG);
1125789Sahrens 
1126789Sahrens 		/*
1127789Sahrens 		 * NB: unique_bytes is not accurate for head objsets
1128789Sahrens 		 * because we don't update it when we delete the most
1129789Sahrens 		 * recent snapshot -- see above comment.
1130789Sahrens 		 */
1131789Sahrens 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
1132789Sahrens 	} else {
1133789Sahrens 		/*
1134789Sahrens 		 * There's no next snapshot, so this is a head dataset.
1135789Sahrens 		 * Destroy the deadlist.  Unless it's a clone, the
1136789Sahrens 		 * deadlist should be empty.  (If it's a clone, it's
1137789Sahrens 		 * safe to ignore the deadlist contents.)
1138789Sahrens 		 */
1139789Sahrens 		struct killarg ka;
1140789Sahrens 
1141789Sahrens 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
1142789Sahrens 		bplist_close(&ds->ds_deadlist);
1143789Sahrens 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1144789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1145789Sahrens 
1146789Sahrens 		/*
1147789Sahrens 		 * Free everything that we point to (that's born after
1148789Sahrens 		 * the previous snapshot, if we are a clone)
1149789Sahrens 		 *
1150789Sahrens 		 * XXX we're doing this long task with the config lock held
1151789Sahrens 		 */
1152789Sahrens 		ka.usedp = &used;
1153789Sahrens 		ka.compressedp = &compressed;
1154789Sahrens 		ka.uncompressedp = &uncompressed;
1155789Sahrens 		ka.zio = zio;
1156789Sahrens 		ka.tx = tx;
1157789Sahrens 		err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1158789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1159789Sahrens 		ASSERT3U(err, ==, 0);
1160789Sahrens 	}
1161789Sahrens 
1162789Sahrens 	err = zio_wait(zio);
1163789Sahrens 	ASSERT3U(err, ==, 0);
1164789Sahrens 
1165789Sahrens 	dsl_dir_diduse_space(dd, -used, -compressed, -uncompressed, tx);
1166789Sahrens 
1167789Sahrens 	if (ds->ds_phys->ds_snapnames_zapobj) {
1168789Sahrens 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1169789Sahrens 		ASSERT(err == 0);
1170789Sahrens 	}
1171789Sahrens 
1172789Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1173789Sahrens 		/* Erase the link in the dataset */
1174789Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
1175789Sahrens 		dd->dd_phys->dd_head_dataset_obj = 0;
1176789Sahrens 		/*
1177789Sahrens 		 * dsl_dir_sync_destroy() called us, they'll destroy
1178789Sahrens 		 * the dataset.
1179789Sahrens 		 */
1180789Sahrens 	} else {
1181789Sahrens 		/* remove from snapshot namespace */
1182789Sahrens 		dsl_dataset_t *ds_head;
1183*1544Seschrock 		VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool,
1184*1544Seschrock 		    dd->dd_phys->dd_head_dataset_obj, NULL,
1185*1544Seschrock 		    DS_MODE_NONE, FTAG, &ds_head));
1186789Sahrens #ifdef ZFS_DEBUG
1187789Sahrens 		{
1188789Sahrens 			uint64_t val;
1189789Sahrens 			err = zap_lookup(mos,
1190789Sahrens 			    ds_head->ds_phys->ds_snapnames_zapobj,
1191789Sahrens 			    snapname, 8, 1, &val);
1192789Sahrens 			ASSERT3U(err, ==, 0);
1193789Sahrens 			ASSERT3U(val, ==, obj);
1194789Sahrens 		}
1195789Sahrens #endif
1196789Sahrens 		err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj,
1197789Sahrens 		    snapname, tx);
1198789Sahrens 		ASSERT(err == 0);
1199789Sahrens 		dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG);
1200789Sahrens 	}
1201789Sahrens 
1202789Sahrens 	if (ds_prev && ds->ds_prev != ds_prev)
1203789Sahrens 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1204789Sahrens 
1205789Sahrens 	err = dmu_object_free(mos, obj, tx);
1206789Sahrens 	ASSERT(err == 0);
1207789Sahrens 
1208789Sahrens 	/*
1209789Sahrens 	 * Close the objset with mode NONE, thus leaving it with
1210789Sahrens 	 * DOS_REF_MAX set, so that noone can access it.
1211789Sahrens 	 */
1212789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
1213789Sahrens 
1214789Sahrens 	if (drop_lock)
1215789Sahrens 		rw_exit(&dp->dp_config_rwlock);
1216789Sahrens 	return (0);
1217789Sahrens }
1218789Sahrens 
1219789Sahrens int
1220789Sahrens dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
1221789Sahrens {
1222789Sahrens 	const char *snapname = arg;
1223789Sahrens 	dsl_pool_t *dp = dd->dd_pool;
1224789Sahrens 	dmu_buf_t *dbuf;
1225789Sahrens 	dsl_dataset_phys_t *dsphys;
1226789Sahrens 	uint64_t dsobj, value;
1227789Sahrens 	objset_t *mos = dp->dp_meta_objset;
1228789Sahrens 	dsl_dataset_t *ds;
1229789Sahrens 	int err;
1230789Sahrens 
1231789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1232789Sahrens 
1233789Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == 0)
1234789Sahrens 		return (EINVAL);
1235*1544Seschrock 	err = dsl_dataset_open_obj(dp, dd->dd_phys->dd_head_dataset_obj, NULL,
1236*1544Seschrock 	    DS_MODE_NONE, FTAG, &ds);
1237*1544Seschrock 	if (err)
1238*1544Seschrock 		return (err);
1239789Sahrens 
1240789Sahrens 	err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
1241789Sahrens 	    snapname, 8, 1, &value);
1242789Sahrens 	if (err == 0) {
1243789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
1244789Sahrens 		return (EEXIST);
1245789Sahrens 	}
1246789Sahrens 	ASSERT(err == ENOENT);
1247789Sahrens 
1248789Sahrens 	/* The point of no (unsuccessful) return */
1249789Sahrens 
1250789Sahrens 	dprintf_dd(dd, "taking snapshot %s in txg %llu\n",
1251789Sahrens 	    snapname, tx->tx_txg);
1252789Sahrens 
1253789Sahrens 	spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1254789Sahrens 
1255789Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
1256789Sahrens 
1257928Stabriz 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0,
1258928Stabriz 	    DMU_OT_DSL_DATASET, sizeof (dsl_dataset_phys_t), tx);
1259*1544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, dsobj, FTAG, &dbuf));
1260789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
1261789Sahrens 	dsphys = dbuf->db_data;
1262789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
1263789Sahrens 	dsphys->ds_fsid_guid = unique_create();
1264789Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
1265789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1266789Sahrens 	    sizeof (dsphys->ds_guid));
1267789Sahrens 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1268789Sahrens 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1269789Sahrens 	dsphys->ds_next_snap_obj = ds->ds_object;
1270789Sahrens 	dsphys->ds_num_children = 1;
1271789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
1272789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
1273789Sahrens 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1274789Sahrens 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
1275789Sahrens 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1276789Sahrens 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
1277789Sahrens 	dsphys->ds_restoring = ds->ds_phys->ds_restoring;
1278789Sahrens 	dsphys->ds_bp = ds->ds_phys->ds_bp;
1279*1544Seschrock 	dmu_buf_rele(dbuf, FTAG);
1280789Sahrens 
1281789Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1282789Sahrens 		dsl_dataset_t *ds_prev;
1283789Sahrens 
1284*1544Seschrock 		VERIFY(0 == dsl_dataset_open_obj(dp,
1285*1544Seschrock 		    ds->ds_phys->ds_prev_snap_obj, NULL,
1286*1544Seschrock 		    DS_MODE_NONE, FTAG, &ds_prev));
1287789Sahrens 		ASSERT(ds_prev->ds_phys->ds_next_snap_obj ==
1288789Sahrens 		    ds->ds_object ||
1289789Sahrens 		    ds_prev->ds_phys->ds_num_children > 1);
1290789Sahrens 		if (ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
1291789Sahrens 			dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1292789Sahrens 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1293789Sahrens 			    ds_prev->ds_phys->ds_creation_txg);
1294789Sahrens 			ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1295789Sahrens 		}
1296789Sahrens 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1297789Sahrens 	} else {
1298789Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 0);
1299789Sahrens 	}
1300789Sahrens 
1301789Sahrens 	bplist_close(&ds->ds_deadlist);
1302789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1303789Sahrens 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg);
1304789Sahrens 	ds->ds_phys->ds_prev_snap_obj = dsobj;
1305789Sahrens 	ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg;
1306789Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
1307789Sahrens 	ds->ds_phys->ds_deadlist_obj =
1308789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1309*1544Seschrock 	VERIFY(0 == bplist_open(&ds->ds_deadlist, mos,
1310*1544Seschrock 	    ds->ds_phys->ds_deadlist_obj));
1311789Sahrens 
1312789Sahrens 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
1313789Sahrens 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1314789Sahrens 	    snapname, 8, 1, &dsobj, tx);
1315789Sahrens 	ASSERT(err == 0);
1316789Sahrens 
1317789Sahrens 	if (ds->ds_prev)
1318789Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
1319*1544Seschrock 	VERIFY(0 == dsl_dataset_open_obj(dp,
1320*1544Seschrock 	    ds->ds_phys->ds_prev_snap_obj, snapname,
1321*1544Seschrock 	    DS_MODE_NONE, ds, &ds->ds_prev));
1322789Sahrens 
1323789Sahrens 	rw_exit(&dp->dp_config_rwlock);
1324789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
1325789Sahrens 
1326789Sahrens 	return (0);
1327789Sahrens }
1328789Sahrens 
1329789Sahrens void
1330789Sahrens dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx)
1331789Sahrens {
1332789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1333789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1334789Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1335789Sahrens 
1336789Sahrens 	dmu_objset_sync(ds->ds_user_ptr, tx);
1337789Sahrens 	dsl_dir_dirty(ds->ds_dir, tx);
1338789Sahrens 	bplist_close(&ds->ds_deadlist);
1339789Sahrens 
1340*1544Seschrock 	dmu_buf_rele(ds->ds_dbuf, ds);
1341789Sahrens }
1342789Sahrens 
1343789Sahrens void
1344789Sahrens dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds)
1345789Sahrens {
1346789Sahrens 	/* fill in properties crap */
1347789Sahrens 	dsl_dir_stats(ds->ds_dir, dds);
1348789Sahrens 
1349789Sahrens 	if (ds->ds_phys->ds_num_children != 0) {
1350789Sahrens 		dds->dds_is_snapshot = TRUE;
1351789Sahrens 		dds->dds_num_clones = ds->ds_phys->ds_num_children - 1;
1352789Sahrens 	}
1353789Sahrens 
1354789Sahrens 	dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth;
1355789Sahrens 
1356789Sahrens 	dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill;
1357789Sahrens 	dds->dds_objects_avail = DN_MAX_OBJECT - dds->dds_objects_used;
1358789Sahrens 
1359789Sahrens 	/* We override the dataset's creation time... they should be the same */
1360789Sahrens 	dds->dds_creation_time = ds->ds_phys->ds_creation_time;
1361789Sahrens 	dds->dds_creation_txg = ds->ds_phys->ds_creation_txg;
1362789Sahrens 	dds->dds_space_refd = ds->ds_phys->ds_used_bytes;
1363789Sahrens 	dds->dds_fsid_guid = ds->ds_phys->ds_fsid_guid;
1364789Sahrens 
1365789Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1366789Sahrens 		/*
1367789Sahrens 		 * This is a snapshot; override the dd's space used with
1368789Sahrens 		 * our unique space
1369789Sahrens 		 */
1370789Sahrens 		dds->dds_space_used = ds->ds_phys->ds_unique_bytes;
1371789Sahrens 		dds->dds_compressed_bytes =
1372789Sahrens 		    ds->ds_phys->ds_compressed_bytes;
1373789Sahrens 		dds->dds_uncompressed_bytes =
1374789Sahrens 		    ds->ds_phys->ds_uncompressed_bytes;
1375789Sahrens 	}
1376789Sahrens }
1377789Sahrens 
1378789Sahrens dsl_pool_t *
1379789Sahrens dsl_dataset_pool(dsl_dataset_t *ds)
1380789Sahrens {
1381789Sahrens 	return (ds->ds_dir->dd_pool);
1382789Sahrens }
1383789Sahrens 
1384789Sahrens struct osrenamearg {
1385789Sahrens 	const char *oldname;
1386789Sahrens 	const char *newname;
1387789Sahrens };
1388789Sahrens 
1389789Sahrens static int
1390789Sahrens dsl_dataset_snapshot_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
1391789Sahrens {
1392789Sahrens 	struct osrenamearg *ora = arg;
1393789Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
1394789Sahrens 	dsl_dir_t *nds;
1395789Sahrens 	const char *tail;
1396789Sahrens 	int err;
1397789Sahrens 	dsl_dataset_t *snds, *fsds;
1398789Sahrens 	uint64_t val;
1399789Sahrens 
1400789Sahrens 	err = dsl_dataset_open_spa(dd->dd_pool->dp_spa, ora->oldname,
1401789Sahrens 	    DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &snds);
1402789Sahrens 	if (err)
1403789Sahrens 		return (err);
1404789Sahrens 
1405789Sahrens 	if (snds->ds_dir != dd) {
1406789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1407789Sahrens 		return (EINVAL);
1408789Sahrens 	}
1409789Sahrens 
1410789Sahrens 	/* better be changing a snapshot */
1411789Sahrens 	if (snds->ds_phys->ds_next_snap_obj == 0) {
1412789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1413789Sahrens 		return (EINVAL);
1414789Sahrens 	}
1415789Sahrens 
1416789Sahrens 	/* new fs better exist */
1417*1544Seschrock 	err = dsl_dir_open_spa(dd->dd_pool->dp_spa, ora->newname,
1418*1544Seschrock 	    FTAG, &nds, &tail);
1419*1544Seschrock 	if (err) {
1420789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1421*1544Seschrock 		return (err);
1422789Sahrens 	}
1423789Sahrens 
1424789Sahrens 	dsl_dir_close(nds, FTAG);
1425789Sahrens 
1426789Sahrens 	/* new name better be in same fs */
1427789Sahrens 	if (nds != dd) {
1428789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1429789Sahrens 		return (EINVAL);
1430789Sahrens 	}
1431789Sahrens 
1432789Sahrens 	/* new name better be a snapshot */
1433789Sahrens 	if (tail == NULL || tail[0] != '@') {
1434789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1435789Sahrens 		return (EINVAL);
1436789Sahrens 	}
1437789Sahrens 
1438789Sahrens 	tail++;
1439789Sahrens 
1440*1544Seschrock 	err = dsl_dataset_open_obj(dd->dd_pool,
1441*1544Seschrock 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG, &fsds);
1442*1544Seschrock 	if (err) {
1443*1544Seschrock 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1444*1544Seschrock 		return (err);
1445*1544Seschrock 	}
1446789Sahrens 
1447789Sahrens 	/* new name better not be in use */
1448789Sahrens 	err = zap_lookup(mos, fsds->ds_phys->ds_snapnames_zapobj,
1449789Sahrens 	    tail, 8, 1, &val);
1450789Sahrens 	if (err != ENOENT) {
1451789Sahrens 		if (err == 0)
1452789Sahrens 			err = EEXIST;
1453789Sahrens 		dsl_dataset_close(fsds, DS_MODE_NONE, FTAG);
1454789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1455789Sahrens 		return (EEXIST);
1456789Sahrens 	}
1457789Sahrens 
1458789Sahrens 	/* The point of no (unsuccessful) return */
1459789Sahrens 
1460789Sahrens 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER);
1461*1544Seschrock 	VERIFY(0 == dsl_dataset_get_snapname(snds));
1462789Sahrens 	err = zap_remove(mos, fsds->ds_phys->ds_snapnames_zapobj,
1463789Sahrens 	    snds->ds_snapname, tx);
1464789Sahrens 	ASSERT3U(err, ==, 0);
1465789Sahrens 	mutex_enter(&snds->ds_lock);
1466789Sahrens 	(void) strcpy(snds->ds_snapname, tail);
1467789Sahrens 	mutex_exit(&snds->ds_lock);
1468789Sahrens 	err = zap_add(mos, fsds->ds_phys->ds_snapnames_zapobj,
1469789Sahrens 	    snds->ds_snapname, 8, 1, &snds->ds_object, tx);
1470789Sahrens 	ASSERT3U(err, ==, 0);
1471789Sahrens 	rw_exit(&dd->dd_pool->dp_config_rwlock);
1472789Sahrens 
1473789Sahrens 	dsl_dataset_close(fsds, DS_MODE_NONE, FTAG);
1474789Sahrens 	dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1475789Sahrens 	return (0);
1476789Sahrens }
1477789Sahrens 
1478789Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename
1479789Sahrens int
1480789Sahrens dsl_dataset_rename(const char *osname, const char *newname)
1481789Sahrens {
1482789Sahrens 	dsl_dir_t *dd;
1483789Sahrens 	const char *tail;
1484789Sahrens 	struct osrenamearg ora;
1485789Sahrens 	int err;
1486789Sahrens 
1487*1544Seschrock 	err = dsl_dir_open(osname, FTAG, &dd, &tail);
1488*1544Seschrock 	if (err)
1489*1544Seschrock 		return (err);
1490789Sahrens 	if (tail == NULL) {
1491789Sahrens 		err = dsl_dir_sync_task(dd,
1492789Sahrens 		    dsl_dir_rename_sync, (void*)newname, 1<<12);
1493789Sahrens 		dsl_dir_close(dd, FTAG);
1494789Sahrens 		return (err);
1495789Sahrens 	}
1496789Sahrens 	if (tail[0] != '@') {
1497789Sahrens 		/* the name ended in a nonexistant component */
1498789Sahrens 		dsl_dir_close(dd, FTAG);
1499789Sahrens 		return (ENOENT);
1500789Sahrens 	}
1501789Sahrens 
1502789Sahrens 	ora.oldname = osname;
1503789Sahrens 	ora.newname = newname;
1504789Sahrens 
1505789Sahrens 	err = dsl_dir_sync_task(dd,
1506789Sahrens 	    dsl_dataset_snapshot_rename_sync, &ora, 1<<12);
1507789Sahrens 	dsl_dir_close(dd, FTAG);
1508789Sahrens 	return (err);
1509789Sahrens }
1510