xref: /onnv-gate/usr/src/uts/common/fs/zfs/dsl_dataset.c (revision 885:d925b21dba78)
1789Sahrens /*
2789Sahrens  * CDDL HEADER START
3789Sahrens  *
4789Sahrens  * The contents of this file are subject to the terms of the
5789Sahrens  * Common Development and Distribution License, Version 1.0 only
6789Sahrens  * (the "License").  You may not use this file except in compliance
7789Sahrens  * with the License.
8789Sahrens  *
9789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10789Sahrens  * or http://www.opensolaris.org/os/licensing.
11789Sahrens  * See the License for the specific language governing permissions
12789Sahrens  * and limitations under the License.
13789Sahrens  *
14789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
15789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
17789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
18789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
19789Sahrens  *
20789Sahrens  * CDDL HEADER END
21789Sahrens  */
22789Sahrens /*
23789Sahrens  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24789Sahrens  * Use is subject to license terms.
25789Sahrens  */
26789Sahrens 
27789Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
28789Sahrens 
29789Sahrens #include <sys/dmu_objset.h>
30789Sahrens #include <sys/dsl_dataset.h>
31789Sahrens #include <sys/dsl_dir.h>
32789Sahrens #include <sys/dmu_traverse.h>
33789Sahrens #include <sys/dmu_tx.h>
34789Sahrens #include <sys/arc.h>
35789Sahrens #include <sys/zio.h>
36789Sahrens #include <sys/zap.h>
37789Sahrens #include <sys/unique.h>
38789Sahrens #include <sys/zfs_context.h>
39789Sahrens 
40789Sahrens #define	DOS_REF_MAX	(1ULL << 62)
41789Sahrens 
42789Sahrens #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
43789Sahrens 
44789Sahrens #define	BP_GET_UCSIZE(bp) \
45789Sahrens 	((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
46789Sahrens 	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
47789Sahrens 
48789Sahrens /*
49789Sahrens  * We use weighted reference counts to express the various forms of exclusion
50789Sahrens  * between different open modes.  A STANDARD open is 1 point, an EXCLUSIVE open
51789Sahrens  * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE.
52789Sahrens  * This makes the exclusion logic simple: the total refcnt for all opens cannot
53789Sahrens  * exceed DOS_REF_MAX.  For example, EXCLUSIVE opens are exclusive because their
54789Sahrens  * weight (DOS_REF_MAX) consumes the entire refcnt space.  PRIMARY opens consume
55789Sahrens  * just over half of the refcnt space, so there can't be more than one, but it
56789Sahrens  * can peacefully coexist with any number of STANDARD opens.
57789Sahrens  */
58789Sahrens static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
59789Sahrens 	0,			/* DOS_MODE_NONE - invalid		*/
60789Sahrens 	1,			/* DOS_MODE_STANDARD - unlimited number	*/
61789Sahrens 	(DOS_REF_MAX >> 1) + 1,	/* DOS_MODE_PRIMARY - only one of these	*/
62789Sahrens 	DOS_REF_MAX		/* DOS_MODE_EXCLUSIVE - no other opens	*/
63789Sahrens };
64789Sahrens 
65789Sahrens 
66789Sahrens void
67789Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
68789Sahrens {
69789Sahrens 	int used = BP_GET_ASIZE(bp);
70789Sahrens 	int compressed = BP_GET_PSIZE(bp);
71789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
72789Sahrens 
73789Sahrens 	dprintf_bp(bp, "born, ds=%p\n", ds);
74789Sahrens 
75789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
76789Sahrens 	/* It could have been compressed away to nothing */
77789Sahrens 	if (BP_IS_HOLE(bp))
78789Sahrens 		return;
79789Sahrens 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
80789Sahrens 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
81789Sahrens 	if (ds == NULL) {
82789Sahrens 		/*
83789Sahrens 		 * Account for the meta-objset space in its placeholder
84789Sahrens 		 * dsl_dir.
85789Sahrens 		 */
86789Sahrens 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
87789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
88789Sahrens 		    used, compressed, uncompressed, tx);
89789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
90789Sahrens 		return;
91789Sahrens 	}
92789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
93789Sahrens 	mutex_enter(&ds->ds_lock);
94789Sahrens 	ds->ds_phys->ds_used_bytes += used;
95789Sahrens 	ds->ds_phys->ds_compressed_bytes += compressed;
96789Sahrens 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
97789Sahrens 	ds->ds_phys->ds_unique_bytes += used;
98789Sahrens 	mutex_exit(&ds->ds_lock);
99789Sahrens 	dsl_dir_diduse_space(ds->ds_dir,
100789Sahrens 	    used, compressed, uncompressed, tx);
101789Sahrens }
102789Sahrens 
103789Sahrens void
104789Sahrens dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
105789Sahrens {
106789Sahrens 	int used = BP_GET_ASIZE(bp);
107789Sahrens 	int compressed = BP_GET_PSIZE(bp);
108789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
109789Sahrens 
110789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
111789Sahrens 	if (BP_IS_HOLE(bp))
112789Sahrens 		return;
113789Sahrens 
114789Sahrens 	ASSERT(used > 0);
115789Sahrens 	if (ds == NULL) {
116789Sahrens 		/*
117789Sahrens 		 * Account for the meta-objset space in its placeholder
118789Sahrens 		 * dataset.
119789Sahrens 		 */
120789Sahrens 		/* XXX this can fail, what do we do when it does? */
121789Sahrens 		(void) arc_free(NULL, tx->tx_pool->dp_spa,
122789Sahrens 		    tx->tx_txg, bp, NULL, NULL, ARC_WAIT);
123789Sahrens 		bzero(bp, sizeof (blkptr_t));
124789Sahrens 
125789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
126789Sahrens 		    -used, -compressed, -uncompressed, tx);
127789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
128789Sahrens 		return;
129789Sahrens 	}
130789Sahrens 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
131789Sahrens 
132789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
133789Sahrens 
134789Sahrens 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
135789Sahrens 		dprintf_bp(bp, "freeing: %s", "");
136789Sahrens 		/* XXX check return code? */
137789Sahrens 		(void) arc_free(NULL, tx->tx_pool->dp_spa,
138789Sahrens 		    tx->tx_txg, bp, NULL, NULL, ARC_WAIT);
139789Sahrens 
140789Sahrens 		mutex_enter(&ds->ds_lock);
141789Sahrens 		/* XXX unique_bytes is not accurate for head datasets */
142789Sahrens 		/* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */
143789Sahrens 		ds->ds_phys->ds_unique_bytes -= used;
144789Sahrens 		mutex_exit(&ds->ds_lock);
145789Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
146789Sahrens 		    -used, -compressed, -uncompressed, tx);
147789Sahrens 	} else {
148789Sahrens 		dprintf_bp(bp, "putting on dead list: %s", "");
149789Sahrens 		bplist_enqueue(&ds->ds_deadlist, bp, tx);
150789Sahrens 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
151789Sahrens 		if (ds->ds_phys->ds_prev_snap_obj != 0) {
152789Sahrens 			ASSERT3U(ds->ds_prev->ds_object, ==,
153789Sahrens 			    ds->ds_phys->ds_prev_snap_obj);
154789Sahrens 			ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
155789Sahrens 			if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
156789Sahrens 			    ds->ds_object &&
157789Sahrens 			    bp->blk_birth >
158789Sahrens 			    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
159789Sahrens 				dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
160789Sahrens 				mutex_enter(&ds->ds_prev->ds_lock);
161789Sahrens 				ds->ds_prev->ds_phys->ds_unique_bytes +=
162789Sahrens 				    used;
163789Sahrens 				mutex_exit(&ds->ds_prev->ds_lock);
164789Sahrens 			}
165789Sahrens 		}
166789Sahrens 	}
167789Sahrens 	bzero(bp, sizeof (blkptr_t));
168789Sahrens 	mutex_enter(&ds->ds_lock);
169789Sahrens 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
170789Sahrens 	ds->ds_phys->ds_used_bytes -= used;
171789Sahrens 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
172789Sahrens 	ds->ds_phys->ds_compressed_bytes -= compressed;
173789Sahrens 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
174789Sahrens 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
175789Sahrens 	mutex_exit(&ds->ds_lock);
176789Sahrens }
177789Sahrens 
178789Sahrens int
179789Sahrens dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth, dmu_tx_t *tx)
180789Sahrens {
181789Sahrens 	uint64_t prev_snap_txg;
182789Sahrens 	dsl_dir_t *dd;
183789Sahrens 	/* ASSERT that it is not a snapshot */
184789Sahrens 	if (ds == NULL)
185789Sahrens 		return (TRUE);
186789Sahrens 	/*
187789Sahrens 	 * The snapshot creation could fail, but that would cause an
188789Sahrens 	 * incorrect FALSE return, which would only result in an
189789Sahrens 	 * overestimation of the amount of space that an operation would
190789Sahrens 	 * consume, which is OK.
191789Sahrens 	 *
192789Sahrens 	 * There's also a small window where we could miss a pending
193789Sahrens 	 * snapshot, because we could set the sync task in the quiescing
194789Sahrens 	 * phase.  So this should only be used as a guess.
195789Sahrens 	 */
196789Sahrens 	dd = ds->ds_dir;
197789Sahrens 	mutex_enter(&dd->dd_lock);
198789Sahrens 	if (dd->dd_sync_func == dsl_dataset_snapshot_sync &&
199789Sahrens 	    dd->dd_sync_txg < tx->tx_txg)
200789Sahrens 		prev_snap_txg = dd->dd_sync_txg;
201789Sahrens 	else
202789Sahrens 		prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
203789Sahrens 	mutex_exit(&dd->dd_lock);
204789Sahrens 	return (blk_birth > prev_snap_txg);
205789Sahrens }
206789Sahrens 
207789Sahrens /* ARGSUSED */
208789Sahrens static void
209789Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv)
210789Sahrens {
211789Sahrens 	dsl_dataset_t *ds = dsv;
212789Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
213789Sahrens 
214789Sahrens 	/* open_refcount == DOS_REF_MAX when deleting */
215789Sahrens 	ASSERT(ds->ds_open_refcount == 0 ||
216789Sahrens 	    ds->ds_open_refcount == DOS_REF_MAX);
217789Sahrens 
218789Sahrens 	dprintf_ds(ds, "evicting %s\n", "");
219789Sahrens 
220789Sahrens 	unique_remove(ds->ds_phys->ds_fsid_guid);
221789Sahrens 
222789Sahrens 	if (ds->ds_user_ptr != NULL)
223789Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
224789Sahrens 
225789Sahrens 	if (ds->ds_prev) {
226789Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
227789Sahrens 		ds->ds_prev = NULL;
228789Sahrens 	}
229789Sahrens 
230789Sahrens 	bplist_close(&ds->ds_deadlist);
231789Sahrens 	dsl_dir_close(ds->ds_dir, ds);
232789Sahrens 
233789Sahrens 	if (list_link_active(&ds->ds_synced_link))
234789Sahrens 		list_remove(&dp->dp_synced_objsets, ds);
235789Sahrens 
236789Sahrens 	kmem_free(ds, sizeof (dsl_dataset_t));
237789Sahrens }
238789Sahrens 
239789Sahrens static void
240789Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds)
241789Sahrens {
242789Sahrens 	dsl_dataset_phys_t *headphys;
243789Sahrens 	int err;
244789Sahrens 	dmu_buf_t *headdbuf;
245789Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
246789Sahrens 	objset_t *mos = dp->dp_meta_objset;
247789Sahrens 
248789Sahrens 	if (ds->ds_snapname[0])
249789Sahrens 		return;
250789Sahrens 	if (ds->ds_phys->ds_next_snap_obj == 0)
251789Sahrens 		return;
252789Sahrens 
253789Sahrens 	headdbuf = dmu_bonus_hold_tag(mos,
254789Sahrens 	    ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG);
255789Sahrens 	dmu_buf_read(headdbuf);
256789Sahrens 	headphys = headdbuf->db_data;
257789Sahrens 	err = zap_value_search(dp->dp_meta_objset,
258789Sahrens 	    headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname);
259789Sahrens 	ASSERT(err == 0);
260789Sahrens 	dmu_buf_rele_tag(headdbuf, FTAG);
261789Sahrens }
262789Sahrens 
263789Sahrens dsl_dataset_t *
264789Sahrens dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
265789Sahrens     int mode, void *tag)
266789Sahrens {
267789Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
268789Sahrens 	objset_t *mos = dp->dp_meta_objset;
269789Sahrens 	dmu_buf_t *dbuf;
270789Sahrens 	dsl_dataset_t *ds;
271789Sahrens 
272789Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
273789Sahrens 	    dsl_pool_sync_context(dp));
274789Sahrens 
275789Sahrens 	dbuf = dmu_bonus_hold_tag(mos, dsobj, tag);
276789Sahrens 	dmu_buf_read(dbuf);
277789Sahrens 	ds = dmu_buf_get_user(dbuf);
278789Sahrens 	if (ds == NULL) {
279789Sahrens 		dsl_dataset_t *winner;
280789Sahrens 
281789Sahrens 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
282789Sahrens 		ds->ds_dbuf = dbuf;
283789Sahrens 		ds->ds_object = dsobj;
284789Sahrens 		ds->ds_phys = dbuf->db_data;
285789Sahrens 		ds->ds_dir = dsl_dir_open_obj(dp,
286789Sahrens 		    ds->ds_phys->ds_dir_obj, NULL, ds);
287789Sahrens 
288789Sahrens 		bplist_open(&ds->ds_deadlist,
289789Sahrens 		    mos, ds->ds_phys->ds_deadlist_obj);
290789Sahrens 
291789Sahrens 		if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
292789Sahrens 			ds->ds_snapname[0] = '\0';
293789Sahrens 			if (ds->ds_phys->ds_prev_snap_obj) {
294789Sahrens 				ds->ds_prev =
295789Sahrens 				    dsl_dataset_open_obj(dp,
296789Sahrens 				    ds->ds_phys->ds_prev_snap_obj, NULL,
297789Sahrens 				    DS_MODE_NONE, ds);
298789Sahrens 			}
299789Sahrens 		} else {
300789Sahrens 			if (snapname) {
301789Sahrens #ifdef ZFS_DEBUG
302789Sahrens 				dsl_dataset_phys_t *headphys;
303789Sahrens 				int err;
304789Sahrens 				dmu_buf_t *headdbuf = dmu_bonus_hold_tag(mos,
305789Sahrens 				    ds->ds_dir->dd_phys->
306789Sahrens 				    dd_head_dataset_obj, FTAG);
307789Sahrens 				dmu_buf_read(headdbuf);
308789Sahrens 				headphys = headdbuf->db_data;
309789Sahrens 				uint64_t foundobj;
310789Sahrens 				err = zap_lookup(dp->dp_meta_objset,
311789Sahrens 				    headphys->ds_snapnames_zapobj,
312789Sahrens 				    snapname, sizeof (foundobj), 1, &foundobj);
313789Sahrens 				ASSERT3U(err, ==, 0);
314789Sahrens 				ASSERT3U(foundobj, ==, dsobj);
315789Sahrens 				dmu_buf_rele_tag(headdbuf, FTAG);
316789Sahrens #endif
317789Sahrens 				(void) strcat(ds->ds_snapname, snapname);
318789Sahrens 			} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
319789Sahrens 				dsl_dataset_get_snapname(ds);
320789Sahrens 			}
321789Sahrens 		}
322789Sahrens 
323789Sahrens 		winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
324789Sahrens 		    dsl_dataset_evict);
325789Sahrens 		if (winner) {
326789Sahrens 			bplist_close(&ds->ds_deadlist);
327789Sahrens 			if (ds->ds_prev) {
328789Sahrens 				dsl_dataset_close(ds->ds_prev,
329789Sahrens 				    DS_MODE_NONE, ds);
330789Sahrens 			}
331789Sahrens 			dsl_dir_close(ds->ds_dir, ds);
332789Sahrens 			kmem_free(ds, sizeof (dsl_dataset_t));
333789Sahrens 			ds = winner;
334789Sahrens 		} else {
335789Sahrens 			uint64_t new =
336789Sahrens 			    unique_insert(ds->ds_phys->ds_fsid_guid);
337789Sahrens 			if (new != ds->ds_phys->ds_fsid_guid) {
338789Sahrens 				/* XXX it won't necessarily be synced... */
339789Sahrens 				ds->ds_phys->ds_fsid_guid = new;
340789Sahrens 			}
341789Sahrens 		}
342789Sahrens 	}
343789Sahrens 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
344789Sahrens 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
345789Sahrens 
346789Sahrens 	mutex_enter(&ds->ds_lock);
347789Sahrens 	if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
348789Sahrens 	    ds->ds_phys->ds_restoring && !DS_MODE_IS_RESTORE(mode)) ||
349789Sahrens 	    (ds->ds_open_refcount + weight > DOS_REF_MAX)) {
350789Sahrens 		mutex_exit(&ds->ds_lock);
351789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
352789Sahrens 		return (NULL);
353789Sahrens 	}
354789Sahrens 	ds->ds_open_refcount += weight;
355789Sahrens 	mutex_exit(&ds->ds_lock);
356789Sahrens 
357789Sahrens 	return (ds);
358789Sahrens }
359789Sahrens 
360789Sahrens int
361789Sahrens dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
362789Sahrens     void *tag, dsl_dataset_t **dsp)
363789Sahrens {
364789Sahrens 	dsl_dir_t *dd;
365789Sahrens 	dsl_pool_t *dp;
366789Sahrens 	const char *tail;
367789Sahrens 	uint64_t obj;
368789Sahrens 	dsl_dataset_t *ds = NULL;
369789Sahrens 	int err = 0;
370789Sahrens 
371789Sahrens 	dd = dsl_dir_open_spa(spa, name, FTAG, &tail);
372789Sahrens 	if (dd == NULL)
373789Sahrens 		return (ENOENT);
374789Sahrens 
375789Sahrens 	dp = dd->dd_pool;
376789Sahrens 	obj = dd->dd_phys->dd_head_dataset_obj;
377789Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
378789Sahrens 	if (obj == 0) {
379789Sahrens 		/* A dataset with no associated objset */
380789Sahrens 		err = ENOENT;
381789Sahrens 		goto out;
382789Sahrens 	}
383789Sahrens 
384789Sahrens 	if (tail != NULL) {
385789Sahrens 		objset_t *mos = dp->dp_meta_objset;
386789Sahrens 
387789Sahrens 		ds = dsl_dataset_open_obj(dp, obj, NULL, DS_MODE_NONE, tag);
388789Sahrens 		obj = ds->ds_phys->ds_snapnames_zapobj;
389789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
390789Sahrens 		ds = NULL;
391789Sahrens 
392789Sahrens 		if (tail[0] != '@') {
393789Sahrens 			err = ENOENT;
394789Sahrens 			goto out;
395789Sahrens 		}
396789Sahrens 		tail++;
397789Sahrens 
398789Sahrens 		/* Look for a snapshot */
399789Sahrens 		if (!DS_MODE_IS_READONLY(mode)) {
400789Sahrens 			err = EROFS;
401789Sahrens 			goto out;
402789Sahrens 		}
403789Sahrens 		dprintf("looking for snapshot '%s'\n", tail);
404789Sahrens 		err = zap_lookup(mos, obj, tail, 8, 1, &obj);
405789Sahrens 		if (err)
406789Sahrens 			goto out;
407789Sahrens 	}
408789Sahrens 	ds = dsl_dataset_open_obj(dp, obj, tail, mode, tag);
409789Sahrens 	if (ds == NULL)
410789Sahrens 		err = EBUSY;
411789Sahrens 
412789Sahrens out:
413789Sahrens 	rw_exit(&dp->dp_config_rwlock);
414789Sahrens 	dsl_dir_close(dd, FTAG);
415789Sahrens 
416789Sahrens 	ASSERT3U((err == 0), ==, (ds != NULL));
417789Sahrens 	/* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */
418789Sahrens 
419789Sahrens 	*dsp = ds;
420789Sahrens 	return (err);
421789Sahrens }
422789Sahrens 
423789Sahrens int
424789Sahrens dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp)
425789Sahrens {
426789Sahrens 	return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp));
427789Sahrens }
428789Sahrens 
429789Sahrens void
430789Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name)
431789Sahrens {
432789Sahrens 	if (ds == NULL) {
433789Sahrens 		(void) strcpy(name, "mos");
434789Sahrens 	} else {
435789Sahrens 		dsl_dir_name(ds->ds_dir, name);
436789Sahrens 		dsl_dataset_get_snapname(ds);
437789Sahrens 		if (ds->ds_snapname[0]) {
438789Sahrens 			(void) strcat(name, "@");
439789Sahrens 			if (!MUTEX_HELD(&ds->ds_lock)) {
440789Sahrens 				/*
441789Sahrens 				 * We use a "recursive" mutex so that we
442789Sahrens 				 * can call dprintf_ds() with ds_lock held.
443789Sahrens 				 */
444789Sahrens 				mutex_enter(&ds->ds_lock);
445789Sahrens 				(void) strcat(name, ds->ds_snapname);
446789Sahrens 				mutex_exit(&ds->ds_lock);
447789Sahrens 			} else {
448789Sahrens 				(void) strcat(name, ds->ds_snapname);
449789Sahrens 			}
450789Sahrens 		}
451789Sahrens 	}
452789Sahrens }
453789Sahrens 
454789Sahrens void
455789Sahrens dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag)
456789Sahrens {
457789Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
458789Sahrens 	mutex_enter(&ds->ds_lock);
459789Sahrens 	ASSERT3U(ds->ds_open_refcount, >=, weight);
460789Sahrens 	ds->ds_open_refcount -= weight;
461789Sahrens 	dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n",
462789Sahrens 	    mode, ds->ds_open_refcount);
463789Sahrens 	mutex_exit(&ds->ds_lock);
464789Sahrens 
465789Sahrens 	dmu_buf_rele_tag(ds->ds_dbuf, tag);
466789Sahrens }
467789Sahrens 
468789Sahrens void
469789Sahrens dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
470789Sahrens {
471789Sahrens 	objset_t *mos = dp->dp_meta_objset;
472789Sahrens 	dmu_buf_t *dbuf;
473789Sahrens 	dsl_dataset_phys_t *dsphys;
474789Sahrens 	dsl_dataset_t *ds;
475789Sahrens 	uint64_t dsobj;
476789Sahrens 	dsl_dir_t *dd;
477789Sahrens 
478789Sahrens 	dsl_dir_create_root(mos, ddobjp, tx);
479789Sahrens 	dd = dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG);
480789Sahrens 	ASSERT(dd != NULL);
481789Sahrens 
482789Sahrens 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_OBJSET, 0,
483789Sahrens 	    DMU_OT_DSL_OBJSET, sizeof (dsl_dataset_phys_t), tx);
484789Sahrens 	dbuf = dmu_bonus_hold(mos, dsobj);
485789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
486789Sahrens 	dsphys = dbuf->db_data;
487789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
488789Sahrens 	dsphys->ds_fsid_guid = unique_create();
489789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
490789Sahrens 	    sizeof (dsphys->ds_guid));
491789Sahrens 	dsphys->ds_snapnames_zapobj =
492*885Sahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
493789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
494789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
495789Sahrens 	dsphys->ds_deadlist_obj =
496789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
497789Sahrens 	dmu_buf_rele(dbuf);
498789Sahrens 
499789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
500789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
501789Sahrens 	dsl_dir_close(dd, FTAG);
502789Sahrens 
503789Sahrens 	ds = dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG);
504789Sahrens 	(void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx);
505789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
506789Sahrens }
507789Sahrens 
508789Sahrens int
509789Sahrens dsl_dataset_create_sync(dsl_dir_t *pds, const char *fullname,
510789Sahrens     const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx)
511789Sahrens {
512789Sahrens 	int err;
513789Sahrens 	dsl_pool_t *dp = pds->dd_pool;
514789Sahrens 	dmu_buf_t *dbuf;
515789Sahrens 	dsl_dataset_phys_t *dsphys;
516789Sahrens 	uint64_t dsobj;
517789Sahrens 	objset_t *mos = dp->dp_meta_objset;
518789Sahrens 	dsl_dir_t *dd;
519789Sahrens 
520789Sahrens 	if (clone_parent != NULL) {
521789Sahrens 		/*
522789Sahrens 		 * You can't clone across pools.
523789Sahrens 		 */
524789Sahrens 		if (clone_parent->ds_dir->dd_pool != dp)
525789Sahrens 			return (EXDEV);
526789Sahrens 
527789Sahrens 		/*
528789Sahrens 		 * You can only clone snapshots, not the head datasets.
529789Sahrens 		 */
530789Sahrens 		if (clone_parent->ds_phys->ds_num_children == 0)
531789Sahrens 			return (EINVAL);
532789Sahrens 	}
533789Sahrens 
534789Sahrens 	ASSERT(lastname[0] != '@');
535789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
536789Sahrens 
537789Sahrens 	err = dsl_dir_create_sync(pds, lastname, tx);
538789Sahrens 	if (err)
539789Sahrens 		return (err);
540789Sahrens 	dd = dsl_dir_open_spa(dp->dp_spa, fullname, FTAG, NULL);
541789Sahrens 	ASSERT(dd != NULL);
542789Sahrens 
543789Sahrens 	/* This is the point of no (unsuccessful) return */
544789Sahrens 
545789Sahrens 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_OBJSET, 0,
546789Sahrens 	    DMU_OT_DSL_OBJSET, sizeof (dsl_dataset_phys_t), tx);
547789Sahrens 	dbuf = dmu_bonus_hold(mos, dsobj);
548789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
549789Sahrens 	dsphys = dbuf->db_data;
550789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
551789Sahrens 	dsphys->ds_fsid_guid = unique_create();
552789Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
553789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
554789Sahrens 	    sizeof (dsphys->ds_guid));
555789Sahrens 	dsphys->ds_snapnames_zapobj =
556*885Sahrens 	    zap_create(mos, DMU_OT_DSL_DS_SNAP_MAP, DMU_OT_NONE, 0, tx);
557789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
558789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
559789Sahrens 	dsphys->ds_deadlist_obj =
560789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
561789Sahrens 	if (clone_parent) {
562789Sahrens 		dsphys->ds_prev_snap_obj = clone_parent->ds_object;
563789Sahrens 		dsphys->ds_prev_snap_txg =
564789Sahrens 		    clone_parent->ds_phys->ds_creation_txg;
565789Sahrens 		dsphys->ds_used_bytes =
566789Sahrens 		    clone_parent->ds_phys->ds_used_bytes;
567789Sahrens 		dsphys->ds_compressed_bytes =
568789Sahrens 		    clone_parent->ds_phys->ds_compressed_bytes;
569789Sahrens 		dsphys->ds_uncompressed_bytes =
570789Sahrens 		    clone_parent->ds_phys->ds_uncompressed_bytes;
571789Sahrens 		dsphys->ds_bp = clone_parent->ds_phys->ds_bp;
572789Sahrens 
573789Sahrens 		dmu_buf_will_dirty(clone_parent->ds_dbuf, tx);
574789Sahrens 		clone_parent->ds_phys->ds_num_children++;
575789Sahrens 
576789Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
577789Sahrens 		dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object;
578789Sahrens 	}
579789Sahrens 	dmu_buf_rele(dbuf);
580789Sahrens 
581789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
582789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
583789Sahrens 	dsl_dir_close(dd, FTAG);
584789Sahrens 
585789Sahrens 	return (0);
586789Sahrens }
587789Sahrens 
588789Sahrens 
589789Sahrens int
590789Sahrens dsl_dataset_destroy(const char *name)
591789Sahrens {
592789Sahrens 	int err;
593789Sahrens 	dsl_pool_t *dp;
594789Sahrens 	dsl_dir_t *dd;
595789Sahrens 	const char *tail;
596789Sahrens 
597789Sahrens 	dd = dsl_dir_open(name, FTAG, &tail);
598789Sahrens 	if (dd == NULL)
599789Sahrens 		return (ENOENT);
600789Sahrens 
601789Sahrens 	dp = dd->dd_pool;
602789Sahrens 	if (tail != NULL) {
603789Sahrens 		if (tail[0] != '@') {
604789Sahrens 			dsl_dir_close(dd, FTAG);
605789Sahrens 			return (ENOENT);
606789Sahrens 		}
607789Sahrens 		tail++;
608789Sahrens 		/* Just blow away the snapshot */
609789Sahrens 		do {
610789Sahrens 			txg_wait_synced(dp, 0);
611789Sahrens 			err = dsl_dir_sync_task(dd,
612789Sahrens 			    dsl_dataset_destroy_sync, (void*)tail, 0);
613789Sahrens 		} while (err == EAGAIN);
614789Sahrens 		dsl_dir_close(dd, FTAG);
615789Sahrens 	} else {
616789Sahrens 		char buf[MAXNAMELEN];
617789Sahrens 		char *cp;
618789Sahrens 
619789Sahrens 		dsl_dir_t *pds;
620789Sahrens 		if (dd->dd_phys->dd_parent_obj == 0) {
621789Sahrens 			dsl_dir_close(dd, FTAG);
622789Sahrens 			return (EINVAL);
623789Sahrens 		}
624789Sahrens 		/*
625789Sahrens 		 * Make sure it's not dirty before we destroy it.
626789Sahrens 		 */
627789Sahrens 		txg_wait_synced(dd->dd_pool, 0);
628789Sahrens 		/*
629789Sahrens 		 * Blow away the dsl_dir + head dataset.
630789Sahrens 		 * dsl_dir_destroy_sync() will call
631789Sahrens 		 * dsl_dataset_destroy_sync() to destroy the head dataset.
632789Sahrens 		 */
633789Sahrens 		rw_enter(&dp->dp_config_rwlock, RW_READER);
634789Sahrens 		pds = dsl_dir_open_obj(dd->dd_pool,
635789Sahrens 		    dd->dd_phys->dd_parent_obj, NULL, FTAG);
636789Sahrens 		dsl_dir_close(dd, FTAG);
637789Sahrens 		rw_exit(&dp->dp_config_rwlock);
638789Sahrens 
639789Sahrens 		(void) strcpy(buf, name);
640789Sahrens 		cp = strrchr(buf, '/') + 1;
641789Sahrens 		ASSERT(cp[0] != '\0');
642789Sahrens 		do {
643789Sahrens 			txg_wait_synced(dp, 0);
644789Sahrens 			err = dsl_dir_sync_task(pds,
645789Sahrens 			    dsl_dir_destroy_sync, cp, 0);
646789Sahrens 		} while (err == EAGAIN);
647789Sahrens 		dsl_dir_close(pds, FTAG);
648789Sahrens 	}
649789Sahrens 
650789Sahrens 	return (err);
651789Sahrens }
652789Sahrens 
653789Sahrens int
654789Sahrens dsl_dataset_rollback(const char *name)
655789Sahrens {
656789Sahrens 	int err;
657789Sahrens 	dsl_dir_t *dd;
658789Sahrens 	const char *tail;
659789Sahrens 
660789Sahrens 	dd = dsl_dir_open(name, FTAG, &tail);
661789Sahrens 	if (dd == NULL)
662789Sahrens 		return (ENOENT);
663789Sahrens 
664789Sahrens 	if (tail != NULL) {
665789Sahrens 		dsl_dir_close(dd, FTAG);
666789Sahrens 		return (EINVAL);
667789Sahrens 	}
668789Sahrens 	do {
669789Sahrens 		txg_wait_synced(dd->dd_pool, 0);
670789Sahrens 		err = dsl_dir_sync_task(dd,
671789Sahrens 		    dsl_dataset_rollback_sync, NULL, 0);
672789Sahrens 	} while (err == EAGAIN);
673789Sahrens 	dsl_dir_close(dd, FTAG);
674789Sahrens 
675789Sahrens 	return (err);
676789Sahrens }
677789Sahrens 
678789Sahrens void *
679789Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
680789Sahrens     void *p, dsl_dataset_evict_func_t func)
681789Sahrens {
682789Sahrens 	void *old;
683789Sahrens 
684789Sahrens 	mutex_enter(&ds->ds_lock);
685789Sahrens 	old = ds->ds_user_ptr;
686789Sahrens 	if (old == NULL) {
687789Sahrens 		ds->ds_user_ptr = p;
688789Sahrens 		ds->ds_user_evict_func = func;
689789Sahrens 	}
690789Sahrens 	mutex_exit(&ds->ds_lock);
691789Sahrens 	return (old);
692789Sahrens }
693789Sahrens 
694789Sahrens void *
695789Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
696789Sahrens {
697789Sahrens 	return (ds->ds_user_ptr);
698789Sahrens }
699789Sahrens 
700789Sahrens 
701789Sahrens void
702789Sahrens dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp)
703789Sahrens {
704789Sahrens 	*bp = ds->ds_phys->ds_bp;
705789Sahrens }
706789Sahrens 
707789Sahrens void
708789Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
709789Sahrens {
710789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
711789Sahrens 	/* If it's the meta-objset, set dp_meta_rootbp */
712789Sahrens 	if (ds == NULL) {
713789Sahrens 		tx->tx_pool->dp_meta_rootbp = *bp;
714789Sahrens 	} else {
715789Sahrens 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
716789Sahrens 		ds->ds_phys->ds_bp = *bp;
717789Sahrens 	}
718789Sahrens }
719789Sahrens 
720789Sahrens spa_t *
721789Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds)
722789Sahrens {
723789Sahrens 	return (ds->ds_dir->dd_pool->dp_spa);
724789Sahrens }
725789Sahrens 
726789Sahrens void
727789Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
728789Sahrens {
729789Sahrens 	dsl_pool_t *dp;
730789Sahrens 
731789Sahrens 	if (ds == NULL) /* this is the meta-objset */
732789Sahrens 		return;
733789Sahrens 
734789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
735789Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
736789Sahrens 
737789Sahrens 	dp = ds->ds_dir->dd_pool;
738789Sahrens 
739789Sahrens 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
740789Sahrens 		/* up the hold count until we can be written out */
741789Sahrens 		dmu_buf_add_ref(ds->ds_dbuf, ds);
742789Sahrens 	}
743789Sahrens }
744789Sahrens 
745789Sahrens struct killarg {
746789Sahrens 	uint64_t *usedp;
747789Sahrens 	uint64_t *compressedp;
748789Sahrens 	uint64_t *uncompressedp;
749789Sahrens 	zio_t *zio;
750789Sahrens 	dmu_tx_t *tx;
751789Sahrens };
752789Sahrens 
753789Sahrens static int
754789Sahrens kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
755789Sahrens {
756789Sahrens 	struct killarg *ka = arg;
757789Sahrens 	blkptr_t *bp = &bc->bc_blkptr;
758789Sahrens 
759789Sahrens 	ASSERT3U(bc->bc_errno, ==, 0);
760789Sahrens 
761789Sahrens 	/*
762789Sahrens 	 * Since this callback is not called concurrently, no lock is
763789Sahrens 	 * needed on the accounting values.
764789Sahrens 	 */
765789Sahrens 	*ka->usedp += BP_GET_ASIZE(bp);
766789Sahrens 	*ka->compressedp += BP_GET_PSIZE(bp);
767789Sahrens 	*ka->uncompressedp += BP_GET_UCSIZE(bp);
768789Sahrens 	/* XXX check for EIO? */
769789Sahrens 	(void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
770789Sahrens 	    ARC_NOWAIT);
771789Sahrens 	return (0);
772789Sahrens }
773789Sahrens 
774789Sahrens /* ARGSUSED */
775789Sahrens int
776789Sahrens dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
777789Sahrens {
778789Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
779789Sahrens 	dsl_dataset_t *ds;
780789Sahrens 
781789Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == 0)
782789Sahrens 		return (EINVAL);
783789Sahrens 	ds = dsl_dataset_open_obj(dd->dd_pool,
784789Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG);
785789Sahrens 
786789Sahrens 	if (ds->ds_phys->ds_prev_snap_txg == 0) {
787789Sahrens 		/*
788789Sahrens 		 * There's no previous snapshot.  I suppose we could
789789Sahrens 		 * roll it back to being empty (and re-initialize the
790789Sahrens 		 * upper (ZPL) layer).  But for now there's no way to do
791789Sahrens 		 * this via the user interface.
792789Sahrens 		 */
793789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
794789Sahrens 		return (EINVAL);
795789Sahrens 	}
796789Sahrens 
797789Sahrens 	mutex_enter(&ds->ds_lock);
798789Sahrens 	if (ds->ds_open_refcount > 0) {
799789Sahrens 		mutex_exit(&ds->ds_lock);
800789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
801789Sahrens 		return (EBUSY);
802789Sahrens 	}
803789Sahrens 
804789Sahrens 	/*
805789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
806789Sahrens 	 * them.  Try again.
807789Sahrens 	 */
808789Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) {
809789Sahrens 		mutex_exit(&ds->ds_lock);
810789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
811789Sahrens 		return (EAGAIN);
812789Sahrens 	}
813789Sahrens 
814789Sahrens 	/* THE POINT OF NO (unsuccessful) RETURN */
815789Sahrens 	ds->ds_open_refcount = DOS_REF_MAX;
816789Sahrens 	mutex_exit(&ds->ds_lock);
817789Sahrens 
818789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
819789Sahrens 
820789Sahrens 	/* Zero out the deadlist. */
821789Sahrens 	dprintf("old deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj);
822789Sahrens 	bplist_close(&ds->ds_deadlist);
823789Sahrens 	bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
824789Sahrens 	ds->ds_phys->ds_deadlist_obj =
825789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
826789Sahrens 	bplist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
827789Sahrens 	dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj);
828789Sahrens 
829789Sahrens 	{
830789Sahrens 		/* Free blkptrs that we gave birth to */
831789Sahrens 		zio_t *zio;
832789Sahrens 		uint64_t used = 0, compressed = 0, uncompressed = 0;
833789Sahrens 		struct killarg ka;
834789Sahrens 
835789Sahrens 		zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
836789Sahrens 		    ZIO_FLAG_MUSTSUCCEED);
837789Sahrens 		ka.usedp = &used;
838789Sahrens 		ka.compressedp = &compressed;
839789Sahrens 		ka.uncompressedp = &uncompressed;
840789Sahrens 		ka.zio = zio;
841789Sahrens 		ka.tx = tx;
842789Sahrens 		(void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
843789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
844789Sahrens 		(void) zio_wait(zio);
845789Sahrens 
846789Sahrens 		dsl_dir_diduse_space(dd,
847789Sahrens 		    -used, -compressed, -uncompressed, tx);
848789Sahrens 	}
849789Sahrens 
850789Sahrens 	/* Change our contents to that of the prev snapshot (finally!) */
851789Sahrens 	ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj);
852789Sahrens 	ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
853789Sahrens 	ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes;
854789Sahrens 	ds->ds_phys->ds_compressed_bytes =
855789Sahrens 	    ds->ds_prev->ds_phys->ds_compressed_bytes;
856789Sahrens 	ds->ds_phys->ds_uncompressed_bytes =
857789Sahrens 	    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
858789Sahrens 	ds->ds_phys->ds_restoring = ds->ds_prev->ds_phys->ds_restoring;
859789Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
860789Sahrens 
861789Sahrens 	dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
862789Sahrens 	ds->ds_prev->ds_phys->ds_unique_bytes = 0;
863789Sahrens 
864789Sahrens 	dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj);
865789Sahrens 	ds->ds_open_refcount = 0;
866789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
867789Sahrens 
868789Sahrens 	return (0);
869789Sahrens }
870789Sahrens 
871789Sahrens int
872789Sahrens dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
873789Sahrens {
874789Sahrens 	const char *snapname = arg;
875789Sahrens 	uint64_t used = 0, compressed = 0, uncompressed = 0;
876789Sahrens 	blkptr_t bp;
877789Sahrens 	zio_t *zio;
878789Sahrens 	int err;
879789Sahrens 	int after_branch_point = FALSE;
880789Sahrens 	int drop_lock = FALSE;
881789Sahrens 	dsl_pool_t *dp = dd->dd_pool;
882789Sahrens 	objset_t *mos = dp->dp_meta_objset;
883789Sahrens 	dsl_dataset_t *ds, *ds_prev = NULL;
884789Sahrens 	uint64_t obj;
885789Sahrens 
886789Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == 0)
887789Sahrens 		return (EINVAL);
888789Sahrens 
889789Sahrens 	if (!RW_WRITE_HELD(&dp->dp_config_rwlock)) {
890789Sahrens 		rw_enter(&dp->dp_config_rwlock, RW_WRITER);
891789Sahrens 		drop_lock = TRUE;
892789Sahrens 	}
893789Sahrens 
894789Sahrens 	ds = dsl_dataset_open_obj(dd->dd_pool,
895789Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL,
896789Sahrens 	    snapname ? DS_MODE_NONE : DS_MODE_EXCLUSIVE, FTAG);
897789Sahrens 
898789Sahrens 	if (snapname) {
899789Sahrens 		err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
900789Sahrens 		    snapname, 8, 1, &obj);
901789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
902789Sahrens 		if (err) {
903789Sahrens 			if (drop_lock)
904789Sahrens 				rw_exit(&dp->dp_config_rwlock);
905789Sahrens 			return (err);
906789Sahrens 		}
907789Sahrens 
908789Sahrens 		ds = dsl_dataset_open_obj(dd->dd_pool, obj, NULL,
909789Sahrens 		    DS_MODE_EXCLUSIVE, FTAG);
910789Sahrens 	}
911789Sahrens 	if (ds == NULL) {
912789Sahrens 		if (drop_lock)
913789Sahrens 			rw_exit(&dp->dp_config_rwlock);
914789Sahrens 		return (EBUSY);
915789Sahrens 	}
916789Sahrens 
917789Sahrens 	obj = ds->ds_object;
918789Sahrens 
919789Sahrens 	/* Can't delete a branch point. */
920789Sahrens 	if (ds->ds_phys->ds_num_children > 1) {
921789Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
922789Sahrens 		if (drop_lock)
923789Sahrens 			rw_exit(&dp->dp_config_rwlock);
924789Sahrens 		return (EINVAL);
925789Sahrens 	}
926789Sahrens 
927789Sahrens 	/*
928789Sahrens 	 * Can't delete a head dataset if there are snapshots of it.
929789Sahrens 	 * (Except if the only snapshots are from the branch we cloned
930789Sahrens 	 * from.)
931789Sahrens 	 */
932789Sahrens 	if (ds->ds_prev != NULL &&
933789Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj == obj) {
934789Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
935789Sahrens 		if (drop_lock)
936789Sahrens 			rw_exit(&dp->dp_config_rwlock);
937789Sahrens 		return (EINVAL);
938789Sahrens 	}
939789Sahrens 
940789Sahrens 	/*
941789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
942789Sahrens 	 * them.  Try again.
943789Sahrens 	 */
944789Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) {
945789Sahrens 		mutex_exit(&ds->ds_lock);
946789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
947*885Sahrens 		if (drop_lock)
948*885Sahrens 			rw_exit(&dp->dp_config_rwlock);
949789Sahrens 		return (EAGAIN);
950789Sahrens 	}
951789Sahrens 
952789Sahrens 	/* THE POINT OF NO (unsuccessful) RETURN */
953789Sahrens 
954789Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
955789Sahrens 		if (ds->ds_prev) {
956789Sahrens 			ds_prev = ds->ds_prev;
957789Sahrens 		} else {
958789Sahrens 			ds_prev = dsl_dataset_open_obj(dd->dd_pool,
959789Sahrens 			    ds->ds_phys->ds_prev_snap_obj, NULL,
960789Sahrens 			    DS_MODE_NONE, FTAG);
961789Sahrens 		}
962789Sahrens 		after_branch_point =
963789Sahrens 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
964789Sahrens 
965789Sahrens 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
966789Sahrens 		if (after_branch_point &&
967789Sahrens 		    ds->ds_phys->ds_next_snap_obj == 0) {
968789Sahrens 			/* This clone is toast. */
969789Sahrens 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
970789Sahrens 			ds_prev->ds_phys->ds_num_children--;
971789Sahrens 		} else if (!after_branch_point) {
972789Sahrens 			ds_prev->ds_phys->ds_next_snap_obj =
973789Sahrens 			    ds->ds_phys->ds_next_snap_obj;
974789Sahrens 		}
975789Sahrens 	}
976789Sahrens 
977789Sahrens 	ASSERT3P(tx->tx_pool, ==, dd->dd_pool);
978789Sahrens 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
979789Sahrens 
980789Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0) {
981789Sahrens 		dsl_dataset_t *ds_next;
982789Sahrens 		uint64_t itor = 0;
983789Sahrens 
984789Sahrens 		spa_scrub_restart(dp->dp_spa, tx->tx_txg);
985789Sahrens 
986789Sahrens 		ds_next = dsl_dataset_open_obj(dd->dd_pool,
987789Sahrens 		    ds->ds_phys->ds_next_snap_obj, NULL, DS_MODE_NONE, FTAG);
988789Sahrens 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
989789Sahrens 
990789Sahrens 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
991789Sahrens 		ds_next->ds_phys->ds_prev_snap_obj =
992789Sahrens 		    ds->ds_phys->ds_prev_snap_obj;
993789Sahrens 		ds_next->ds_phys->ds_prev_snap_txg =
994789Sahrens 		    ds->ds_phys->ds_prev_snap_txg;
995789Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
996789Sahrens 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
997789Sahrens 
998789Sahrens 		/*
999789Sahrens 		 * Transfer to our deadlist (which will become next's
1000789Sahrens 		 * new deadlist) any entries from next's current
1001789Sahrens 		 * deadlist which were born before prev, and free the
1002789Sahrens 		 * other entries.
1003789Sahrens 		 *
1004789Sahrens 		 * XXX we're doing this long task with the config lock held
1005789Sahrens 		 */
1006789Sahrens 		while (bplist_iterate(&ds_next->ds_deadlist, &itor,
1007789Sahrens 		    &bp) == 0) {
1008789Sahrens 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
1009789Sahrens 				bplist_enqueue(&ds->ds_deadlist, &bp, tx);
1010789Sahrens 				if (ds_prev && !after_branch_point &&
1011789Sahrens 				    bp.blk_birth >
1012789Sahrens 				    ds_prev->ds_phys->ds_prev_snap_txg) {
1013789Sahrens 					ds_prev->ds_phys->ds_unique_bytes +=
1014789Sahrens 					    BP_GET_ASIZE(&bp);
1015789Sahrens 				}
1016789Sahrens 			} else {
1017789Sahrens 				used += BP_GET_ASIZE(&bp);
1018789Sahrens 				compressed += BP_GET_PSIZE(&bp);
1019789Sahrens 				uncompressed += BP_GET_UCSIZE(&bp);
1020789Sahrens 				/* XXX check return value? */
1021789Sahrens 				(void) arc_free(zio, dp->dp_spa, tx->tx_txg,
1022789Sahrens 				    &bp, NULL, NULL, ARC_NOWAIT);
1023789Sahrens 			}
1024789Sahrens 		}
1025789Sahrens 
1026789Sahrens 		/* free next's deadlist */
1027789Sahrens 		bplist_close(&ds_next->ds_deadlist);
1028789Sahrens 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
1029789Sahrens 
1030789Sahrens 		/* set next's deadlist to our deadlist */
1031789Sahrens 		ds_next->ds_phys->ds_deadlist_obj =
1032789Sahrens 		    ds->ds_phys->ds_deadlist_obj;
1033789Sahrens 		bplist_open(&ds_next->ds_deadlist, mos,
1034789Sahrens 		    ds_next->ds_phys->ds_deadlist_obj);
1035789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1036789Sahrens 
1037789Sahrens 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
1038789Sahrens 			/*
1039789Sahrens 			 * Update next's unique to include blocks which
1040789Sahrens 			 * were previously shared by only this snapshot
1041789Sahrens 			 * and it.  Those blocks will be born after the
1042789Sahrens 			 * prev snap and before this snap, and will have
1043789Sahrens 			 * died after the next snap and before the one
1044789Sahrens 			 * after that (ie. be on the snap after next's
1045789Sahrens 			 * deadlist).
1046789Sahrens 			 *
1047789Sahrens 			 * XXX we're doing this long task with the
1048789Sahrens 			 * config lock held
1049789Sahrens 			 */
1050789Sahrens 			dsl_dataset_t *ds_after_next;
1051789Sahrens 
1052789Sahrens 			ds_after_next = dsl_dataset_open_obj(dd->dd_pool,
1053789Sahrens 			    ds_next->ds_phys->ds_next_snap_obj, NULL,
1054789Sahrens 			    DS_MODE_NONE, FTAG);
1055789Sahrens 			itor = 0;
1056789Sahrens 			while (bplist_iterate(&ds_after_next->ds_deadlist,
1057789Sahrens 			    &itor, &bp) == 0) {
1058789Sahrens 				if (bp.blk_birth >
1059789Sahrens 				    ds->ds_phys->ds_prev_snap_txg &&
1060789Sahrens 				    bp.blk_birth <=
1061789Sahrens 				    ds->ds_phys->ds_creation_txg) {
1062789Sahrens 					ds_next->ds_phys->ds_unique_bytes +=
1063789Sahrens 					    BP_GET_ASIZE(&bp);
1064789Sahrens 				}
1065789Sahrens 			}
1066789Sahrens 
1067789Sahrens 			dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG);
1068789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, NULL);
1069789Sahrens 		} else {
1070789Sahrens 			/*
1071789Sahrens 			 * It would be nice to update the head dataset's
1072789Sahrens 			 * unique.  To do so we would have to traverse
1073789Sahrens 			 * it for blocks born after ds_prev, which is
1074789Sahrens 			 * pretty expensive just to maintain something
1075789Sahrens 			 * for debugging purposes.
1076789Sahrens 			 */
1077789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, ds);
1078789Sahrens 			dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE,
1079789Sahrens 			    ds_next);
1080789Sahrens 			if (ds_prev) {
1081789Sahrens 				ds_next->ds_prev = dsl_dataset_open_obj(
1082789Sahrens 				    dd->dd_pool, ds->ds_phys->ds_prev_snap_obj,
1083789Sahrens 				    NULL, DS_MODE_NONE, ds_next);
1084789Sahrens 			} else {
1085789Sahrens 				ds_next->ds_prev = NULL;
1086789Sahrens 			}
1087789Sahrens 		}
1088789Sahrens 		dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG);
1089789Sahrens 
1090789Sahrens 		/*
1091789Sahrens 		 * NB: unique_bytes is not accurate for head objsets
1092789Sahrens 		 * because we don't update it when we delete the most
1093789Sahrens 		 * recent snapshot -- see above comment.
1094789Sahrens 		 */
1095789Sahrens 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
1096789Sahrens 	} else {
1097789Sahrens 		/*
1098789Sahrens 		 * There's no next snapshot, so this is a head dataset.
1099789Sahrens 		 * Destroy the deadlist.  Unless it's a clone, the
1100789Sahrens 		 * deadlist should be empty.  (If it's a clone, it's
1101789Sahrens 		 * safe to ignore the deadlist contents.)
1102789Sahrens 		 */
1103789Sahrens 		struct killarg ka;
1104789Sahrens 
1105789Sahrens 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
1106789Sahrens 		bplist_close(&ds->ds_deadlist);
1107789Sahrens 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1108789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1109789Sahrens 
1110789Sahrens 		/*
1111789Sahrens 		 * Free everything that we point to (that's born after
1112789Sahrens 		 * the previous snapshot, if we are a clone)
1113789Sahrens 		 *
1114789Sahrens 		 * XXX we're doing this long task with the config lock held
1115789Sahrens 		 */
1116789Sahrens 		ka.usedp = &used;
1117789Sahrens 		ka.compressedp = &compressed;
1118789Sahrens 		ka.uncompressedp = &uncompressed;
1119789Sahrens 		ka.zio = zio;
1120789Sahrens 		ka.tx = tx;
1121789Sahrens 		err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1122789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1123789Sahrens 		ASSERT3U(err, ==, 0);
1124789Sahrens 	}
1125789Sahrens 
1126789Sahrens 	err = zio_wait(zio);
1127789Sahrens 	ASSERT3U(err, ==, 0);
1128789Sahrens 
1129789Sahrens 	dsl_dir_diduse_space(dd, -used, -compressed, -uncompressed, tx);
1130789Sahrens 
1131789Sahrens 	if (ds->ds_phys->ds_snapnames_zapobj) {
1132789Sahrens 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1133789Sahrens 		ASSERT(err == 0);
1134789Sahrens 	}
1135789Sahrens 
1136789Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1137789Sahrens 		/* Erase the link in the dataset */
1138789Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
1139789Sahrens 		dd->dd_phys->dd_head_dataset_obj = 0;
1140789Sahrens 		/*
1141789Sahrens 		 * dsl_dir_sync_destroy() called us, they'll destroy
1142789Sahrens 		 * the dataset.
1143789Sahrens 		 */
1144789Sahrens 	} else {
1145789Sahrens 		/* remove from snapshot namespace */
1146789Sahrens 		dsl_dataset_t *ds_head;
1147789Sahrens 		ds_head = dsl_dataset_open_obj(dd->dd_pool,
1148789Sahrens 		    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG);
1149789Sahrens #ifdef ZFS_DEBUG
1150789Sahrens 		{
1151789Sahrens 			uint64_t val;
1152789Sahrens 			err = zap_lookup(mos,
1153789Sahrens 			    ds_head->ds_phys->ds_snapnames_zapobj,
1154789Sahrens 			    snapname, 8, 1, &val);
1155789Sahrens 			ASSERT3U(err, ==, 0);
1156789Sahrens 			ASSERT3U(val, ==, obj);
1157789Sahrens 		}
1158789Sahrens #endif
1159789Sahrens 		err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj,
1160789Sahrens 		    snapname, tx);
1161789Sahrens 		ASSERT(err == 0);
1162789Sahrens 		dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG);
1163789Sahrens 	}
1164789Sahrens 
1165789Sahrens 	if (ds_prev && ds->ds_prev != ds_prev)
1166789Sahrens 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1167789Sahrens 
1168789Sahrens 	err = dmu_object_free(mos, obj, tx);
1169789Sahrens 	ASSERT(err == 0);
1170789Sahrens 
1171789Sahrens 	/*
1172789Sahrens 	 * Close the objset with mode NONE, thus leaving it with
1173789Sahrens 	 * DOS_REF_MAX set, so that noone can access it.
1174789Sahrens 	 */
1175789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
1176789Sahrens 
1177789Sahrens 	if (drop_lock)
1178789Sahrens 		rw_exit(&dp->dp_config_rwlock);
1179789Sahrens 	return (0);
1180789Sahrens }
1181789Sahrens 
1182789Sahrens int
1183789Sahrens dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
1184789Sahrens {
1185789Sahrens 	const char *snapname = arg;
1186789Sahrens 	dsl_pool_t *dp = dd->dd_pool;
1187789Sahrens 	dmu_buf_t *dbuf;
1188789Sahrens 	dsl_dataset_phys_t *dsphys;
1189789Sahrens 	uint64_t dsobj, value;
1190789Sahrens 	objset_t *mos = dp->dp_meta_objset;
1191789Sahrens 	dsl_dataset_t *ds;
1192789Sahrens 	int err;
1193789Sahrens 
1194789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1195789Sahrens 
1196789Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == 0)
1197789Sahrens 		return (EINVAL);
1198789Sahrens 	ds = dsl_dataset_open_obj(dp, dd->dd_phys->dd_head_dataset_obj, NULL,
1199789Sahrens 	    DS_MODE_NONE, FTAG);
1200789Sahrens 
1201789Sahrens 	err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
1202789Sahrens 	    snapname, 8, 1, &value);
1203789Sahrens 	if (err == 0) {
1204789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
1205789Sahrens 		return (EEXIST);
1206789Sahrens 	}
1207789Sahrens 	ASSERT(err == ENOENT);
1208789Sahrens 
1209789Sahrens 	/* The point of no (unsuccessful) return */
1210789Sahrens 
1211789Sahrens 	dprintf_dd(dd, "taking snapshot %s in txg %llu\n",
1212789Sahrens 	    snapname, tx->tx_txg);
1213789Sahrens 
1214789Sahrens 	spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1215789Sahrens 
1216789Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
1217789Sahrens 
1218789Sahrens 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_OBJSET, 0,
1219789Sahrens 	    DMU_OT_DSL_OBJSET, sizeof (dsl_dataset_phys_t), tx);
1220789Sahrens 	dbuf = dmu_bonus_hold(mos, dsobj);
1221789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
1222789Sahrens 	dsphys = dbuf->db_data;
1223789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
1224789Sahrens 	dsphys->ds_fsid_guid = unique_create();
1225789Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
1226789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1227789Sahrens 	    sizeof (dsphys->ds_guid));
1228789Sahrens 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1229789Sahrens 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1230789Sahrens 	dsphys->ds_next_snap_obj = ds->ds_object;
1231789Sahrens 	dsphys->ds_num_children = 1;
1232789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
1233789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
1234789Sahrens 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1235789Sahrens 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
1236789Sahrens 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1237789Sahrens 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
1238789Sahrens 	dsphys->ds_restoring = ds->ds_phys->ds_restoring;
1239789Sahrens 	dsphys->ds_bp = ds->ds_phys->ds_bp;
1240789Sahrens 	dmu_buf_rele(dbuf);
1241789Sahrens 
1242789Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1243789Sahrens 		dsl_dataset_t *ds_prev;
1244789Sahrens 
1245789Sahrens 		ds_prev = dsl_dataset_open_obj(dp,
1246789Sahrens 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_NONE, FTAG);
1247789Sahrens 		ASSERT(ds_prev->ds_phys->ds_next_snap_obj ==
1248789Sahrens 		    ds->ds_object ||
1249789Sahrens 		    ds_prev->ds_phys->ds_num_children > 1);
1250789Sahrens 		if (ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
1251789Sahrens 			dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1252789Sahrens 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1253789Sahrens 			    ds_prev->ds_phys->ds_creation_txg);
1254789Sahrens 			ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1255789Sahrens 		}
1256789Sahrens 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1257789Sahrens 	} else {
1258789Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 0);
1259789Sahrens 	}
1260789Sahrens 
1261789Sahrens 	bplist_close(&ds->ds_deadlist);
1262789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1263789Sahrens 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg);
1264789Sahrens 	ds->ds_phys->ds_prev_snap_obj = dsobj;
1265789Sahrens 	ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg;
1266789Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
1267789Sahrens 	ds->ds_phys->ds_deadlist_obj =
1268789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1269789Sahrens 	bplist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
1270789Sahrens 
1271789Sahrens 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
1272789Sahrens 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1273789Sahrens 	    snapname, 8, 1, &dsobj, tx);
1274789Sahrens 	ASSERT(err == 0);
1275789Sahrens 
1276789Sahrens 	if (ds->ds_prev)
1277789Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
1278789Sahrens 	ds->ds_prev = dsl_dataset_open_obj(dp,
1279789Sahrens 	    ds->ds_phys->ds_prev_snap_obj, snapname, DS_MODE_NONE, ds);
1280789Sahrens 
1281789Sahrens 	rw_exit(&dp->dp_config_rwlock);
1282789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
1283789Sahrens 
1284789Sahrens 	return (0);
1285789Sahrens }
1286789Sahrens 
1287789Sahrens void
1288789Sahrens dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx)
1289789Sahrens {
1290789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1291789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1292789Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1293789Sahrens 
1294789Sahrens 	dmu_objset_sync(ds->ds_user_ptr, tx);
1295789Sahrens 	dsl_dir_dirty(ds->ds_dir, tx);
1296789Sahrens 	bplist_close(&ds->ds_deadlist);
1297789Sahrens 
1298789Sahrens 	dmu_buf_remove_ref(ds->ds_dbuf, ds);
1299789Sahrens }
1300789Sahrens 
1301789Sahrens void
1302789Sahrens dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds)
1303789Sahrens {
1304789Sahrens 	/* fill in properties crap */
1305789Sahrens 	dsl_dir_stats(ds->ds_dir, dds);
1306789Sahrens 
1307789Sahrens 	if (ds->ds_phys->ds_num_children != 0) {
1308789Sahrens 		dds->dds_is_snapshot = TRUE;
1309789Sahrens 		dds->dds_num_clones = ds->ds_phys->ds_num_children - 1;
1310789Sahrens 	}
1311789Sahrens 
1312789Sahrens 	dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth;
1313789Sahrens 
1314789Sahrens 	dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill;
1315789Sahrens 	dds->dds_objects_avail = DN_MAX_OBJECT - dds->dds_objects_used;
1316789Sahrens 
1317789Sahrens 	/* We override the dataset's creation time... they should be the same */
1318789Sahrens 	dds->dds_creation_time = ds->ds_phys->ds_creation_time;
1319789Sahrens 	dds->dds_creation_txg = ds->ds_phys->ds_creation_txg;
1320789Sahrens 	dds->dds_space_refd = ds->ds_phys->ds_used_bytes;
1321789Sahrens 	dds->dds_fsid_guid = ds->ds_phys->ds_fsid_guid;
1322789Sahrens 	dds->dds_guid = ds->ds_phys->ds_guid;
1323789Sahrens 
1324789Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1325789Sahrens 		/*
1326789Sahrens 		 * This is a snapshot; override the dd's space used with
1327789Sahrens 		 * our unique space
1328789Sahrens 		 */
1329789Sahrens 		dds->dds_space_used = ds->ds_phys->ds_unique_bytes;
1330789Sahrens 		dds->dds_compressed_bytes =
1331789Sahrens 		    ds->ds_phys->ds_compressed_bytes;
1332789Sahrens 		dds->dds_uncompressed_bytes =
1333789Sahrens 		    ds->ds_phys->ds_uncompressed_bytes;
1334789Sahrens 	}
1335789Sahrens 
1336789Sahrens 	dds->dds_objset_obj = ds->ds_object;
1337789Sahrens }
1338789Sahrens 
1339789Sahrens dsl_pool_t *
1340789Sahrens dsl_dataset_pool(dsl_dataset_t *ds)
1341789Sahrens {
1342789Sahrens 	return (ds->ds_dir->dd_pool);
1343789Sahrens }
1344789Sahrens 
1345789Sahrens struct osrenamearg {
1346789Sahrens 	const char *oldname;
1347789Sahrens 	const char *newname;
1348789Sahrens };
1349789Sahrens 
1350789Sahrens static int
1351789Sahrens dsl_dataset_snapshot_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
1352789Sahrens {
1353789Sahrens 	struct osrenamearg *ora = arg;
1354789Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
1355789Sahrens 	dsl_dir_t *nds;
1356789Sahrens 	const char *tail;
1357789Sahrens 	int err;
1358789Sahrens 	dsl_dataset_t *snds, *fsds;
1359789Sahrens 	uint64_t val;
1360789Sahrens 
1361789Sahrens 	err = dsl_dataset_open_spa(dd->dd_pool->dp_spa, ora->oldname,
1362789Sahrens 	    DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &snds);
1363789Sahrens 	if (err)
1364789Sahrens 		return (err);
1365789Sahrens 
1366789Sahrens 	if (snds->ds_dir != dd) {
1367789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1368789Sahrens 		return (EINVAL);
1369789Sahrens 	}
1370789Sahrens 
1371789Sahrens 	/* better be changing a snapshot */
1372789Sahrens 	if (snds->ds_phys->ds_next_snap_obj == 0) {
1373789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1374789Sahrens 		return (EINVAL);
1375789Sahrens 	}
1376789Sahrens 
1377789Sahrens 	/* new fs better exist */
1378789Sahrens 	nds = dsl_dir_open_spa(dd->dd_pool->dp_spa, ora->newname, FTAG, &tail);
1379789Sahrens 	if (nds == NULL) {
1380789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1381789Sahrens 		return (ENOENT);
1382789Sahrens 	}
1383789Sahrens 
1384789Sahrens 	dsl_dir_close(nds, FTAG);
1385789Sahrens 
1386789Sahrens 	/* new name better be in same fs */
1387789Sahrens 	if (nds != dd) {
1388789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1389789Sahrens 		return (EINVAL);
1390789Sahrens 	}
1391789Sahrens 
1392789Sahrens 	/* new name better be a snapshot */
1393789Sahrens 	if (tail == NULL || tail[0] != '@') {
1394789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1395789Sahrens 		return (EINVAL);
1396789Sahrens 	}
1397789Sahrens 
1398789Sahrens 	tail++;
1399789Sahrens 
1400789Sahrens 	fsds = dsl_dataset_open_obj(dd->dd_pool,
1401789Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG);
1402789Sahrens 
1403789Sahrens 	/* new name better not be in use */
1404789Sahrens 	err = zap_lookup(mos, fsds->ds_phys->ds_snapnames_zapobj,
1405789Sahrens 	    tail, 8, 1, &val);
1406789Sahrens 	if (err != ENOENT) {
1407789Sahrens 		if (err == 0)
1408789Sahrens 			err = EEXIST;
1409789Sahrens 		dsl_dataset_close(fsds, DS_MODE_NONE, FTAG);
1410789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1411789Sahrens 		return (EEXIST);
1412789Sahrens 	}
1413789Sahrens 
1414789Sahrens 	/* The point of no (unsuccessful) return */
1415789Sahrens 
1416789Sahrens 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER);
1417789Sahrens 	dsl_dataset_get_snapname(snds);
1418789Sahrens 	err = zap_remove(mos, fsds->ds_phys->ds_snapnames_zapobj,
1419789Sahrens 	    snds->ds_snapname, tx);
1420789Sahrens 	ASSERT3U(err, ==, 0);
1421789Sahrens 	mutex_enter(&snds->ds_lock);
1422789Sahrens 	(void) strcpy(snds->ds_snapname, tail);
1423789Sahrens 	mutex_exit(&snds->ds_lock);
1424789Sahrens 	err = zap_add(mos, fsds->ds_phys->ds_snapnames_zapobj,
1425789Sahrens 	    snds->ds_snapname, 8, 1, &snds->ds_object, tx);
1426789Sahrens 	ASSERT3U(err, ==, 0);
1427789Sahrens 	rw_exit(&dd->dd_pool->dp_config_rwlock);
1428789Sahrens 
1429789Sahrens 	dsl_dataset_close(fsds, DS_MODE_NONE, FTAG);
1430789Sahrens 	dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1431789Sahrens 	return (0);
1432789Sahrens }
1433789Sahrens 
1434789Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename
1435789Sahrens int
1436789Sahrens dsl_dataset_rename(const char *osname, const char *newname)
1437789Sahrens {
1438789Sahrens 	dsl_dir_t *dd;
1439789Sahrens 	const char *tail;
1440789Sahrens 	struct osrenamearg ora;
1441789Sahrens 	int err;
1442789Sahrens 
1443789Sahrens 	dd = dsl_dir_open(osname, FTAG, &tail);
1444789Sahrens 	if (dd == NULL)
1445789Sahrens 		return (ENOENT);
1446789Sahrens 	if (tail == NULL) {
1447789Sahrens 		err = dsl_dir_sync_task(dd,
1448789Sahrens 		    dsl_dir_rename_sync, (void*)newname, 1<<12);
1449789Sahrens 		dsl_dir_close(dd, FTAG);
1450789Sahrens 		return (err);
1451789Sahrens 	}
1452789Sahrens 	if (tail[0] != '@') {
1453789Sahrens 		/* the name ended in a nonexistant component */
1454789Sahrens 		dsl_dir_close(dd, FTAG);
1455789Sahrens 		return (ENOENT);
1456789Sahrens 	}
1457789Sahrens 
1458789Sahrens 	ora.oldname = osname;
1459789Sahrens 	ora.newname = newname;
1460789Sahrens 
1461789Sahrens 	err = dsl_dir_sync_task(dd,
1462789Sahrens 	    dsl_dataset_snapshot_rename_sync, &ora, 1<<12);
1463789Sahrens 	dsl_dir_close(dd, FTAG);
1464789Sahrens 	return (err);
1465789Sahrens }
1466