xref: /onnv-gate/usr/src/uts/common/fs/zfs/dsl_dataset.c (revision 789:b348f31ed315)
1*789Sahrens /*
2*789Sahrens  * CDDL HEADER START
3*789Sahrens  *
4*789Sahrens  * The contents of this file are subject to the terms of the
5*789Sahrens  * Common Development and Distribution License, Version 1.0 only
6*789Sahrens  * (the "License").  You may not use this file except in compliance
7*789Sahrens  * with the License.
8*789Sahrens  *
9*789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*789Sahrens  * or http://www.opensolaris.org/os/licensing.
11*789Sahrens  * See the License for the specific language governing permissions
12*789Sahrens  * and limitations under the License.
13*789Sahrens  *
14*789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
15*789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
17*789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
18*789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
19*789Sahrens  *
20*789Sahrens  * CDDL HEADER END
21*789Sahrens  */
22*789Sahrens /*
23*789Sahrens  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*789Sahrens  * Use is subject to license terms.
25*789Sahrens  */
26*789Sahrens 
27*789Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*789Sahrens 
29*789Sahrens #include <sys/dmu_objset.h>
30*789Sahrens #include <sys/dsl_dataset.h>
31*789Sahrens #include <sys/dsl_dir.h>
32*789Sahrens #include <sys/dmu_traverse.h>
33*789Sahrens #include <sys/dmu_tx.h>
34*789Sahrens #include <sys/arc.h>
35*789Sahrens #include <sys/zio.h>
36*789Sahrens #include <sys/zap.h>
37*789Sahrens #include <sys/unique.h>
38*789Sahrens #include <sys/zfs_context.h>
39*789Sahrens 
40*789Sahrens #define	DOS_REF_MAX	(1ULL << 62)
41*789Sahrens 
42*789Sahrens #define	DSL_DEADLIST_BLOCKSIZE	SPA_MAXBLOCKSIZE
43*789Sahrens 
44*789Sahrens #define	BP_GET_UCSIZE(bp) \
45*789Sahrens 	((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
46*789Sahrens 	BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp));
47*789Sahrens 
48*789Sahrens /*
49*789Sahrens  * We use weighted reference counts to express the various forms of exclusion
50*789Sahrens  * between different open modes.  A STANDARD open is 1 point, an EXCLUSIVE open
51*789Sahrens  * is DOS_REF_MAX, and a PRIMARY open is little more than half of an EXCLUSIVE.
52*789Sahrens  * This makes the exclusion logic simple: the total refcnt for all opens cannot
53*789Sahrens  * exceed DOS_REF_MAX.  For example, EXCLUSIVE opens are exclusive because their
54*789Sahrens  * weight (DOS_REF_MAX) consumes the entire refcnt space.  PRIMARY opens consume
55*789Sahrens  * just over half of the refcnt space, so there can't be more than one, but it
56*789Sahrens  * can peacefully coexist with any number of STANDARD opens.
57*789Sahrens  */
58*789Sahrens static uint64_t ds_refcnt_weight[DS_MODE_LEVELS] = {
59*789Sahrens 	0,			/* DOS_MODE_NONE - invalid		*/
60*789Sahrens 	1,			/* DOS_MODE_STANDARD - unlimited number	*/
61*789Sahrens 	(DOS_REF_MAX >> 1) + 1,	/* DOS_MODE_PRIMARY - only one of these	*/
62*789Sahrens 	DOS_REF_MAX		/* DOS_MODE_EXCLUSIVE - no other opens	*/
63*789Sahrens };
64*789Sahrens 
65*789Sahrens 
66*789Sahrens void
67*789Sahrens dsl_dataset_block_born(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
68*789Sahrens {
69*789Sahrens 	int used = BP_GET_ASIZE(bp);
70*789Sahrens 	int compressed = BP_GET_PSIZE(bp);
71*789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
72*789Sahrens 
73*789Sahrens 	dprintf_bp(bp, "born, ds=%p\n", ds);
74*789Sahrens 
75*789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
76*789Sahrens 	/* It could have been compressed away to nothing */
77*789Sahrens 	if (BP_IS_HOLE(bp))
78*789Sahrens 		return;
79*789Sahrens 	ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
80*789Sahrens 	ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
81*789Sahrens 	if (ds == NULL) {
82*789Sahrens 		/*
83*789Sahrens 		 * Account for the meta-objset space in its placeholder
84*789Sahrens 		 * dsl_dir.
85*789Sahrens 		 */
86*789Sahrens 		ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
87*789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
88*789Sahrens 		    used, compressed, uncompressed, tx);
89*789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
90*789Sahrens 		return;
91*789Sahrens 	}
92*789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
93*789Sahrens 	mutex_enter(&ds->ds_lock);
94*789Sahrens 	ds->ds_phys->ds_used_bytes += used;
95*789Sahrens 	ds->ds_phys->ds_compressed_bytes += compressed;
96*789Sahrens 	ds->ds_phys->ds_uncompressed_bytes += uncompressed;
97*789Sahrens 	ds->ds_phys->ds_unique_bytes += used;
98*789Sahrens 	mutex_exit(&ds->ds_lock);
99*789Sahrens 	dsl_dir_diduse_space(ds->ds_dir,
100*789Sahrens 	    used, compressed, uncompressed, tx);
101*789Sahrens }
102*789Sahrens 
103*789Sahrens void
104*789Sahrens dsl_dataset_block_kill(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
105*789Sahrens {
106*789Sahrens 	int used = BP_GET_ASIZE(bp);
107*789Sahrens 	int compressed = BP_GET_PSIZE(bp);
108*789Sahrens 	int uncompressed = BP_GET_UCSIZE(bp);
109*789Sahrens 
110*789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
111*789Sahrens 	if (BP_IS_HOLE(bp))
112*789Sahrens 		return;
113*789Sahrens 
114*789Sahrens 	ASSERT(used > 0);
115*789Sahrens 	if (ds == NULL) {
116*789Sahrens 		/*
117*789Sahrens 		 * Account for the meta-objset space in its placeholder
118*789Sahrens 		 * dataset.
119*789Sahrens 		 */
120*789Sahrens 		/* XXX this can fail, what do we do when it does? */
121*789Sahrens 		(void) arc_free(NULL, tx->tx_pool->dp_spa,
122*789Sahrens 		    tx->tx_txg, bp, NULL, NULL, ARC_WAIT);
123*789Sahrens 		bzero(bp, sizeof (blkptr_t));
124*789Sahrens 
125*789Sahrens 		dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir,
126*789Sahrens 		    -used, -compressed, -uncompressed, tx);
127*789Sahrens 		dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
128*789Sahrens 		return;
129*789Sahrens 	}
130*789Sahrens 	ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
131*789Sahrens 
132*789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
133*789Sahrens 
134*789Sahrens 	if (bp->blk_birth > ds->ds_phys->ds_prev_snap_txg) {
135*789Sahrens 		dprintf_bp(bp, "freeing: %s", "");
136*789Sahrens 		/* XXX check return code? */
137*789Sahrens 		(void) arc_free(NULL, tx->tx_pool->dp_spa,
138*789Sahrens 		    tx->tx_txg, bp, NULL, NULL, ARC_WAIT);
139*789Sahrens 
140*789Sahrens 		mutex_enter(&ds->ds_lock);
141*789Sahrens 		/* XXX unique_bytes is not accurate for head datasets */
142*789Sahrens 		/* ASSERT3U(ds->ds_phys->ds_unique_bytes, >=, used); */
143*789Sahrens 		ds->ds_phys->ds_unique_bytes -= used;
144*789Sahrens 		mutex_exit(&ds->ds_lock);
145*789Sahrens 		dsl_dir_diduse_space(ds->ds_dir,
146*789Sahrens 		    -used, -compressed, -uncompressed, tx);
147*789Sahrens 	} else {
148*789Sahrens 		dprintf_bp(bp, "putting on dead list: %s", "");
149*789Sahrens 		bplist_enqueue(&ds->ds_deadlist, bp, tx);
150*789Sahrens 		/* if (bp->blk_birth > prev prev snap txg) prev unique += bs */
151*789Sahrens 		if (ds->ds_phys->ds_prev_snap_obj != 0) {
152*789Sahrens 			ASSERT3U(ds->ds_prev->ds_object, ==,
153*789Sahrens 			    ds->ds_phys->ds_prev_snap_obj);
154*789Sahrens 			ASSERT(ds->ds_prev->ds_phys->ds_num_children > 0);
155*789Sahrens 			if (ds->ds_prev->ds_phys->ds_next_snap_obj ==
156*789Sahrens 			    ds->ds_object &&
157*789Sahrens 			    bp->blk_birth >
158*789Sahrens 			    ds->ds_prev->ds_phys->ds_prev_snap_txg) {
159*789Sahrens 				dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
160*789Sahrens 				mutex_enter(&ds->ds_prev->ds_lock);
161*789Sahrens 				ds->ds_prev->ds_phys->ds_unique_bytes +=
162*789Sahrens 				    used;
163*789Sahrens 				mutex_exit(&ds->ds_prev->ds_lock);
164*789Sahrens 			}
165*789Sahrens 		}
166*789Sahrens 	}
167*789Sahrens 	bzero(bp, sizeof (blkptr_t));
168*789Sahrens 	mutex_enter(&ds->ds_lock);
169*789Sahrens 	ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
170*789Sahrens 	ds->ds_phys->ds_used_bytes -= used;
171*789Sahrens 	ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
172*789Sahrens 	ds->ds_phys->ds_compressed_bytes -= compressed;
173*789Sahrens 	ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
174*789Sahrens 	ds->ds_phys->ds_uncompressed_bytes -= uncompressed;
175*789Sahrens 	mutex_exit(&ds->ds_lock);
176*789Sahrens }
177*789Sahrens 
178*789Sahrens int
179*789Sahrens dsl_dataset_block_freeable(dsl_dataset_t *ds, uint64_t blk_birth, dmu_tx_t *tx)
180*789Sahrens {
181*789Sahrens 	uint64_t prev_snap_txg;
182*789Sahrens 	dsl_dir_t *dd;
183*789Sahrens 	/* ASSERT that it is not a snapshot */
184*789Sahrens 	if (ds == NULL)
185*789Sahrens 		return (TRUE);
186*789Sahrens 	/*
187*789Sahrens 	 * The snapshot creation could fail, but that would cause an
188*789Sahrens 	 * incorrect FALSE return, which would only result in an
189*789Sahrens 	 * overestimation of the amount of space that an operation would
190*789Sahrens 	 * consume, which is OK.
191*789Sahrens 	 *
192*789Sahrens 	 * There's also a small window where we could miss a pending
193*789Sahrens 	 * snapshot, because we could set the sync task in the quiescing
194*789Sahrens 	 * phase.  So this should only be used as a guess.
195*789Sahrens 	 */
196*789Sahrens 	dd = ds->ds_dir;
197*789Sahrens 	mutex_enter(&dd->dd_lock);
198*789Sahrens 	if (dd->dd_sync_func == dsl_dataset_snapshot_sync &&
199*789Sahrens 	    dd->dd_sync_txg < tx->tx_txg)
200*789Sahrens 		prev_snap_txg = dd->dd_sync_txg;
201*789Sahrens 	else
202*789Sahrens 		prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
203*789Sahrens 	mutex_exit(&dd->dd_lock);
204*789Sahrens 	return (blk_birth > prev_snap_txg);
205*789Sahrens }
206*789Sahrens 
207*789Sahrens /* ARGSUSED */
208*789Sahrens static void
209*789Sahrens dsl_dataset_evict(dmu_buf_t *db, void *dsv)
210*789Sahrens {
211*789Sahrens 	dsl_dataset_t *ds = dsv;
212*789Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
213*789Sahrens 
214*789Sahrens 	/* open_refcount == DOS_REF_MAX when deleting */
215*789Sahrens 	ASSERT(ds->ds_open_refcount == 0 ||
216*789Sahrens 	    ds->ds_open_refcount == DOS_REF_MAX);
217*789Sahrens 
218*789Sahrens 	dprintf_ds(ds, "evicting %s\n", "");
219*789Sahrens 
220*789Sahrens 	unique_remove(ds->ds_phys->ds_fsid_guid);
221*789Sahrens 
222*789Sahrens 	if (ds->ds_user_ptr != NULL)
223*789Sahrens 		ds->ds_user_evict_func(ds, ds->ds_user_ptr);
224*789Sahrens 
225*789Sahrens 	if (ds->ds_prev) {
226*789Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
227*789Sahrens 		ds->ds_prev = NULL;
228*789Sahrens 	}
229*789Sahrens 
230*789Sahrens 	bplist_close(&ds->ds_deadlist);
231*789Sahrens 	dsl_dir_close(ds->ds_dir, ds);
232*789Sahrens 
233*789Sahrens 	if (list_link_active(&ds->ds_synced_link))
234*789Sahrens 		list_remove(&dp->dp_synced_objsets, ds);
235*789Sahrens 
236*789Sahrens 	kmem_free(ds, sizeof (dsl_dataset_t));
237*789Sahrens }
238*789Sahrens 
239*789Sahrens static void
240*789Sahrens dsl_dataset_get_snapname(dsl_dataset_t *ds)
241*789Sahrens {
242*789Sahrens 	dsl_dataset_phys_t *headphys;
243*789Sahrens 	int err;
244*789Sahrens 	dmu_buf_t *headdbuf;
245*789Sahrens 	dsl_pool_t *dp = ds->ds_dir->dd_pool;
246*789Sahrens 	objset_t *mos = dp->dp_meta_objset;
247*789Sahrens 
248*789Sahrens 	if (ds->ds_snapname[0])
249*789Sahrens 		return;
250*789Sahrens 	if (ds->ds_phys->ds_next_snap_obj == 0)
251*789Sahrens 		return;
252*789Sahrens 
253*789Sahrens 	headdbuf = dmu_bonus_hold_tag(mos,
254*789Sahrens 	    ds->ds_dir->dd_phys->dd_head_dataset_obj, FTAG);
255*789Sahrens 	dmu_buf_read(headdbuf);
256*789Sahrens 	headphys = headdbuf->db_data;
257*789Sahrens 	err = zap_value_search(dp->dp_meta_objset,
258*789Sahrens 	    headphys->ds_snapnames_zapobj, ds->ds_object, ds->ds_snapname);
259*789Sahrens 	ASSERT(err == 0);
260*789Sahrens 	dmu_buf_rele_tag(headdbuf, FTAG);
261*789Sahrens }
262*789Sahrens 
263*789Sahrens dsl_dataset_t *
264*789Sahrens dsl_dataset_open_obj(dsl_pool_t *dp, uint64_t dsobj, const char *snapname,
265*789Sahrens     int mode, void *tag)
266*789Sahrens {
267*789Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
268*789Sahrens 	objset_t *mos = dp->dp_meta_objset;
269*789Sahrens 	dmu_buf_t *dbuf;
270*789Sahrens 	dsl_dataset_t *ds;
271*789Sahrens 
272*789Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
273*789Sahrens 	    dsl_pool_sync_context(dp));
274*789Sahrens 
275*789Sahrens 	dbuf = dmu_bonus_hold_tag(mos, dsobj, tag);
276*789Sahrens 	dmu_buf_read(dbuf);
277*789Sahrens 	ds = dmu_buf_get_user(dbuf);
278*789Sahrens 	if (ds == NULL) {
279*789Sahrens 		dsl_dataset_t *winner;
280*789Sahrens 
281*789Sahrens 		ds = kmem_zalloc(sizeof (dsl_dataset_t), KM_SLEEP);
282*789Sahrens 		ds->ds_dbuf = dbuf;
283*789Sahrens 		ds->ds_object = dsobj;
284*789Sahrens 		ds->ds_phys = dbuf->db_data;
285*789Sahrens 		ds->ds_dir = dsl_dir_open_obj(dp,
286*789Sahrens 		    ds->ds_phys->ds_dir_obj, NULL, ds);
287*789Sahrens 
288*789Sahrens 		bplist_open(&ds->ds_deadlist,
289*789Sahrens 		    mos, ds->ds_phys->ds_deadlist_obj);
290*789Sahrens 
291*789Sahrens 		if (ds->ds_dir->dd_phys->dd_head_dataset_obj == dsobj) {
292*789Sahrens 			ds->ds_snapname[0] = '\0';
293*789Sahrens 			if (ds->ds_phys->ds_prev_snap_obj) {
294*789Sahrens 				ds->ds_prev =
295*789Sahrens 				    dsl_dataset_open_obj(dp,
296*789Sahrens 				    ds->ds_phys->ds_prev_snap_obj, NULL,
297*789Sahrens 				    DS_MODE_NONE, ds);
298*789Sahrens 			}
299*789Sahrens 		} else {
300*789Sahrens 			if (snapname) {
301*789Sahrens #ifdef ZFS_DEBUG
302*789Sahrens 				dsl_dataset_phys_t *headphys;
303*789Sahrens 				int err;
304*789Sahrens 				dmu_buf_t *headdbuf = dmu_bonus_hold_tag(mos,
305*789Sahrens 				    ds->ds_dir->dd_phys->
306*789Sahrens 				    dd_head_dataset_obj, FTAG);
307*789Sahrens 				dmu_buf_read(headdbuf);
308*789Sahrens 				headphys = headdbuf->db_data;
309*789Sahrens 				uint64_t foundobj;
310*789Sahrens 				err = zap_lookup(dp->dp_meta_objset,
311*789Sahrens 				    headphys->ds_snapnames_zapobj,
312*789Sahrens 				    snapname, sizeof (foundobj), 1, &foundobj);
313*789Sahrens 				ASSERT3U(err, ==, 0);
314*789Sahrens 				ASSERT3U(foundobj, ==, dsobj);
315*789Sahrens 				dmu_buf_rele_tag(headdbuf, FTAG);
316*789Sahrens #endif
317*789Sahrens 				(void) strcat(ds->ds_snapname, snapname);
318*789Sahrens 			} else if (zfs_flags & ZFS_DEBUG_SNAPNAMES) {
319*789Sahrens 				dsl_dataset_get_snapname(ds);
320*789Sahrens 			}
321*789Sahrens 		}
322*789Sahrens 
323*789Sahrens 		winner = dmu_buf_set_user_ie(dbuf, ds, &ds->ds_phys,
324*789Sahrens 		    dsl_dataset_evict);
325*789Sahrens 		if (winner) {
326*789Sahrens 			bplist_close(&ds->ds_deadlist);
327*789Sahrens 			if (ds->ds_prev) {
328*789Sahrens 				dsl_dataset_close(ds->ds_prev,
329*789Sahrens 				    DS_MODE_NONE, ds);
330*789Sahrens 			}
331*789Sahrens 			dsl_dir_close(ds->ds_dir, ds);
332*789Sahrens 			kmem_free(ds, sizeof (dsl_dataset_t));
333*789Sahrens 			ds = winner;
334*789Sahrens 		} else {
335*789Sahrens 			uint64_t new =
336*789Sahrens 			    unique_insert(ds->ds_phys->ds_fsid_guid);
337*789Sahrens 			if (new != ds->ds_phys->ds_fsid_guid) {
338*789Sahrens 				/* XXX it won't necessarily be synced... */
339*789Sahrens 				ds->ds_phys->ds_fsid_guid = new;
340*789Sahrens 			}
341*789Sahrens 		}
342*789Sahrens 	}
343*789Sahrens 	ASSERT3P(ds->ds_dbuf, ==, dbuf);
344*789Sahrens 	ASSERT3P(ds->ds_phys, ==, dbuf->db_data);
345*789Sahrens 
346*789Sahrens 	mutex_enter(&ds->ds_lock);
347*789Sahrens 	if ((DS_MODE_LEVEL(mode) == DS_MODE_PRIMARY &&
348*789Sahrens 	    ds->ds_phys->ds_restoring && !DS_MODE_IS_RESTORE(mode)) ||
349*789Sahrens 	    (ds->ds_open_refcount + weight > DOS_REF_MAX)) {
350*789Sahrens 		mutex_exit(&ds->ds_lock);
351*789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
352*789Sahrens 		return (NULL);
353*789Sahrens 	}
354*789Sahrens 	ds->ds_open_refcount += weight;
355*789Sahrens 	mutex_exit(&ds->ds_lock);
356*789Sahrens 
357*789Sahrens 	return (ds);
358*789Sahrens }
359*789Sahrens 
360*789Sahrens int
361*789Sahrens dsl_dataset_open_spa(spa_t *spa, const char *name, int mode,
362*789Sahrens     void *tag, dsl_dataset_t **dsp)
363*789Sahrens {
364*789Sahrens 	dsl_dir_t *dd;
365*789Sahrens 	dsl_pool_t *dp;
366*789Sahrens 	const char *tail;
367*789Sahrens 	uint64_t obj;
368*789Sahrens 	dsl_dataset_t *ds = NULL;
369*789Sahrens 	int err = 0;
370*789Sahrens 
371*789Sahrens 	dd = dsl_dir_open_spa(spa, name, FTAG, &tail);
372*789Sahrens 	if (dd == NULL)
373*789Sahrens 		return (ENOENT);
374*789Sahrens 
375*789Sahrens 	dp = dd->dd_pool;
376*789Sahrens 	obj = dd->dd_phys->dd_head_dataset_obj;
377*789Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
378*789Sahrens 	if (obj == 0) {
379*789Sahrens 		/* A dataset with no associated objset */
380*789Sahrens 		err = ENOENT;
381*789Sahrens 		goto out;
382*789Sahrens 	}
383*789Sahrens 
384*789Sahrens 	if (tail != NULL) {
385*789Sahrens 		objset_t *mos = dp->dp_meta_objset;
386*789Sahrens 
387*789Sahrens 		ds = dsl_dataset_open_obj(dp, obj, NULL, DS_MODE_NONE, tag);
388*789Sahrens 		obj = ds->ds_phys->ds_snapnames_zapobj;
389*789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, tag);
390*789Sahrens 		ds = NULL;
391*789Sahrens 
392*789Sahrens 		if (tail[0] != '@') {
393*789Sahrens 			err = ENOENT;
394*789Sahrens 			goto out;
395*789Sahrens 		}
396*789Sahrens 		tail++;
397*789Sahrens 
398*789Sahrens 		/* Look for a snapshot */
399*789Sahrens 		if (!DS_MODE_IS_READONLY(mode)) {
400*789Sahrens 			err = EROFS;
401*789Sahrens 			goto out;
402*789Sahrens 		}
403*789Sahrens 		dprintf("looking for snapshot '%s'\n", tail);
404*789Sahrens 		err = zap_lookup(mos, obj, tail, 8, 1, &obj);
405*789Sahrens 		if (err)
406*789Sahrens 			goto out;
407*789Sahrens 	}
408*789Sahrens 	ds = dsl_dataset_open_obj(dp, obj, tail, mode, tag);
409*789Sahrens 	if (ds == NULL)
410*789Sahrens 		err = EBUSY;
411*789Sahrens 
412*789Sahrens out:
413*789Sahrens 	rw_exit(&dp->dp_config_rwlock);
414*789Sahrens 	dsl_dir_close(dd, FTAG);
415*789Sahrens 
416*789Sahrens 	ASSERT3U((err == 0), ==, (ds != NULL));
417*789Sahrens 	/* ASSERT(ds == NULL || strcmp(name, ds->ds_name) == 0); */
418*789Sahrens 
419*789Sahrens 	*dsp = ds;
420*789Sahrens 	return (err);
421*789Sahrens }
422*789Sahrens 
423*789Sahrens int
424*789Sahrens dsl_dataset_open(const char *name, int mode, void *tag, dsl_dataset_t **dsp)
425*789Sahrens {
426*789Sahrens 	return (dsl_dataset_open_spa(NULL, name, mode, tag, dsp));
427*789Sahrens }
428*789Sahrens 
429*789Sahrens void
430*789Sahrens dsl_dataset_name(dsl_dataset_t *ds, char *name)
431*789Sahrens {
432*789Sahrens 	if (ds == NULL) {
433*789Sahrens 		(void) strcpy(name, "mos");
434*789Sahrens 	} else {
435*789Sahrens 		dsl_dir_name(ds->ds_dir, name);
436*789Sahrens 		dsl_dataset_get_snapname(ds);
437*789Sahrens 		if (ds->ds_snapname[0]) {
438*789Sahrens 			(void) strcat(name, "@");
439*789Sahrens 			if (!MUTEX_HELD(&ds->ds_lock)) {
440*789Sahrens 				/*
441*789Sahrens 				 * We use a "recursive" mutex so that we
442*789Sahrens 				 * can call dprintf_ds() with ds_lock held.
443*789Sahrens 				 */
444*789Sahrens 				mutex_enter(&ds->ds_lock);
445*789Sahrens 				(void) strcat(name, ds->ds_snapname);
446*789Sahrens 				mutex_exit(&ds->ds_lock);
447*789Sahrens 			} else {
448*789Sahrens 				(void) strcat(name, ds->ds_snapname);
449*789Sahrens 			}
450*789Sahrens 		}
451*789Sahrens 	}
452*789Sahrens }
453*789Sahrens 
454*789Sahrens void
455*789Sahrens dsl_dataset_close(dsl_dataset_t *ds, int mode, void *tag)
456*789Sahrens {
457*789Sahrens 	uint64_t weight = ds_refcnt_weight[DS_MODE_LEVEL(mode)];
458*789Sahrens 	mutex_enter(&ds->ds_lock);
459*789Sahrens 	ASSERT3U(ds->ds_open_refcount, >=, weight);
460*789Sahrens 	ds->ds_open_refcount -= weight;
461*789Sahrens 	dprintf_ds(ds, "closing mode %u refcount now 0x%llx\n",
462*789Sahrens 	    mode, ds->ds_open_refcount);
463*789Sahrens 	mutex_exit(&ds->ds_lock);
464*789Sahrens 
465*789Sahrens 	dmu_buf_rele_tag(ds->ds_dbuf, tag);
466*789Sahrens }
467*789Sahrens 
468*789Sahrens void
469*789Sahrens dsl_dataset_create_root(dsl_pool_t *dp, uint64_t *ddobjp, dmu_tx_t *tx)
470*789Sahrens {
471*789Sahrens 	objset_t *mos = dp->dp_meta_objset;
472*789Sahrens 	dmu_buf_t *dbuf;
473*789Sahrens 	dsl_dataset_phys_t *dsphys;
474*789Sahrens 	dsl_dataset_t *ds;
475*789Sahrens 	uint64_t dsobj;
476*789Sahrens 	dsl_dir_t *dd;
477*789Sahrens 
478*789Sahrens 	dsl_dir_create_root(mos, ddobjp, tx);
479*789Sahrens 	dd = dsl_dir_open_obj(dp, *ddobjp, NULL, FTAG);
480*789Sahrens 	ASSERT(dd != NULL);
481*789Sahrens 
482*789Sahrens 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_OBJSET, 0,
483*789Sahrens 	    DMU_OT_DSL_OBJSET, sizeof (dsl_dataset_phys_t), tx);
484*789Sahrens 	dbuf = dmu_bonus_hold(mos, dsobj);
485*789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
486*789Sahrens 	dsphys = dbuf->db_data;
487*789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
488*789Sahrens 	dsphys->ds_fsid_guid = unique_create();
489*789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
490*789Sahrens 	    sizeof (dsphys->ds_guid));
491*789Sahrens 	dsphys->ds_snapnames_zapobj =
492*789Sahrens 	    zap_create(mos, DMU_OT_DSL_OBJSET_SNAP_MAP, DMU_OT_NONE, 0, tx);
493*789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
494*789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
495*789Sahrens 	dsphys->ds_deadlist_obj =
496*789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
497*789Sahrens 	dmu_buf_rele(dbuf);
498*789Sahrens 
499*789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
500*789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
501*789Sahrens 	dsl_dir_close(dd, FTAG);
502*789Sahrens 
503*789Sahrens 	ds = dsl_dataset_open_obj(dp, dsobj, NULL, DS_MODE_NONE, FTAG);
504*789Sahrens 	(void) dmu_objset_create_impl(dp->dp_spa, ds, DMU_OST_ZFS, tx);
505*789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
506*789Sahrens }
507*789Sahrens 
508*789Sahrens int
509*789Sahrens dsl_dataset_create_sync(dsl_dir_t *pds, const char *fullname,
510*789Sahrens     const char *lastname, dsl_dataset_t *clone_parent, dmu_tx_t *tx)
511*789Sahrens {
512*789Sahrens 	int err;
513*789Sahrens 	dsl_pool_t *dp = pds->dd_pool;
514*789Sahrens 	dmu_buf_t *dbuf;
515*789Sahrens 	dsl_dataset_phys_t *dsphys;
516*789Sahrens 	uint64_t dsobj;
517*789Sahrens 	objset_t *mos = dp->dp_meta_objset;
518*789Sahrens 	dsl_dir_t *dd;
519*789Sahrens 
520*789Sahrens 	if (clone_parent != NULL) {
521*789Sahrens 		/*
522*789Sahrens 		 * You can't clone across pools.
523*789Sahrens 		 */
524*789Sahrens 		if (clone_parent->ds_dir->dd_pool != dp)
525*789Sahrens 			return (EXDEV);
526*789Sahrens 
527*789Sahrens 		/*
528*789Sahrens 		 * You can only clone snapshots, not the head datasets.
529*789Sahrens 		 */
530*789Sahrens 		if (clone_parent->ds_phys->ds_num_children == 0)
531*789Sahrens 			return (EINVAL);
532*789Sahrens 	}
533*789Sahrens 
534*789Sahrens 	ASSERT(lastname[0] != '@');
535*789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
536*789Sahrens 
537*789Sahrens 	err = dsl_dir_create_sync(pds, lastname, tx);
538*789Sahrens 	if (err)
539*789Sahrens 		return (err);
540*789Sahrens 	dd = dsl_dir_open_spa(dp->dp_spa, fullname, FTAG, NULL);
541*789Sahrens 	ASSERT(dd != NULL);
542*789Sahrens 
543*789Sahrens 	/* This is the point of no (unsuccessful) return */
544*789Sahrens 
545*789Sahrens 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_OBJSET, 0,
546*789Sahrens 	    DMU_OT_DSL_OBJSET, sizeof (dsl_dataset_phys_t), tx);
547*789Sahrens 	dbuf = dmu_bonus_hold(mos, dsobj);
548*789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
549*789Sahrens 	dsphys = dbuf->db_data;
550*789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
551*789Sahrens 	dsphys->ds_fsid_guid = unique_create();
552*789Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
553*789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
554*789Sahrens 	    sizeof (dsphys->ds_guid));
555*789Sahrens 	dsphys->ds_snapnames_zapobj =
556*789Sahrens 	    zap_create(mos, DMU_OT_DSL_OBJSET_SNAP_MAP, DMU_OT_NONE, 0, tx);
557*789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
558*789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
559*789Sahrens 	dsphys->ds_deadlist_obj =
560*789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
561*789Sahrens 	if (clone_parent) {
562*789Sahrens 		dsphys->ds_prev_snap_obj = clone_parent->ds_object;
563*789Sahrens 		dsphys->ds_prev_snap_txg =
564*789Sahrens 		    clone_parent->ds_phys->ds_creation_txg;
565*789Sahrens 		dsphys->ds_used_bytes =
566*789Sahrens 		    clone_parent->ds_phys->ds_used_bytes;
567*789Sahrens 		dsphys->ds_compressed_bytes =
568*789Sahrens 		    clone_parent->ds_phys->ds_compressed_bytes;
569*789Sahrens 		dsphys->ds_uncompressed_bytes =
570*789Sahrens 		    clone_parent->ds_phys->ds_uncompressed_bytes;
571*789Sahrens 		dsphys->ds_bp = clone_parent->ds_phys->ds_bp;
572*789Sahrens 
573*789Sahrens 		dmu_buf_will_dirty(clone_parent->ds_dbuf, tx);
574*789Sahrens 		clone_parent->ds_phys->ds_num_children++;
575*789Sahrens 
576*789Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
577*789Sahrens 		dd->dd_phys->dd_clone_parent_obj = clone_parent->ds_object;
578*789Sahrens 	}
579*789Sahrens 	dmu_buf_rele(dbuf);
580*789Sahrens 
581*789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
582*789Sahrens 	dd->dd_phys->dd_head_dataset_obj = dsobj;
583*789Sahrens 	dsl_dir_close(dd, FTAG);
584*789Sahrens 
585*789Sahrens 	return (0);
586*789Sahrens }
587*789Sahrens 
588*789Sahrens 
589*789Sahrens int
590*789Sahrens dsl_dataset_destroy(const char *name)
591*789Sahrens {
592*789Sahrens 	int err;
593*789Sahrens 	dsl_pool_t *dp;
594*789Sahrens 	dsl_dir_t *dd;
595*789Sahrens 	const char *tail;
596*789Sahrens 
597*789Sahrens 	dd = dsl_dir_open(name, FTAG, &tail);
598*789Sahrens 	if (dd == NULL)
599*789Sahrens 		return (ENOENT);
600*789Sahrens 
601*789Sahrens 	dp = dd->dd_pool;
602*789Sahrens 	if (tail != NULL) {
603*789Sahrens 		if (tail[0] != '@') {
604*789Sahrens 			dsl_dir_close(dd, FTAG);
605*789Sahrens 			return (ENOENT);
606*789Sahrens 		}
607*789Sahrens 		tail++;
608*789Sahrens 		/* Just blow away the snapshot */
609*789Sahrens 		do {
610*789Sahrens 			txg_wait_synced(dp, 0);
611*789Sahrens 			err = dsl_dir_sync_task(dd,
612*789Sahrens 			    dsl_dataset_destroy_sync, (void*)tail, 0);
613*789Sahrens 		} while (err == EAGAIN);
614*789Sahrens 		dsl_dir_close(dd, FTAG);
615*789Sahrens 	} else {
616*789Sahrens 		char buf[MAXNAMELEN];
617*789Sahrens 		char *cp;
618*789Sahrens 
619*789Sahrens 		dsl_dir_t *pds;
620*789Sahrens 		if (dd->dd_phys->dd_parent_obj == 0) {
621*789Sahrens 			dsl_dir_close(dd, FTAG);
622*789Sahrens 			return (EINVAL);
623*789Sahrens 		}
624*789Sahrens 		/*
625*789Sahrens 		 * Make sure it's not dirty before we destroy it.
626*789Sahrens 		 */
627*789Sahrens 		txg_wait_synced(dd->dd_pool, 0);
628*789Sahrens 		/*
629*789Sahrens 		 * Blow away the dsl_dir + head dataset.
630*789Sahrens 		 * dsl_dir_destroy_sync() will call
631*789Sahrens 		 * dsl_dataset_destroy_sync() to destroy the head dataset.
632*789Sahrens 		 */
633*789Sahrens 		rw_enter(&dp->dp_config_rwlock, RW_READER);
634*789Sahrens 		pds = dsl_dir_open_obj(dd->dd_pool,
635*789Sahrens 		    dd->dd_phys->dd_parent_obj, NULL, FTAG);
636*789Sahrens 		dsl_dir_close(dd, FTAG);
637*789Sahrens 		rw_exit(&dp->dp_config_rwlock);
638*789Sahrens 
639*789Sahrens 		(void) strcpy(buf, name);
640*789Sahrens 		cp = strrchr(buf, '/') + 1;
641*789Sahrens 		ASSERT(cp[0] != '\0');
642*789Sahrens 		do {
643*789Sahrens 			txg_wait_synced(dp, 0);
644*789Sahrens 			err = dsl_dir_sync_task(pds,
645*789Sahrens 			    dsl_dir_destroy_sync, cp, 0);
646*789Sahrens 		} while (err == EAGAIN);
647*789Sahrens 		dsl_dir_close(pds, FTAG);
648*789Sahrens 	}
649*789Sahrens 
650*789Sahrens 	return (err);
651*789Sahrens }
652*789Sahrens 
653*789Sahrens int
654*789Sahrens dsl_dataset_rollback(const char *name)
655*789Sahrens {
656*789Sahrens 	int err;
657*789Sahrens 	dsl_dir_t *dd;
658*789Sahrens 	const char *tail;
659*789Sahrens 
660*789Sahrens 	dd = dsl_dir_open(name, FTAG, &tail);
661*789Sahrens 	if (dd == NULL)
662*789Sahrens 		return (ENOENT);
663*789Sahrens 
664*789Sahrens 	if (tail != NULL) {
665*789Sahrens 		dsl_dir_close(dd, FTAG);
666*789Sahrens 		return (EINVAL);
667*789Sahrens 	}
668*789Sahrens 	do {
669*789Sahrens 		txg_wait_synced(dd->dd_pool, 0);
670*789Sahrens 		err = dsl_dir_sync_task(dd,
671*789Sahrens 		    dsl_dataset_rollback_sync, NULL, 0);
672*789Sahrens 	} while (err == EAGAIN);
673*789Sahrens 	dsl_dir_close(dd, FTAG);
674*789Sahrens 
675*789Sahrens 	return (err);
676*789Sahrens }
677*789Sahrens 
678*789Sahrens void *
679*789Sahrens dsl_dataset_set_user_ptr(dsl_dataset_t *ds,
680*789Sahrens     void *p, dsl_dataset_evict_func_t func)
681*789Sahrens {
682*789Sahrens 	void *old;
683*789Sahrens 
684*789Sahrens 	mutex_enter(&ds->ds_lock);
685*789Sahrens 	old = ds->ds_user_ptr;
686*789Sahrens 	if (old == NULL) {
687*789Sahrens 		ds->ds_user_ptr = p;
688*789Sahrens 		ds->ds_user_evict_func = func;
689*789Sahrens 	}
690*789Sahrens 	mutex_exit(&ds->ds_lock);
691*789Sahrens 	return (old);
692*789Sahrens }
693*789Sahrens 
694*789Sahrens void *
695*789Sahrens dsl_dataset_get_user_ptr(dsl_dataset_t *ds)
696*789Sahrens {
697*789Sahrens 	return (ds->ds_user_ptr);
698*789Sahrens }
699*789Sahrens 
700*789Sahrens 
701*789Sahrens void
702*789Sahrens dsl_dataset_get_blkptr(dsl_dataset_t *ds, blkptr_t *bp)
703*789Sahrens {
704*789Sahrens 	*bp = ds->ds_phys->ds_bp;
705*789Sahrens }
706*789Sahrens 
707*789Sahrens void
708*789Sahrens dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
709*789Sahrens {
710*789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
711*789Sahrens 	/* If it's the meta-objset, set dp_meta_rootbp */
712*789Sahrens 	if (ds == NULL) {
713*789Sahrens 		tx->tx_pool->dp_meta_rootbp = *bp;
714*789Sahrens 	} else {
715*789Sahrens 		dmu_buf_will_dirty(ds->ds_dbuf, tx);
716*789Sahrens 		ds->ds_phys->ds_bp = *bp;
717*789Sahrens 	}
718*789Sahrens }
719*789Sahrens 
720*789Sahrens spa_t *
721*789Sahrens dsl_dataset_get_spa(dsl_dataset_t *ds)
722*789Sahrens {
723*789Sahrens 	return (ds->ds_dir->dd_pool->dp_spa);
724*789Sahrens }
725*789Sahrens 
726*789Sahrens void
727*789Sahrens dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
728*789Sahrens {
729*789Sahrens 	dsl_pool_t *dp;
730*789Sahrens 
731*789Sahrens 	if (ds == NULL) /* this is the meta-objset */
732*789Sahrens 		return;
733*789Sahrens 
734*789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
735*789Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
736*789Sahrens 
737*789Sahrens 	dp = ds->ds_dir->dd_pool;
738*789Sahrens 
739*789Sahrens 	if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg) == 0) {
740*789Sahrens 		/* up the hold count until we can be written out */
741*789Sahrens 		dmu_buf_add_ref(ds->ds_dbuf, ds);
742*789Sahrens 	}
743*789Sahrens }
744*789Sahrens 
745*789Sahrens struct killarg {
746*789Sahrens 	uint64_t *usedp;
747*789Sahrens 	uint64_t *compressedp;
748*789Sahrens 	uint64_t *uncompressedp;
749*789Sahrens 	zio_t *zio;
750*789Sahrens 	dmu_tx_t *tx;
751*789Sahrens };
752*789Sahrens 
753*789Sahrens static int
754*789Sahrens kill_blkptr(traverse_blk_cache_t *bc, spa_t *spa, void *arg)
755*789Sahrens {
756*789Sahrens 	struct killarg *ka = arg;
757*789Sahrens 	blkptr_t *bp = &bc->bc_blkptr;
758*789Sahrens 
759*789Sahrens 	ASSERT3U(bc->bc_errno, ==, 0);
760*789Sahrens 
761*789Sahrens 	/*
762*789Sahrens 	 * Since this callback is not called concurrently, no lock is
763*789Sahrens 	 * needed on the accounting values.
764*789Sahrens 	 */
765*789Sahrens 	*ka->usedp += BP_GET_ASIZE(bp);
766*789Sahrens 	*ka->compressedp += BP_GET_PSIZE(bp);
767*789Sahrens 	*ka->uncompressedp += BP_GET_UCSIZE(bp);
768*789Sahrens 	/* XXX check for EIO? */
769*789Sahrens 	(void) arc_free(ka->zio, spa, ka->tx->tx_txg, bp, NULL, NULL,
770*789Sahrens 	    ARC_NOWAIT);
771*789Sahrens 	return (0);
772*789Sahrens }
773*789Sahrens 
774*789Sahrens /* ARGSUSED */
775*789Sahrens int
776*789Sahrens dsl_dataset_rollback_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
777*789Sahrens {
778*789Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
779*789Sahrens 	dsl_dataset_t *ds;
780*789Sahrens 
781*789Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == 0)
782*789Sahrens 		return (EINVAL);
783*789Sahrens 	ds = dsl_dataset_open_obj(dd->dd_pool,
784*789Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG);
785*789Sahrens 
786*789Sahrens 	if (ds->ds_phys->ds_prev_snap_txg == 0) {
787*789Sahrens 		/*
788*789Sahrens 		 * There's no previous snapshot.  I suppose we could
789*789Sahrens 		 * roll it back to being empty (and re-initialize the
790*789Sahrens 		 * upper (ZPL) layer).  But for now there's no way to do
791*789Sahrens 		 * this via the user interface.
792*789Sahrens 		 */
793*789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
794*789Sahrens 		return (EINVAL);
795*789Sahrens 	}
796*789Sahrens 
797*789Sahrens 	mutex_enter(&ds->ds_lock);
798*789Sahrens 	if (ds->ds_open_refcount > 0) {
799*789Sahrens 		mutex_exit(&ds->ds_lock);
800*789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
801*789Sahrens 		return (EBUSY);
802*789Sahrens 	}
803*789Sahrens 
804*789Sahrens 	/*
805*789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
806*789Sahrens 	 * them.  Try again.
807*789Sahrens 	 */
808*789Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) {
809*789Sahrens 		mutex_exit(&ds->ds_lock);
810*789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
811*789Sahrens 		return (EAGAIN);
812*789Sahrens 	}
813*789Sahrens 
814*789Sahrens 	/* THE POINT OF NO (unsuccessful) RETURN */
815*789Sahrens 	ds->ds_open_refcount = DOS_REF_MAX;
816*789Sahrens 	mutex_exit(&ds->ds_lock);
817*789Sahrens 
818*789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
819*789Sahrens 
820*789Sahrens 	/* Zero out the deadlist. */
821*789Sahrens 	dprintf("old deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj);
822*789Sahrens 	bplist_close(&ds->ds_deadlist);
823*789Sahrens 	bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
824*789Sahrens 	ds->ds_phys->ds_deadlist_obj =
825*789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
826*789Sahrens 	bplist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
827*789Sahrens 	dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj);
828*789Sahrens 
829*789Sahrens 	{
830*789Sahrens 		/* Free blkptrs that we gave birth to */
831*789Sahrens 		zio_t *zio;
832*789Sahrens 		uint64_t used = 0, compressed = 0, uncompressed = 0;
833*789Sahrens 		struct killarg ka;
834*789Sahrens 
835*789Sahrens 		zio = zio_root(tx->tx_pool->dp_spa, NULL, NULL,
836*789Sahrens 		    ZIO_FLAG_MUSTSUCCEED);
837*789Sahrens 		ka.usedp = &used;
838*789Sahrens 		ka.compressedp = &compressed;
839*789Sahrens 		ka.uncompressedp = &uncompressed;
840*789Sahrens 		ka.zio = zio;
841*789Sahrens 		ka.tx = tx;
842*789Sahrens 		(void) traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
843*789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
844*789Sahrens 		(void) zio_wait(zio);
845*789Sahrens 
846*789Sahrens 		dsl_dir_diduse_space(dd,
847*789Sahrens 		    -used, -compressed, -uncompressed, tx);
848*789Sahrens 	}
849*789Sahrens 
850*789Sahrens 	/* Change our contents to that of the prev snapshot (finally!) */
851*789Sahrens 	ASSERT3U(ds->ds_prev->ds_object, ==, ds->ds_phys->ds_prev_snap_obj);
852*789Sahrens 	ds->ds_phys->ds_bp = ds->ds_prev->ds_phys->ds_bp;
853*789Sahrens 	ds->ds_phys->ds_used_bytes = ds->ds_prev->ds_phys->ds_used_bytes;
854*789Sahrens 	ds->ds_phys->ds_compressed_bytes =
855*789Sahrens 	    ds->ds_prev->ds_phys->ds_compressed_bytes;
856*789Sahrens 	ds->ds_phys->ds_uncompressed_bytes =
857*789Sahrens 	    ds->ds_prev->ds_phys->ds_uncompressed_bytes;
858*789Sahrens 	ds->ds_phys->ds_restoring = ds->ds_prev->ds_phys->ds_restoring;
859*789Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
860*789Sahrens 
861*789Sahrens 	dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
862*789Sahrens 	ds->ds_prev->ds_phys->ds_unique_bytes = 0;
863*789Sahrens 
864*789Sahrens 	dprintf("new deadlist obj = %llx\n", ds->ds_phys->ds_deadlist_obj);
865*789Sahrens 	ds->ds_open_refcount = 0;
866*789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
867*789Sahrens 
868*789Sahrens 	return (0);
869*789Sahrens }
870*789Sahrens 
871*789Sahrens int
872*789Sahrens dsl_dataset_destroy_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
873*789Sahrens {
874*789Sahrens 	const char *snapname = arg;
875*789Sahrens 	uint64_t used = 0, compressed = 0, uncompressed = 0;
876*789Sahrens 	blkptr_t bp;
877*789Sahrens 	zio_t *zio;
878*789Sahrens 	int err;
879*789Sahrens 	int after_branch_point = FALSE;
880*789Sahrens 	int drop_lock = FALSE;
881*789Sahrens 	dsl_pool_t *dp = dd->dd_pool;
882*789Sahrens 	objset_t *mos = dp->dp_meta_objset;
883*789Sahrens 	dsl_dataset_t *ds, *ds_prev = NULL;
884*789Sahrens 	uint64_t obj;
885*789Sahrens 
886*789Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == 0)
887*789Sahrens 		return (EINVAL);
888*789Sahrens 
889*789Sahrens 	if (!RW_WRITE_HELD(&dp->dp_config_rwlock)) {
890*789Sahrens 		rw_enter(&dp->dp_config_rwlock, RW_WRITER);
891*789Sahrens 		drop_lock = TRUE;
892*789Sahrens 	}
893*789Sahrens 
894*789Sahrens 	ds = dsl_dataset_open_obj(dd->dd_pool,
895*789Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL,
896*789Sahrens 	    snapname ? DS_MODE_NONE : DS_MODE_EXCLUSIVE, FTAG);
897*789Sahrens 
898*789Sahrens 	if (snapname) {
899*789Sahrens 		err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
900*789Sahrens 		    snapname, 8, 1, &obj);
901*789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
902*789Sahrens 		if (err) {
903*789Sahrens 			if (drop_lock)
904*789Sahrens 				rw_exit(&dp->dp_config_rwlock);
905*789Sahrens 			return (err);
906*789Sahrens 		}
907*789Sahrens 
908*789Sahrens 		ds = dsl_dataset_open_obj(dd->dd_pool, obj, NULL,
909*789Sahrens 		    DS_MODE_EXCLUSIVE, FTAG);
910*789Sahrens 	}
911*789Sahrens 	if (ds == NULL) {
912*789Sahrens 		if (drop_lock)
913*789Sahrens 			rw_exit(&dp->dp_config_rwlock);
914*789Sahrens 		return (EBUSY);
915*789Sahrens 	}
916*789Sahrens 
917*789Sahrens 	obj = ds->ds_object;
918*789Sahrens 
919*789Sahrens 	/* Can't delete a branch point. */
920*789Sahrens 	if (ds->ds_phys->ds_num_children > 1) {
921*789Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
922*789Sahrens 		if (drop_lock)
923*789Sahrens 			rw_exit(&dp->dp_config_rwlock);
924*789Sahrens 		return (EINVAL);
925*789Sahrens 	}
926*789Sahrens 
927*789Sahrens 	/*
928*789Sahrens 	 * Can't delete a head dataset if there are snapshots of it.
929*789Sahrens 	 * (Except if the only snapshots are from the branch we cloned
930*789Sahrens 	 * from.)
931*789Sahrens 	 */
932*789Sahrens 	if (ds->ds_prev != NULL &&
933*789Sahrens 	    ds->ds_prev->ds_phys->ds_next_snap_obj == obj) {
934*789Sahrens 		dsl_dataset_close(ds, DS_MODE_EXCLUSIVE, FTAG);
935*789Sahrens 		if (drop_lock)
936*789Sahrens 			rw_exit(&dp->dp_config_rwlock);
937*789Sahrens 		return (EINVAL);
938*789Sahrens 	}
939*789Sahrens 
940*789Sahrens 	/*
941*789Sahrens 	 * If we made changes this txg, traverse_dsl_dataset won't find
942*789Sahrens 	 * them.  Try again.
943*789Sahrens 	 */
944*789Sahrens 	if (ds->ds_phys->ds_bp.blk_birth >= tx->tx_txg) {
945*789Sahrens 		mutex_exit(&ds->ds_lock);
946*789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
947*789Sahrens 		return (EAGAIN);
948*789Sahrens 	}
949*789Sahrens 
950*789Sahrens 	/* THE POINT OF NO (unsuccessful) RETURN */
951*789Sahrens 
952*789Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
953*789Sahrens 		if (ds->ds_prev) {
954*789Sahrens 			ds_prev = ds->ds_prev;
955*789Sahrens 		} else {
956*789Sahrens 			ds_prev = dsl_dataset_open_obj(dd->dd_pool,
957*789Sahrens 			    ds->ds_phys->ds_prev_snap_obj, NULL,
958*789Sahrens 			    DS_MODE_NONE, FTAG);
959*789Sahrens 		}
960*789Sahrens 		after_branch_point =
961*789Sahrens 		    (ds_prev->ds_phys->ds_next_snap_obj != obj);
962*789Sahrens 
963*789Sahrens 		dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
964*789Sahrens 		if (after_branch_point &&
965*789Sahrens 		    ds->ds_phys->ds_next_snap_obj == 0) {
966*789Sahrens 			/* This clone is toast. */
967*789Sahrens 			ASSERT(ds_prev->ds_phys->ds_num_children > 1);
968*789Sahrens 			ds_prev->ds_phys->ds_num_children--;
969*789Sahrens 		} else if (!after_branch_point) {
970*789Sahrens 			ds_prev->ds_phys->ds_next_snap_obj =
971*789Sahrens 			    ds->ds_phys->ds_next_snap_obj;
972*789Sahrens 		}
973*789Sahrens 	}
974*789Sahrens 
975*789Sahrens 	ASSERT3P(tx->tx_pool, ==, dd->dd_pool);
976*789Sahrens 	zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
977*789Sahrens 
978*789Sahrens 	if (ds->ds_phys->ds_next_snap_obj != 0) {
979*789Sahrens 		dsl_dataset_t *ds_next;
980*789Sahrens 		uint64_t itor = 0;
981*789Sahrens 
982*789Sahrens 		spa_scrub_restart(dp->dp_spa, tx->tx_txg);
983*789Sahrens 
984*789Sahrens 		ds_next = dsl_dataset_open_obj(dd->dd_pool,
985*789Sahrens 		    ds->ds_phys->ds_next_snap_obj, NULL, DS_MODE_NONE, FTAG);
986*789Sahrens 		ASSERT3U(ds_next->ds_phys->ds_prev_snap_obj, ==, obj);
987*789Sahrens 
988*789Sahrens 		dmu_buf_will_dirty(ds_next->ds_dbuf, tx);
989*789Sahrens 		ds_next->ds_phys->ds_prev_snap_obj =
990*789Sahrens 		    ds->ds_phys->ds_prev_snap_obj;
991*789Sahrens 		ds_next->ds_phys->ds_prev_snap_txg =
992*789Sahrens 		    ds->ds_phys->ds_prev_snap_txg;
993*789Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
994*789Sahrens 		    ds_prev ? ds_prev->ds_phys->ds_creation_txg : 0);
995*789Sahrens 
996*789Sahrens 		/*
997*789Sahrens 		 * Transfer to our deadlist (which will become next's
998*789Sahrens 		 * new deadlist) any entries from next's current
999*789Sahrens 		 * deadlist which were born before prev, and free the
1000*789Sahrens 		 * other entries.
1001*789Sahrens 		 *
1002*789Sahrens 		 * XXX we're doing this long task with the config lock held
1003*789Sahrens 		 */
1004*789Sahrens 		while (bplist_iterate(&ds_next->ds_deadlist, &itor,
1005*789Sahrens 		    &bp) == 0) {
1006*789Sahrens 			if (bp.blk_birth <= ds->ds_phys->ds_prev_snap_txg) {
1007*789Sahrens 				bplist_enqueue(&ds->ds_deadlist, &bp, tx);
1008*789Sahrens 				if (ds_prev && !after_branch_point &&
1009*789Sahrens 				    bp.blk_birth >
1010*789Sahrens 				    ds_prev->ds_phys->ds_prev_snap_txg) {
1011*789Sahrens 					ds_prev->ds_phys->ds_unique_bytes +=
1012*789Sahrens 					    BP_GET_ASIZE(&bp);
1013*789Sahrens 				}
1014*789Sahrens 			} else {
1015*789Sahrens 				used += BP_GET_ASIZE(&bp);
1016*789Sahrens 				compressed += BP_GET_PSIZE(&bp);
1017*789Sahrens 				uncompressed += BP_GET_UCSIZE(&bp);
1018*789Sahrens 				/* XXX check return value? */
1019*789Sahrens 				(void) arc_free(zio, dp->dp_spa, tx->tx_txg,
1020*789Sahrens 				    &bp, NULL, NULL, ARC_NOWAIT);
1021*789Sahrens 			}
1022*789Sahrens 		}
1023*789Sahrens 
1024*789Sahrens 		/* free next's deadlist */
1025*789Sahrens 		bplist_close(&ds_next->ds_deadlist);
1026*789Sahrens 		bplist_destroy(mos, ds_next->ds_phys->ds_deadlist_obj, tx);
1027*789Sahrens 
1028*789Sahrens 		/* set next's deadlist to our deadlist */
1029*789Sahrens 		ds_next->ds_phys->ds_deadlist_obj =
1030*789Sahrens 		    ds->ds_phys->ds_deadlist_obj;
1031*789Sahrens 		bplist_open(&ds_next->ds_deadlist, mos,
1032*789Sahrens 		    ds_next->ds_phys->ds_deadlist_obj);
1033*789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1034*789Sahrens 
1035*789Sahrens 		if (ds_next->ds_phys->ds_next_snap_obj != 0) {
1036*789Sahrens 			/*
1037*789Sahrens 			 * Update next's unique to include blocks which
1038*789Sahrens 			 * were previously shared by only this snapshot
1039*789Sahrens 			 * and it.  Those blocks will be born after the
1040*789Sahrens 			 * prev snap and before this snap, and will have
1041*789Sahrens 			 * died after the next snap and before the one
1042*789Sahrens 			 * after that (ie. be on the snap after next's
1043*789Sahrens 			 * deadlist).
1044*789Sahrens 			 *
1045*789Sahrens 			 * XXX we're doing this long task with the
1046*789Sahrens 			 * config lock held
1047*789Sahrens 			 */
1048*789Sahrens 			dsl_dataset_t *ds_after_next;
1049*789Sahrens 
1050*789Sahrens 			ds_after_next = dsl_dataset_open_obj(dd->dd_pool,
1051*789Sahrens 			    ds_next->ds_phys->ds_next_snap_obj, NULL,
1052*789Sahrens 			    DS_MODE_NONE, FTAG);
1053*789Sahrens 			itor = 0;
1054*789Sahrens 			while (bplist_iterate(&ds_after_next->ds_deadlist,
1055*789Sahrens 			    &itor, &bp) == 0) {
1056*789Sahrens 				if (bp.blk_birth >
1057*789Sahrens 				    ds->ds_phys->ds_prev_snap_txg &&
1058*789Sahrens 				    bp.blk_birth <=
1059*789Sahrens 				    ds->ds_phys->ds_creation_txg) {
1060*789Sahrens 					ds_next->ds_phys->ds_unique_bytes +=
1061*789Sahrens 					    BP_GET_ASIZE(&bp);
1062*789Sahrens 				}
1063*789Sahrens 			}
1064*789Sahrens 
1065*789Sahrens 			dsl_dataset_close(ds_after_next, DS_MODE_NONE, FTAG);
1066*789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, NULL);
1067*789Sahrens 		} else {
1068*789Sahrens 			/*
1069*789Sahrens 			 * It would be nice to update the head dataset's
1070*789Sahrens 			 * unique.  To do so we would have to traverse
1071*789Sahrens 			 * it for blocks born after ds_prev, which is
1072*789Sahrens 			 * pretty expensive just to maintain something
1073*789Sahrens 			 * for debugging purposes.
1074*789Sahrens 			 */
1075*789Sahrens 			ASSERT3P(ds_next->ds_prev, ==, ds);
1076*789Sahrens 			dsl_dataset_close(ds_next->ds_prev, DS_MODE_NONE,
1077*789Sahrens 			    ds_next);
1078*789Sahrens 			if (ds_prev) {
1079*789Sahrens 				ds_next->ds_prev = dsl_dataset_open_obj(
1080*789Sahrens 				    dd->dd_pool, ds->ds_phys->ds_prev_snap_obj,
1081*789Sahrens 				    NULL, DS_MODE_NONE, ds_next);
1082*789Sahrens 			} else {
1083*789Sahrens 				ds_next->ds_prev = NULL;
1084*789Sahrens 			}
1085*789Sahrens 		}
1086*789Sahrens 		dsl_dataset_close(ds_next, DS_MODE_NONE, FTAG);
1087*789Sahrens 
1088*789Sahrens 		/*
1089*789Sahrens 		 * NB: unique_bytes is not accurate for head objsets
1090*789Sahrens 		 * because we don't update it when we delete the most
1091*789Sahrens 		 * recent snapshot -- see above comment.
1092*789Sahrens 		 */
1093*789Sahrens 		ASSERT3U(used, ==, ds->ds_phys->ds_unique_bytes);
1094*789Sahrens 	} else {
1095*789Sahrens 		/*
1096*789Sahrens 		 * There's no next snapshot, so this is a head dataset.
1097*789Sahrens 		 * Destroy the deadlist.  Unless it's a clone, the
1098*789Sahrens 		 * deadlist should be empty.  (If it's a clone, it's
1099*789Sahrens 		 * safe to ignore the deadlist contents.)
1100*789Sahrens 		 */
1101*789Sahrens 		struct killarg ka;
1102*789Sahrens 
1103*789Sahrens 		ASSERT(after_branch_point || bplist_empty(&ds->ds_deadlist));
1104*789Sahrens 		bplist_close(&ds->ds_deadlist);
1105*789Sahrens 		bplist_destroy(mos, ds->ds_phys->ds_deadlist_obj, tx);
1106*789Sahrens 		ds->ds_phys->ds_deadlist_obj = 0;
1107*789Sahrens 
1108*789Sahrens 		/*
1109*789Sahrens 		 * Free everything that we point to (that's born after
1110*789Sahrens 		 * the previous snapshot, if we are a clone)
1111*789Sahrens 		 *
1112*789Sahrens 		 * XXX we're doing this long task with the config lock held
1113*789Sahrens 		 */
1114*789Sahrens 		ka.usedp = &used;
1115*789Sahrens 		ka.compressedp = &compressed;
1116*789Sahrens 		ka.uncompressedp = &uncompressed;
1117*789Sahrens 		ka.zio = zio;
1118*789Sahrens 		ka.tx = tx;
1119*789Sahrens 		err = traverse_dsl_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
1120*789Sahrens 		    ADVANCE_POST, kill_blkptr, &ka);
1121*789Sahrens 		ASSERT3U(err, ==, 0);
1122*789Sahrens 	}
1123*789Sahrens 
1124*789Sahrens 	err = zio_wait(zio);
1125*789Sahrens 	ASSERT3U(err, ==, 0);
1126*789Sahrens 
1127*789Sahrens 	dsl_dir_diduse_space(dd, -used, -compressed, -uncompressed, tx);
1128*789Sahrens 
1129*789Sahrens 	if (ds->ds_phys->ds_snapnames_zapobj) {
1130*789Sahrens 		err = zap_destroy(mos, ds->ds_phys->ds_snapnames_zapobj, tx);
1131*789Sahrens 		ASSERT(err == 0);
1132*789Sahrens 	}
1133*789Sahrens 
1134*789Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == ds->ds_object) {
1135*789Sahrens 		/* Erase the link in the dataset */
1136*789Sahrens 		dmu_buf_will_dirty(dd->dd_dbuf, tx);
1137*789Sahrens 		dd->dd_phys->dd_head_dataset_obj = 0;
1138*789Sahrens 		/*
1139*789Sahrens 		 * dsl_dir_sync_destroy() called us, they'll destroy
1140*789Sahrens 		 * the dataset.
1141*789Sahrens 		 */
1142*789Sahrens 	} else {
1143*789Sahrens 		/* remove from snapshot namespace */
1144*789Sahrens 		dsl_dataset_t *ds_head;
1145*789Sahrens 		ds_head = dsl_dataset_open_obj(dd->dd_pool,
1146*789Sahrens 		    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG);
1147*789Sahrens #ifdef ZFS_DEBUG
1148*789Sahrens 		{
1149*789Sahrens 			uint64_t val;
1150*789Sahrens 			err = zap_lookup(mos,
1151*789Sahrens 			    ds_head->ds_phys->ds_snapnames_zapobj,
1152*789Sahrens 			    snapname, 8, 1, &val);
1153*789Sahrens 			ASSERT3U(err, ==, 0);
1154*789Sahrens 			ASSERT3U(val, ==, obj);
1155*789Sahrens 		}
1156*789Sahrens #endif
1157*789Sahrens 		err = zap_remove(mos, ds_head->ds_phys->ds_snapnames_zapobj,
1158*789Sahrens 		    snapname, tx);
1159*789Sahrens 		ASSERT(err == 0);
1160*789Sahrens 		dsl_dataset_close(ds_head, DS_MODE_NONE, FTAG);
1161*789Sahrens 	}
1162*789Sahrens 
1163*789Sahrens 	if (ds_prev && ds->ds_prev != ds_prev)
1164*789Sahrens 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1165*789Sahrens 
1166*789Sahrens 	err = dmu_object_free(mos, obj, tx);
1167*789Sahrens 	ASSERT(err == 0);
1168*789Sahrens 
1169*789Sahrens 	/*
1170*789Sahrens 	 * Close the objset with mode NONE, thus leaving it with
1171*789Sahrens 	 * DOS_REF_MAX set, so that noone can access it.
1172*789Sahrens 	 */
1173*789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
1174*789Sahrens 
1175*789Sahrens 	if (drop_lock)
1176*789Sahrens 		rw_exit(&dp->dp_config_rwlock);
1177*789Sahrens 	return (0);
1178*789Sahrens }
1179*789Sahrens 
1180*789Sahrens int
1181*789Sahrens dsl_dataset_snapshot_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
1182*789Sahrens {
1183*789Sahrens 	const char *snapname = arg;
1184*789Sahrens 	dsl_pool_t *dp = dd->dd_pool;
1185*789Sahrens 	dmu_buf_t *dbuf;
1186*789Sahrens 	dsl_dataset_phys_t *dsphys;
1187*789Sahrens 	uint64_t dsobj, value;
1188*789Sahrens 	objset_t *mos = dp->dp_meta_objset;
1189*789Sahrens 	dsl_dataset_t *ds;
1190*789Sahrens 	int err;
1191*789Sahrens 
1192*789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1193*789Sahrens 
1194*789Sahrens 	if (dd->dd_phys->dd_head_dataset_obj == 0)
1195*789Sahrens 		return (EINVAL);
1196*789Sahrens 	ds = dsl_dataset_open_obj(dp, dd->dd_phys->dd_head_dataset_obj, NULL,
1197*789Sahrens 	    DS_MODE_NONE, FTAG);
1198*789Sahrens 
1199*789Sahrens 	err = zap_lookup(mos, ds->ds_phys->ds_snapnames_zapobj,
1200*789Sahrens 	    snapname, 8, 1, &value);
1201*789Sahrens 	if (err == 0) {
1202*789Sahrens 		dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
1203*789Sahrens 		return (EEXIST);
1204*789Sahrens 	}
1205*789Sahrens 	ASSERT(err == ENOENT);
1206*789Sahrens 
1207*789Sahrens 	/* The point of no (unsuccessful) return */
1208*789Sahrens 
1209*789Sahrens 	dprintf_dd(dd, "taking snapshot %s in txg %llu\n",
1210*789Sahrens 	    snapname, tx->tx_txg);
1211*789Sahrens 
1212*789Sahrens 	spa_scrub_restart(dp->dp_spa, tx->tx_txg);
1213*789Sahrens 
1214*789Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_WRITER);
1215*789Sahrens 
1216*789Sahrens 	dsobj = dmu_object_alloc(mos, DMU_OT_DSL_OBJSET, 0,
1217*789Sahrens 	    DMU_OT_DSL_OBJSET, sizeof (dsl_dataset_phys_t), tx);
1218*789Sahrens 	dbuf = dmu_bonus_hold(mos, dsobj);
1219*789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
1220*789Sahrens 	dsphys = dbuf->db_data;
1221*789Sahrens 	dsphys->ds_dir_obj = dd->dd_object;
1222*789Sahrens 	dsphys->ds_fsid_guid = unique_create();
1223*789Sahrens 	unique_remove(dsphys->ds_fsid_guid); /* it isn't open yet */
1224*789Sahrens 	(void) random_get_pseudo_bytes((void*)&dsphys->ds_guid,
1225*789Sahrens 	    sizeof (dsphys->ds_guid));
1226*789Sahrens 	dsphys->ds_prev_snap_obj = ds->ds_phys->ds_prev_snap_obj;
1227*789Sahrens 	dsphys->ds_prev_snap_txg = ds->ds_phys->ds_prev_snap_txg;
1228*789Sahrens 	dsphys->ds_next_snap_obj = ds->ds_object;
1229*789Sahrens 	dsphys->ds_num_children = 1;
1230*789Sahrens 	dsphys->ds_creation_time = gethrestime_sec();
1231*789Sahrens 	dsphys->ds_creation_txg = tx->tx_txg;
1232*789Sahrens 	dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
1233*789Sahrens 	dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
1234*789Sahrens 	dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
1235*789Sahrens 	dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
1236*789Sahrens 	dsphys->ds_restoring = ds->ds_phys->ds_restoring;
1237*789Sahrens 	dsphys->ds_bp = ds->ds_phys->ds_bp;
1238*789Sahrens 	dmu_buf_rele(dbuf);
1239*789Sahrens 
1240*789Sahrens 	if (ds->ds_phys->ds_prev_snap_obj != 0) {
1241*789Sahrens 		dsl_dataset_t *ds_prev;
1242*789Sahrens 
1243*789Sahrens 		ds_prev = dsl_dataset_open_obj(dp,
1244*789Sahrens 		    ds->ds_phys->ds_prev_snap_obj, NULL, DS_MODE_NONE, FTAG);
1245*789Sahrens 		ASSERT(ds_prev->ds_phys->ds_next_snap_obj ==
1246*789Sahrens 		    ds->ds_object ||
1247*789Sahrens 		    ds_prev->ds_phys->ds_num_children > 1);
1248*789Sahrens 		if (ds_prev->ds_phys->ds_next_snap_obj == ds->ds_object) {
1249*789Sahrens 			dmu_buf_will_dirty(ds_prev->ds_dbuf, tx);
1250*789Sahrens 			ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==,
1251*789Sahrens 			    ds_prev->ds_phys->ds_creation_txg);
1252*789Sahrens 			ds_prev->ds_phys->ds_next_snap_obj = dsobj;
1253*789Sahrens 		}
1254*789Sahrens 		dsl_dataset_close(ds_prev, DS_MODE_NONE, FTAG);
1255*789Sahrens 	} else {
1256*789Sahrens 		ASSERT3U(ds->ds_phys->ds_prev_snap_txg, ==, 0);
1257*789Sahrens 	}
1258*789Sahrens 
1259*789Sahrens 	bplist_close(&ds->ds_deadlist);
1260*789Sahrens 	dmu_buf_will_dirty(ds->ds_dbuf, tx);
1261*789Sahrens 	ASSERT3U(ds->ds_phys->ds_prev_snap_txg, <, dsphys->ds_creation_txg);
1262*789Sahrens 	ds->ds_phys->ds_prev_snap_obj = dsobj;
1263*789Sahrens 	ds->ds_phys->ds_prev_snap_txg = dsphys->ds_creation_txg;
1264*789Sahrens 	ds->ds_phys->ds_unique_bytes = 0;
1265*789Sahrens 	ds->ds_phys->ds_deadlist_obj =
1266*789Sahrens 	    bplist_create(mos, DSL_DEADLIST_BLOCKSIZE, tx);
1267*789Sahrens 	bplist_open(&ds->ds_deadlist, mos, ds->ds_phys->ds_deadlist_obj);
1268*789Sahrens 
1269*789Sahrens 	dprintf("snap '%s' -> obj %llu\n", snapname, dsobj);
1270*789Sahrens 	err = zap_add(mos, ds->ds_phys->ds_snapnames_zapobj,
1271*789Sahrens 	    snapname, 8, 1, &dsobj, tx);
1272*789Sahrens 	ASSERT(err == 0);
1273*789Sahrens 
1274*789Sahrens 	if (ds->ds_prev)
1275*789Sahrens 		dsl_dataset_close(ds->ds_prev, DS_MODE_NONE, ds);
1276*789Sahrens 	ds->ds_prev = dsl_dataset_open_obj(dp,
1277*789Sahrens 	    ds->ds_phys->ds_prev_snap_obj, snapname, DS_MODE_NONE, ds);
1278*789Sahrens 
1279*789Sahrens 	rw_exit(&dp->dp_config_rwlock);
1280*789Sahrens 	dsl_dataset_close(ds, DS_MODE_NONE, FTAG);
1281*789Sahrens 
1282*789Sahrens 	return (0);
1283*789Sahrens }
1284*789Sahrens 
1285*789Sahrens void
1286*789Sahrens dsl_dataset_sync(dsl_dataset_t *ds, dmu_tx_t *tx)
1287*789Sahrens {
1288*789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
1289*789Sahrens 	ASSERT(ds->ds_user_ptr != NULL);
1290*789Sahrens 	ASSERT(ds->ds_phys->ds_next_snap_obj == 0);
1291*789Sahrens 
1292*789Sahrens 	dmu_objset_sync(ds->ds_user_ptr, tx);
1293*789Sahrens 	dsl_dir_dirty(ds->ds_dir, tx);
1294*789Sahrens 	bplist_close(&ds->ds_deadlist);
1295*789Sahrens 
1296*789Sahrens 	dmu_buf_remove_ref(ds->ds_dbuf, ds);
1297*789Sahrens }
1298*789Sahrens 
1299*789Sahrens void
1300*789Sahrens dsl_dataset_stats(dsl_dataset_t *ds, dmu_objset_stats_t *dds)
1301*789Sahrens {
1302*789Sahrens 	/* fill in properties crap */
1303*789Sahrens 	dsl_dir_stats(ds->ds_dir, dds);
1304*789Sahrens 
1305*789Sahrens 	if (ds->ds_phys->ds_num_children != 0) {
1306*789Sahrens 		dds->dds_is_snapshot = TRUE;
1307*789Sahrens 		dds->dds_num_clones = ds->ds_phys->ds_num_children - 1;
1308*789Sahrens 	}
1309*789Sahrens 
1310*789Sahrens 	dds->dds_last_txg = ds->ds_phys->ds_bp.blk_birth;
1311*789Sahrens 
1312*789Sahrens 	dds->dds_objects_used = ds->ds_phys->ds_bp.blk_fill;
1313*789Sahrens 	dds->dds_objects_avail = DN_MAX_OBJECT - dds->dds_objects_used;
1314*789Sahrens 
1315*789Sahrens 	/* We override the dataset's creation time... they should be the same */
1316*789Sahrens 	dds->dds_creation_time = ds->ds_phys->ds_creation_time;
1317*789Sahrens 	dds->dds_creation_txg = ds->ds_phys->ds_creation_txg;
1318*789Sahrens 	dds->dds_space_refd = ds->ds_phys->ds_used_bytes;
1319*789Sahrens 	dds->dds_fsid_guid = ds->ds_phys->ds_fsid_guid;
1320*789Sahrens 	dds->dds_guid = ds->ds_phys->ds_guid;
1321*789Sahrens 
1322*789Sahrens 	if (ds->ds_phys->ds_next_snap_obj) {
1323*789Sahrens 		/*
1324*789Sahrens 		 * This is a snapshot; override the dd's space used with
1325*789Sahrens 		 * our unique space
1326*789Sahrens 		 */
1327*789Sahrens 		dds->dds_space_used = ds->ds_phys->ds_unique_bytes;
1328*789Sahrens 		dds->dds_compressed_bytes =
1329*789Sahrens 		    ds->ds_phys->ds_compressed_bytes;
1330*789Sahrens 		dds->dds_uncompressed_bytes =
1331*789Sahrens 		    ds->ds_phys->ds_uncompressed_bytes;
1332*789Sahrens 	}
1333*789Sahrens 
1334*789Sahrens 	dds->dds_objset_obj = ds->ds_object;
1335*789Sahrens }
1336*789Sahrens 
1337*789Sahrens dsl_pool_t *
1338*789Sahrens dsl_dataset_pool(dsl_dataset_t *ds)
1339*789Sahrens {
1340*789Sahrens 	return (ds->ds_dir->dd_pool);
1341*789Sahrens }
1342*789Sahrens 
1343*789Sahrens struct osrenamearg {
1344*789Sahrens 	const char *oldname;
1345*789Sahrens 	const char *newname;
1346*789Sahrens };
1347*789Sahrens 
1348*789Sahrens static int
1349*789Sahrens dsl_dataset_snapshot_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx)
1350*789Sahrens {
1351*789Sahrens 	struct osrenamearg *ora = arg;
1352*789Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
1353*789Sahrens 	dsl_dir_t *nds;
1354*789Sahrens 	const char *tail;
1355*789Sahrens 	int err;
1356*789Sahrens 	dsl_dataset_t *snds, *fsds;
1357*789Sahrens 	uint64_t val;
1358*789Sahrens 
1359*789Sahrens 	err = dsl_dataset_open_spa(dd->dd_pool->dp_spa, ora->oldname,
1360*789Sahrens 	    DS_MODE_READONLY | DS_MODE_STANDARD, FTAG, &snds);
1361*789Sahrens 	if (err)
1362*789Sahrens 		return (err);
1363*789Sahrens 
1364*789Sahrens 	if (snds->ds_dir != dd) {
1365*789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1366*789Sahrens 		return (EINVAL);
1367*789Sahrens 	}
1368*789Sahrens 
1369*789Sahrens 	/* better be changing a snapshot */
1370*789Sahrens 	if (snds->ds_phys->ds_next_snap_obj == 0) {
1371*789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1372*789Sahrens 		return (EINVAL);
1373*789Sahrens 	}
1374*789Sahrens 
1375*789Sahrens 	/* new fs better exist */
1376*789Sahrens 	nds = dsl_dir_open_spa(dd->dd_pool->dp_spa, ora->newname, FTAG, &tail);
1377*789Sahrens 	if (nds == NULL) {
1378*789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1379*789Sahrens 		return (ENOENT);
1380*789Sahrens 	}
1381*789Sahrens 
1382*789Sahrens 	dsl_dir_close(nds, FTAG);
1383*789Sahrens 
1384*789Sahrens 	/* new name better be in same fs */
1385*789Sahrens 	if (nds != dd) {
1386*789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1387*789Sahrens 		return (EINVAL);
1388*789Sahrens 	}
1389*789Sahrens 
1390*789Sahrens 	/* new name better be a snapshot */
1391*789Sahrens 	if (tail == NULL || tail[0] != '@') {
1392*789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1393*789Sahrens 		return (EINVAL);
1394*789Sahrens 	}
1395*789Sahrens 
1396*789Sahrens 	tail++;
1397*789Sahrens 
1398*789Sahrens 	fsds = dsl_dataset_open_obj(dd->dd_pool,
1399*789Sahrens 	    dd->dd_phys->dd_head_dataset_obj, NULL, DS_MODE_NONE, FTAG);
1400*789Sahrens 
1401*789Sahrens 	/* new name better not be in use */
1402*789Sahrens 	err = zap_lookup(mos, fsds->ds_phys->ds_snapnames_zapobj,
1403*789Sahrens 	    tail, 8, 1, &val);
1404*789Sahrens 	if (err != ENOENT) {
1405*789Sahrens 		if (err == 0)
1406*789Sahrens 			err = EEXIST;
1407*789Sahrens 		dsl_dataset_close(fsds, DS_MODE_NONE, FTAG);
1408*789Sahrens 		dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1409*789Sahrens 		return (EEXIST);
1410*789Sahrens 	}
1411*789Sahrens 
1412*789Sahrens 	/* The point of no (unsuccessful) return */
1413*789Sahrens 
1414*789Sahrens 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_WRITER);
1415*789Sahrens 	dsl_dataset_get_snapname(snds);
1416*789Sahrens 	err = zap_remove(mos, fsds->ds_phys->ds_snapnames_zapobj,
1417*789Sahrens 	    snds->ds_snapname, tx);
1418*789Sahrens 	ASSERT3U(err, ==, 0);
1419*789Sahrens 	mutex_enter(&snds->ds_lock);
1420*789Sahrens 	(void) strcpy(snds->ds_snapname, tail);
1421*789Sahrens 	mutex_exit(&snds->ds_lock);
1422*789Sahrens 	err = zap_add(mos, fsds->ds_phys->ds_snapnames_zapobj,
1423*789Sahrens 	    snds->ds_snapname, 8, 1, &snds->ds_object, tx);
1424*789Sahrens 	ASSERT3U(err, ==, 0);
1425*789Sahrens 	rw_exit(&dd->dd_pool->dp_config_rwlock);
1426*789Sahrens 
1427*789Sahrens 	dsl_dataset_close(fsds, DS_MODE_NONE, FTAG);
1428*789Sahrens 	dsl_dataset_close(snds, DS_MODE_STANDARD, FTAG);
1429*789Sahrens 	return (0);
1430*789Sahrens }
1431*789Sahrens 
1432*789Sahrens #pragma weak dmu_objset_rename = dsl_dataset_rename
1433*789Sahrens int
1434*789Sahrens dsl_dataset_rename(const char *osname, const char *newname)
1435*789Sahrens {
1436*789Sahrens 	dsl_dir_t *dd;
1437*789Sahrens 	const char *tail;
1438*789Sahrens 	struct osrenamearg ora;
1439*789Sahrens 	int err;
1440*789Sahrens 
1441*789Sahrens 	dd = dsl_dir_open(osname, FTAG, &tail);
1442*789Sahrens 	if (dd == NULL)
1443*789Sahrens 		return (ENOENT);
1444*789Sahrens 	if (tail == NULL) {
1445*789Sahrens 		err = dsl_dir_sync_task(dd,
1446*789Sahrens 		    dsl_dir_rename_sync, (void*)newname, 1<<12);
1447*789Sahrens 		dsl_dir_close(dd, FTAG);
1448*789Sahrens 		return (err);
1449*789Sahrens 	}
1450*789Sahrens 	if (tail[0] != '@') {
1451*789Sahrens 		/* the name ended in a nonexistant component */
1452*789Sahrens 		dsl_dir_close(dd, FTAG);
1453*789Sahrens 		return (ENOENT);
1454*789Sahrens 	}
1455*789Sahrens 
1456*789Sahrens 	ora.oldname = osname;
1457*789Sahrens 	ora.newname = newname;
1458*789Sahrens 
1459*789Sahrens 	err = dsl_dir_sync_task(dd,
1460*789Sahrens 	    dsl_dataset_snapshot_rename_sync, &ora, 1<<12);
1461*789Sahrens 	dsl_dir_close(dd, FTAG);
1462*789Sahrens 	return (err);
1463*789Sahrens }
1464