xref: /onnv-gate/usr/src/uts/common/fs/zfs/dsl_dir.c (revision 12470:54258108784b)
1789Sahrens /*
2789Sahrens  * CDDL HEADER START
3789Sahrens  *
4789Sahrens  * The contents of this file are subject to the terms of the
51544Seschrock  * Common Development and Distribution License (the "License").
61544Seschrock  * You may not use this file except in compliance with the License.
7789Sahrens  *
8789Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9789Sahrens  * or http://www.opensolaris.org/os/licensing.
10789Sahrens  * See the License for the specific language governing permissions
11789Sahrens  * and limitations under the License.
12789Sahrens  *
13789Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14789Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15789Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16789Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17789Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18789Sahrens  *
19789Sahrens  * CDDL HEADER END
20789Sahrens  */
21789Sahrens /*
2212296SLin.Ling@Sun.COM  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23789Sahrens  */
24789Sahrens 
25789Sahrens #include <sys/dmu.h>
265378Sck153898 #include <sys/dmu_objset.h>
27789Sahrens #include <sys/dmu_tx.h>
28789Sahrens #include <sys/dsl_dataset.h>
29789Sahrens #include <sys/dsl_dir.h>
30789Sahrens #include <sys/dsl_prop.h>
312199Sahrens #include <sys/dsl_synctask.h>
324543Smarks #include <sys/dsl_deleg.h>
33789Sahrens #include <sys/spa.h>
3410922SJeff.Bonwick@Sun.COM #include <sys/metaslab.h>
35789Sahrens #include <sys/zap.h>
36789Sahrens #include <sys/zio.h>
37789Sahrens #include <sys/arc.h>
384543Smarks #include <sys/sunddi.h>
39789Sahrens #include "zfs_namecheck.h"
40789Sahrens 
415378Sck153898 static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
4212296SLin.Ling@Sun.COM static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx);
43789Sahrens 
44789Sahrens 
45789Sahrens /* ARGSUSED */
46789Sahrens static void
dsl_dir_evict(dmu_buf_t * db,void * arg)47789Sahrens dsl_dir_evict(dmu_buf_t *db, void *arg)
48789Sahrens {
49789Sahrens 	dsl_dir_t *dd = arg;
50789Sahrens 	dsl_pool_t *dp = dd->dd_pool;
51789Sahrens 	int t;
52789Sahrens 
53789Sahrens 	for (t = 0; t < TXG_SIZE; t++) {
54789Sahrens 		ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t));
55789Sahrens 		ASSERT(dd->dd_tempreserved[t] == 0);
56789Sahrens 		ASSERT(dd->dd_space_towrite[t] == 0);
57789Sahrens 	}
58789Sahrens 
59789Sahrens 	if (dd->dd_parent)
60789Sahrens 		dsl_dir_close(dd->dd_parent, dd);
61789Sahrens 
62789Sahrens 	spa_close(dd->dd_pool->dp_spa, dd);
63789Sahrens 
64789Sahrens 	/*
6512296SLin.Ling@Sun.COM 	 * The props callback list should have been cleaned up by
6612296SLin.Ling@Sun.COM 	 * objset_evict().
67789Sahrens 	 */
68789Sahrens 	list_destroy(&dd->dd_prop_cbs);
692856Snd150628 	mutex_destroy(&dd->dd_lock);
70789Sahrens 	kmem_free(dd, sizeof (dsl_dir_t));
71789Sahrens }
72789Sahrens 
731544Seschrock int
dsl_dir_open_obj(dsl_pool_t * dp,uint64_t ddobj,const char * tail,void * tag,dsl_dir_t ** ddp)74789Sahrens dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
751544Seschrock     const char *tail, void *tag, dsl_dir_t **ddp)
76789Sahrens {
77789Sahrens 	dmu_buf_t *dbuf;
78789Sahrens 	dsl_dir_t *dd;
791544Seschrock 	int err;
80789Sahrens 
81789Sahrens 	ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) ||
82789Sahrens 	    dsl_pool_sync_context(dp));
83789Sahrens 
841544Seschrock 	err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf);
851544Seschrock 	if (err)
861544Seschrock 		return (err);
87789Sahrens 	dd = dmu_buf_get_user(dbuf);
88789Sahrens #ifdef ZFS_DEBUG
89789Sahrens 	{
90789Sahrens 		dmu_object_info_t doi;
91789Sahrens 		dmu_object_info_from_db(dbuf, &doi);
92928Stabriz 		ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR);
937390SMatthew.Ahrens@Sun.COM 		ASSERT3U(doi.doi_bonus_size, >=, sizeof (dsl_dir_phys_t));
94789Sahrens 	}
95789Sahrens #endif
96789Sahrens 	if (dd == NULL) {
97789Sahrens 		dsl_dir_t *winner;
98789Sahrens 
99789Sahrens 		dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP);
100789Sahrens 		dd->dd_object = ddobj;
101789Sahrens 		dd->dd_dbuf = dbuf;
102789Sahrens 		dd->dd_pool = dp;
103789Sahrens 		dd->dd_phys = dbuf->db_data;
1042856Snd150628 		mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
105789Sahrens 
106789Sahrens 		list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
107789Sahrens 		    offsetof(dsl_prop_cb_record_t, cbr_node));
108789Sahrens 
10910373Schris.kirby@sun.com 		dsl_dir_snap_cmtime_update(dd);
11010373Schris.kirby@sun.com 
111789Sahrens 		if (dd->dd_phys->dd_parent_obj) {
1121544Seschrock 			err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj,
1131544Seschrock 			    NULL, dd, &dd->dd_parent);
1147390SMatthew.Ahrens@Sun.COM 			if (err)
1157390SMatthew.Ahrens@Sun.COM 				goto errout;
116789Sahrens 			if (tail) {
117789Sahrens #ifdef ZFS_DEBUG
118789Sahrens 				uint64_t foundobj;
119789Sahrens 
120789Sahrens 				err = zap_lookup(dp->dp_meta_objset,
1214577Sahrens 				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
122789Sahrens 				    tail, sizeof (foundobj), 1, &foundobj);
1231544Seschrock 				ASSERT(err || foundobj == ddobj);
124789Sahrens #endif
125789Sahrens 				(void) strcpy(dd->dd_myname, tail);
126789Sahrens 			} else {
127789Sahrens 				err = zap_value_search(dp->dp_meta_objset,
1284577Sahrens 				    dd->dd_parent->dd_phys->dd_child_dir_zapobj,
1294577Sahrens 				    ddobj, 0, dd->dd_myname);
1301544Seschrock 			}
1317390SMatthew.Ahrens@Sun.COM 			if (err)
1327390SMatthew.Ahrens@Sun.COM 				goto errout;
133789Sahrens 		} else {
134789Sahrens 			(void) strcpy(dd->dd_myname, spa_name(dp->dp_spa));
135789Sahrens 		}
136789Sahrens 
13712296SLin.Ling@Sun.COM 		if (dsl_dir_is_clone(dd)) {
13812296SLin.Ling@Sun.COM 			dmu_buf_t *origin_bonus;
13912296SLin.Ling@Sun.COM 			dsl_dataset_phys_t *origin_phys;
14012296SLin.Ling@Sun.COM 
14112296SLin.Ling@Sun.COM 			/*
14212296SLin.Ling@Sun.COM 			 * We can't open the origin dataset, because
14312296SLin.Ling@Sun.COM 			 * that would require opening this dsl_dir.
14412296SLin.Ling@Sun.COM 			 * Just look at its phys directly instead.
14512296SLin.Ling@Sun.COM 			 */
14612296SLin.Ling@Sun.COM 			err = dmu_bonus_hold(dp->dp_meta_objset,
14712296SLin.Ling@Sun.COM 			    dd->dd_phys->dd_origin_obj, FTAG, &origin_bonus);
14812296SLin.Ling@Sun.COM 			if (err)
14912296SLin.Ling@Sun.COM 				goto errout;
15012296SLin.Ling@Sun.COM 			origin_phys = origin_bonus->db_data;
15112296SLin.Ling@Sun.COM 			dd->dd_origin_txg =
15212296SLin.Ling@Sun.COM 			    origin_phys->ds_creation_txg;
15312296SLin.Ling@Sun.COM 			dmu_buf_rele(origin_bonus, FTAG);
15412296SLin.Ling@Sun.COM 		}
15512296SLin.Ling@Sun.COM 
156789Sahrens 		winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys,
157789Sahrens 		    dsl_dir_evict);
158789Sahrens 		if (winner) {
159789Sahrens 			if (dd->dd_parent)
160789Sahrens 				dsl_dir_close(dd->dd_parent, dd);
1612856Snd150628 			mutex_destroy(&dd->dd_lock);
162789Sahrens 			kmem_free(dd, sizeof (dsl_dir_t));
163789Sahrens 			dd = winner;
164789Sahrens 		} else {
165789Sahrens 			spa_open_ref(dp->dp_spa, dd);
166789Sahrens 		}
167789Sahrens 	}
168789Sahrens 
169789Sahrens 	/*
170789Sahrens 	 * The dsl_dir_t has both open-to-close and instantiate-to-evict
171789Sahrens 	 * holds on the spa.  We need the open-to-close holds because
172789Sahrens 	 * otherwise the spa_refcnt wouldn't change when we open a
173789Sahrens 	 * dir which the spa also has open, so we could incorrectly
174789Sahrens 	 * think it was OK to unload/export/destroy the pool.  We need
175789Sahrens 	 * the instantiate-to-evict hold because the dsl_dir_t has a
176789Sahrens 	 * pointer to the dd_pool, which has a pointer to the spa_t.
177789Sahrens 	 */
178789Sahrens 	spa_open_ref(dp->dp_spa, tag);
179789Sahrens 	ASSERT3P(dd->dd_pool, ==, dp);
180789Sahrens 	ASSERT3U(dd->dd_object, ==, ddobj);
181789Sahrens 	ASSERT3P(dd->dd_dbuf, ==, dbuf);
1821544Seschrock 	*ddp = dd;
1831544Seschrock 	return (0);
1847390SMatthew.Ahrens@Sun.COM 
1857390SMatthew.Ahrens@Sun.COM errout:
1867390SMatthew.Ahrens@Sun.COM 	if (dd->dd_parent)
1877390SMatthew.Ahrens@Sun.COM 		dsl_dir_close(dd->dd_parent, dd);
1887390SMatthew.Ahrens@Sun.COM 	mutex_destroy(&dd->dd_lock);
1897390SMatthew.Ahrens@Sun.COM 	kmem_free(dd, sizeof (dsl_dir_t));
1907390SMatthew.Ahrens@Sun.COM 	dmu_buf_rele(dbuf, tag);
1917390SMatthew.Ahrens@Sun.COM 	return (err);
1927390SMatthew.Ahrens@Sun.COM 
193789Sahrens }
194789Sahrens 
195789Sahrens void
dsl_dir_close(dsl_dir_t * dd,void * tag)196789Sahrens dsl_dir_close(dsl_dir_t *dd, void *tag)
197789Sahrens {
198789Sahrens 	dprintf_dd(dd, "%s\n", "");
199789Sahrens 	spa_close(dd->dd_pool->dp_spa, tag);
2001544Seschrock 	dmu_buf_rele(dd->dd_dbuf, tag);
201789Sahrens }
202789Sahrens 
2032467Sek110237 /* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
204789Sahrens void
dsl_dir_name(dsl_dir_t * dd,char * buf)205789Sahrens dsl_dir_name(dsl_dir_t *dd, char *buf)
206789Sahrens {
207789Sahrens 	if (dd->dd_parent) {
208789Sahrens 		dsl_dir_name(dd->dd_parent, buf);
209789Sahrens 		(void) strcat(buf, "/");
210789Sahrens 	} else {
211789Sahrens 		buf[0] = '\0';
212789Sahrens 	}
213789Sahrens 	if (!MUTEX_HELD(&dd->dd_lock)) {
214789Sahrens 		/*
215789Sahrens 		 * recursive mutex so that we can use
216789Sahrens 		 * dprintf_dd() with dd_lock held
217789Sahrens 		 */
218789Sahrens 		mutex_enter(&dd->dd_lock);
219789Sahrens 		(void) strcat(buf, dd->dd_myname);
220789Sahrens 		mutex_exit(&dd->dd_lock);
221789Sahrens 	} else {
222789Sahrens 		(void) strcat(buf, dd->dd_myname);
223789Sahrens 	}
224789Sahrens }
225789Sahrens 
2263978Smmusante /* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */
2273978Smmusante int
dsl_dir_namelen(dsl_dir_t * dd)2283978Smmusante dsl_dir_namelen(dsl_dir_t *dd)
2293978Smmusante {
2303978Smmusante 	int result = 0;
2313978Smmusante 
2323978Smmusante 	if (dd->dd_parent) {
2333978Smmusante 		/* parent's name + 1 for the "/" */
2343978Smmusante 		result = dsl_dir_namelen(dd->dd_parent) + 1;
2353978Smmusante 	}
2363978Smmusante 
2373978Smmusante 	if (!MUTEX_HELD(&dd->dd_lock)) {
2383978Smmusante 		/* see dsl_dir_name */
2393978Smmusante 		mutex_enter(&dd->dd_lock);
2403978Smmusante 		result += strlen(dd->dd_myname);
2413978Smmusante 		mutex_exit(&dd->dd_lock);
2423978Smmusante 	} else {
2433978Smmusante 		result += strlen(dd->dd_myname);
2443978Smmusante 	}
2453978Smmusante 
2463978Smmusante 	return (result);
2473978Smmusante }
2483978Smmusante 
249789Sahrens static int
getcomponent(const char * path,char * component,const char ** nextp)250789Sahrens getcomponent(const char *path, char *component, const char **nextp)
251789Sahrens {
252789Sahrens 	char *p;
2538924SRichard.Morris@Sun.COM 	if ((path == NULL) || (path[0] == '\0'))
2542731Snd150628 		return (ENOENT);
255789Sahrens 	/* This would be a good place to reserve some namespace... */
256789Sahrens 	p = strpbrk(path, "/@");
257789Sahrens 	if (p && (p[1] == '/' || p[1] == '@')) {
258789Sahrens 		/* two separators in a row */
259789Sahrens 		return (EINVAL);
260789Sahrens 	}
261789Sahrens 	if (p == NULL || p == path) {
262789Sahrens 		/*
263789Sahrens 		 * if the first thing is an @ or /, it had better be an
264789Sahrens 		 * @ and it had better not have any more ats or slashes,
265789Sahrens 		 * and it had better have something after the @.
266789Sahrens 		 */
267789Sahrens 		if (p != NULL &&
268789Sahrens 		    (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
269789Sahrens 			return (EINVAL);
270789Sahrens 		if (strlen(path) >= MAXNAMELEN)
271789Sahrens 			return (ENAMETOOLONG);
272789Sahrens 		(void) strcpy(component, path);
273789Sahrens 		p = NULL;
274789Sahrens 	} else if (p[0] == '/') {
275789Sahrens 		if (p-path >= MAXNAMELEN)
276789Sahrens 			return (ENAMETOOLONG);
277789Sahrens 		(void) strncpy(component, path, p - path);
278789Sahrens 		component[p-path] = '\0';
279789Sahrens 		p++;
280789Sahrens 	} else if (p[0] == '@') {
281789Sahrens 		/*
282789Sahrens 		 * if the next separator is an @, there better not be
283789Sahrens 		 * any more slashes.
284789Sahrens 		 */
285789Sahrens 		if (strchr(path, '/'))
286789Sahrens 			return (EINVAL);
287789Sahrens 		if (p-path >= MAXNAMELEN)
288789Sahrens 			return (ENAMETOOLONG);
289789Sahrens 		(void) strncpy(component, path, p - path);
290789Sahrens 		component[p-path] = '\0';
291789Sahrens 	} else {
292789Sahrens 		ASSERT(!"invalid p");
293789Sahrens 	}
294789Sahrens 	*nextp = p;
295789Sahrens 	return (0);
296789Sahrens }
297789Sahrens 
298789Sahrens /*
299789Sahrens  * same as dsl_open_dir, ignore the first component of name and use the
300789Sahrens  * spa instead
301789Sahrens  */
3021544Seschrock int
dsl_dir_open_spa(spa_t * spa,const char * name,void * tag,dsl_dir_t ** ddp,const char ** tailp)3031544Seschrock dsl_dir_open_spa(spa_t *spa, const char *name, void *tag,
3041544Seschrock     dsl_dir_t **ddp, const char **tailp)
305789Sahrens {
306789Sahrens 	char buf[MAXNAMELEN];
307789Sahrens 	const char *next, *nextnext = NULL;
308789Sahrens 	int err;
309789Sahrens 	dsl_dir_t *dd;
310789Sahrens 	dsl_pool_t *dp;
311789Sahrens 	uint64_t ddobj;
312789Sahrens 	int openedspa = FALSE;
313789Sahrens 
314789Sahrens 	dprintf("%s\n", name);
315789Sahrens 
316789Sahrens 	err = getcomponent(name, buf, &next);
317789Sahrens 	if (err)
3181544Seschrock 		return (err);
319789Sahrens 	if (spa == NULL) {
320789Sahrens 		err = spa_open(buf, &spa, FTAG);
321789Sahrens 		if (err) {
322789Sahrens 			dprintf("spa_open(%s) failed\n", buf);
3231544Seschrock 			return (err);
324789Sahrens 		}
325789Sahrens 		openedspa = TRUE;
326789Sahrens 
327789Sahrens 		/* XXX this assertion belongs in spa_open */
328789Sahrens 		ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa)));
329789Sahrens 	}
330789Sahrens 
331789Sahrens 	dp = spa_get_dsl(spa);
332789Sahrens 
333789Sahrens 	rw_enter(&dp->dp_config_rwlock, RW_READER);
3341544Seschrock 	err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd);
3351544Seschrock 	if (err) {
3361544Seschrock 		rw_exit(&dp->dp_config_rwlock);
3371544Seschrock 		if (openedspa)
3381544Seschrock 			spa_close(spa, FTAG);
3391544Seschrock 		return (err);
3401544Seschrock 	}
3411544Seschrock 
342789Sahrens 	while (next != NULL) {
343789Sahrens 		dsl_dir_t *child_ds;
344789Sahrens 		err = getcomponent(next, buf, &nextnext);
3451544Seschrock 		if (err)
3461544Seschrock 			break;
347789Sahrens 		ASSERT(next[0] != '\0');
348789Sahrens 		if (next[0] == '@')
349789Sahrens 			break;
350789Sahrens 		dprintf("looking up %s in obj%lld\n",
351789Sahrens 		    buf, dd->dd_phys->dd_child_dir_zapobj);
352789Sahrens 
353789Sahrens 		err = zap_lookup(dp->dp_meta_objset,
354789Sahrens 		    dd->dd_phys->dd_child_dir_zapobj,
355789Sahrens 		    buf, sizeof (ddobj), 1, &ddobj);
3561544Seschrock 		if (err) {
3571544Seschrock 			if (err == ENOENT)
3581544Seschrock 				err = 0;
359789Sahrens 			break;
360789Sahrens 		}
361789Sahrens 
3621544Seschrock 		err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds);
3631544Seschrock 		if (err)
3641544Seschrock 			break;
365789Sahrens 		dsl_dir_close(dd, tag);
366789Sahrens 		dd = child_ds;
367789Sahrens 		next = nextnext;
368789Sahrens 	}
369789Sahrens 	rw_exit(&dp->dp_config_rwlock);
370789Sahrens 
3711544Seschrock 	if (err) {
3721544Seschrock 		dsl_dir_close(dd, tag);
3731544Seschrock 		if (openedspa)
3741544Seschrock 			spa_close(spa, FTAG);
3751544Seschrock 		return (err);
3761544Seschrock 	}
3771544Seschrock 
378789Sahrens 	/*
379789Sahrens 	 * It's an error if there's more than one component left, or
380789Sahrens 	 * tailp==NULL and there's any component left.
381789Sahrens 	 */
382789Sahrens 	if (next != NULL &&
383789Sahrens 	    (tailp == NULL || (nextnext && nextnext[0] != '\0'))) {
384789Sahrens 		/* bad path name */
385789Sahrens 		dsl_dir_close(dd, tag);
386789Sahrens 		dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp);
3871544Seschrock 		err = ENOENT;
388789Sahrens 	}
389789Sahrens 	if (tailp)
390789Sahrens 		*tailp = next;
391789Sahrens 	if (openedspa)
392789Sahrens 		spa_close(spa, FTAG);
3931544Seschrock 	*ddp = dd;
3941544Seschrock 	return (err);
395789Sahrens }
396789Sahrens 
397789Sahrens /*
398789Sahrens  * Return the dsl_dir_t, and possibly the last component which couldn't
399789Sahrens  * be found in *tail.  Return NULL if the path is bogus, or if
400789Sahrens  * tail==NULL and we couldn't parse the whole name.  (*tail)[0] == '@'
401789Sahrens  * means that the last component is a snapshot.
402789Sahrens  */
4031544Seschrock int
dsl_dir_open(const char * name,void * tag,dsl_dir_t ** ddp,const char ** tailp)4041544Seschrock dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp)
405789Sahrens {
4061544Seschrock 	return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp));
407789Sahrens }
408789Sahrens 
4092199Sahrens uint64_t
dsl_dir_create_sync(dsl_pool_t * dp,dsl_dir_t * pds,const char * name,dmu_tx_t * tx)4107046Sahrens dsl_dir_create_sync(dsl_pool_t *dp, dsl_dir_t *pds, const char *name,
4117046Sahrens     dmu_tx_t *tx)
412789Sahrens {
4137046Sahrens 	objset_t *mos = dp->dp_meta_objset;
414789Sahrens 	uint64_t ddobj;
415*12470SMatthew.Ahrens@Sun.COM 	dsl_dir_phys_t *ddphys;
416789Sahrens 	dmu_buf_t *dbuf;
417789Sahrens 
418928Stabriz 	ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0,
419928Stabriz 	    DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx);
4207046Sahrens 	if (pds) {
4217046Sahrens 		VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj,
4227046Sahrens 		    name, sizeof (uint64_t), 1, &ddobj, tx));
4237046Sahrens 	} else {
4247046Sahrens 		/* it's the root dir */
4257046Sahrens 		VERIFY(0 == zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
4267046Sahrens 		    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1, &ddobj, tx));
4277046Sahrens 	}
4281544Seschrock 	VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf));
429789Sahrens 	dmu_buf_will_dirty(dbuf, tx);
430*12470SMatthew.Ahrens@Sun.COM 	ddphys = dbuf->db_data;
431789Sahrens 
432*12470SMatthew.Ahrens@Sun.COM 	ddphys->dd_creation_time = gethrestime_sec();
4337046Sahrens 	if (pds)
434*12470SMatthew.Ahrens@Sun.COM 		ddphys->dd_parent_obj = pds->dd_object;
435*12470SMatthew.Ahrens@Sun.COM 	ddphys->dd_props_zapobj = zap_create(mos,
436789Sahrens 	    DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx);
437*12470SMatthew.Ahrens@Sun.COM 	ddphys->dd_child_dir_zapobj = zap_create(mos,
438885Sahrens 	    DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx);
4397390SMatthew.Ahrens@Sun.COM 	if (spa_version(dp->dp_spa) >= SPA_VERSION_USED_BREAKDOWN)
440*12470SMatthew.Ahrens@Sun.COM 		ddphys->dd_flags |= DD_FLAG_USED_BREAKDOWN;
4411544Seschrock 	dmu_buf_rele(dbuf, FTAG);
442789Sahrens 
4432199Sahrens 	return (ddobj);
4442199Sahrens }
4452199Sahrens 
4462199Sahrens /* ARGSUSED */
4472199Sahrens int
dsl_dir_destroy_check(void * arg1,void * arg2,dmu_tx_t * tx)4482199Sahrens dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx)
4492199Sahrens {
45011022STom.Erickson@Sun.COM 	dsl_dataset_t *ds = arg1;
45111022STom.Erickson@Sun.COM 	dsl_dir_t *dd = ds->ds_dir;
4522199Sahrens 	dsl_pool_t *dp = dd->dd_pool;
4532199Sahrens 	objset_t *mos = dp->dp_meta_objset;
4542199Sahrens 	int err;
4552199Sahrens 	uint64_t count;
4562199Sahrens 
4572199Sahrens 	/*
4582199Sahrens 	 * There should be exactly two holds, both from
4592199Sahrens 	 * dsl_dataset_destroy: one on the dd directory, and one on its
4602199Sahrens 	 * head ds.  Otherwise, someone is trying to lookup something
4612199Sahrens 	 * inside this dir while we want to destroy it.  The
4622199Sahrens 	 * config_rwlock ensures that nobody else opens it after we
4632199Sahrens 	 * check.
4642199Sahrens 	 */
4652199Sahrens 	if (dmu_buf_refcount(dd->dd_dbuf) > 2)
4662199Sahrens 		return (EBUSY);
4672199Sahrens 
4682199Sahrens 	err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count);
4692199Sahrens 	if (err)
4702199Sahrens 		return (err);
4712199Sahrens 	if (count != 0)
4722199Sahrens 		return (EEXIST);
473789Sahrens 
474789Sahrens 	return (0);
475789Sahrens }
476789Sahrens 
4772199Sahrens void
dsl_dir_destroy_sync(void * arg1,void * tag,dmu_tx_t * tx)47812296SLin.Ling@Sun.COM dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
479789Sahrens {
48011022STom.Erickson@Sun.COM 	dsl_dataset_t *ds = arg1;
48111022STom.Erickson@Sun.COM 	dsl_dir_t *dd = ds->ds_dir;
4822199Sahrens 	objset_t *mos = dd->dd_pool->dp_meta_objset;
48311022STom.Erickson@Sun.COM 	dsl_prop_setarg_t psa;
48411022STom.Erickson@Sun.COM 	uint64_t value = 0;
48511022STom.Erickson@Sun.COM 	uint64_t obj;
4867390SMatthew.Ahrens@Sun.COM 	dd_used_t t;
487789Sahrens 
4882199Sahrens 	ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock));
489789Sahrens 	ASSERT(dd->dd_phys->dd_head_dataset_obj == 0);
490789Sahrens 
4912199Sahrens 	/* Remove our reservation. */
49211022STom.Erickson@Sun.COM 	dsl_prop_setarg_init_uint64(&psa, "reservation",
49311022STom.Erickson@Sun.COM 	    (ZPROP_SRC_NONE | ZPROP_SRC_LOCAL | ZPROP_SRC_RECEIVED),
49411022STom.Erickson@Sun.COM 	    &value);
49511022STom.Erickson@Sun.COM 	psa.psa_effective_value = 0;	/* predict default value */
49611022STom.Erickson@Sun.COM 
49712296SLin.Ling@Sun.COM 	dsl_dir_set_reservation_sync(ds, &psa, tx);
49811022STom.Erickson@Sun.COM 
4997390SMatthew.Ahrens@Sun.COM 	ASSERT3U(dd->dd_phys->dd_used_bytes, ==, 0);
500789Sahrens 	ASSERT3U(dd->dd_phys->dd_reserved, ==, 0);
5017390SMatthew.Ahrens@Sun.COM 	for (t = 0; t < DD_USED_NUM; t++)
5027390SMatthew.Ahrens@Sun.COM 		ASSERT3U(dd->dd_phys->dd_used_breakdown[t], ==, 0);
503789Sahrens 
5042199Sahrens 	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx));
5052199Sahrens 	VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx));
5064543Smarks 	VERIFY(0 == dsl_deleg_destroy(mos, dd->dd_phys->dd_deleg_zapobj, tx));
5072199Sahrens 	VERIFY(0 == zap_remove(mos,
5082199Sahrens 	    dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx));
509789Sahrens 
5102199Sahrens 	obj = dd->dd_object;
5112199Sahrens 	dsl_dir_close(dd, tag);
5122199Sahrens 	VERIFY(0 == dmu_object_free(mos, obj, tx));
513789Sahrens }
514789Sahrens 
5157046Sahrens boolean_t
dsl_dir_is_clone(dsl_dir_t * dd)5167046Sahrens dsl_dir_is_clone(dsl_dir_t *dd)
517789Sahrens {
5187046Sahrens 	return (dd->dd_phys->dd_origin_obj &&
5197046Sahrens 	    (dd->dd_pool->dp_origin_snap == NULL ||
5207046Sahrens 	    dd->dd_phys->dd_origin_obj !=
5217046Sahrens 	    dd->dd_pool->dp_origin_snap->ds_object));
522789Sahrens }
523789Sahrens 
524789Sahrens void
dsl_dir_stats(dsl_dir_t * dd,nvlist_t * nv)5252885Sahrens dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
526789Sahrens {
527789Sahrens 	mutex_enter(&dd->dd_lock);
5287390SMatthew.Ahrens@Sun.COM 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
5297390SMatthew.Ahrens@Sun.COM 	    dd->dd_phys->dd_used_bytes);
5305378Sck153898 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_QUOTA, dd->dd_phys->dd_quota);
5312885Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_RESERVATION,
5322885Sahrens 	    dd->dd_phys->dd_reserved);
5332885Sahrens 	dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
5342885Sahrens 	    dd->dd_phys->dd_compressed_bytes == 0 ? 100 :
5352885Sahrens 	    (dd->dd_phys->dd_uncompressed_bytes * 100 /
5362885Sahrens 	    dd->dd_phys->dd_compressed_bytes));
5377390SMatthew.Ahrens@Sun.COM 	if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
5387390SMatthew.Ahrens@Sun.COM 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDSNAP,
5397390SMatthew.Ahrens@Sun.COM 		    dd->dd_phys->dd_used_breakdown[DD_USED_SNAP]);
5407390SMatthew.Ahrens@Sun.COM 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDDS,
5417390SMatthew.Ahrens@Sun.COM 		    dd->dd_phys->dd_used_breakdown[DD_USED_HEAD]);
5427390SMatthew.Ahrens@Sun.COM 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDREFRESERV,
5437390SMatthew.Ahrens@Sun.COM 		    dd->dd_phys->dd_used_breakdown[DD_USED_REFRSRV]);
5447390SMatthew.Ahrens@Sun.COM 		dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USEDCHILD,
5457390SMatthew.Ahrens@Sun.COM 		    dd->dd_phys->dd_used_breakdown[DD_USED_CHILD] +
5467390SMatthew.Ahrens@Sun.COM 		    dd->dd_phys->dd_used_breakdown[DD_USED_CHILD_RSRV]);
5477390SMatthew.Ahrens@Sun.COM 	}
548789Sahrens 	mutex_exit(&dd->dd_lock);
549789Sahrens 
5505446Sahrens 	rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
5517046Sahrens 	if (dsl_dir_is_clone(dd)) {
552789Sahrens 		dsl_dataset_t *ds;
5532885Sahrens 		char buf[MAXNAMELEN];
554789Sahrens 
5556689Smaybee 		VERIFY(0 == dsl_dataset_hold_obj(dd->dd_pool,
5566689Smaybee 		    dd->dd_phys->dd_origin_obj, FTAG, &ds));
5572885Sahrens 		dsl_dataset_name(ds, buf);
5586689Smaybee 		dsl_dataset_rele(ds, FTAG);
5592885Sahrens 		dsl_prop_nvlist_add_string(nv, ZFS_PROP_ORIGIN, buf);
560789Sahrens 	}
5615446Sahrens 	rw_exit(&dd->dd_pool->dp_config_rwlock);
562789Sahrens }
563789Sahrens 
564789Sahrens void
dsl_dir_dirty(dsl_dir_t * dd,dmu_tx_t * tx)565789Sahrens dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx)
566789Sahrens {
567789Sahrens 	dsl_pool_t *dp = dd->dd_pool;
568789Sahrens 
569789Sahrens 	ASSERT(dd->dd_phys);
570789Sahrens 
571789Sahrens 	if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) {
572789Sahrens 		/* up the hold count until we can be written out */
573789Sahrens 		dmu_buf_add_ref(dd->dd_dbuf, dd);
574789Sahrens 	}
575789Sahrens }
576789Sahrens 
577789Sahrens static int64_t
parent_delta(dsl_dir_t * dd,uint64_t used,int64_t delta)578789Sahrens parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta)
579789Sahrens {
580789Sahrens 	uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved);
581789Sahrens 	uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved);
582789Sahrens 	return (new_accounted - old_accounted);
583789Sahrens }
584789Sahrens 
585789Sahrens void
dsl_dir_sync(dsl_dir_t * dd,dmu_tx_t * tx)586789Sahrens dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
587789Sahrens {
588789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
589789Sahrens 
590789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
591789Sahrens 
592789Sahrens 	mutex_enter(&dd->dd_lock);
593789Sahrens 	ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0);
594789Sahrens 	dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
595789Sahrens 	    dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024);
596789Sahrens 	dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0;
597789Sahrens 	mutex_exit(&dd->dd_lock);
598789Sahrens 
599789Sahrens 	/* release the hold from dsl_dir_dirty */
6001544Seschrock 	dmu_buf_rele(dd->dd_dbuf, dd);
601789Sahrens }
602789Sahrens 
603789Sahrens static uint64_t
dsl_dir_space_towrite(dsl_dir_t * dd)6045378Sck153898 dsl_dir_space_towrite(dsl_dir_t *dd)
605789Sahrens {
6065378Sck153898 	uint64_t space = 0;
607789Sahrens 	int i;
608789Sahrens 
609789Sahrens 	ASSERT(MUTEX_HELD(&dd->dd_lock));
610789Sahrens 
611789Sahrens 	for (i = 0; i < TXG_SIZE; i++) {
612789Sahrens 		space += dd->dd_space_towrite[i&TXG_MASK];
613789Sahrens 		ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
614789Sahrens 	}
615789Sahrens 	return (space);
616789Sahrens }
617789Sahrens 
618789Sahrens /*
619789Sahrens  * How much space would dd have available if ancestor had delta applied
620789Sahrens  * to it?  If ondiskonly is set, we're only interested in what's
621789Sahrens  * on-disk, not estimated pending changes.
622789Sahrens  */
6232885Sahrens uint64_t
dsl_dir_space_available(dsl_dir_t * dd,dsl_dir_t * ancestor,int64_t delta,int ondiskonly)624789Sahrens dsl_dir_space_available(dsl_dir_t *dd,
625789Sahrens     dsl_dir_t *ancestor, int64_t delta, int ondiskonly)
626789Sahrens {
627789Sahrens 	uint64_t parentspace, myspace, quota, used;
628789Sahrens 
629789Sahrens 	/*
630789Sahrens 	 * If there are no restrictions otherwise, assume we have
631789Sahrens 	 * unlimited space available.
632789Sahrens 	 */
633789Sahrens 	quota = UINT64_MAX;
634789Sahrens 	parentspace = UINT64_MAX;
635789Sahrens 
636789Sahrens 	if (dd->dd_parent != NULL) {
637789Sahrens 		parentspace = dsl_dir_space_available(dd->dd_parent,
638789Sahrens 		    ancestor, delta, ondiskonly);
639789Sahrens 	}
640789Sahrens 
641789Sahrens 	mutex_enter(&dd->dd_lock);
642789Sahrens 	if (dd->dd_phys->dd_quota != 0)
643789Sahrens 		quota = dd->dd_phys->dd_quota;
6447390SMatthew.Ahrens@Sun.COM 	used = dd->dd_phys->dd_used_bytes;
6455378Sck153898 	if (!ondiskonly)
6465378Sck153898 		used += dsl_dir_space_towrite(dd);
647789Sahrens 
648789Sahrens 	if (dd->dd_parent == NULL) {
6492082Seschrock 		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE);
650789Sahrens 		quota = MIN(quota, poolsize);
651789Sahrens 	}
652789Sahrens 
653789Sahrens 	if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) {
654789Sahrens 		/*
655789Sahrens 		 * We have some space reserved, in addition to what our
656789Sahrens 		 * parent gave us.
657789Sahrens 		 */
658789Sahrens 		parentspace += dd->dd_phys->dd_reserved - used;
659789Sahrens 	}
660789Sahrens 
6617390SMatthew.Ahrens@Sun.COM 	if (dd == ancestor) {
6627390SMatthew.Ahrens@Sun.COM 		ASSERT(delta <= 0);
6637390SMatthew.Ahrens@Sun.COM 		ASSERT(used >= -delta);
6647390SMatthew.Ahrens@Sun.COM 		used += delta;
6657390SMatthew.Ahrens@Sun.COM 		if (parentspace != UINT64_MAX)
6667390SMatthew.Ahrens@Sun.COM 			parentspace -= delta;
6677390SMatthew.Ahrens@Sun.COM 	}
6687390SMatthew.Ahrens@Sun.COM 
669789Sahrens 	if (used > quota) {
670789Sahrens 		/* over quota */
671789Sahrens 		myspace = 0;
672789Sahrens 	} else {
673789Sahrens 		/*
6742082Seschrock 		 * the lesser of the space provided by our parent and
6752082Seschrock 		 * the space left in our quota
676789Sahrens 		 */
677789Sahrens 		myspace = MIN(parentspace, quota - used);
678789Sahrens 	}
679789Sahrens 
680789Sahrens 	mutex_exit(&dd->dd_lock);
681789Sahrens 
682789Sahrens 	return (myspace);
683789Sahrens }
684789Sahrens 
685789Sahrens struct tempreserve {
686789Sahrens 	list_node_t tr_node;
6876245Smaybee 	dsl_pool_t *tr_dp;
688789Sahrens 	dsl_dir_t *tr_ds;
689789Sahrens 	uint64_t tr_size;
690789Sahrens };
691789Sahrens 
692789Sahrens static int
dsl_dir_tempreserve_impl(dsl_dir_t * dd,uint64_t asize,boolean_t netfree,boolean_t ignorequota,boolean_t checkrefquota,list_t * tr_list,dmu_tx_t * tx,boolean_t first)6935378Sck153898 dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
6945378Sck153898     boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
6956300Sck153898     dmu_tx_t *tx, boolean_t first)
696789Sahrens {
697789Sahrens 	uint64_t txg = tx->tx_txg;
6985378Sck153898 	uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
69910921STim.Haley@Sun.COM 	uint64_t deferred = 0;
7005378Sck153898 	struct tempreserve *tr;
70110921STim.Haley@Sun.COM 	int retval = EDQUOT;
702789Sahrens 	int txgidx = txg & TXG_MASK;
703789Sahrens 	int i;
7045831Sck153898 	uint64_t ref_rsrv = 0;
705789Sahrens 
706789Sahrens 	ASSERT3U(txg, !=, 0);
7075378Sck153898 	ASSERT3S(asize, >, 0);
708789Sahrens 
709789Sahrens 	mutex_enter(&dd->dd_lock);
7105378Sck153898 
711789Sahrens 	/*
712789Sahrens 	 * Check against the dsl_dir's quota.  We don't add in the delta
713789Sahrens 	 * when checking for over-quota because they get one free hit.
714789Sahrens 	 */
7155378Sck153898 	est_inflight = dsl_dir_space_towrite(dd);
716789Sahrens 	for (i = 0; i < TXG_SIZE; i++)
7175378Sck153898 		est_inflight += dd->dd_tempreserved[i];
7187390SMatthew.Ahrens@Sun.COM 	used_on_disk = dd->dd_phys->dd_used_bytes;
719789Sahrens 
7204709Smaybee 	/*
7216300Sck153898 	 * On the first iteration, fetch the dataset's used-on-disk and
7226300Sck153898 	 * refreservation values. Also, if checkrefquota is set, test if
7236300Sck153898 	 * allocating this space would exceed the dataset's refquota.
7244709Smaybee 	 */
7256300Sck153898 	if (first && tx->tx_objset) {
7265392Smaybee 		int error;
72710298SMatthew.Ahrens@Sun.COM 		dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
7285392Smaybee 
7295378Sck153898 		error = dsl_dataset_check_quota(ds, checkrefquota,
7305831Sck153898 		    asize, est_inflight, &used_on_disk, &ref_rsrv);
7315378Sck153898 		if (error) {
7325378Sck153898 			mutex_exit(&dd->dd_lock);
7335378Sck153898 			return (error);
7345378Sck153898 		}
7355378Sck153898 	}
7365378Sck153898 
7375378Sck153898 	/*
7385378Sck153898 	 * If this transaction will result in a net free of space,
7395378Sck153898 	 * we want to let it through.
7405378Sck153898 	 */
7415378Sck153898 	if (ignorequota || netfree || dd->dd_phys->dd_quota == 0)
7424709Smaybee 		quota = UINT64_MAX;
7434709Smaybee 	else
744789Sahrens 		quota = dd->dd_phys->dd_quota;
745789Sahrens 
746789Sahrens 	/*
74710921STim.Haley@Sun.COM 	 * Adjust the quota against the actual pool size at the root
74810921STim.Haley@Sun.COM 	 * minus any outstanding deferred frees.
7494709Smaybee 	 * To ensure that it's possible to remove files from a full
7504709Smaybee 	 * pool without inducing transient overcommits, we throttle
751789Sahrens 	 * netfree transactions against a quota that is slightly larger,
752789Sahrens 	 * but still within the pool's allocation slop.  In cases where
753789Sahrens 	 * we're very close to full, this will allow a steady trickle of
754789Sahrens 	 * removes to get through.
755789Sahrens 	 */
7564944Smaybee 	if (dd->dd_parent == NULL) {
75710922SJeff.Bonwick@Sun.COM 		spa_t *spa = dd->dd_pool->dp_spa;
758789Sahrens 		uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
75910922SJeff.Bonwick@Sun.COM 		deferred = metaslab_class_get_deferred(spa_normal_class(spa));
76010921STim.Haley@Sun.COM 		if (poolsize - deferred < quota) {
76110921STim.Haley@Sun.COM 			quota = poolsize - deferred;
76210921STim.Haley@Sun.COM 			retval = ENOSPC;
763789Sahrens 		}
764789Sahrens 	}
765789Sahrens 
766789Sahrens 	/*
767789Sahrens 	 * If they are requesting more space, and our current estimate
7685378Sck153898 	 * is over quota, they get to try again unless the actual
7691544Seschrock 	 * on-disk is over quota and there are no pending changes (which
7701544Seschrock 	 * may free up space for us).
771789Sahrens 	 */
77210921STim.Haley@Sun.COM 	if (used_on_disk + est_inflight >= quota) {
77310921STim.Haley@Sun.COM 		if (est_inflight > 0 || used_on_disk < quota ||
77410921STim.Haley@Sun.COM 		    (retval == ENOSPC && used_on_disk < quota + deferred))
77510921STim.Haley@Sun.COM 			retval = ERESTART;
7765378Sck153898 		dprintf_dd(dd, "failing: used=%lluK inflight = %lluK "
777789Sahrens 		    "quota=%lluK tr=%lluK err=%d\n",
7785378Sck153898 		    used_on_disk>>10, est_inflight>>10,
77910921STim.Haley@Sun.COM 		    quota>>10, asize>>10, retval);
780789Sahrens 		mutex_exit(&dd->dd_lock);
78110921STim.Haley@Sun.COM 		return (retval);
782789Sahrens 	}
783789Sahrens 
784789Sahrens 	/* We need to up our estimated delta before dropping dd_lock */
785789Sahrens 	dd->dd_tempreserved[txgidx] += asize;
786789Sahrens 
7875831Sck153898 	parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
7885831Sck153898 	    asize - ref_rsrv);
789789Sahrens 	mutex_exit(&dd->dd_lock);
790789Sahrens 
7916245Smaybee 	tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
792789Sahrens 	tr->tr_ds = dd;
793789Sahrens 	tr->tr_size = asize;
794789Sahrens 	list_insert_tail(tr_list, tr);
795789Sahrens 
796789Sahrens 	/* see if it's OK with our parent */
7974944Smaybee 	if (dd->dd_parent && parent_rsrv) {
7984944Smaybee 		boolean_t ismos = (dd->dd_phys->dd_head_dataset_obj == 0);
7994944Smaybee 
800789Sahrens 		return (dsl_dir_tempreserve_impl(dd->dd_parent,
8016300Sck153898 		    parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE));
802789Sahrens 	} else {
803789Sahrens 		return (0);
804789Sahrens 	}
805789Sahrens }
806789Sahrens 
807789Sahrens /*
808789Sahrens  * Reserve space in this dsl_dir, to be used in this tx's txg.
8095378Sck153898  * After the space has been dirtied (and dsl_dir_willuse_space()
8105378Sck153898  * has been called), the reservation should be canceled, using
8115378Sck153898  * dsl_dir_tempreserve_clear().
812789Sahrens  */
813789Sahrens int
dsl_dir_tempreserve_space(dsl_dir_t * dd,uint64_t lsize,uint64_t asize,uint64_t fsize,uint64_t usize,void ** tr_cookiep,dmu_tx_t * tx)8145378Sck153898 dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
8155378Sck153898     uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
816789Sahrens {
8176245Smaybee 	int err;
818789Sahrens 	list_t *tr_list;
819789Sahrens 
8205378Sck153898 	if (asize == 0) {
8215378Sck153898 		*tr_cookiep = NULL;
8225378Sck153898 		return (0);
8235378Sck153898 	}
8245378Sck153898 
825789Sahrens 	tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
826789Sahrens 	list_create(tr_list, sizeof (struct tempreserve),
827789Sahrens 	    offsetof(struct tempreserve, tr_node));
8285378Sck153898 	ASSERT3S(asize, >, 0);
8291544Seschrock 	ASSERT3S(fsize, >=, 0);
830789Sahrens 
8316245Smaybee 	err = arc_tempreserve_space(lsize, tx->tx_txg);
8326245Smaybee 	if (err == 0) {
8336245Smaybee 		struct tempreserve *tr;
8346245Smaybee 
8356245Smaybee 		tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
8366245Smaybee 		tr->tr_size = lsize;
8376245Smaybee 		list_insert_tail(tr_list, tr);
8386245Smaybee 
8396245Smaybee 		err = dsl_pool_tempreserve_space(dd->dd_pool, asize, tx);
8406245Smaybee 	} else {
8416245Smaybee 		if (err == EAGAIN) {
8426245Smaybee 			txg_delay(dd->dd_pool, tx->tx_txg, 1);
8436245Smaybee 			err = ERESTART;
8446245Smaybee 		}
8456245Smaybee 		dsl_pool_memory_pressure(dd->dd_pool);
8466245Smaybee 	}
847789Sahrens 
848789Sahrens 	if (err == 0) {
849789Sahrens 		struct tempreserve *tr;
850789Sahrens 
8516245Smaybee 		tr = kmem_zalloc(sizeof (struct tempreserve), KM_SLEEP);
8526245Smaybee 		tr->tr_dp = dd->dd_pool;
8536245Smaybee 		tr->tr_size = asize;
8546245Smaybee 		list_insert_tail(tr_list, tr);
8556245Smaybee 
8566245Smaybee 		err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
8576300Sck153898 		    FALSE, asize > usize, tr_list, tx, TRUE);
858789Sahrens 	}
859789Sahrens 
860789Sahrens 	if (err)
861789Sahrens 		dsl_dir_tempreserve_clear(tr_list, tx);
862789Sahrens 	else
863789Sahrens 		*tr_cookiep = tr_list;
8646245Smaybee 
865789Sahrens 	return (err);
866789Sahrens }
867789Sahrens 
868789Sahrens /*
869789Sahrens  * Clear a temporary reservation that we previously made with
870789Sahrens  * dsl_dir_tempreserve_space().
871789Sahrens  */
872789Sahrens void
dsl_dir_tempreserve_clear(void * tr_cookie,dmu_tx_t * tx)873789Sahrens dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx)
874789Sahrens {
875789Sahrens 	int txgidx = tx->tx_txg & TXG_MASK;
876789Sahrens 	list_t *tr_list = tr_cookie;
877789Sahrens 	struct tempreserve *tr;
878789Sahrens 
879789Sahrens 	ASSERT3U(tx->tx_txg, !=, 0);
880789Sahrens 
8815378Sck153898 	if (tr_cookie == NULL)
8825378Sck153898 		return;
8835378Sck153898 
884789Sahrens 	while (tr = list_head(tr_list)) {
8856245Smaybee 		if (tr->tr_dp) {
8866245Smaybee 			dsl_pool_tempreserve_clear(tr->tr_dp, tr->tr_size, tx);
8876245Smaybee 		} else if (tr->tr_ds) {
888789Sahrens 			mutex_enter(&tr->tr_ds->dd_lock);
889789Sahrens 			ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=,
890789Sahrens 			    tr->tr_size);
891789Sahrens 			tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size;
892789Sahrens 			mutex_exit(&tr->tr_ds->dd_lock);
8936245Smaybee 		} else {
8946245Smaybee 			arc_tempreserve_clear(tr->tr_size);
895789Sahrens 		}
896789Sahrens 		list_remove(tr_list, tr);
897789Sahrens 		kmem_free(tr, sizeof (struct tempreserve));
898789Sahrens 	}
899789Sahrens 
900789Sahrens 	kmem_free(tr_list, sizeof (list_t));
901789Sahrens }
902789Sahrens 
9036245Smaybee static void
dsl_dir_willuse_space_impl(dsl_dir_t * dd,int64_t space,dmu_tx_t * tx)9046245Smaybee dsl_dir_willuse_space_impl(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
905789Sahrens {
906789Sahrens 	int64_t parent_space;
907789Sahrens 	uint64_t est_used;
908789Sahrens 
909789Sahrens 	mutex_enter(&dd->dd_lock);
910789Sahrens 	if (space > 0)
911789Sahrens 		dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space;
912789Sahrens 
9137390SMatthew.Ahrens@Sun.COM 	est_used = dsl_dir_space_towrite(dd) + dd->dd_phys->dd_used_bytes;
914789Sahrens 	parent_space = parent_delta(dd, est_used, space);
915789Sahrens 	mutex_exit(&dd->dd_lock);
916789Sahrens 
917789Sahrens 	/* Make sure that we clean up dd_space_to* */
918789Sahrens 	dsl_dir_dirty(dd, tx);
919789Sahrens 
920789Sahrens 	/* XXX this is potentially expensive and unnecessary... */
921789Sahrens 	if (parent_space && dd->dd_parent)
9226245Smaybee 		dsl_dir_willuse_space_impl(dd->dd_parent, parent_space, tx);
9236245Smaybee }
9246245Smaybee 
9256245Smaybee /*
9266245Smaybee  * Call in open context when we think we're going to write/free space,
9276245Smaybee  * eg. when dirtying data.  Be conservative (ie. OK to write less than
9286245Smaybee  * this or free more than this, but don't write more or free less).
9296245Smaybee  */
9306245Smaybee void
dsl_dir_willuse_space(dsl_dir_t * dd,int64_t space,dmu_tx_t * tx)9316245Smaybee dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx)
9326245Smaybee {
9336245Smaybee 	dsl_pool_willuse_space(dd->dd_pool, space, tx);
9346245Smaybee 	dsl_dir_willuse_space_impl(dd, space, tx);
935789Sahrens }
936789Sahrens 
937789Sahrens /* call from syncing context when we actually write/free space for this dd */
938789Sahrens void
dsl_dir_diduse_space(dsl_dir_t * dd,dd_used_t type,int64_t used,int64_t compressed,int64_t uncompressed,dmu_tx_t * tx)9397390SMatthew.Ahrens@Sun.COM dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
940789Sahrens     int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx)
941789Sahrens {
942789Sahrens 	int64_t accounted_delta;
9437595SMatthew.Ahrens@Sun.COM 	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
944789Sahrens 
945789Sahrens 	ASSERT(dmu_tx_is_syncing(tx));
9467390SMatthew.Ahrens@Sun.COM 	ASSERT(type < DD_USED_NUM);
947789Sahrens 
948789Sahrens 	dsl_dir_dirty(dd, tx);
949789Sahrens 
9507595SMatthew.Ahrens@Sun.COM 	if (needlock)
9517595SMatthew.Ahrens@Sun.COM 		mutex_enter(&dd->dd_lock);
9527390SMatthew.Ahrens@Sun.COM 	accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
9537390SMatthew.Ahrens@Sun.COM 	ASSERT(used >= 0 || dd->dd_phys->dd_used_bytes >= -used);
954789Sahrens 	ASSERT(compressed >= 0 ||
955789Sahrens 	    dd->dd_phys->dd_compressed_bytes >= -compressed);
956789Sahrens 	ASSERT(uncompressed >= 0 ||
957789Sahrens 	    dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
9587390SMatthew.Ahrens@Sun.COM 	dd->dd_phys->dd_used_bytes += used;
959789Sahrens 	dd->dd_phys->dd_uncompressed_bytes += uncompressed;
960789Sahrens 	dd->dd_phys->dd_compressed_bytes += compressed;
9617390SMatthew.Ahrens@Sun.COM 
9627390SMatthew.Ahrens@Sun.COM 	if (dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN) {
9637390SMatthew.Ahrens@Sun.COM 		ASSERT(used > 0 ||
9647390SMatthew.Ahrens@Sun.COM 		    dd->dd_phys->dd_used_breakdown[type] >= -used);
9657390SMatthew.Ahrens@Sun.COM 		dd->dd_phys->dd_used_breakdown[type] += used;
9667390SMatthew.Ahrens@Sun.COM #ifdef DEBUG
9677390SMatthew.Ahrens@Sun.COM 		dd_used_t t;
9687390SMatthew.Ahrens@Sun.COM 		uint64_t u = 0;
9697390SMatthew.Ahrens@Sun.COM 		for (t = 0; t < DD_USED_NUM; t++)
9707390SMatthew.Ahrens@Sun.COM 			u += dd->dd_phys->dd_used_breakdown[t];
9717390SMatthew.Ahrens@Sun.COM 		ASSERT3U(u, ==, dd->dd_phys->dd_used_bytes);
9727390SMatthew.Ahrens@Sun.COM #endif
9737390SMatthew.Ahrens@Sun.COM 	}
9747595SMatthew.Ahrens@Sun.COM 	if (needlock)
9757595SMatthew.Ahrens@Sun.COM 		mutex_exit(&dd->dd_lock);
976789Sahrens 
977789Sahrens 	if (dd->dd_parent != NULL) {
9787390SMatthew.Ahrens@Sun.COM 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
979789Sahrens 		    accounted_delta, compressed, uncompressed, tx);
9807390SMatthew.Ahrens@Sun.COM 		dsl_dir_transfer_space(dd->dd_parent,
9817390SMatthew.Ahrens@Sun.COM 		    used - accounted_delta,
9827390SMatthew.Ahrens@Sun.COM 		    DD_USED_CHILD_RSRV, DD_USED_CHILD, tx);
983789Sahrens 	}
984789Sahrens }
985789Sahrens 
9867390SMatthew.Ahrens@Sun.COM void
dsl_dir_transfer_space(dsl_dir_t * dd,int64_t delta,dd_used_t oldtype,dd_used_t newtype,dmu_tx_t * tx)9877390SMatthew.Ahrens@Sun.COM dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
9887390SMatthew.Ahrens@Sun.COM     dd_used_t oldtype, dd_used_t newtype, dmu_tx_t *tx)
9897390SMatthew.Ahrens@Sun.COM {
9907595SMatthew.Ahrens@Sun.COM 	boolean_t needlock = !MUTEX_HELD(&dd->dd_lock);
9917595SMatthew.Ahrens@Sun.COM 
9927390SMatthew.Ahrens@Sun.COM 	ASSERT(dmu_tx_is_syncing(tx));
9937390SMatthew.Ahrens@Sun.COM 	ASSERT(oldtype < DD_USED_NUM);
9947390SMatthew.Ahrens@Sun.COM 	ASSERT(newtype < DD_USED_NUM);
9957390SMatthew.Ahrens@Sun.COM 
9967390SMatthew.Ahrens@Sun.COM 	if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
9977390SMatthew.Ahrens@Sun.COM 		return;
9987390SMatthew.Ahrens@Sun.COM 
9997390SMatthew.Ahrens@Sun.COM 	dsl_dir_dirty(dd, tx);
10007595SMatthew.Ahrens@Sun.COM 	if (needlock)
10017595SMatthew.Ahrens@Sun.COM 		mutex_enter(&dd->dd_lock);
10027390SMatthew.Ahrens@Sun.COM 	ASSERT(delta > 0 ?
10037390SMatthew.Ahrens@Sun.COM 	    dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
10047390SMatthew.Ahrens@Sun.COM 	    dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
10057390SMatthew.Ahrens@Sun.COM 	ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
10067390SMatthew.Ahrens@Sun.COM 	dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
10077390SMatthew.Ahrens@Sun.COM 	dd->dd_phys->dd_used_breakdown[newtype] += delta;
10087595SMatthew.Ahrens@Sun.COM 	if (needlock)
10097595SMatthew.Ahrens@Sun.COM 		mutex_exit(&dd->dd_lock);
10107390SMatthew.Ahrens@Sun.COM }
10117390SMatthew.Ahrens@Sun.COM 
1012789Sahrens static int
dsl_dir_set_quota_check(void * arg1,void * arg2,dmu_tx_t * tx)10132199Sahrens dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
1014789Sahrens {
101511022STom.Erickson@Sun.COM 	dsl_dataset_t *ds = arg1;
101611022STom.Erickson@Sun.COM 	dsl_dir_t *dd = ds->ds_dir;
101711022STom.Erickson@Sun.COM 	dsl_prop_setarg_t *psa = arg2;
101811022STom.Erickson@Sun.COM 	int err;
10192199Sahrens 	uint64_t towrite;
10202199Sahrens 
102111022STom.Erickson@Sun.COM 	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
102211022STom.Erickson@Sun.COM 		return (err);
102311022STom.Erickson@Sun.COM 
102411022STom.Erickson@Sun.COM 	if (psa->psa_effective_value == 0)
10252199Sahrens 		return (0);
10262199Sahrens 
10272199Sahrens 	mutex_enter(&dd->dd_lock);
10282199Sahrens 	/*
10292199Sahrens 	 * If we are doing the preliminary check in open context, and
10302199Sahrens 	 * there are pending changes, then don't fail it, since the
10315378Sck153898 	 * pending changes could under-estimate the amount of space to be
10322199Sahrens 	 * freed up.
10332199Sahrens 	 */
10345378Sck153898 	towrite = dsl_dir_space_towrite(dd);
10352199Sahrens 	if ((dmu_tx_is_syncing(tx) || towrite == 0) &&
103611022STom.Erickson@Sun.COM 	    (psa->psa_effective_value < dd->dd_phys->dd_reserved ||
103711022STom.Erickson@Sun.COM 	    psa->psa_effective_value < dd->dd_phys->dd_used_bytes + towrite)) {
10382199Sahrens 		err = ENOSPC;
10392199Sahrens 	}
10402199Sahrens 	mutex_exit(&dd->dd_lock);
10412199Sahrens 	return (err);
10422199Sahrens }
10432199Sahrens 
104412296SLin.Ling@Sun.COM extern dsl_syncfunc_t dsl_prop_set_sync;
104511022STom.Erickson@Sun.COM 
10462199Sahrens static void
dsl_dir_set_quota_sync(void * arg1,void * arg2,dmu_tx_t * tx)104712296SLin.Ling@Sun.COM dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
10482199Sahrens {
104911022STom.Erickson@Sun.COM 	dsl_dataset_t *ds = arg1;
105011022STom.Erickson@Sun.COM 	dsl_dir_t *dd = ds->ds_dir;
105111022STom.Erickson@Sun.COM 	dsl_prop_setarg_t *psa = arg2;
105211022STom.Erickson@Sun.COM 	uint64_t effective_value = psa->psa_effective_value;
105311022STom.Erickson@Sun.COM 
105412296SLin.Ling@Sun.COM 	dsl_prop_set_sync(ds, psa, tx);
105511022STom.Erickson@Sun.COM 	DSL_PROP_CHECK_PREDICTION(dd, psa);
1056789Sahrens 
1057789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
1058789Sahrens 
1059789Sahrens 	mutex_enter(&dd->dd_lock);
106011022STom.Erickson@Sun.COM 	dd->dd_phys->dd_quota = effective_value;
1061789Sahrens 	mutex_exit(&dd->dd_lock);
10624543Smarks 
106312296SLin.Ling@Sun.COM 	spa_history_log_internal(LOG_DS_QUOTA, dd->dd_pool->dp_spa,
106412296SLin.Ling@Sun.COM 	    tx, "%lld dataset = %llu ",
106511022STom.Erickson@Sun.COM 	    (longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj);
1066789Sahrens }
1067789Sahrens 
1068789Sahrens int
dsl_dir_set_quota(const char * ddname,zprop_source_t source,uint64_t quota)106911022STom.Erickson@Sun.COM dsl_dir_set_quota(const char *ddname, zprop_source_t source, uint64_t quota)
1070789Sahrens {
1071789Sahrens 	dsl_dir_t *dd;
107211022STom.Erickson@Sun.COM 	dsl_dataset_t *ds;
107311022STom.Erickson@Sun.COM 	dsl_prop_setarg_t psa;
1074789Sahrens 	int err;
1075789Sahrens 
107611022STom.Erickson@Sun.COM 	dsl_prop_setarg_init_uint64(&psa, "quota", source, &quota);
107711022STom.Erickson@Sun.COM 
107811022STom.Erickson@Sun.COM 	err = dsl_dataset_hold(ddname, FTAG, &ds);
10791544Seschrock 	if (err)
10801544Seschrock 		return (err);
1081789Sahrens 
108211022STom.Erickson@Sun.COM 	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
108311022STom.Erickson@Sun.COM 	if (err) {
108411022STom.Erickson@Sun.COM 		dsl_dataset_rele(ds, FTAG);
108511022STom.Erickson@Sun.COM 		return (err);
108611022STom.Erickson@Sun.COM 	}
108711022STom.Erickson@Sun.COM 
108811022STom.Erickson@Sun.COM 	ASSERT(ds->ds_dir == dd);
10895481Sck153898 
109011022STom.Erickson@Sun.COM 	/*
109111022STom.Erickson@Sun.COM 	 * If someone removes a file, then tries to set the quota, we want to
109211022STom.Erickson@Sun.COM 	 * make sure the file freeing takes effect.
109311022STom.Erickson@Sun.COM 	 */
109411022STom.Erickson@Sun.COM 	txg_wait_open(dd->dd_pool, 0);
109511022STom.Erickson@Sun.COM 
109611022STom.Erickson@Sun.COM 	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check,
109711022STom.Erickson@Sun.COM 	    dsl_dir_set_quota_sync, ds, &psa, 0);
109811022STom.Erickson@Sun.COM 
1099789Sahrens 	dsl_dir_close(dd, FTAG);
110011022STom.Erickson@Sun.COM 	dsl_dataset_rele(ds, FTAG);
1101789Sahrens 	return (err);
1102789Sahrens }
1103789Sahrens 
11045378Sck153898 int
dsl_dir_set_reservation_check(void * arg1,void * arg2,dmu_tx_t * tx)11052199Sahrens dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx)
1106789Sahrens {
110711022STom.Erickson@Sun.COM 	dsl_dataset_t *ds = arg1;
110811022STom.Erickson@Sun.COM 	dsl_dir_t *dd = ds->ds_dir;
110911022STom.Erickson@Sun.COM 	dsl_prop_setarg_t *psa = arg2;
111011022STom.Erickson@Sun.COM 	uint64_t effective_value;
1111789Sahrens 	uint64_t used, avail;
111211022STom.Erickson@Sun.COM 	int err;
111311022STom.Erickson@Sun.COM 
111411022STom.Erickson@Sun.COM 	if ((err = dsl_prop_predict_sync(ds->ds_dir, psa)) != 0)
111511022STom.Erickson@Sun.COM 		return (err);
111611022STom.Erickson@Sun.COM 
111711022STom.Erickson@Sun.COM 	effective_value = psa->psa_effective_value;
1118789Sahrens 
11192199Sahrens 	/*
11202199Sahrens 	 * If we are doing the preliminary check in open context, the
11212199Sahrens 	 * space estimates may be inaccurate.
11222199Sahrens 	 */
11232199Sahrens 	if (!dmu_tx_is_syncing(tx))
11242199Sahrens 		return (0);
11252199Sahrens 
1126789Sahrens 	mutex_enter(&dd->dd_lock);
11277390SMatthew.Ahrens@Sun.COM 	used = dd->dd_phys->dd_used_bytes;
1128789Sahrens 	mutex_exit(&dd->dd_lock);
1129789Sahrens 
1130789Sahrens 	if (dd->dd_parent) {
1131789Sahrens 		avail = dsl_dir_space_available(dd->dd_parent,
1132789Sahrens 		    NULL, 0, FALSE);
1133789Sahrens 	} else {
1134789Sahrens 		avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used;
1135789Sahrens 	}
1136789Sahrens 
113711022STom.Erickson@Sun.COM 	if (MAX(used, effective_value) > MAX(used, dd->dd_phys->dd_reserved)) {
113811022STom.Erickson@Sun.COM 		uint64_t delta = MAX(used, effective_value) -
11398525SEric.Schrock@Sun.COM 		    MAX(used, dd->dd_phys->dd_reserved);
11408525SEric.Schrock@Sun.COM 
11418525SEric.Schrock@Sun.COM 		if (delta > avail)
11428525SEric.Schrock@Sun.COM 			return (ENOSPC);
11438525SEric.Schrock@Sun.COM 		if (dd->dd_phys->dd_quota > 0 &&
114411022STom.Erickson@Sun.COM 		    effective_value > dd->dd_phys->dd_quota)
11458525SEric.Schrock@Sun.COM 			return (ENOSPC);
11468525SEric.Schrock@Sun.COM 	}
11478525SEric.Schrock@Sun.COM 
11482199Sahrens 	return (0);
11492199Sahrens }
11502199Sahrens 
11512199Sahrens static void
dsl_dir_set_reservation_sync(void * arg1,void * arg2,dmu_tx_t * tx)115212296SLin.Ling@Sun.COM dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
11532199Sahrens {
115411022STom.Erickson@Sun.COM 	dsl_dataset_t *ds = arg1;
115511022STom.Erickson@Sun.COM 	dsl_dir_t *dd = ds->ds_dir;
115611022STom.Erickson@Sun.COM 	dsl_prop_setarg_t *psa = arg2;
115711022STom.Erickson@Sun.COM 	uint64_t effective_value = psa->psa_effective_value;
11582199Sahrens 	uint64_t used;
11592199Sahrens 	int64_t delta;
11602199Sahrens 
116112296SLin.Ling@Sun.COM 	dsl_prop_set_sync(ds, psa, tx);
116211022STom.Erickson@Sun.COM 	DSL_PROP_CHECK_PREDICTION(dd, psa);
116311022STom.Erickson@Sun.COM 
11645378Sck153898 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
11655378Sck153898 
11662199Sahrens 	mutex_enter(&dd->dd_lock);
11677390SMatthew.Ahrens@Sun.COM 	used = dd->dd_phys->dd_used_bytes;
116811022STom.Erickson@Sun.COM 	delta = MAX(used, effective_value) -
11692199Sahrens 	    MAX(used, dd->dd_phys->dd_reserved);
117011022STom.Erickson@Sun.COM 	dd->dd_phys->dd_reserved = effective_value;
1171789Sahrens 
1172789Sahrens 	if (dd->dd_parent != NULL) {
1173789Sahrens 		/* Roll up this additional usage into our ancestors */
11747390SMatthew.Ahrens@Sun.COM 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
11757390SMatthew.Ahrens@Sun.COM 		    delta, 0, 0, tx);
1176789Sahrens 	}
11777595SMatthew.Ahrens@Sun.COM 	mutex_exit(&dd->dd_lock);
11784543Smarks 
117912296SLin.Ling@Sun.COM 	spa_history_log_internal(LOG_DS_RESERVATION, dd->dd_pool->dp_spa,
118012296SLin.Ling@Sun.COM 	    tx, "%lld dataset = %llu",
118111022STom.Erickson@Sun.COM 	    (longlong_t)effective_value, dd->dd_phys->dd_head_dataset_obj);
1182789Sahrens }
1183789Sahrens 
1184789Sahrens int
dsl_dir_set_reservation(const char * ddname,zprop_source_t source,uint64_t reservation)118511022STom.Erickson@Sun.COM dsl_dir_set_reservation(const char *ddname, zprop_source_t source,
118611022STom.Erickson@Sun.COM     uint64_t reservation)
1187789Sahrens {
1188789Sahrens 	dsl_dir_t *dd;
118911022STom.Erickson@Sun.COM 	dsl_dataset_t *ds;
119011022STom.Erickson@Sun.COM 	dsl_prop_setarg_t psa;
1191789Sahrens 	int err;
1192789Sahrens 
119311022STom.Erickson@Sun.COM 	dsl_prop_setarg_init_uint64(&psa, "reservation", source, &reservation);
119411022STom.Erickson@Sun.COM 
119511022STom.Erickson@Sun.COM 	err = dsl_dataset_hold(ddname, FTAG, &ds);
119611022STom.Erickson@Sun.COM 	if (err)
119711022STom.Erickson@Sun.COM 		return (err);
119811022STom.Erickson@Sun.COM 
11991544Seschrock 	err = dsl_dir_open(ddname, FTAG, &dd, NULL);
120011022STom.Erickson@Sun.COM 	if (err) {
120111022STom.Erickson@Sun.COM 		dsl_dataset_rele(ds, FTAG);
12021544Seschrock 		return (err);
120311022STom.Erickson@Sun.COM 	}
120411022STom.Erickson@Sun.COM 
120511022STom.Erickson@Sun.COM 	ASSERT(ds->ds_dir == dd);
120611022STom.Erickson@Sun.COM 
12072199Sahrens 	err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check,
120811022STom.Erickson@Sun.COM 	    dsl_dir_set_reservation_sync, ds, &psa, 0);
120911022STom.Erickson@Sun.COM 
1210789Sahrens 	dsl_dir_close(dd, FTAG);
121111022STom.Erickson@Sun.COM 	dsl_dataset_rele(ds, FTAG);
1212789Sahrens 	return (err);
1213789Sahrens }
1214789Sahrens 
1215789Sahrens static dsl_dir_t *
closest_common_ancestor(dsl_dir_t * ds1,dsl_dir_t * ds2)1216789Sahrens closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2)
1217789Sahrens {
1218789Sahrens 	for (; ds1; ds1 = ds1->dd_parent) {
1219789Sahrens 		dsl_dir_t *dd;
1220789Sahrens 		for (dd = ds2; dd; dd = dd->dd_parent) {
1221789Sahrens 			if (ds1 == dd)
1222789Sahrens 				return (dd);
1223789Sahrens 		}
1224789Sahrens 	}
1225789Sahrens 	return (NULL);
1226789Sahrens }
1227789Sahrens 
1228789Sahrens /*
1229789Sahrens  * If delta is applied to dd, how much of that delta would be applied to
1230789Sahrens  * ancestor?  Syncing context only.
1231789Sahrens  */
1232789Sahrens static int64_t
would_change(dsl_dir_t * dd,int64_t delta,dsl_dir_t * ancestor)1233789Sahrens would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor)
1234789Sahrens {
1235789Sahrens 	if (dd == ancestor)
1236789Sahrens 		return (delta);
1237789Sahrens 
1238789Sahrens 	mutex_enter(&dd->dd_lock);
12397390SMatthew.Ahrens@Sun.COM 	delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, delta);
1240789Sahrens 	mutex_exit(&dd->dd_lock);
1241789Sahrens 	return (would_change(dd->dd_parent, delta, ancestor));
1242789Sahrens }
1243789Sahrens 
12442199Sahrens struct renamearg {
12452199Sahrens 	dsl_dir_t *newparent;
12462199Sahrens 	const char *mynewname;
12472199Sahrens };
12482199Sahrens 
12492199Sahrens static int
dsl_dir_rename_check(void * arg1,void * arg2,dmu_tx_t * tx)12502199Sahrens dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx)
1251789Sahrens {
12522199Sahrens 	dsl_dir_t *dd = arg1;
12532199Sahrens 	struct renamearg *ra = arg2;
1254789Sahrens 	dsl_pool_t *dp = dd->dd_pool;
1255789Sahrens 	objset_t *mos = dp->dp_meta_objset;
12562199Sahrens 	int err;
12572199Sahrens 	uint64_t val;
12582199Sahrens 
125911823SMatthew.Ahrens@Sun.COM 	/*
126011823SMatthew.Ahrens@Sun.COM 	 * There should only be one reference, from dmu_objset_rename().
126111823SMatthew.Ahrens@Sun.COM 	 * Fleeting holds are also possible (eg, from "zfs list" getting
126211823SMatthew.Ahrens@Sun.COM 	 * stats), but any that are present in open context will likely
126311823SMatthew.Ahrens@Sun.COM 	 * be gone by syncing context, so only fail from syncing
126411823SMatthew.Ahrens@Sun.COM 	 * context.
126511823SMatthew.Ahrens@Sun.COM 	 */
126611823SMatthew.Ahrens@Sun.COM 	if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 1)
12672199Sahrens 		return (EBUSY);
1268789Sahrens 
12692199Sahrens 	/* check for existing name */
12702199Sahrens 	err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
12712199Sahrens 	    ra->mynewname, 8, 1, &val);
12722199Sahrens 	if (err == 0)
12732199Sahrens 		return (EEXIST);
12742199Sahrens 	if (err != ENOENT)
12751544Seschrock 		return (err);
1276789Sahrens 
12772199Sahrens 	if (ra->newparent != dd->dd_parent) {
12782082Seschrock 		/* is there enough space? */
12792082Seschrock 		uint64_t myspace =
12807390SMatthew.Ahrens@Sun.COM 		    MAX(dd->dd_phys->dd_used_bytes, dd->dd_phys->dd_reserved);
1281789Sahrens 
12822199Sahrens 		/* no rename into our descendant */
12832199Sahrens 		if (closest_common_ancestor(dd, ra->newparent) == dd)
1284789Sahrens 			return (EINVAL);
12852199Sahrens 
12862199Sahrens 		if (err = dsl_dir_transfer_possible(dd->dd_parent,
12872199Sahrens 		    ra->newparent, myspace))
12882199Sahrens 			return (err);
12892199Sahrens 	}
12902199Sahrens 
12912199Sahrens 	return (0);
12922199Sahrens }
1293789Sahrens 
12942199Sahrens static void
dsl_dir_rename_sync(void * arg1,void * arg2,dmu_tx_t * tx)129512296SLin.Ling@Sun.COM dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx)
12962199Sahrens {
12972199Sahrens 	dsl_dir_t *dd = arg1;
12982199Sahrens 	struct renamearg *ra = arg2;
12992199Sahrens 	dsl_pool_t *dp = dd->dd_pool;
13002199Sahrens 	objset_t *mos = dp->dp_meta_objset;
13012199Sahrens 	int err;
1302789Sahrens 
13032199Sahrens 	ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2);
13042199Sahrens 
13052199Sahrens 	if (ra->newparent != dd->dd_parent) {
13067390SMatthew.Ahrens@Sun.COM 		dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD,
13077390SMatthew.Ahrens@Sun.COM 		    -dd->dd_phys->dd_used_bytes,
1308789Sahrens 		    -dd->dd_phys->dd_compressed_bytes,
1309789Sahrens 		    -dd->dd_phys->dd_uncompressed_bytes, tx);
13107390SMatthew.Ahrens@Sun.COM 		dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD,
13117390SMatthew.Ahrens@Sun.COM 		    dd->dd_phys->dd_used_bytes,
1312789Sahrens 		    dd->dd_phys->dd_compressed_bytes,
1313789Sahrens 		    dd->dd_phys->dd_uncompressed_bytes, tx);
13147390SMatthew.Ahrens@Sun.COM 
13157390SMatthew.Ahrens@Sun.COM 		if (dd->dd_phys->dd_reserved > dd->dd_phys->dd_used_bytes) {
13167390SMatthew.Ahrens@Sun.COM 			uint64_t unused_rsrv = dd->dd_phys->dd_reserved -
13177390SMatthew.Ahrens@Sun.COM 			    dd->dd_phys->dd_used_bytes;
13187390SMatthew.Ahrens@Sun.COM 
13197390SMatthew.Ahrens@Sun.COM 			dsl_dir_diduse_space(dd->dd_parent, DD_USED_CHILD_RSRV,
13207390SMatthew.Ahrens@Sun.COM 			    -unused_rsrv, 0, 0, tx);
13217390SMatthew.Ahrens@Sun.COM 			dsl_dir_diduse_space(ra->newparent, DD_USED_CHILD_RSRV,
13227390SMatthew.Ahrens@Sun.COM 			    unused_rsrv, 0, 0, tx);
13237390SMatthew.Ahrens@Sun.COM 		}
1324789Sahrens 	}
1325789Sahrens 
1326789Sahrens 	dmu_buf_will_dirty(dd->dd_dbuf, tx);
1327789Sahrens 
1328789Sahrens 	/* remove from old parent zapobj */
1329789Sahrens 	err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj,
1330789Sahrens 	    dd->dd_myname, tx);
1331789Sahrens 	ASSERT3U(err, ==, 0);
1332789Sahrens 
13332199Sahrens 	(void) strcpy(dd->dd_myname, ra->mynewname);
1334789Sahrens 	dsl_dir_close(dd->dd_parent, dd);
13352199Sahrens 	dd->dd_phys->dd_parent_obj = ra->newparent->dd_object;
13361544Seschrock 	VERIFY(0 == dsl_dir_open_obj(dd->dd_pool,
13372199Sahrens 	    ra->newparent->dd_object, NULL, dd, &dd->dd_parent));
1338789Sahrens 
1339789Sahrens 	/* add to new parent zapobj */
13402199Sahrens 	err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj,
1341789Sahrens 	    dd->dd_myname, 8, 1, &dd->dd_object, tx);
1342789Sahrens 	ASSERT3U(err, ==, 0);
13434543Smarks 
134412296SLin.Ling@Sun.COM 	spa_history_log_internal(LOG_DS_RENAME, dd->dd_pool->dp_spa,
134512296SLin.Ling@Sun.COM 	    tx, "dataset = %llu", dd->dd_phys->dd_head_dataset_obj);
13462199Sahrens }
1347789Sahrens 
13482199Sahrens int
dsl_dir_rename(dsl_dir_t * dd,const char * newname)13492199Sahrens dsl_dir_rename(dsl_dir_t *dd, const char *newname)
13502199Sahrens {
13512199Sahrens 	struct renamearg ra;
13522199Sahrens 	int err;
13532199Sahrens 
13542199Sahrens 	/* new parent should exist */
13552199Sahrens 	err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname);
13562199Sahrens 	if (err)
13572199Sahrens 		return (err);
13582199Sahrens 
13592199Sahrens 	/* can't rename to different pool */
13602199Sahrens 	if (dd->dd_pool != ra.newparent->dd_pool) {
13612199Sahrens 		err = ENXIO;
13622199Sahrens 		goto out;
13632199Sahrens 	}
13642199Sahrens 
13652199Sahrens 	/* new name should not already exist */
13662199Sahrens 	if (ra.mynewname == NULL) {
13672199Sahrens 		err = EEXIST;
13682199Sahrens 		goto out;
13692199Sahrens 	}
13702199Sahrens 
13712199Sahrens 	err = dsl_sync_task_do(dd->dd_pool,
13722199Sahrens 	    dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3);
13732199Sahrens 
13742199Sahrens out:
13752199Sahrens 	dsl_dir_close(ra.newparent, FTAG);
13762199Sahrens 	return (err);
1377789Sahrens }
13782082Seschrock 
13792082Seschrock int
dsl_dir_transfer_possible(dsl_dir_t * sdd,dsl_dir_t * tdd,uint64_t space)13802082Seschrock dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space)
13812082Seschrock {
13822082Seschrock 	dsl_dir_t *ancestor;
13832082Seschrock 	int64_t adelta;
13842082Seschrock 	uint64_t avail;
13852082Seschrock 
13862082Seschrock 	ancestor = closest_common_ancestor(sdd, tdd);
13872082Seschrock 	adelta = would_change(sdd, -space, ancestor);
13882082Seschrock 	avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE);
13892082Seschrock 	if (avail < space)
13902082Seschrock 		return (ENOSPC);
13912082Seschrock 
13922082Seschrock 	return (0);
13932082Seschrock }
139410373Schris.kirby@sun.com 
139510373Schris.kirby@sun.com timestruc_t
dsl_dir_snap_cmtime(dsl_dir_t * dd)139610373Schris.kirby@sun.com dsl_dir_snap_cmtime(dsl_dir_t *dd)
139710373Schris.kirby@sun.com {
139810373Schris.kirby@sun.com 	timestruc_t t;
139910373Schris.kirby@sun.com 
140010373Schris.kirby@sun.com 	mutex_enter(&dd->dd_lock);
140110373Schris.kirby@sun.com 	t = dd->dd_snap_cmtime;
140210373Schris.kirby@sun.com 	mutex_exit(&dd->dd_lock);
140310373Schris.kirby@sun.com 
140410373Schris.kirby@sun.com 	return (t);
140510373Schris.kirby@sun.com }
140610373Schris.kirby@sun.com 
140710373Schris.kirby@sun.com void
dsl_dir_snap_cmtime_update(dsl_dir_t * dd)140810373Schris.kirby@sun.com dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
140910373Schris.kirby@sun.com {
141010373Schris.kirby@sun.com 	timestruc_t t;
141110373Schris.kirby@sun.com 
141210373Schris.kirby@sun.com 	gethrestime(&t);
141310373Schris.kirby@sun.com 	mutex_enter(&dd->dd_lock);
141410373Schris.kirby@sun.com 	dd->dd_snap_cmtime = t;
141510373Schris.kirby@sun.com 	mutex_exit(&dd->dd_lock);
141610373Schris.kirby@sun.com }
1417