1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 221356Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/dmu.h> 29789Sahrens #include <sys/dmu_tx.h> 30789Sahrens #include <sys/dsl_dataset.h> 31789Sahrens #include <sys/dsl_dir.h> 32789Sahrens #include <sys/dsl_prop.h> 332199Sahrens #include <sys/dsl_synctask.h> 34789Sahrens #include <sys/spa.h> 35789Sahrens #include <sys/zap.h> 36789Sahrens #include <sys/zio.h> 37789Sahrens #include <sys/arc.h> 38789Sahrens #include "zfs_namecheck.h" 39789Sahrens 40789Sahrens static uint64_t dsl_dir_estimated_space(dsl_dir_t *dd); 41789Sahrens static uint64_t dsl_dir_space_available(dsl_dir_t *dd, 42789Sahrens dsl_dir_t *ancestor, int64_t delta, int ondiskonly); 432199Sahrens static void dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx); 44789Sahrens 45789Sahrens 46789Sahrens /* ARGSUSED */ 47789Sahrens static void 48789Sahrens dsl_dir_evict(dmu_buf_t *db, void *arg) 49789Sahrens { 50789Sahrens dsl_dir_t *dd = arg; 51789Sahrens dsl_pool_t *dp = dd->dd_pool; 52789Sahrens int t; 53789Sahrens 54789Sahrens for (t = 0; t < TXG_SIZE; t++) { 55789Sahrens ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); 56789Sahrens ASSERT(dd->dd_tempreserved[t] == 0); 57789Sahrens ASSERT(dd->dd_space_towrite[t] == 0); 58789Sahrens } 59789Sahrens 60789Sahrens ASSERT3U(dd->dd_used_bytes, ==, dd->dd_phys->dd_used_bytes); 61789Sahrens 62789Sahrens if (dd->dd_parent) 63789Sahrens dsl_dir_close(dd->dd_parent, dd); 64789Sahrens 65789Sahrens spa_close(dd->dd_pool->dp_spa, dd); 66789Sahrens 67789Sahrens /* 68789Sahrens * The props callback list should be empty since they hold the 69789Sahrens * dir open. 70789Sahrens */ 71789Sahrens list_destroy(&dd->dd_prop_cbs); 72*2856Snd150628 mutex_destroy(&dd->dd_lock); 73789Sahrens kmem_free(dd, sizeof (dsl_dir_t)); 74789Sahrens } 75789Sahrens 761544Seschrock int 77789Sahrens dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, 781544Seschrock const char *tail, void *tag, dsl_dir_t **ddp) 79789Sahrens { 80789Sahrens dmu_buf_t *dbuf; 81789Sahrens dsl_dir_t *dd; 821544Seschrock int err; 83789Sahrens 84789Sahrens ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 85789Sahrens dsl_pool_sync_context(dp)); 86789Sahrens 871544Seschrock err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf); 881544Seschrock if (err) 891544Seschrock return (err); 90789Sahrens dd = dmu_buf_get_user(dbuf); 91789Sahrens #ifdef ZFS_DEBUG 92789Sahrens { 93789Sahrens dmu_object_info_t doi; 94789Sahrens dmu_object_info_from_db(dbuf, &doi); 95928Stabriz ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR); 96789Sahrens } 97789Sahrens #endif 98789Sahrens /* XXX assert bonus buffer size is correct */ 99789Sahrens if (dd == NULL) { 100789Sahrens dsl_dir_t *winner; 101789Sahrens int err; 102789Sahrens 103789Sahrens dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP); 104789Sahrens dd->dd_object = ddobj; 105789Sahrens dd->dd_dbuf = dbuf; 106789Sahrens dd->dd_pool = dp; 107789Sahrens dd->dd_phys = dbuf->db_data; 108789Sahrens dd->dd_used_bytes = dd->dd_phys->dd_used_bytes; 109*2856Snd150628 mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL); 110789Sahrens 111789Sahrens list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t), 112789Sahrens offsetof(dsl_prop_cb_record_t, cbr_node)); 113789Sahrens 114789Sahrens if (dd->dd_phys->dd_parent_obj) { 1151544Seschrock err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj, 1161544Seschrock NULL, dd, &dd->dd_parent); 1171544Seschrock if (err) { 118*2856Snd150628 mutex_destroy(&dd->dd_lock); 1191544Seschrock kmem_free(dd, sizeof (dsl_dir_t)); 1201544Seschrock dmu_buf_rele(dbuf, tag); 1211544Seschrock return (err); 1221544Seschrock } 123789Sahrens if (tail) { 124789Sahrens #ifdef ZFS_DEBUG 125789Sahrens uint64_t foundobj; 126789Sahrens 127789Sahrens err = zap_lookup(dp->dp_meta_objset, 128789Sahrens dd->dd_parent->dd_phys-> 129789Sahrens dd_child_dir_zapobj, 130789Sahrens tail, sizeof (foundobj), 1, &foundobj); 1311544Seschrock ASSERT(err || foundobj == ddobj); 132789Sahrens #endif 133789Sahrens (void) strcpy(dd->dd_myname, tail); 134789Sahrens } else { 135789Sahrens err = zap_value_search(dp->dp_meta_objset, 136789Sahrens dd->dd_parent->dd_phys-> 137789Sahrens dd_child_dir_zapobj, 138789Sahrens ddobj, dd->dd_myname); 1391544Seschrock } 1401544Seschrock if (err) { 1411544Seschrock dsl_dir_close(dd->dd_parent, dd); 142*2856Snd150628 mutex_destroy(&dd->dd_lock); 1431544Seschrock kmem_free(dd, sizeof (dsl_dir_t)); 1441544Seschrock dmu_buf_rele(dbuf, tag); 1451544Seschrock return (err); 146789Sahrens } 147789Sahrens } else { 148789Sahrens (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa)); 149789Sahrens } 150789Sahrens 151789Sahrens winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys, 152789Sahrens dsl_dir_evict); 153789Sahrens if (winner) { 154789Sahrens if (dd->dd_parent) 155789Sahrens dsl_dir_close(dd->dd_parent, dd); 156*2856Snd150628 mutex_destroy(&dd->dd_lock); 157789Sahrens kmem_free(dd, sizeof (dsl_dir_t)); 158789Sahrens dd = winner; 159789Sahrens } else { 160789Sahrens spa_open_ref(dp->dp_spa, dd); 161789Sahrens } 162789Sahrens } 163789Sahrens 164789Sahrens /* 165789Sahrens * The dsl_dir_t has both open-to-close and instantiate-to-evict 166789Sahrens * holds on the spa. We need the open-to-close holds because 167789Sahrens * otherwise the spa_refcnt wouldn't change when we open a 168789Sahrens * dir which the spa also has open, so we could incorrectly 169789Sahrens * think it was OK to unload/export/destroy the pool. We need 170789Sahrens * the instantiate-to-evict hold because the dsl_dir_t has a 171789Sahrens * pointer to the dd_pool, which has a pointer to the spa_t. 172789Sahrens */ 173789Sahrens spa_open_ref(dp->dp_spa, tag); 174789Sahrens ASSERT3P(dd->dd_pool, ==, dp); 175789Sahrens ASSERT3U(dd->dd_object, ==, ddobj); 176789Sahrens ASSERT3P(dd->dd_dbuf, ==, dbuf); 1771544Seschrock *ddp = dd; 1781544Seschrock return (0); 179789Sahrens } 180789Sahrens 181789Sahrens void 182789Sahrens dsl_dir_close(dsl_dir_t *dd, void *tag) 183789Sahrens { 184789Sahrens dprintf_dd(dd, "%s\n", ""); 185789Sahrens spa_close(dd->dd_pool->dp_spa, tag); 1861544Seschrock dmu_buf_rele(dd->dd_dbuf, tag); 187789Sahrens } 188789Sahrens 1892467Sek110237 /* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */ 190789Sahrens void 191789Sahrens dsl_dir_name(dsl_dir_t *dd, char *buf) 192789Sahrens { 193789Sahrens if (dd->dd_parent) { 194789Sahrens dsl_dir_name(dd->dd_parent, buf); 195789Sahrens (void) strcat(buf, "/"); 196789Sahrens } else { 197789Sahrens buf[0] = '\0'; 198789Sahrens } 199789Sahrens if (!MUTEX_HELD(&dd->dd_lock)) { 200789Sahrens /* 201789Sahrens * recursive mutex so that we can use 202789Sahrens * dprintf_dd() with dd_lock held 203789Sahrens */ 204789Sahrens mutex_enter(&dd->dd_lock); 205789Sahrens (void) strcat(buf, dd->dd_myname); 206789Sahrens mutex_exit(&dd->dd_lock); 207789Sahrens } else { 208789Sahrens (void) strcat(buf, dd->dd_myname); 209789Sahrens } 210789Sahrens } 211789Sahrens 212789Sahrens int 213789Sahrens dsl_dir_is_private(dsl_dir_t *dd) 214789Sahrens { 215789Sahrens int rv = FALSE; 216789Sahrens 217789Sahrens if (dd->dd_parent && dsl_dir_is_private(dd->dd_parent)) 218789Sahrens rv = TRUE; 219789Sahrens if (dataset_name_hidden(dd->dd_myname)) 220789Sahrens rv = TRUE; 221789Sahrens return (rv); 222789Sahrens } 223789Sahrens 224789Sahrens 225789Sahrens static int 226789Sahrens getcomponent(const char *path, char *component, const char **nextp) 227789Sahrens { 228789Sahrens char *p; 229789Sahrens if (path == NULL) 2302731Snd150628 return (ENOENT); 231789Sahrens /* This would be a good place to reserve some namespace... */ 232789Sahrens p = strpbrk(path, "/@"); 233789Sahrens if (p && (p[1] == '/' || p[1] == '@')) { 234789Sahrens /* two separators in a row */ 235789Sahrens return (EINVAL); 236789Sahrens } 237789Sahrens if (p == NULL || p == path) { 238789Sahrens /* 239789Sahrens * if the first thing is an @ or /, it had better be an 240789Sahrens * @ and it had better not have any more ats or slashes, 241789Sahrens * and it had better have something after the @. 242789Sahrens */ 243789Sahrens if (p != NULL && 244789Sahrens (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0')) 245789Sahrens return (EINVAL); 246789Sahrens if (strlen(path) >= MAXNAMELEN) 247789Sahrens return (ENAMETOOLONG); 248789Sahrens (void) strcpy(component, path); 249789Sahrens p = NULL; 250789Sahrens } else if (p[0] == '/') { 251789Sahrens if (p-path >= MAXNAMELEN) 252789Sahrens return (ENAMETOOLONG); 253789Sahrens (void) strncpy(component, path, p - path); 254789Sahrens component[p-path] = '\0'; 255789Sahrens p++; 256789Sahrens } else if (p[0] == '@') { 257789Sahrens /* 258789Sahrens * if the next separator is an @, there better not be 259789Sahrens * any more slashes. 260789Sahrens */ 261789Sahrens if (strchr(path, '/')) 262789Sahrens return (EINVAL); 263789Sahrens if (p-path >= MAXNAMELEN) 264789Sahrens return (ENAMETOOLONG); 265789Sahrens (void) strncpy(component, path, p - path); 266789Sahrens component[p-path] = '\0'; 267789Sahrens } else { 268789Sahrens ASSERT(!"invalid p"); 269789Sahrens } 270789Sahrens *nextp = p; 271789Sahrens return (0); 272789Sahrens } 273789Sahrens 274789Sahrens /* 275789Sahrens * same as dsl_open_dir, ignore the first component of name and use the 276789Sahrens * spa instead 277789Sahrens */ 2781544Seschrock int 2791544Seschrock dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, 2801544Seschrock dsl_dir_t **ddp, const char **tailp) 281789Sahrens { 282789Sahrens char buf[MAXNAMELEN]; 283789Sahrens const char *next, *nextnext = NULL; 284789Sahrens int err; 285789Sahrens dsl_dir_t *dd; 286789Sahrens dsl_pool_t *dp; 287789Sahrens uint64_t ddobj; 288789Sahrens int openedspa = FALSE; 289789Sahrens 290789Sahrens dprintf("%s\n", name); 291789Sahrens 292789Sahrens err = getcomponent(name, buf, &next); 293789Sahrens if (err) 2941544Seschrock return (err); 295789Sahrens if (spa == NULL) { 296789Sahrens err = spa_open(buf, &spa, FTAG); 297789Sahrens if (err) { 298789Sahrens dprintf("spa_open(%s) failed\n", buf); 2991544Seschrock return (err); 300789Sahrens } 301789Sahrens openedspa = TRUE; 302789Sahrens 303789Sahrens /* XXX this assertion belongs in spa_open */ 304789Sahrens ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa))); 305789Sahrens } 306789Sahrens 307789Sahrens dp = spa_get_dsl(spa); 308789Sahrens 309789Sahrens rw_enter(&dp->dp_config_rwlock, RW_READER); 3101544Seschrock err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); 3111544Seschrock if (err) { 3121544Seschrock rw_exit(&dp->dp_config_rwlock); 3131544Seschrock if (openedspa) 3141544Seschrock spa_close(spa, FTAG); 3151544Seschrock return (err); 3161544Seschrock } 3171544Seschrock 318789Sahrens while (next != NULL) { 319789Sahrens dsl_dir_t *child_ds; 320789Sahrens err = getcomponent(next, buf, &nextnext); 3211544Seschrock if (err) 3221544Seschrock break; 323789Sahrens ASSERT(next[0] != '\0'); 324789Sahrens if (next[0] == '@') 325789Sahrens break; 326789Sahrens dprintf("looking up %s in obj%lld\n", 327789Sahrens buf, dd->dd_phys->dd_child_dir_zapobj); 328789Sahrens 329789Sahrens err = zap_lookup(dp->dp_meta_objset, 330789Sahrens dd->dd_phys->dd_child_dir_zapobj, 331789Sahrens buf, sizeof (ddobj), 1, &ddobj); 3321544Seschrock if (err) { 3331544Seschrock if (err == ENOENT) 3341544Seschrock err = 0; 335789Sahrens break; 336789Sahrens } 337789Sahrens 3381544Seschrock err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds); 3391544Seschrock if (err) 3401544Seschrock break; 341789Sahrens dsl_dir_close(dd, tag); 342789Sahrens dd = child_ds; 343789Sahrens next = nextnext; 344789Sahrens } 345789Sahrens rw_exit(&dp->dp_config_rwlock); 346789Sahrens 3471544Seschrock if (err) { 3481544Seschrock dsl_dir_close(dd, tag); 3491544Seschrock if (openedspa) 3501544Seschrock spa_close(spa, FTAG); 3511544Seschrock return (err); 3521544Seschrock } 3531544Seschrock 354789Sahrens /* 355789Sahrens * It's an error if there's more than one component left, or 356789Sahrens * tailp==NULL and there's any component left. 357789Sahrens */ 358789Sahrens if (next != NULL && 359789Sahrens (tailp == NULL || (nextnext && nextnext[0] != '\0'))) { 360789Sahrens /* bad path name */ 361789Sahrens dsl_dir_close(dd, tag); 362789Sahrens dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); 3631544Seschrock err = ENOENT; 364789Sahrens } 365789Sahrens if (tailp) 366789Sahrens *tailp = next; 367789Sahrens if (openedspa) 368789Sahrens spa_close(spa, FTAG); 3691544Seschrock *ddp = dd; 3701544Seschrock return (err); 371789Sahrens } 372789Sahrens 373789Sahrens /* 374789Sahrens * Return the dsl_dir_t, and possibly the last component which couldn't 375789Sahrens * be found in *tail. Return NULL if the path is bogus, or if 376789Sahrens * tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@' 377789Sahrens * means that the last component is a snapshot. 378789Sahrens */ 3791544Seschrock int 3801544Seschrock dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp) 381789Sahrens { 3821544Seschrock return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp)); 383789Sahrens } 384789Sahrens 3852199Sahrens uint64_t 386789Sahrens dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx) 387789Sahrens { 388789Sahrens objset_t *mos = pds->dd_pool->dp_meta_objset; 389789Sahrens uint64_t ddobj; 390789Sahrens dsl_dir_phys_t *dsphys; 391789Sahrens dmu_buf_t *dbuf; 392789Sahrens 393928Stabriz ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, 394928Stabriz DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); 3952199Sahrens VERIFY(0 == zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, 3962199Sahrens name, sizeof (uint64_t), 1, &ddobj, tx)); 3971544Seschrock VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf)); 398789Sahrens dmu_buf_will_dirty(dbuf, tx); 399789Sahrens dsphys = dbuf->db_data; 400789Sahrens 401789Sahrens dsphys->dd_creation_time = gethrestime_sec(); 402789Sahrens dsphys->dd_parent_obj = pds->dd_object; 403789Sahrens dsphys->dd_props_zapobj = zap_create(mos, 404789Sahrens DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); 405789Sahrens dsphys->dd_child_dir_zapobj = zap_create(mos, 406885Sahrens DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); 4071544Seschrock dmu_buf_rele(dbuf, FTAG); 408789Sahrens 4092199Sahrens return (ddobj); 4102199Sahrens } 4112199Sahrens 4122199Sahrens /* ARGSUSED */ 4132199Sahrens int 4142199Sahrens dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) 4152199Sahrens { 4162199Sahrens dsl_dir_t *dd = arg1; 4172199Sahrens dsl_pool_t *dp = dd->dd_pool; 4182199Sahrens objset_t *mos = dp->dp_meta_objset; 4192199Sahrens int err; 4202199Sahrens uint64_t count; 4212199Sahrens 4222199Sahrens /* 4232199Sahrens * There should be exactly two holds, both from 4242199Sahrens * dsl_dataset_destroy: one on the dd directory, and one on its 4252199Sahrens * head ds. Otherwise, someone is trying to lookup something 4262199Sahrens * inside this dir while we want to destroy it. The 4272199Sahrens * config_rwlock ensures that nobody else opens it after we 4282199Sahrens * check. 4292199Sahrens */ 4302199Sahrens if (dmu_buf_refcount(dd->dd_dbuf) > 2) 4312199Sahrens return (EBUSY); 4322199Sahrens 4332199Sahrens err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count); 4342199Sahrens if (err) 4352199Sahrens return (err); 4362199Sahrens if (count != 0) 4372199Sahrens return (EEXIST); 438789Sahrens 439789Sahrens return (0); 440789Sahrens } 441789Sahrens 4422199Sahrens void 4432199Sahrens dsl_dir_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx) 444789Sahrens { 4452199Sahrens dsl_dir_t *dd = arg1; 4462199Sahrens objset_t *mos = dd->dd_pool->dp_meta_objset; 4472199Sahrens uint64_t val, obj; 448789Sahrens 4492199Sahrens ASSERT(RW_WRITE_HELD(&dd->dd_pool->dp_config_rwlock)); 450789Sahrens ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 451789Sahrens 4522199Sahrens /* Remove our reservation. */ 453789Sahrens val = 0; 4542199Sahrens dsl_dir_set_reservation_sync(dd, &val, tx); 455789Sahrens ASSERT3U(dd->dd_used_bytes, ==, 0); 456789Sahrens ASSERT3U(dd->dd_phys->dd_reserved, ==, 0); 457789Sahrens 4582199Sahrens VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_child_dir_zapobj, tx)); 4592199Sahrens VERIFY(0 == zap_destroy(mos, dd->dd_phys->dd_props_zapobj, tx)); 4602199Sahrens VERIFY(0 == zap_remove(mos, 4612199Sahrens dd->dd_parent->dd_phys->dd_child_dir_zapobj, dd->dd_myname, tx)); 462789Sahrens 4632199Sahrens obj = dd->dd_object; 4642199Sahrens dsl_dir_close(dd, tag); 4652199Sahrens VERIFY(0 == dmu_object_free(mos, obj, tx)); 466789Sahrens } 467789Sahrens 468789Sahrens void 469789Sahrens dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx) 470789Sahrens { 471789Sahrens dsl_dir_phys_t *dsp; 472789Sahrens dmu_buf_t *dbuf; 473789Sahrens int error; 474789Sahrens 475928Stabriz *ddobjp = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, 476928Stabriz DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); 477789Sahrens 478789Sahrens error = zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, 479789Sahrens sizeof (uint64_t), 1, ddobjp, tx); 480789Sahrens ASSERT3U(error, ==, 0); 481789Sahrens 4821544Seschrock VERIFY(0 == dmu_bonus_hold(mos, *ddobjp, FTAG, &dbuf)); 483789Sahrens dmu_buf_will_dirty(dbuf, tx); 484789Sahrens dsp = dbuf->db_data; 485789Sahrens 486789Sahrens dsp->dd_creation_time = gethrestime_sec(); 487789Sahrens dsp->dd_props_zapobj = zap_create(mos, 488789Sahrens DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); 489789Sahrens dsp->dd_child_dir_zapobj = zap_create(mos, 490885Sahrens DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); 491789Sahrens 4921544Seschrock dmu_buf_rele(dbuf, FTAG); 493789Sahrens } 494789Sahrens 495789Sahrens void 496789Sahrens dsl_dir_stats(dsl_dir_t *dd, dmu_objset_stats_t *dds) 497789Sahrens { 498789Sahrens bzero(dds, sizeof (dmu_objset_stats_t)); 499789Sahrens 500789Sahrens dds->dds_available = dsl_dir_space_available(dd, NULL, 0, TRUE); 501789Sahrens 502789Sahrens mutex_enter(&dd->dd_lock); 503789Sahrens dds->dds_space_used = dd->dd_used_bytes; 504789Sahrens dds->dds_compressed_bytes = dd->dd_phys->dd_compressed_bytes; 505789Sahrens dds->dds_uncompressed_bytes = dd->dd_phys->dd_uncompressed_bytes; 506789Sahrens dds->dds_quota = dd->dd_phys->dd_quota; 507789Sahrens dds->dds_reserved = dd->dd_phys->dd_reserved; 508789Sahrens mutex_exit(&dd->dd_lock); 509789Sahrens 510789Sahrens dds->dds_creation_time = dd->dd_phys->dd_creation_time; 511789Sahrens 512789Sahrens if (dd->dd_phys->dd_clone_parent_obj) { 513789Sahrens dsl_dataset_t *ds; 514789Sahrens 515789Sahrens rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 5161544Seschrock VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 5171544Seschrock dd->dd_phys->dd_clone_parent_obj, 5181544Seschrock NULL, DS_MODE_NONE, FTAG, &ds)); 519789Sahrens dsl_dataset_name(ds, dds->dds_clone_of); 520789Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 521789Sahrens rw_exit(&dd->dd_pool->dp_config_rwlock); 522789Sahrens } 523789Sahrens } 524789Sahrens 525789Sahrens void 526789Sahrens dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) 527789Sahrens { 528789Sahrens dsl_pool_t *dp = dd->dd_pool; 529789Sahrens 530789Sahrens ASSERT(dd->dd_phys); 531789Sahrens 532789Sahrens if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) { 533789Sahrens /* up the hold count until we can be written out */ 534789Sahrens dmu_buf_add_ref(dd->dd_dbuf, dd); 535789Sahrens } 536789Sahrens } 537789Sahrens 538789Sahrens static int64_t 539789Sahrens parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta) 540789Sahrens { 541789Sahrens uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved); 542789Sahrens uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved); 543789Sahrens return (new_accounted - old_accounted); 544789Sahrens } 545789Sahrens 546789Sahrens void 547789Sahrens dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) 548789Sahrens { 549789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 550789Sahrens 551789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 552789Sahrens 553789Sahrens mutex_enter(&dd->dd_lock); 554789Sahrens ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0); 555789Sahrens dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg, 556789Sahrens dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024); 557789Sahrens dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0; 558789Sahrens dd->dd_phys->dd_used_bytes = dd->dd_used_bytes; 559789Sahrens mutex_exit(&dd->dd_lock); 560789Sahrens 561789Sahrens /* release the hold from dsl_dir_dirty */ 5621544Seschrock dmu_buf_rele(dd->dd_dbuf, dd); 563789Sahrens } 564789Sahrens 565789Sahrens static uint64_t 566789Sahrens dsl_dir_estimated_space(dsl_dir_t *dd) 567789Sahrens { 568789Sahrens int64_t space; 569789Sahrens int i; 570789Sahrens 571789Sahrens ASSERT(MUTEX_HELD(&dd->dd_lock)); 572789Sahrens 5731544Seschrock space = dd->dd_phys->dd_used_bytes; 574789Sahrens ASSERT(space >= 0); 575789Sahrens for (i = 0; i < TXG_SIZE; i++) { 576789Sahrens space += dd->dd_space_towrite[i&TXG_MASK]; 577789Sahrens ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0); 578789Sahrens } 579789Sahrens return (space); 580789Sahrens } 581789Sahrens 582789Sahrens /* 583789Sahrens * How much space would dd have available if ancestor had delta applied 584789Sahrens * to it? If ondiskonly is set, we're only interested in what's 585789Sahrens * on-disk, not estimated pending changes. 586789Sahrens */ 587789Sahrens static uint64_t 588789Sahrens dsl_dir_space_available(dsl_dir_t *dd, 589789Sahrens dsl_dir_t *ancestor, int64_t delta, int ondiskonly) 590789Sahrens { 591789Sahrens uint64_t parentspace, myspace, quota, used; 592789Sahrens 593789Sahrens /* 594789Sahrens * If there are no restrictions otherwise, assume we have 595789Sahrens * unlimited space available. 596789Sahrens */ 597789Sahrens quota = UINT64_MAX; 598789Sahrens parentspace = UINT64_MAX; 599789Sahrens 600789Sahrens if (dd->dd_parent != NULL) { 601789Sahrens parentspace = dsl_dir_space_available(dd->dd_parent, 602789Sahrens ancestor, delta, ondiskonly); 603789Sahrens } 604789Sahrens 605789Sahrens mutex_enter(&dd->dd_lock); 606789Sahrens if (dd->dd_phys->dd_quota != 0) 607789Sahrens quota = dd->dd_phys->dd_quota; 608789Sahrens if (ondiskonly) { 609789Sahrens used = dd->dd_used_bytes; 610789Sahrens } else { 611789Sahrens used = dsl_dir_estimated_space(dd); 612789Sahrens } 613789Sahrens if (dd == ancestor) 614789Sahrens used += delta; 615789Sahrens 616789Sahrens if (dd->dd_parent == NULL) { 6172082Seschrock uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, FALSE); 618789Sahrens quota = MIN(quota, poolsize); 619789Sahrens } 620789Sahrens 621789Sahrens if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) { 622789Sahrens /* 623789Sahrens * We have some space reserved, in addition to what our 624789Sahrens * parent gave us. 625789Sahrens */ 626789Sahrens parentspace += dd->dd_phys->dd_reserved - used; 627789Sahrens } 628789Sahrens 629789Sahrens if (used > quota) { 630789Sahrens /* over quota */ 631789Sahrens myspace = 0; 6322082Seschrock 6332082Seschrock /* 6342082Seschrock * While it's OK to be a little over quota, if 6352082Seschrock * we think we are using more space than there 6362082Seschrock * is in the pool (which is already 1.6% more than 6372082Seschrock * dsl_pool_adjustedsize()), something is very 6382082Seschrock * wrong. 6392082Seschrock */ 6402082Seschrock ASSERT3U(used, <=, spa_get_space(dd->dd_pool->dp_spa)); 641789Sahrens } else { 642789Sahrens /* 6432082Seschrock * the lesser of the space provided by our parent and 6442082Seschrock * the space left in our quota 645789Sahrens */ 646789Sahrens myspace = MIN(parentspace, quota - used); 647789Sahrens } 648789Sahrens 649789Sahrens mutex_exit(&dd->dd_lock); 650789Sahrens 651789Sahrens return (myspace); 652789Sahrens } 653789Sahrens 654789Sahrens struct tempreserve { 655789Sahrens list_node_t tr_node; 656789Sahrens dsl_dir_t *tr_ds; 657789Sahrens uint64_t tr_size; 658789Sahrens }; 659789Sahrens 660789Sahrens /* 661789Sahrens * Reserve space in this dsl_dir, to be used in this tx's txg. 662789Sahrens * After the space has been dirtied (and thus 663789Sahrens * dsl_dir_willuse_space() has been called), the reservation should 664789Sahrens * be canceled, using dsl_dir_tempreserve_clear(). 665789Sahrens */ 666789Sahrens static int 667789Sahrens dsl_dir_tempreserve_impl(dsl_dir_t *dd, 668789Sahrens uint64_t asize, boolean_t netfree, list_t *tr_list, dmu_tx_t *tx) 669789Sahrens { 670789Sahrens uint64_t txg = tx->tx_txg; 671789Sahrens uint64_t est_used, quota, parent_rsrv; 672789Sahrens int edquot = EDQUOT; 673789Sahrens int txgidx = txg & TXG_MASK; 674789Sahrens int i; 675789Sahrens struct tempreserve *tr; 676789Sahrens 677789Sahrens ASSERT3U(txg, !=, 0); 6781544Seschrock ASSERT3S(asize, >=, 0); 679789Sahrens 680789Sahrens mutex_enter(&dd->dd_lock); 681789Sahrens /* 682789Sahrens * Check against the dsl_dir's quota. We don't add in the delta 683789Sahrens * when checking for over-quota because they get one free hit. 684789Sahrens */ 685789Sahrens est_used = dsl_dir_estimated_space(dd); 686789Sahrens for (i = 0; i < TXG_SIZE; i++) 687789Sahrens est_used += dd->dd_tempreserved[i]; 688789Sahrens 689789Sahrens quota = UINT64_MAX; 690789Sahrens 691789Sahrens if (dd->dd_phys->dd_quota) 692789Sahrens quota = dd->dd_phys->dd_quota; 693789Sahrens 694789Sahrens /* 695789Sahrens * If this transaction will result in a net free of space, we want 696789Sahrens * to let it through, but we have to be careful: the space that it 697789Sahrens * frees won't become available until *after* this txg syncs. 698789Sahrens * Therefore, to ensure that it's possible to remove files from 699789Sahrens * a full pool without inducing transient overcommits, we throttle 700789Sahrens * netfree transactions against a quota that is slightly larger, 701789Sahrens * but still within the pool's allocation slop. In cases where 702789Sahrens * we're very close to full, this will allow a steady trickle of 703789Sahrens * removes to get through. 704789Sahrens */ 705789Sahrens if (dd->dd_parent == NULL) { 706789Sahrens uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree); 707789Sahrens if (poolsize < quota) { 708789Sahrens quota = poolsize; 709789Sahrens edquot = ENOSPC; 710789Sahrens } 711789Sahrens } else if (netfree) { 712789Sahrens quota = UINT64_MAX; 713789Sahrens } 714789Sahrens 715789Sahrens /* 716789Sahrens * If they are requesting more space, and our current estimate 717789Sahrens * is over quota. They get to try again unless the actual 7181544Seschrock * on-disk is over quota and there are no pending changes (which 7191544Seschrock * may free up space for us). 720789Sahrens */ 721789Sahrens if (asize > 0 && est_used > quota) { 7221544Seschrock if (dd->dd_space_towrite[txg & TXG_MASK] != 0 || 7231544Seschrock dd->dd_space_towrite[(txg-1) & TXG_MASK] != 0 || 7241544Seschrock dd->dd_space_towrite[(txg-2) & TXG_MASK] != 0 || 7251544Seschrock dd->dd_used_bytes < quota) 726789Sahrens edquot = ERESTART; 727789Sahrens dprintf_dd(dd, "failing: used=%lluK est_used = %lluK " 728789Sahrens "quota=%lluK tr=%lluK err=%d\n", 729789Sahrens dd->dd_used_bytes>>10, est_used>>10, 730789Sahrens quota>>10, asize>>10, edquot); 731789Sahrens mutex_exit(&dd->dd_lock); 732789Sahrens return (edquot); 733789Sahrens } 734789Sahrens 735789Sahrens /* We need to up our estimated delta before dropping dd_lock */ 736789Sahrens dd->dd_tempreserved[txgidx] += asize; 737789Sahrens 738789Sahrens parent_rsrv = parent_delta(dd, est_used, asize); 739789Sahrens mutex_exit(&dd->dd_lock); 740789Sahrens 741789Sahrens tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); 742789Sahrens tr->tr_ds = dd; 743789Sahrens tr->tr_size = asize; 744789Sahrens list_insert_tail(tr_list, tr); 745789Sahrens 746789Sahrens /* see if it's OK with our parent */ 747789Sahrens if (dd->dd_parent && parent_rsrv) { 748789Sahrens return (dsl_dir_tempreserve_impl(dd->dd_parent, 749789Sahrens parent_rsrv, netfree, tr_list, tx)); 750789Sahrens } else { 751789Sahrens return (0); 752789Sahrens } 753789Sahrens } 754789Sahrens 755789Sahrens /* 756789Sahrens * Reserve space in this dsl_dir, to be used in this tx's txg. 757789Sahrens * After the space has been dirtied (and thus 758789Sahrens * dsl_dir_willuse_space() has been called), the reservation should 759789Sahrens * be canceled, using dsl_dir_tempreserve_clear(). 760789Sahrens */ 761789Sahrens int 762789Sahrens dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, 763789Sahrens uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx) 764789Sahrens { 765789Sahrens int err = 0; 766789Sahrens list_t *tr_list; 767789Sahrens 768789Sahrens tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP); 769789Sahrens list_create(tr_list, sizeof (struct tempreserve), 770789Sahrens offsetof(struct tempreserve, tr_node)); 7711544Seschrock ASSERT3S(asize, >=, 0); 7721544Seschrock ASSERT3S(fsize, >=, 0); 773789Sahrens 774789Sahrens err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize, 775789Sahrens tr_list, tx); 776789Sahrens 777789Sahrens if (err == 0) { 778789Sahrens struct tempreserve *tr; 779789Sahrens 780789Sahrens err = arc_tempreserve_space(lsize); 781789Sahrens if (err == 0) { 782789Sahrens tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); 783789Sahrens tr->tr_ds = NULL; 784789Sahrens tr->tr_size = lsize; 785789Sahrens list_insert_tail(tr_list, tr); 786789Sahrens } 787789Sahrens } 788789Sahrens 789789Sahrens if (err) 790789Sahrens dsl_dir_tempreserve_clear(tr_list, tx); 791789Sahrens else 792789Sahrens *tr_cookiep = tr_list; 793789Sahrens return (err); 794789Sahrens } 795789Sahrens 796789Sahrens /* 797789Sahrens * Clear a temporary reservation that we previously made with 798789Sahrens * dsl_dir_tempreserve_space(). 799789Sahrens */ 800789Sahrens void 801789Sahrens dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) 802789Sahrens { 803789Sahrens int txgidx = tx->tx_txg & TXG_MASK; 804789Sahrens list_t *tr_list = tr_cookie; 805789Sahrens struct tempreserve *tr; 806789Sahrens 807789Sahrens ASSERT3U(tx->tx_txg, !=, 0); 808789Sahrens 809789Sahrens while (tr = list_head(tr_list)) { 810789Sahrens if (tr->tr_ds == NULL) { 811789Sahrens arc_tempreserve_clear(tr->tr_size); 812789Sahrens } else { 813789Sahrens mutex_enter(&tr->tr_ds->dd_lock); 814789Sahrens ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=, 815789Sahrens tr->tr_size); 816789Sahrens tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size; 817789Sahrens mutex_exit(&tr->tr_ds->dd_lock); 818789Sahrens } 819789Sahrens list_remove(tr_list, tr); 820789Sahrens kmem_free(tr, sizeof (struct tempreserve)); 821789Sahrens } 822789Sahrens 823789Sahrens kmem_free(tr_list, sizeof (list_t)); 824789Sahrens } 825789Sahrens 826789Sahrens /* 827789Sahrens * Call in open context when we think we're going to write/free space, 828789Sahrens * eg. when dirtying data. Be conservative (ie. OK to write less than 829789Sahrens * this or free more than this, but don't write more or free less). 830789Sahrens */ 831789Sahrens void 832789Sahrens dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) 833789Sahrens { 834789Sahrens int64_t parent_space; 835789Sahrens uint64_t est_used; 836789Sahrens 837789Sahrens mutex_enter(&dd->dd_lock); 838789Sahrens if (space > 0) 839789Sahrens dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space; 840789Sahrens 841789Sahrens est_used = dsl_dir_estimated_space(dd); 842789Sahrens parent_space = parent_delta(dd, est_used, space); 843789Sahrens mutex_exit(&dd->dd_lock); 844789Sahrens 845789Sahrens /* Make sure that we clean up dd_space_to* */ 846789Sahrens dsl_dir_dirty(dd, tx); 847789Sahrens 848789Sahrens /* XXX this is potentially expensive and unnecessary... */ 849789Sahrens if (parent_space && dd->dd_parent) 850789Sahrens dsl_dir_willuse_space(dd->dd_parent, parent_space, tx); 851789Sahrens } 852789Sahrens 853789Sahrens /* call from syncing context when we actually write/free space for this dd */ 854789Sahrens void 855789Sahrens dsl_dir_diduse_space(dsl_dir_t *dd, 856789Sahrens int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx) 857789Sahrens { 858789Sahrens int64_t accounted_delta; 859789Sahrens 860789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 861789Sahrens 862789Sahrens dsl_dir_dirty(dd, tx); 863789Sahrens 864789Sahrens mutex_enter(&dd->dd_lock); 865789Sahrens accounted_delta = parent_delta(dd, dd->dd_used_bytes, used); 866789Sahrens ASSERT(used >= 0 || dd->dd_used_bytes >= -used); 867789Sahrens ASSERT(compressed >= 0 || 868789Sahrens dd->dd_phys->dd_compressed_bytes >= -compressed); 869789Sahrens ASSERT(uncompressed >= 0 || 870789Sahrens dd->dd_phys->dd_uncompressed_bytes >= -uncompressed); 871789Sahrens dd->dd_used_bytes += used; 872789Sahrens dd->dd_phys->dd_uncompressed_bytes += uncompressed; 873789Sahrens dd->dd_phys->dd_compressed_bytes += compressed; 874789Sahrens mutex_exit(&dd->dd_lock); 875789Sahrens 876789Sahrens if (dd->dd_parent != NULL) { 877789Sahrens dsl_dir_diduse_space(dd->dd_parent, 878789Sahrens accounted_delta, compressed, uncompressed, tx); 879789Sahrens } 880789Sahrens } 881789Sahrens 8822199Sahrens /* ARGSUSED */ 883789Sahrens static int 8842199Sahrens dsl_dir_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx) 885789Sahrens { 8862199Sahrens dsl_dir_t *dd = arg1; 8872199Sahrens uint64_t *quotap = arg2; 888789Sahrens uint64_t new_quota = *quotap; 889789Sahrens int err = 0; 8902199Sahrens uint64_t towrite; 8912199Sahrens 8922199Sahrens if (new_quota == 0) 8932199Sahrens return (0); 8942199Sahrens 8952199Sahrens mutex_enter(&dd->dd_lock); 8962199Sahrens /* 8972199Sahrens * If we are doing the preliminary check in open context, and 8982199Sahrens * there are pending changes, then don't fail it, since the 8992199Sahrens * pending changes could under-estimat the amount of space to be 9002199Sahrens * freed up. 9012199Sahrens */ 9022199Sahrens towrite = dd->dd_space_towrite[0] + dd->dd_space_towrite[1] + 9032199Sahrens dd->dd_space_towrite[2] + dd->dd_space_towrite[3]; 9042199Sahrens if ((dmu_tx_is_syncing(tx) || towrite == 0) && 9052199Sahrens (new_quota < dd->dd_phys->dd_reserved || 9062199Sahrens new_quota < dsl_dir_estimated_space(dd))) { 9072199Sahrens err = ENOSPC; 9082199Sahrens } 9092199Sahrens mutex_exit(&dd->dd_lock); 9102199Sahrens return (err); 9112199Sahrens } 9122199Sahrens 9132199Sahrens static void 9142199Sahrens dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx) 9152199Sahrens { 9162199Sahrens dsl_dir_t *dd = arg1; 9172199Sahrens uint64_t *quotap = arg2; 9182199Sahrens uint64_t new_quota = *quotap; 919789Sahrens 920789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 921789Sahrens 922789Sahrens mutex_enter(&dd->dd_lock); 9232199Sahrens dd->dd_phys->dd_quota = new_quota; 924789Sahrens mutex_exit(&dd->dd_lock); 925789Sahrens } 926789Sahrens 927789Sahrens int 928789Sahrens dsl_dir_set_quota(const char *ddname, uint64_t quota) 929789Sahrens { 930789Sahrens dsl_dir_t *dd; 931789Sahrens int err; 932789Sahrens 9331544Seschrock err = dsl_dir_open(ddname, FTAG, &dd, NULL); 9341544Seschrock if (err) 9351544Seschrock return (err); 936789Sahrens /* 937789Sahrens * If someone removes a file, then tries to set the quota, we 938789Sahrens * want to make sure the file freeing takes effect. 939789Sahrens */ 940789Sahrens txg_wait_open(dd->dd_pool, 0); 941789Sahrens 9422199Sahrens err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_quota_check, 9432199Sahrens dsl_dir_set_quota_sync, dd, "a, 0); 944789Sahrens dsl_dir_close(dd, FTAG); 945789Sahrens return (err); 946789Sahrens } 947789Sahrens 9482199Sahrens /* ARGSUSED */ 949789Sahrens static int 9502199Sahrens dsl_dir_set_reservation_check(void *arg1, void *arg2, dmu_tx_t *tx) 951789Sahrens { 9522199Sahrens dsl_dir_t *dd = arg1; 9532199Sahrens uint64_t *reservationp = arg2; 954789Sahrens uint64_t new_reservation = *reservationp; 955789Sahrens uint64_t used, avail; 956789Sahrens int64_t delta; 957789Sahrens 958789Sahrens if (new_reservation > INT64_MAX) 959789Sahrens return (EOVERFLOW); 960789Sahrens 9612199Sahrens /* 9622199Sahrens * If we are doing the preliminary check in open context, the 9632199Sahrens * space estimates may be inaccurate. 9642199Sahrens */ 9652199Sahrens if (!dmu_tx_is_syncing(tx)) 9662199Sahrens return (0); 9672199Sahrens 968789Sahrens mutex_enter(&dd->dd_lock); 969789Sahrens used = dd->dd_used_bytes; 970789Sahrens delta = MAX(used, new_reservation) - 971789Sahrens MAX(used, dd->dd_phys->dd_reserved); 972789Sahrens mutex_exit(&dd->dd_lock); 973789Sahrens 974789Sahrens if (dd->dd_parent) { 975789Sahrens avail = dsl_dir_space_available(dd->dd_parent, 976789Sahrens NULL, 0, FALSE); 977789Sahrens } else { 978789Sahrens avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used; 979789Sahrens } 980789Sahrens 981789Sahrens if (delta > 0 && delta > avail) 982789Sahrens return (ENOSPC); 983789Sahrens if (delta > 0 && dd->dd_phys->dd_quota > 0 && 984789Sahrens new_reservation > dd->dd_phys->dd_quota) 985789Sahrens return (ENOSPC); 9862199Sahrens return (0); 9872199Sahrens } 9882199Sahrens 9892199Sahrens static void 9902199Sahrens dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx) 9912199Sahrens { 9922199Sahrens dsl_dir_t *dd = arg1; 9932199Sahrens uint64_t *reservationp = arg2; 9942199Sahrens uint64_t new_reservation = *reservationp; 9952199Sahrens uint64_t used; 9962199Sahrens int64_t delta; 9972199Sahrens 9982199Sahrens mutex_enter(&dd->dd_lock); 9992199Sahrens used = dd->dd_used_bytes; 10002199Sahrens delta = MAX(used, new_reservation) - 10012199Sahrens MAX(used, dd->dd_phys->dd_reserved); 10022199Sahrens mutex_exit(&dd->dd_lock); 1003789Sahrens 1004789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 1005789Sahrens dd->dd_phys->dd_reserved = new_reservation; 1006789Sahrens 1007789Sahrens if (dd->dd_parent != NULL) { 1008789Sahrens /* Roll up this additional usage into our ancestors */ 1009789Sahrens dsl_dir_diduse_space(dd->dd_parent, delta, 0, 0, tx); 1010789Sahrens } 1011789Sahrens } 1012789Sahrens 1013789Sahrens int 1014789Sahrens dsl_dir_set_reservation(const char *ddname, uint64_t reservation) 1015789Sahrens { 1016789Sahrens dsl_dir_t *dd; 1017789Sahrens int err; 1018789Sahrens 10191544Seschrock err = dsl_dir_open(ddname, FTAG, &dd, NULL); 10201544Seschrock if (err) 10211544Seschrock return (err); 10222199Sahrens err = dsl_sync_task_do(dd->dd_pool, dsl_dir_set_reservation_check, 10232199Sahrens dsl_dir_set_reservation_sync, dd, &reservation, 0); 1024789Sahrens dsl_dir_close(dd, FTAG); 1025789Sahrens return (err); 1026789Sahrens } 1027789Sahrens 1028789Sahrens static dsl_dir_t * 1029789Sahrens closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2) 1030789Sahrens { 1031789Sahrens for (; ds1; ds1 = ds1->dd_parent) { 1032789Sahrens dsl_dir_t *dd; 1033789Sahrens for (dd = ds2; dd; dd = dd->dd_parent) { 1034789Sahrens if (ds1 == dd) 1035789Sahrens return (dd); 1036789Sahrens } 1037789Sahrens } 1038789Sahrens return (NULL); 1039789Sahrens } 1040789Sahrens 1041789Sahrens /* 1042789Sahrens * If delta is applied to dd, how much of that delta would be applied to 1043789Sahrens * ancestor? Syncing context only. 1044789Sahrens */ 1045789Sahrens static int64_t 1046789Sahrens would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) 1047789Sahrens { 1048789Sahrens if (dd == ancestor) 1049789Sahrens return (delta); 1050789Sahrens 1051789Sahrens mutex_enter(&dd->dd_lock); 1052789Sahrens delta = parent_delta(dd, dd->dd_used_bytes, delta); 1053789Sahrens mutex_exit(&dd->dd_lock); 1054789Sahrens return (would_change(dd->dd_parent, delta, ancestor)); 1055789Sahrens } 1056789Sahrens 10572199Sahrens struct renamearg { 10582199Sahrens dsl_dir_t *newparent; 10592199Sahrens const char *mynewname; 10602199Sahrens }; 10612199Sahrens 10622199Sahrens /* ARGSUSED */ 10632199Sahrens static int 10642199Sahrens dsl_dir_rename_check(void *arg1, void *arg2, dmu_tx_t *tx) 1065789Sahrens { 10662199Sahrens dsl_dir_t *dd = arg1; 10672199Sahrens struct renamearg *ra = arg2; 1068789Sahrens dsl_pool_t *dp = dd->dd_pool; 1069789Sahrens objset_t *mos = dp->dp_meta_objset; 10702199Sahrens int err; 10712199Sahrens uint64_t val; 10722199Sahrens 10732199Sahrens /* There should be 2 references: the open and the dirty */ 10742199Sahrens if (dmu_buf_refcount(dd->dd_dbuf) > 2) 10752199Sahrens return (EBUSY); 1076789Sahrens 10772199Sahrens /* check for existing name */ 10782199Sahrens err = zap_lookup(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, 10792199Sahrens ra->mynewname, 8, 1, &val); 10802199Sahrens if (err == 0) 10812199Sahrens return (EEXIST); 10822199Sahrens if (err != ENOENT) 10831544Seschrock return (err); 1084789Sahrens 10852199Sahrens if (ra->newparent != dd->dd_parent) { 10862082Seschrock /* is there enough space? */ 10872082Seschrock uint64_t myspace = 10882082Seschrock MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); 1089789Sahrens 10902199Sahrens /* no rename into our descendant */ 10912199Sahrens if (closest_common_ancestor(dd, ra->newparent) == dd) 1092789Sahrens return (EINVAL); 10932199Sahrens 10942199Sahrens if (err = dsl_dir_transfer_possible(dd->dd_parent, 10952199Sahrens ra->newparent, myspace)) 10962199Sahrens return (err); 10972199Sahrens } 10982199Sahrens 10992199Sahrens return (0); 11002199Sahrens } 1101789Sahrens 11022199Sahrens static void 11032199Sahrens dsl_dir_rename_sync(void *arg1, void *arg2, dmu_tx_t *tx) 11042199Sahrens { 11052199Sahrens dsl_dir_t *dd = arg1; 11062199Sahrens struct renamearg *ra = arg2; 11072199Sahrens dsl_pool_t *dp = dd->dd_pool; 11082199Sahrens objset_t *mos = dp->dp_meta_objset; 11092199Sahrens int err; 1110789Sahrens 11112199Sahrens ASSERT(dmu_buf_refcount(dd->dd_dbuf) <= 2); 11122199Sahrens 11132199Sahrens if (ra->newparent != dd->dd_parent) { 11142199Sahrens uint64_t myspace = 11152199Sahrens MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); 1116789Sahrens 1117789Sahrens dsl_dir_diduse_space(dd->dd_parent, -myspace, 1118789Sahrens -dd->dd_phys->dd_compressed_bytes, 1119789Sahrens -dd->dd_phys->dd_uncompressed_bytes, tx); 11202199Sahrens dsl_dir_diduse_space(ra->newparent, myspace, 1121789Sahrens dd->dd_phys->dd_compressed_bytes, 1122789Sahrens dd->dd_phys->dd_uncompressed_bytes, tx); 1123789Sahrens } 1124789Sahrens 1125789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 1126789Sahrens 1127789Sahrens /* remove from old parent zapobj */ 1128789Sahrens err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, 1129789Sahrens dd->dd_myname, tx); 1130789Sahrens ASSERT3U(err, ==, 0); 1131789Sahrens 11322199Sahrens (void) strcpy(dd->dd_myname, ra->mynewname); 1133789Sahrens dsl_dir_close(dd->dd_parent, dd); 11342199Sahrens dd->dd_phys->dd_parent_obj = ra->newparent->dd_object; 11351544Seschrock VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, 11362199Sahrens ra->newparent->dd_object, NULL, dd, &dd->dd_parent)); 1137789Sahrens 1138789Sahrens /* add to new parent zapobj */ 11392199Sahrens err = zap_add(mos, ra->newparent->dd_phys->dd_child_dir_zapobj, 1140789Sahrens dd->dd_myname, 8, 1, &dd->dd_object, tx); 1141789Sahrens ASSERT3U(err, ==, 0); 11422199Sahrens } 1143789Sahrens 11442199Sahrens int 11452199Sahrens dsl_dir_rename(dsl_dir_t *dd, const char *newname) 11462199Sahrens { 11472199Sahrens struct renamearg ra; 11482199Sahrens int err; 11492199Sahrens 11502199Sahrens /* new parent should exist */ 11512199Sahrens err = dsl_dir_open(newname, FTAG, &ra.newparent, &ra.mynewname); 11522199Sahrens if (err) 11532199Sahrens return (err); 11542199Sahrens 11552199Sahrens /* can't rename to different pool */ 11562199Sahrens if (dd->dd_pool != ra.newparent->dd_pool) { 11572199Sahrens err = ENXIO; 11582199Sahrens goto out; 11592199Sahrens } 11602199Sahrens 11612199Sahrens /* new name should not already exist */ 11622199Sahrens if (ra.mynewname == NULL) { 11632199Sahrens err = EEXIST; 11642199Sahrens goto out; 11652199Sahrens } 11662199Sahrens 11672199Sahrens 11682199Sahrens err = dsl_sync_task_do(dd->dd_pool, 11692199Sahrens dsl_dir_rename_check, dsl_dir_rename_sync, dd, &ra, 3); 11702199Sahrens 11712199Sahrens out: 11722199Sahrens dsl_dir_close(ra.newparent, FTAG); 11732199Sahrens return (err); 1174789Sahrens } 11752082Seschrock 11762082Seschrock int 11772082Seschrock dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd, uint64_t space) 11782082Seschrock { 11792082Seschrock dsl_dir_t *ancestor; 11802082Seschrock int64_t adelta; 11812082Seschrock uint64_t avail; 11822082Seschrock 11832082Seschrock ancestor = closest_common_ancestor(sdd, tdd); 11842082Seschrock adelta = would_change(sdd, -space, ancestor); 11852082Seschrock avail = dsl_dir_space_available(tdd, ancestor, adelta, FALSE); 11862082Seschrock if (avail < space) 11872082Seschrock return (ENOSPC); 11882082Seschrock 11892082Seschrock return (0); 11902082Seschrock } 1191