1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 5*1544Seschrock * Common Development and Distribution License (the "License"). 6*1544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 221356Seschrock * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens #include <sys/dmu.h> 29789Sahrens #include <sys/dmu_tx.h> 30789Sahrens #include <sys/dsl_dataset.h> 31789Sahrens #include <sys/dsl_dir.h> 32789Sahrens #include <sys/dsl_prop.h> 33789Sahrens #include <sys/spa.h> 34789Sahrens #include <sys/zap.h> 35789Sahrens #include <sys/zio.h> 36789Sahrens #include <sys/arc.h> 37789Sahrens #include "zfs_namecheck.h" 38789Sahrens 39789Sahrens static uint64_t dsl_dir_space_accounted(dsl_dir_t *dd); 40789Sahrens static uint64_t dsl_dir_estimated_space(dsl_dir_t *dd); 41789Sahrens static int dsl_dir_set_reservation_sync(dsl_dir_t *dd, 42789Sahrens void *arg, dmu_tx_t *tx); 43789Sahrens static uint64_t dsl_dir_space_available(dsl_dir_t *dd, 44789Sahrens dsl_dir_t *ancestor, int64_t delta, int ondiskonly); 45789Sahrens 46789Sahrens 47789Sahrens /* ARGSUSED */ 48789Sahrens static void 49789Sahrens dsl_dir_evict(dmu_buf_t *db, void *arg) 50789Sahrens { 51789Sahrens dsl_dir_t *dd = arg; 52789Sahrens dsl_pool_t *dp = dd->dd_pool; 53789Sahrens int t; 54789Sahrens 55789Sahrens for (t = 0; t < TXG_SIZE; t++) { 56789Sahrens ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); 57789Sahrens ASSERT(dd->dd_tempreserved[t] == 0); 58789Sahrens ASSERT(dd->dd_space_towrite[t] == 0); 59789Sahrens } 60789Sahrens 61789Sahrens ASSERT3U(dd->dd_used_bytes, ==, dd->dd_phys->dd_used_bytes); 62789Sahrens 63789Sahrens ASSERT(dd->dd_sync_txg == 0); 64789Sahrens 65789Sahrens if (dd->dd_parent) 66789Sahrens dsl_dir_close(dd->dd_parent, dd); 67789Sahrens 68789Sahrens spa_close(dd->dd_pool->dp_spa, dd); 69789Sahrens 70789Sahrens /* 71789Sahrens * The props callback list should be empty since they hold the 72789Sahrens * dir open. 73789Sahrens */ 74789Sahrens list_destroy(&dd->dd_prop_cbs); 75789Sahrens kmem_free(dd, sizeof (dsl_dir_t)); 76789Sahrens } 77789Sahrens 78*1544Seschrock int 79789Sahrens dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, 80*1544Seschrock const char *tail, void *tag, dsl_dir_t **ddp) 81789Sahrens { 82789Sahrens dmu_buf_t *dbuf; 83789Sahrens dsl_dir_t *dd; 84*1544Seschrock int err; 85789Sahrens 86789Sahrens ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 87789Sahrens dsl_pool_sync_context(dp)); 88789Sahrens 89*1544Seschrock err = dmu_bonus_hold(dp->dp_meta_objset, ddobj, tag, &dbuf); 90*1544Seschrock if (err) 91*1544Seschrock return (err); 92789Sahrens dd = dmu_buf_get_user(dbuf); 93789Sahrens #ifdef ZFS_DEBUG 94789Sahrens { 95789Sahrens dmu_object_info_t doi; 96789Sahrens dmu_object_info_from_db(dbuf, &doi); 97928Stabriz ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DIR); 98789Sahrens } 99789Sahrens #endif 100789Sahrens /* XXX assert bonus buffer size is correct */ 101789Sahrens if (dd == NULL) { 102789Sahrens dsl_dir_t *winner; 103789Sahrens int err; 104789Sahrens 105789Sahrens dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP); 106789Sahrens dd->dd_object = ddobj; 107789Sahrens dd->dd_dbuf = dbuf; 108789Sahrens dd->dd_pool = dp; 109789Sahrens dd->dd_phys = dbuf->db_data; 110789Sahrens dd->dd_used_bytes = dd->dd_phys->dd_used_bytes; 111789Sahrens 112789Sahrens list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t), 113789Sahrens offsetof(dsl_prop_cb_record_t, cbr_node)); 114789Sahrens 115789Sahrens if (dd->dd_phys->dd_parent_obj) { 116*1544Seschrock err = dsl_dir_open_obj(dp, dd->dd_phys->dd_parent_obj, 117*1544Seschrock NULL, dd, &dd->dd_parent); 118*1544Seschrock if (err) { 119*1544Seschrock kmem_free(dd, sizeof (dsl_dir_t)); 120*1544Seschrock dmu_buf_rele(dbuf, tag); 121*1544Seschrock return (err); 122*1544Seschrock } 123789Sahrens if (tail) { 124789Sahrens #ifdef ZFS_DEBUG 125789Sahrens uint64_t foundobj; 126789Sahrens 127789Sahrens err = zap_lookup(dp->dp_meta_objset, 128789Sahrens dd->dd_parent->dd_phys-> 129789Sahrens dd_child_dir_zapobj, 130789Sahrens tail, sizeof (foundobj), 1, &foundobj); 131*1544Seschrock ASSERT(err || foundobj == ddobj); 132789Sahrens #endif 133789Sahrens (void) strcpy(dd->dd_myname, tail); 134789Sahrens } else { 135789Sahrens err = zap_value_search(dp->dp_meta_objset, 136789Sahrens dd->dd_parent->dd_phys-> 137789Sahrens dd_child_dir_zapobj, 138789Sahrens ddobj, dd->dd_myname); 139*1544Seschrock } 140*1544Seschrock if (err) { 141*1544Seschrock dsl_dir_close(dd->dd_parent, dd); 142*1544Seschrock kmem_free(dd, sizeof (dsl_dir_t)); 143*1544Seschrock dmu_buf_rele(dbuf, tag); 144*1544Seschrock return (err); 145789Sahrens } 146789Sahrens } else { 147789Sahrens (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa)); 148789Sahrens } 149789Sahrens 150789Sahrens winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys, 151789Sahrens dsl_dir_evict); 152789Sahrens if (winner) { 153789Sahrens if (dd->dd_parent) 154789Sahrens dsl_dir_close(dd->dd_parent, dd); 155789Sahrens kmem_free(dd, sizeof (dsl_dir_t)); 156789Sahrens dd = winner; 157789Sahrens } else { 158789Sahrens spa_open_ref(dp->dp_spa, dd); 159789Sahrens } 160789Sahrens } 161789Sahrens 162789Sahrens /* 163789Sahrens * The dsl_dir_t has both open-to-close and instantiate-to-evict 164789Sahrens * holds on the spa. We need the open-to-close holds because 165789Sahrens * otherwise the spa_refcnt wouldn't change when we open a 166789Sahrens * dir which the spa also has open, so we could incorrectly 167789Sahrens * think it was OK to unload/export/destroy the pool. We need 168789Sahrens * the instantiate-to-evict hold because the dsl_dir_t has a 169789Sahrens * pointer to the dd_pool, which has a pointer to the spa_t. 170789Sahrens */ 171789Sahrens spa_open_ref(dp->dp_spa, tag); 172789Sahrens ASSERT3P(dd->dd_pool, ==, dp); 173789Sahrens ASSERT3U(dd->dd_object, ==, ddobj); 174789Sahrens ASSERT3P(dd->dd_dbuf, ==, dbuf); 175*1544Seschrock *ddp = dd; 176*1544Seschrock return (0); 177789Sahrens } 178789Sahrens 179789Sahrens void 180789Sahrens dsl_dir_close(dsl_dir_t *dd, void *tag) 181789Sahrens { 182789Sahrens dprintf_dd(dd, "%s\n", ""); 183789Sahrens spa_close(dd->dd_pool->dp_spa, tag); 184*1544Seschrock dmu_buf_rele(dd->dd_dbuf, tag); 185789Sahrens } 186789Sahrens 187789Sahrens /* buf must be long enough (MAXNAMELEN should do) */ 188789Sahrens void 189789Sahrens dsl_dir_name(dsl_dir_t *dd, char *buf) 190789Sahrens { 191789Sahrens if (dd->dd_parent) { 192789Sahrens dsl_dir_name(dd->dd_parent, buf); 193789Sahrens (void) strcat(buf, "/"); 194789Sahrens } else { 195789Sahrens buf[0] = '\0'; 196789Sahrens } 197789Sahrens if (!MUTEX_HELD(&dd->dd_lock)) { 198789Sahrens /* 199789Sahrens * recursive mutex so that we can use 200789Sahrens * dprintf_dd() with dd_lock held 201789Sahrens */ 202789Sahrens mutex_enter(&dd->dd_lock); 203789Sahrens (void) strcat(buf, dd->dd_myname); 204789Sahrens mutex_exit(&dd->dd_lock); 205789Sahrens } else { 206789Sahrens (void) strcat(buf, dd->dd_myname); 207789Sahrens } 208789Sahrens } 209789Sahrens 210789Sahrens int 211789Sahrens dsl_dir_is_private(dsl_dir_t *dd) 212789Sahrens { 213789Sahrens int rv = FALSE; 214789Sahrens 215789Sahrens if (dd->dd_parent && dsl_dir_is_private(dd->dd_parent)) 216789Sahrens rv = TRUE; 217789Sahrens if (dataset_name_hidden(dd->dd_myname)) 218789Sahrens rv = TRUE; 219789Sahrens return (rv); 220789Sahrens } 221789Sahrens 222789Sahrens 223789Sahrens static int 224789Sahrens getcomponent(const char *path, char *component, const char **nextp) 225789Sahrens { 226789Sahrens char *p; 227789Sahrens if (path == NULL) 228789Sahrens return (NULL); 229789Sahrens /* This would be a good place to reserve some namespace... */ 230789Sahrens p = strpbrk(path, "/@"); 231789Sahrens if (p && (p[1] == '/' || p[1] == '@')) { 232789Sahrens /* two separators in a row */ 233789Sahrens return (EINVAL); 234789Sahrens } 235789Sahrens if (p == NULL || p == path) { 236789Sahrens /* 237789Sahrens * if the first thing is an @ or /, it had better be an 238789Sahrens * @ and it had better not have any more ats or slashes, 239789Sahrens * and it had better have something after the @. 240789Sahrens */ 241789Sahrens if (p != NULL && 242789Sahrens (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0')) 243789Sahrens return (EINVAL); 244789Sahrens if (strlen(path) >= MAXNAMELEN) 245789Sahrens return (ENAMETOOLONG); 246789Sahrens (void) strcpy(component, path); 247789Sahrens p = NULL; 248789Sahrens } else if (p[0] == '/') { 249789Sahrens if (p-path >= MAXNAMELEN) 250789Sahrens return (ENAMETOOLONG); 251789Sahrens (void) strncpy(component, path, p - path); 252789Sahrens component[p-path] = '\0'; 253789Sahrens p++; 254789Sahrens } else if (p[0] == '@') { 255789Sahrens /* 256789Sahrens * if the next separator is an @, there better not be 257789Sahrens * any more slashes. 258789Sahrens */ 259789Sahrens if (strchr(path, '/')) 260789Sahrens return (EINVAL); 261789Sahrens if (p-path >= MAXNAMELEN) 262789Sahrens return (ENAMETOOLONG); 263789Sahrens (void) strncpy(component, path, p - path); 264789Sahrens component[p-path] = '\0'; 265789Sahrens } else { 266789Sahrens ASSERT(!"invalid p"); 267789Sahrens } 268789Sahrens *nextp = p; 269789Sahrens return (0); 270789Sahrens } 271789Sahrens 272789Sahrens /* 273789Sahrens * same as dsl_open_dir, ignore the first component of name and use the 274789Sahrens * spa instead 275789Sahrens */ 276*1544Seschrock int 277*1544Seschrock dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, 278*1544Seschrock dsl_dir_t **ddp, const char **tailp) 279789Sahrens { 280789Sahrens char buf[MAXNAMELEN]; 281789Sahrens const char *next, *nextnext = NULL; 282789Sahrens int err; 283789Sahrens dsl_dir_t *dd; 284789Sahrens dsl_pool_t *dp; 285789Sahrens uint64_t ddobj; 286789Sahrens int openedspa = FALSE; 287789Sahrens 288789Sahrens dprintf("%s\n", name); 289789Sahrens 290789Sahrens if (name == NULL) 291*1544Seschrock return (ENOENT); 292789Sahrens err = getcomponent(name, buf, &next); 293789Sahrens if (err) 294*1544Seschrock return (err); 295789Sahrens if (spa == NULL) { 296789Sahrens err = spa_open(buf, &spa, FTAG); 297789Sahrens if (err) { 298789Sahrens dprintf("spa_open(%s) failed\n", buf); 299*1544Seschrock return (err); 300789Sahrens } 301789Sahrens openedspa = TRUE; 302789Sahrens 303789Sahrens /* XXX this assertion belongs in spa_open */ 304789Sahrens ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa))); 305789Sahrens } 306789Sahrens 307789Sahrens dp = spa_get_dsl(spa); 308789Sahrens 309789Sahrens rw_enter(&dp->dp_config_rwlock, RW_READER); 310*1544Seschrock err = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag, &dd); 311*1544Seschrock if (err) { 312*1544Seschrock rw_exit(&dp->dp_config_rwlock); 313*1544Seschrock if (openedspa) 314*1544Seschrock spa_close(spa, FTAG); 315*1544Seschrock return (err); 316*1544Seschrock } 317*1544Seschrock 318789Sahrens while (next != NULL) { 319789Sahrens dsl_dir_t *child_ds; 320789Sahrens err = getcomponent(next, buf, &nextnext); 321*1544Seschrock if (err) 322*1544Seschrock break; 323789Sahrens ASSERT(next[0] != '\0'); 324789Sahrens if (next[0] == '@') 325789Sahrens break; 326789Sahrens if (dd->dd_phys->dd_child_dir_zapobj == 0) 327789Sahrens break; 328789Sahrens dprintf("looking up %s in obj%lld\n", 329789Sahrens buf, dd->dd_phys->dd_child_dir_zapobj); 330789Sahrens 331789Sahrens err = zap_lookup(dp->dp_meta_objset, 332789Sahrens dd->dd_phys->dd_child_dir_zapobj, 333789Sahrens buf, sizeof (ddobj), 1, &ddobj); 334*1544Seschrock if (err) { 335*1544Seschrock if (err == ENOENT) 336*1544Seschrock err = 0; 337789Sahrens break; 338789Sahrens } 339789Sahrens 340*1544Seschrock err = dsl_dir_open_obj(dp, ddobj, buf, tag, &child_ds); 341*1544Seschrock if (err) 342*1544Seschrock break; 343789Sahrens dsl_dir_close(dd, tag); 344789Sahrens dd = child_ds; 345789Sahrens next = nextnext; 346789Sahrens } 347789Sahrens rw_exit(&dp->dp_config_rwlock); 348789Sahrens 349*1544Seschrock if (err) { 350*1544Seschrock dsl_dir_close(dd, tag); 351*1544Seschrock if (openedspa) 352*1544Seschrock spa_close(spa, FTAG); 353*1544Seschrock return (err); 354*1544Seschrock } 355*1544Seschrock 356789Sahrens /* 357789Sahrens * It's an error if there's more than one component left, or 358789Sahrens * tailp==NULL and there's any component left. 359789Sahrens */ 360789Sahrens if (next != NULL && 361789Sahrens (tailp == NULL || (nextnext && nextnext[0] != '\0'))) { 362789Sahrens /* bad path name */ 363789Sahrens dsl_dir_close(dd, tag); 364789Sahrens dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); 365*1544Seschrock err = ENOENT; 366789Sahrens } 367789Sahrens if (tailp) 368789Sahrens *tailp = next; 369789Sahrens if (openedspa) 370789Sahrens spa_close(spa, FTAG); 371*1544Seschrock *ddp = dd; 372*1544Seschrock return (err); 373789Sahrens } 374789Sahrens 375789Sahrens /* 376789Sahrens * Return the dsl_dir_t, and possibly the last component which couldn't 377789Sahrens * be found in *tail. Return NULL if the path is bogus, or if 378789Sahrens * tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@' 379789Sahrens * means that the last component is a snapshot. 380789Sahrens */ 381*1544Seschrock int 382*1544Seschrock dsl_dir_open(const char *name, void *tag, dsl_dir_t **ddp, const char **tailp) 383789Sahrens { 384*1544Seschrock return (dsl_dir_open_spa(NULL, name, tag, ddp, tailp)); 385789Sahrens } 386789Sahrens 387789Sahrens int 388789Sahrens dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx) 389789Sahrens { 390789Sahrens objset_t *mos = pds->dd_pool->dp_meta_objset; 391789Sahrens uint64_t ddobj; 392789Sahrens dsl_dir_phys_t *dsphys; 393789Sahrens dmu_buf_t *dbuf; 394789Sahrens int err; 395789Sahrens 396789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 397789Sahrens 398789Sahrens if (pds->dd_phys->dd_child_dir_zapobj == 0) { 399789Sahrens dmu_buf_will_dirty(pds->dd_dbuf, tx); 400789Sahrens pds->dd_phys->dd_child_dir_zapobj = zap_create(mos, 401885Sahrens DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); 402789Sahrens } 403789Sahrens 404789Sahrens rw_enter(&pds->dd_pool->dp_config_rwlock, RW_WRITER); 405789Sahrens err = zap_lookup(mos, pds->dd_phys->dd_child_dir_zapobj, 406789Sahrens name, sizeof (uint64_t), 1, &ddobj); 407789Sahrens if (err != ENOENT) { 408789Sahrens rw_exit(&pds->dd_pool->dp_config_rwlock); 409789Sahrens return (err ? err : EEXIST); 410789Sahrens } 411789Sahrens 412928Stabriz ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, 413928Stabriz DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); 414789Sahrens err = zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, 415789Sahrens name, sizeof (uint64_t), 1, &ddobj, tx); 416789Sahrens ASSERT3U(err, ==, 0); 417789Sahrens dprintf("dataset_create: zap_add %s->%lld to %lld returned %d\n", 418789Sahrens name, ddobj, pds->dd_phys->dd_child_dir_zapobj, err); 419789Sahrens 420*1544Seschrock VERIFY(0 == dmu_bonus_hold(mos, ddobj, FTAG, &dbuf)); 421789Sahrens dmu_buf_will_dirty(dbuf, tx); 422789Sahrens dsphys = dbuf->db_data; 423789Sahrens 424789Sahrens dsphys->dd_creation_time = gethrestime_sec(); 425789Sahrens dsphys->dd_parent_obj = pds->dd_object; 426789Sahrens dsphys->dd_props_zapobj = zap_create(mos, 427789Sahrens DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); 428789Sahrens dsphys->dd_child_dir_zapobj = zap_create(mos, 429885Sahrens DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); 430*1544Seschrock dmu_buf_rele(dbuf, FTAG); 431789Sahrens 432789Sahrens rw_exit(&pds->dd_pool->dp_config_rwlock); 433789Sahrens 434789Sahrens return (0); 435789Sahrens } 436789Sahrens 437789Sahrens int 438789Sahrens dsl_dir_destroy_sync(dsl_dir_t *pds, void *arg, dmu_tx_t *tx) 439789Sahrens { 440789Sahrens const char *name = arg; 441789Sahrens dsl_dir_t *dd = NULL; 442789Sahrens dsl_pool_t *dp = pds->dd_pool; 443789Sahrens objset_t *mos = dp->dp_meta_objset; 444789Sahrens uint64_t val, obj, child_zapobj, props_zapobj; 445789Sahrens int t, err; 446789Sahrens 447789Sahrens rw_enter(&dp->dp_config_rwlock, RW_WRITER); 448789Sahrens 449789Sahrens err = zap_lookup(mos, pds->dd_phys->dd_child_dir_zapobj, name, 450789Sahrens 8, 1, &obj); 451789Sahrens if (err) 452789Sahrens goto out; 453789Sahrens 454*1544Seschrock err = dsl_dir_open_obj(dp, obj, name, FTAG, &dd); 455*1544Seschrock if (err) 456*1544Seschrock goto out; 457789Sahrens ASSERT3U(dd->dd_phys->dd_parent_obj, ==, pds->dd_object); 458789Sahrens 459789Sahrens if (dmu_buf_refcount(dd->dd_dbuf) > 1) { 460789Sahrens err = EBUSY; 461789Sahrens goto out; 462789Sahrens } 463789Sahrens 464789Sahrens for (t = 0; t < TXG_SIZE; t++) { 465789Sahrens /* 466789Sahrens * if they were dirty, they'd also be open. 467789Sahrens * dp_config_rwlock ensures that it stays that way. 468789Sahrens */ 469789Sahrens ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); 470789Sahrens } 471789Sahrens 472789Sahrens child_zapobj = dd->dd_phys->dd_child_dir_zapobj; 473789Sahrens props_zapobj = dd->dd_phys->dd_props_zapobj; 474789Sahrens 475789Sahrens if (child_zapobj != 0) { 476789Sahrens uint64_t count; 477789Sahrens err = EEXIST; 478789Sahrens (void) zap_count(mos, child_zapobj, &count); 479789Sahrens if (count != 0) 480789Sahrens goto out; 481789Sahrens } 482789Sahrens 483789Sahrens if (dd->dd_phys->dd_head_dataset_obj != 0) { 484789Sahrens err = dsl_dataset_destroy_sync(dd, NULL, tx); 485789Sahrens if (err) 486789Sahrens goto out; 487789Sahrens } 488789Sahrens ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 489789Sahrens 490789Sahrens /* The point of no (unsuccessful) return */ 491789Sahrens 492789Sahrens /* Make sure parent's used gets updated */ 493789Sahrens val = 0; 494789Sahrens err = dsl_dir_set_reservation_sync(dd, &val, tx); 495789Sahrens ASSERT(err == 0); 496789Sahrens ASSERT3U(dd->dd_used_bytes, ==, 0); 497789Sahrens ASSERT3U(dd->dd_phys->dd_reserved, ==, 0); 498789Sahrens dsl_dir_close(dd, FTAG); 499789Sahrens dd = NULL; 500789Sahrens 501789Sahrens err = dmu_object_free(mos, obj, tx); 502789Sahrens ASSERT(err == 0); 503789Sahrens 504789Sahrens if (child_zapobj) 505789Sahrens err = zap_destroy(mos, child_zapobj, tx); 506789Sahrens ASSERT(err == 0); 507789Sahrens 508789Sahrens if (props_zapobj) 509789Sahrens err = zap_destroy(mos, props_zapobj, tx); 510789Sahrens ASSERT(err == 0); 511789Sahrens 512789Sahrens err = zap_remove(mos, pds->dd_phys->dd_child_dir_zapobj, name, tx); 513789Sahrens ASSERT(err == 0); 514789Sahrens 515789Sahrens out: 516789Sahrens rw_exit(&dp->dp_config_rwlock); 517789Sahrens if (dd) 518789Sahrens dsl_dir_close(dd, FTAG); 519789Sahrens 520789Sahrens return (err); 521789Sahrens } 522789Sahrens 523789Sahrens void 524789Sahrens dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx) 525789Sahrens { 526789Sahrens dsl_dir_phys_t *dsp; 527789Sahrens dmu_buf_t *dbuf; 528789Sahrens int error; 529789Sahrens 530928Stabriz *ddobjp = dmu_object_alloc(mos, DMU_OT_DSL_DIR, 0, 531928Stabriz DMU_OT_DSL_DIR, sizeof (dsl_dir_phys_t), tx); 532789Sahrens 533789Sahrens error = zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, 534789Sahrens sizeof (uint64_t), 1, ddobjp, tx); 535789Sahrens ASSERT3U(error, ==, 0); 536789Sahrens 537*1544Seschrock VERIFY(0 == dmu_bonus_hold(mos, *ddobjp, FTAG, &dbuf)); 538789Sahrens dmu_buf_will_dirty(dbuf, tx); 539789Sahrens dsp = dbuf->db_data; 540789Sahrens 541789Sahrens dsp->dd_creation_time = gethrestime_sec(); 542789Sahrens dsp->dd_props_zapobj = zap_create(mos, 543789Sahrens DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); 544789Sahrens dsp->dd_child_dir_zapobj = zap_create(mos, 545885Sahrens DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); 546789Sahrens 547*1544Seschrock dmu_buf_rele(dbuf, FTAG); 548789Sahrens } 549789Sahrens 550789Sahrens void 551789Sahrens dsl_dir_stats(dsl_dir_t *dd, dmu_objset_stats_t *dds) 552789Sahrens { 553789Sahrens bzero(dds, sizeof (dmu_objset_stats_t)); 554789Sahrens 555789Sahrens dds->dds_available = dsl_dir_space_available(dd, NULL, 0, TRUE); 556789Sahrens 557789Sahrens mutex_enter(&dd->dd_lock); 558789Sahrens dds->dds_space_used = dd->dd_used_bytes; 559789Sahrens dds->dds_compressed_bytes = dd->dd_phys->dd_compressed_bytes; 560789Sahrens dds->dds_uncompressed_bytes = dd->dd_phys->dd_uncompressed_bytes; 561789Sahrens dds->dds_quota = dd->dd_phys->dd_quota; 562789Sahrens dds->dds_reserved = dd->dd_phys->dd_reserved; 563789Sahrens mutex_exit(&dd->dd_lock); 564789Sahrens 565789Sahrens dds->dds_creation_time = dd->dd_phys->dd_creation_time; 566789Sahrens 567789Sahrens if (dd->dd_phys->dd_clone_parent_obj) { 568789Sahrens dsl_dataset_t *ds; 569789Sahrens 570789Sahrens rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 571*1544Seschrock VERIFY(0 == dsl_dataset_open_obj(dd->dd_pool, 572*1544Seschrock dd->dd_phys->dd_clone_parent_obj, 573*1544Seschrock NULL, DS_MODE_NONE, FTAG, &ds)); 574789Sahrens dsl_dataset_name(ds, dds->dds_clone_of); 575789Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 576789Sahrens rw_exit(&dd->dd_pool->dp_config_rwlock); 577789Sahrens } 578789Sahrens } 579789Sahrens 580789Sahrens int 581789Sahrens dsl_dir_sync_task(dsl_dir_t *dd, 582789Sahrens int (*func)(dsl_dir_t *, void*, dmu_tx_t *), void *arg, uint64_t space) 583789Sahrens { 584789Sahrens dmu_tx_t *tx; 585789Sahrens dsl_pool_t *dp = dd->dd_pool; 586789Sahrens int err = 0; 587789Sahrens uint64_t txg; 588789Sahrens 589789Sahrens dprintf_dd(dd, "func=%p space=%llu\n", func, space); 590789Sahrens 591789Sahrens again: 592789Sahrens tx = dmu_tx_create_ds(dd); 593789Sahrens dmu_tx_hold_space(tx, space); 594789Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 595789Sahrens if (err == ENOSPC || err == EDQUOT) { 596789Sahrens dsl_dir_t *rds; 597789Sahrens /* 598789Sahrens * They can get their space from either this dd, or the 599789Sahrens * root dd. 600789Sahrens */ 601789Sahrens for (rds = dd; rds->dd_parent; rds = rds->dd_parent) 602789Sahrens continue; 603789Sahrens dmu_tx_abort(tx); 604789Sahrens tx = dmu_tx_create_ds(rds); 605789Sahrens dmu_tx_hold_space(tx, space); 606789Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 607789Sahrens } 608789Sahrens if (err) { 609789Sahrens dmu_tx_abort(tx); 610789Sahrens return (err); 611789Sahrens } 612789Sahrens 613789Sahrens txg = dmu_tx_get_txg(tx); 614789Sahrens mutex_enter(&dd->dd_lock); 615789Sahrens if (dd->dd_sync_txg != 0) { 616789Sahrens mutex_exit(&dd->dd_lock); 617789Sahrens dmu_tx_commit(tx); 618789Sahrens txg_wait_synced(dp, 0); 619789Sahrens goto again; 620789Sahrens } 621789Sahrens 622789Sahrens /* We're good to go */ 623789Sahrens 624789Sahrens dd->dd_sync_txg = txg; 625789Sahrens dd->dd_sync_func = func; 626789Sahrens dd->dd_sync_arg = arg; 627789Sahrens 628789Sahrens mutex_exit(&dd->dd_lock); 629789Sahrens 630789Sahrens dsl_dir_dirty(dd, tx); 631789Sahrens dmu_tx_commit(tx); 632789Sahrens 633789Sahrens txg_wait_synced(dp, txg); 634789Sahrens 635789Sahrens mutex_enter(&dd->dd_lock); 636789Sahrens ASSERT(dd->dd_sync_txg == txg); 637789Sahrens ASSERT(dd->dd_sync_func == NULL); 638789Sahrens err = dd->dd_sync_err; 639789Sahrens dd->dd_sync_txg = 0; 640789Sahrens mutex_exit(&dd->dd_lock); 641789Sahrens 642789Sahrens return (err); 643789Sahrens } 644789Sahrens 645789Sahrens void 646789Sahrens dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) 647789Sahrens { 648789Sahrens dsl_pool_t *dp = dd->dd_pool; 649789Sahrens 650789Sahrens ASSERT(dd->dd_phys); 651789Sahrens 652789Sahrens if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) { 653789Sahrens /* up the hold count until we can be written out */ 654789Sahrens dmu_buf_add_ref(dd->dd_dbuf, dd); 655789Sahrens } 656789Sahrens } 657789Sahrens 658789Sahrens static int64_t 659789Sahrens parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta) 660789Sahrens { 661789Sahrens uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved); 662789Sahrens uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved); 663789Sahrens return (new_accounted - old_accounted); 664789Sahrens } 665789Sahrens 666789Sahrens void 667789Sahrens dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) 668789Sahrens { 669789Sahrens if (dd->dd_sync_txg == tx->tx_txg && dd->dd_sync_func) { 670789Sahrens dd->dd_sync_err = dd->dd_sync_func(dd, dd->dd_sync_arg, tx); 671789Sahrens dd->dd_sync_func = NULL; 672789Sahrens } 673789Sahrens 674789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 675789Sahrens 676789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 677789Sahrens 678789Sahrens mutex_enter(&dd->dd_lock); 679789Sahrens ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0); 680789Sahrens dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg, 681789Sahrens dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024); 682789Sahrens dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0; 683789Sahrens dd->dd_phys->dd_used_bytes = dd->dd_used_bytes; 684789Sahrens mutex_exit(&dd->dd_lock); 685789Sahrens 686789Sahrens /* release the hold from dsl_dir_dirty */ 687*1544Seschrock dmu_buf_rele(dd->dd_dbuf, dd); 688789Sahrens } 689789Sahrens 690789Sahrens static uint64_t 691789Sahrens dsl_dir_estimated_space(dsl_dir_t *dd) 692789Sahrens { 693789Sahrens int64_t space; 694789Sahrens int i; 695789Sahrens 696789Sahrens ASSERT(MUTEX_HELD(&dd->dd_lock)); 697789Sahrens 698*1544Seschrock space = dd->dd_phys->dd_used_bytes; 699789Sahrens ASSERT(space >= 0); 700789Sahrens for (i = 0; i < TXG_SIZE; i++) { 701789Sahrens space += dd->dd_space_towrite[i&TXG_MASK]; 702789Sahrens ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0); 703789Sahrens } 704789Sahrens return (space); 705789Sahrens } 706789Sahrens 707789Sahrens /* 708789Sahrens * How much space would dd have available if ancestor had delta applied 709789Sahrens * to it? If ondiskonly is set, we're only interested in what's 710789Sahrens * on-disk, not estimated pending changes. 711789Sahrens */ 712789Sahrens static uint64_t 713789Sahrens dsl_dir_space_available(dsl_dir_t *dd, 714789Sahrens dsl_dir_t *ancestor, int64_t delta, int ondiskonly) 715789Sahrens { 716789Sahrens uint64_t parentspace, myspace, quota, used; 717789Sahrens 718789Sahrens /* 719789Sahrens * If there are no restrictions otherwise, assume we have 720789Sahrens * unlimited space available. 721789Sahrens */ 722789Sahrens quota = UINT64_MAX; 723789Sahrens parentspace = UINT64_MAX; 724789Sahrens 725789Sahrens if (dd->dd_parent != NULL) { 726789Sahrens parentspace = dsl_dir_space_available(dd->dd_parent, 727789Sahrens ancestor, delta, ondiskonly); 728789Sahrens } 729789Sahrens 730789Sahrens mutex_enter(&dd->dd_lock); 731789Sahrens if (dd->dd_phys->dd_quota != 0) 732789Sahrens quota = dd->dd_phys->dd_quota; 733789Sahrens if (ondiskonly) { 734789Sahrens used = dd->dd_used_bytes; 735789Sahrens } else { 736789Sahrens used = dsl_dir_estimated_space(dd); 737789Sahrens } 738789Sahrens if (dd == ancestor) 739789Sahrens used += delta; 740789Sahrens 741789Sahrens if (dd->dd_parent == NULL) { 742789Sahrens uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE); 743789Sahrens quota = MIN(quota, poolsize); 744789Sahrens } 745789Sahrens 746789Sahrens if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) { 747789Sahrens /* 748789Sahrens * We have some space reserved, in addition to what our 749789Sahrens * parent gave us. 750789Sahrens */ 751789Sahrens parentspace += dd->dd_phys->dd_reserved - used; 752789Sahrens } 753789Sahrens 754789Sahrens if (used > quota) { 755789Sahrens /* over quota */ 756789Sahrens myspace = 0; 757789Sahrens #ifdef ZFS_DEBUG 758789Sahrens { 759789Sahrens /* 760789Sahrens * While it's OK to be a little over quota, if 761789Sahrens * we think we are using more space than there 762789Sahrens * is in the pool (which is already 6% more than 763789Sahrens * dsl_pool_adjustedsize()), something is very 764789Sahrens * wrong. 765789Sahrens */ 766789Sahrens uint64_t space = spa_get_space(dd->dd_pool->dp_spa); 767789Sahrens ASSERT3U(used, <=, space); 768789Sahrens } 769789Sahrens #endif 770789Sahrens } else { 771789Sahrens /* 772789Sahrens * the lesser of parent's space and the space 773789Sahrens * left in our quota 774789Sahrens */ 775789Sahrens myspace = MIN(parentspace, quota - used); 776789Sahrens } 777789Sahrens 778789Sahrens mutex_exit(&dd->dd_lock); 779789Sahrens 780789Sahrens return (myspace); 781789Sahrens } 782789Sahrens 783789Sahrens struct tempreserve { 784789Sahrens list_node_t tr_node; 785789Sahrens dsl_dir_t *tr_ds; 786789Sahrens uint64_t tr_size; 787789Sahrens }; 788789Sahrens 789789Sahrens /* 790789Sahrens * Reserve space in this dsl_dir, to be used in this tx's txg. 791789Sahrens * After the space has been dirtied (and thus 792789Sahrens * dsl_dir_willuse_space() has been called), the reservation should 793789Sahrens * be canceled, using dsl_dir_tempreserve_clear(). 794789Sahrens */ 795789Sahrens static int 796789Sahrens dsl_dir_tempreserve_impl(dsl_dir_t *dd, 797789Sahrens uint64_t asize, boolean_t netfree, list_t *tr_list, dmu_tx_t *tx) 798789Sahrens { 799789Sahrens uint64_t txg = tx->tx_txg; 800789Sahrens uint64_t est_used, quota, parent_rsrv; 801789Sahrens int edquot = EDQUOT; 802789Sahrens int txgidx = txg & TXG_MASK; 803789Sahrens int i; 804789Sahrens struct tempreserve *tr; 805789Sahrens 806789Sahrens ASSERT3U(txg, !=, 0); 807*1544Seschrock ASSERT3S(asize, >=, 0); 808789Sahrens 809789Sahrens mutex_enter(&dd->dd_lock); 810789Sahrens /* 811789Sahrens * Check against the dsl_dir's quota. We don't add in the delta 812789Sahrens * when checking for over-quota because they get one free hit. 813789Sahrens */ 814789Sahrens est_used = dsl_dir_estimated_space(dd); 815789Sahrens for (i = 0; i < TXG_SIZE; i++) 816789Sahrens est_used += dd->dd_tempreserved[i]; 817789Sahrens 818789Sahrens quota = UINT64_MAX; 819789Sahrens 820789Sahrens if (dd->dd_phys->dd_quota) 821789Sahrens quota = dd->dd_phys->dd_quota; 822789Sahrens 823789Sahrens /* 824789Sahrens * If this transaction will result in a net free of space, we want 825789Sahrens * to let it through, but we have to be careful: the space that it 826789Sahrens * frees won't become available until *after* this txg syncs. 827789Sahrens * Therefore, to ensure that it's possible to remove files from 828789Sahrens * a full pool without inducing transient overcommits, we throttle 829789Sahrens * netfree transactions against a quota that is slightly larger, 830789Sahrens * but still within the pool's allocation slop. In cases where 831789Sahrens * we're very close to full, this will allow a steady trickle of 832789Sahrens * removes to get through. 833789Sahrens */ 834789Sahrens if (dd->dd_parent == NULL) { 835789Sahrens uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree); 836789Sahrens if (poolsize < quota) { 837789Sahrens quota = poolsize; 838789Sahrens edquot = ENOSPC; 839789Sahrens } 840789Sahrens } else if (netfree) { 841789Sahrens quota = UINT64_MAX; 842789Sahrens } 843789Sahrens 844789Sahrens /* 845789Sahrens * If they are requesting more space, and our current estimate 846789Sahrens * is over quota. They get to try again unless the actual 847*1544Seschrock * on-disk is over quota and there are no pending changes (which 848*1544Seschrock * may free up space for us). 849789Sahrens */ 850789Sahrens if (asize > 0 && est_used > quota) { 851*1544Seschrock if (dd->dd_space_towrite[txg & TXG_MASK] != 0 || 852*1544Seschrock dd->dd_space_towrite[(txg-1) & TXG_MASK] != 0 || 853*1544Seschrock dd->dd_space_towrite[(txg-2) & TXG_MASK] != 0 || 854*1544Seschrock dd->dd_used_bytes < quota) 855789Sahrens edquot = ERESTART; 856789Sahrens dprintf_dd(dd, "failing: used=%lluK est_used = %lluK " 857789Sahrens "quota=%lluK tr=%lluK err=%d\n", 858789Sahrens dd->dd_used_bytes>>10, est_used>>10, 859789Sahrens quota>>10, asize>>10, edquot); 860789Sahrens mutex_exit(&dd->dd_lock); 861789Sahrens return (edquot); 862789Sahrens } 863789Sahrens 864789Sahrens /* We need to up our estimated delta before dropping dd_lock */ 865789Sahrens dd->dd_tempreserved[txgidx] += asize; 866789Sahrens 867789Sahrens parent_rsrv = parent_delta(dd, est_used, asize); 868789Sahrens mutex_exit(&dd->dd_lock); 869789Sahrens 870789Sahrens tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); 871789Sahrens tr->tr_ds = dd; 872789Sahrens tr->tr_size = asize; 873789Sahrens list_insert_tail(tr_list, tr); 874789Sahrens 875789Sahrens /* see if it's OK with our parent */ 876789Sahrens if (dd->dd_parent && parent_rsrv) { 877789Sahrens return (dsl_dir_tempreserve_impl(dd->dd_parent, 878789Sahrens parent_rsrv, netfree, tr_list, tx)); 879789Sahrens } else { 880789Sahrens return (0); 881789Sahrens } 882789Sahrens } 883789Sahrens 884789Sahrens /* 885789Sahrens * Reserve space in this dsl_dir, to be used in this tx's txg. 886789Sahrens * After the space has been dirtied (and thus 887789Sahrens * dsl_dir_willuse_space() has been called), the reservation should 888789Sahrens * be canceled, using dsl_dir_tempreserve_clear(). 889789Sahrens */ 890789Sahrens int 891789Sahrens dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, 892789Sahrens uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx) 893789Sahrens { 894789Sahrens int err = 0; 895789Sahrens list_t *tr_list; 896789Sahrens 897789Sahrens tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP); 898789Sahrens list_create(tr_list, sizeof (struct tempreserve), 899789Sahrens offsetof(struct tempreserve, tr_node)); 900*1544Seschrock ASSERT3S(asize, >=, 0); 901*1544Seschrock ASSERT3S(fsize, >=, 0); 902789Sahrens 903789Sahrens err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize, 904789Sahrens tr_list, tx); 905789Sahrens 906789Sahrens if (err == 0) { 907789Sahrens struct tempreserve *tr; 908789Sahrens 909789Sahrens err = arc_tempreserve_space(lsize); 910789Sahrens if (err == 0) { 911789Sahrens tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); 912789Sahrens tr->tr_ds = NULL; 913789Sahrens tr->tr_size = lsize; 914789Sahrens list_insert_tail(tr_list, tr); 915789Sahrens } 916789Sahrens } 917789Sahrens 918789Sahrens if (err) 919789Sahrens dsl_dir_tempreserve_clear(tr_list, tx); 920789Sahrens else 921789Sahrens *tr_cookiep = tr_list; 922789Sahrens return (err); 923789Sahrens } 924789Sahrens 925789Sahrens /* 926789Sahrens * Clear a temporary reservation that we previously made with 927789Sahrens * dsl_dir_tempreserve_space(). 928789Sahrens */ 929789Sahrens void 930789Sahrens dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) 931789Sahrens { 932789Sahrens int txgidx = tx->tx_txg & TXG_MASK; 933789Sahrens list_t *tr_list = tr_cookie; 934789Sahrens struct tempreserve *tr; 935789Sahrens 936789Sahrens ASSERT3U(tx->tx_txg, !=, 0); 937789Sahrens 938789Sahrens while (tr = list_head(tr_list)) { 939789Sahrens if (tr->tr_ds == NULL) { 940789Sahrens arc_tempreserve_clear(tr->tr_size); 941789Sahrens } else { 942789Sahrens mutex_enter(&tr->tr_ds->dd_lock); 943789Sahrens ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=, 944789Sahrens tr->tr_size); 945789Sahrens tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size; 946789Sahrens mutex_exit(&tr->tr_ds->dd_lock); 947789Sahrens } 948789Sahrens list_remove(tr_list, tr); 949789Sahrens kmem_free(tr, sizeof (struct tempreserve)); 950789Sahrens } 951789Sahrens 952789Sahrens kmem_free(tr_list, sizeof (list_t)); 953789Sahrens } 954789Sahrens 955789Sahrens /* 956789Sahrens * Call in open context when we think we're going to write/free space, 957789Sahrens * eg. when dirtying data. Be conservative (ie. OK to write less than 958789Sahrens * this or free more than this, but don't write more or free less). 959789Sahrens */ 960789Sahrens void 961789Sahrens dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) 962789Sahrens { 963789Sahrens int64_t parent_space; 964789Sahrens uint64_t est_used; 965789Sahrens 966789Sahrens mutex_enter(&dd->dd_lock); 967789Sahrens if (space > 0) 968789Sahrens dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space; 969789Sahrens 970789Sahrens est_used = dsl_dir_estimated_space(dd); 971789Sahrens parent_space = parent_delta(dd, est_used, space); 972789Sahrens mutex_exit(&dd->dd_lock); 973789Sahrens 974789Sahrens /* Make sure that we clean up dd_space_to* */ 975789Sahrens dsl_dir_dirty(dd, tx); 976789Sahrens 977789Sahrens /* XXX this is potentially expensive and unnecessary... */ 978789Sahrens if (parent_space && dd->dd_parent) 979789Sahrens dsl_dir_willuse_space(dd->dd_parent, parent_space, tx); 980789Sahrens } 981789Sahrens 982789Sahrens /* call from syncing context when we actually write/free space for this dd */ 983789Sahrens void 984789Sahrens dsl_dir_diduse_space(dsl_dir_t *dd, 985789Sahrens int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx) 986789Sahrens { 987789Sahrens int64_t accounted_delta; 988789Sahrens 989789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 990789Sahrens 991789Sahrens dsl_dir_dirty(dd, tx); 992789Sahrens 993789Sahrens mutex_enter(&dd->dd_lock); 994789Sahrens accounted_delta = parent_delta(dd, dd->dd_used_bytes, used); 995789Sahrens ASSERT(used >= 0 || dd->dd_used_bytes >= -used); 996789Sahrens ASSERT(compressed >= 0 || 997789Sahrens dd->dd_phys->dd_compressed_bytes >= -compressed); 998789Sahrens ASSERT(uncompressed >= 0 || 999789Sahrens dd->dd_phys->dd_uncompressed_bytes >= -uncompressed); 1000789Sahrens dd->dd_used_bytes += used; 1001789Sahrens dd->dd_phys->dd_uncompressed_bytes += uncompressed; 1002789Sahrens dd->dd_phys->dd_compressed_bytes += compressed; 1003789Sahrens mutex_exit(&dd->dd_lock); 1004789Sahrens 1005789Sahrens if (dd->dd_parent != NULL) { 1006789Sahrens dsl_dir_diduse_space(dd->dd_parent, 1007789Sahrens accounted_delta, compressed, uncompressed, tx); 1008789Sahrens } 1009789Sahrens } 1010789Sahrens 1011789Sahrens static int 1012789Sahrens dsl_dir_set_quota_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1013789Sahrens { 1014789Sahrens uint64_t *quotap = arg; 1015789Sahrens uint64_t new_quota = *quotap; 1016789Sahrens int err = 0; 1017789Sahrens 1018789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 1019789Sahrens 1020789Sahrens mutex_enter(&dd->dd_lock); 1021789Sahrens if (new_quota != 0 && (new_quota < dd->dd_phys->dd_reserved || 1022789Sahrens new_quota < dsl_dir_estimated_space(dd))) { 1023789Sahrens err = ENOSPC; 1024789Sahrens } else { 1025789Sahrens dd->dd_phys->dd_quota = new_quota; 1026789Sahrens } 1027789Sahrens mutex_exit(&dd->dd_lock); 1028789Sahrens return (err); 1029789Sahrens } 1030789Sahrens 1031789Sahrens int 1032789Sahrens dsl_dir_set_quota(const char *ddname, uint64_t quota) 1033789Sahrens { 1034789Sahrens dsl_dir_t *dd; 1035789Sahrens int err; 1036789Sahrens 1037*1544Seschrock err = dsl_dir_open(ddname, FTAG, &dd, NULL); 1038*1544Seschrock if (err) 1039*1544Seschrock return (err); 1040789Sahrens /* 1041789Sahrens * If someone removes a file, then tries to set the quota, we 1042789Sahrens * want to make sure the file freeing takes effect. 1043789Sahrens */ 1044789Sahrens txg_wait_open(dd->dd_pool, 0); 1045789Sahrens 1046789Sahrens err = dsl_dir_sync_task(dd, dsl_dir_set_quota_sync, "a, 0); 1047789Sahrens dsl_dir_close(dd, FTAG); 1048789Sahrens return (err); 1049789Sahrens } 1050789Sahrens 1051789Sahrens static int 1052789Sahrens dsl_dir_set_reservation_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1053789Sahrens { 1054789Sahrens uint64_t *reservationp = arg; 1055789Sahrens uint64_t new_reservation = *reservationp; 1056789Sahrens uint64_t used, avail; 1057789Sahrens int64_t delta; 1058789Sahrens 1059789Sahrens if (new_reservation > INT64_MAX) 1060789Sahrens return (EOVERFLOW); 1061789Sahrens 1062789Sahrens mutex_enter(&dd->dd_lock); 1063789Sahrens used = dd->dd_used_bytes; 1064789Sahrens delta = MAX(used, new_reservation) - 1065789Sahrens MAX(used, dd->dd_phys->dd_reserved); 1066789Sahrens mutex_exit(&dd->dd_lock); 1067789Sahrens 1068789Sahrens if (dd->dd_parent) { 1069789Sahrens avail = dsl_dir_space_available(dd->dd_parent, 1070789Sahrens NULL, 0, FALSE); 1071789Sahrens } else { 1072789Sahrens avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used; 1073789Sahrens } 1074789Sahrens 1075789Sahrens if (delta > 0 && delta > avail) 1076789Sahrens return (ENOSPC); 1077789Sahrens if (delta > 0 && dd->dd_phys->dd_quota > 0 && 1078789Sahrens new_reservation > dd->dd_phys->dd_quota) 1079789Sahrens return (ENOSPC); 1080789Sahrens 1081789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 1082789Sahrens dd->dd_phys->dd_reserved = new_reservation; 1083789Sahrens 1084789Sahrens if (dd->dd_parent != NULL) { 1085789Sahrens /* Roll up this additional usage into our ancestors */ 1086789Sahrens dsl_dir_diduse_space(dd->dd_parent, delta, 0, 0, tx); 1087789Sahrens } 1088789Sahrens return (0); 1089789Sahrens } 1090789Sahrens 1091789Sahrens int 1092789Sahrens dsl_dir_set_reservation(const char *ddname, uint64_t reservation) 1093789Sahrens { 1094789Sahrens dsl_dir_t *dd; 1095789Sahrens int err; 1096789Sahrens 1097*1544Seschrock err = dsl_dir_open(ddname, FTAG, &dd, NULL); 1098*1544Seschrock if (err) 1099*1544Seschrock return (err); 1100789Sahrens err = dsl_dir_sync_task(dd, 1101789Sahrens dsl_dir_set_reservation_sync, &reservation, 0); 1102789Sahrens dsl_dir_close(dd, FTAG); 1103789Sahrens return (err); 1104789Sahrens } 1105789Sahrens 1106789Sahrens static dsl_dir_t * 1107789Sahrens closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2) 1108789Sahrens { 1109789Sahrens for (; ds1; ds1 = ds1->dd_parent) { 1110789Sahrens dsl_dir_t *dd; 1111789Sahrens for (dd = ds2; dd; dd = dd->dd_parent) { 1112789Sahrens if (ds1 == dd) 1113789Sahrens return (dd); 1114789Sahrens } 1115789Sahrens } 1116789Sahrens return (NULL); 1117789Sahrens } 1118789Sahrens 1119789Sahrens /* 1120789Sahrens * If delta is applied to dd, how much of that delta would be applied to 1121789Sahrens * ancestor? Syncing context only. 1122789Sahrens */ 1123789Sahrens static int64_t 1124789Sahrens would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) 1125789Sahrens { 1126789Sahrens if (dd == ancestor) 1127789Sahrens return (delta); 1128789Sahrens 1129789Sahrens mutex_enter(&dd->dd_lock); 1130789Sahrens delta = parent_delta(dd, dd->dd_used_bytes, delta); 1131789Sahrens mutex_exit(&dd->dd_lock); 1132789Sahrens return (would_change(dd->dd_parent, delta, ancestor)); 1133789Sahrens } 1134789Sahrens 1135789Sahrens int 1136789Sahrens dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1137789Sahrens { 1138789Sahrens const char *newname = arg; 1139789Sahrens dsl_pool_t *dp = dd->dd_pool; 1140789Sahrens objset_t *mos = dp->dp_meta_objset; 1141789Sahrens dsl_dir_t *newpds; 1142789Sahrens const char *tail; 1143789Sahrens int err, len; 1144789Sahrens 1145789Sahrens /* can't rename to different pool */ 1146789Sahrens len = strlen(dp->dp_root_dir->dd_myname); 1147789Sahrens if (strncmp(dp->dp_root_dir->dd_myname, newname, len != 0) || 1148789Sahrens newname[len] != '/') { 1149789Sahrens return (ENXIO); 1150789Sahrens } 1151789Sahrens 1152789Sahrens /* new parent should exist */ 1153*1544Seschrock err = dsl_dir_open_spa(dp->dp_spa, newname, FTAG, &newpds, &tail); 1154*1544Seschrock if (err) 1155*1544Seschrock return (err); 1156789Sahrens 1157789Sahrens /* new name should not already exist */ 1158789Sahrens if (tail == NULL) { 1159789Sahrens dsl_dir_close(newpds, FTAG); 1160789Sahrens return (EEXIST); 1161789Sahrens } 1162789Sahrens 1163789Sahrens rw_enter(&dp->dp_config_rwlock, RW_WRITER); 1164789Sahrens 1165789Sahrens /* There should be 2 references: the open and the dirty */ 1166789Sahrens if (dmu_buf_refcount(dd->dd_dbuf) > 2) { 1167789Sahrens rw_exit(&dp->dp_config_rwlock); 1168789Sahrens dsl_dir_close(newpds, FTAG); 1169789Sahrens return (EBUSY); 1170789Sahrens } 1171789Sahrens 1172789Sahrens if (newpds != dd->dd_parent) { 1173789Sahrens dsl_dir_t *ancestor; 1174789Sahrens int64_t adelta; 1175789Sahrens uint64_t myspace, avail; 1176789Sahrens 1177789Sahrens ancestor = closest_common_ancestor(dd, newpds); 1178789Sahrens 1179789Sahrens /* no rename into our descendent */ 1180789Sahrens if (ancestor == dd) { 1181789Sahrens dsl_dir_close(newpds, FTAG); 1182789Sahrens rw_exit(&dp->dp_config_rwlock); 1183789Sahrens return (EINVAL); 1184789Sahrens } 1185789Sahrens 1186789Sahrens myspace = MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); 1187789Sahrens adelta = would_change(dd->dd_parent, -myspace, ancestor); 1188789Sahrens avail = dsl_dir_space_available(newpds, 1189789Sahrens ancestor, adelta, FALSE); 1190789Sahrens if (avail < myspace) { 1191789Sahrens dsl_dir_close(newpds, FTAG); 1192789Sahrens rw_exit(&dp->dp_config_rwlock); 1193789Sahrens return (ENOSPC); 1194789Sahrens } 1195789Sahrens 1196789Sahrens /* The point of no (unsuccessful) return */ 1197789Sahrens 1198789Sahrens dsl_dir_diduse_space(dd->dd_parent, -myspace, 1199789Sahrens -dd->dd_phys->dd_compressed_bytes, 1200789Sahrens -dd->dd_phys->dd_uncompressed_bytes, tx); 1201789Sahrens dsl_dir_diduse_space(newpds, myspace, 1202789Sahrens dd->dd_phys->dd_compressed_bytes, 1203789Sahrens dd->dd_phys->dd_uncompressed_bytes, tx); 1204789Sahrens } 1205789Sahrens 1206789Sahrens /* The point of no (unsuccessful) return */ 1207789Sahrens 1208789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 1209789Sahrens 1210789Sahrens /* remove from old parent zapobj */ 1211789Sahrens err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, 1212789Sahrens dd->dd_myname, tx); 1213789Sahrens ASSERT3U(err, ==, 0); 1214789Sahrens 1215789Sahrens (void) strcpy(dd->dd_myname, tail); 1216789Sahrens dsl_dir_close(dd->dd_parent, dd); 1217789Sahrens dd->dd_phys->dd_parent_obj = newpds->dd_object; 1218*1544Seschrock VERIFY(0 == dsl_dir_open_obj(dd->dd_pool, 1219*1544Seschrock newpds->dd_object, NULL, dd, &dd->dd_parent)); 1220789Sahrens 1221789Sahrens /* add to new parent zapobj */ 1222789Sahrens err = zap_add(mos, newpds->dd_phys->dd_child_dir_zapobj, 1223789Sahrens dd->dd_myname, 8, 1, &dd->dd_object, tx); 1224789Sahrens ASSERT3U(err, ==, 0); 1225789Sahrens 1226789Sahrens dsl_dir_close(newpds, FTAG); 1227789Sahrens rw_exit(&dp->dp_config_rwlock); 1228789Sahrens return (0); 1229789Sahrens } 1230