1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 5789Sahrens * Common Development and Distribution License, Version 1.0 only 6789Sahrens * (the "License"). You may not use this file except in compliance 7789Sahrens * with the License. 8789Sahrens * 9789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10789Sahrens * or http://www.opensolaris.org/os/licensing. 11789Sahrens * See the License for the specific language governing permissions 12789Sahrens * and limitations under the License. 13789Sahrens * 14789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 15789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16789Sahrens * If applicable, add the following below this CDDL HEADER, with the 17789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 18789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 19789Sahrens * 20789Sahrens * CDDL HEADER END 21789Sahrens */ 22789Sahrens /* 23789Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24789Sahrens * Use is subject to license terms. 25789Sahrens */ 26789Sahrens 27789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28789Sahrens 29789Sahrens #include <sys/dmu.h> 30789Sahrens #include <sys/dmu_tx.h> 31789Sahrens #include <sys/dsl_dataset.h> 32789Sahrens #include <sys/dsl_dir.h> 33789Sahrens #include <sys/dsl_prop.h> 34789Sahrens #include <sys/spa.h> 35789Sahrens #include <sys/zap.h> 36789Sahrens #include <sys/zio.h> 37789Sahrens #include <sys/arc.h> 38789Sahrens #include "zfs_namecheck.h" 39789Sahrens 40789Sahrens static uint64_t dsl_dir_space_accounted(dsl_dir_t *dd); 41789Sahrens static uint64_t dsl_dir_estimated_space(dsl_dir_t *dd); 42789Sahrens static int dsl_dir_set_reservation_sync(dsl_dir_t *dd, 43789Sahrens void *arg, dmu_tx_t *tx); 44789Sahrens static uint64_t dsl_dir_space_available(dsl_dir_t *dd, 45789Sahrens dsl_dir_t *ancestor, int64_t delta, int ondiskonly); 46789Sahrens 47789Sahrens 48789Sahrens /* ARGSUSED */ 49789Sahrens static void 50789Sahrens dsl_dir_evict(dmu_buf_t *db, void *arg) 51789Sahrens { 52789Sahrens dsl_dir_t *dd = arg; 53789Sahrens dsl_pool_t *dp = dd->dd_pool; 54789Sahrens int t; 55789Sahrens 56789Sahrens for (t = 0; t < TXG_SIZE; t++) { 57789Sahrens ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); 58789Sahrens ASSERT(dd->dd_tempreserved[t] == 0); 59789Sahrens ASSERT(dd->dd_space_towrite[t] == 0); 60789Sahrens } 61789Sahrens 62789Sahrens ASSERT3U(dd->dd_used_bytes, ==, dd->dd_phys->dd_used_bytes); 63789Sahrens 64789Sahrens ASSERT(dd->dd_sync_txg == 0); 65789Sahrens 66789Sahrens if (dd->dd_parent) 67789Sahrens dsl_dir_close(dd->dd_parent, dd); 68789Sahrens 69789Sahrens spa_close(dd->dd_pool->dp_spa, dd); 70789Sahrens 71789Sahrens /* 72789Sahrens * The props callback list should be empty since they hold the 73789Sahrens * dir open. 74789Sahrens */ 75789Sahrens list_destroy(&dd->dd_prop_cbs); 76789Sahrens kmem_free(dd, sizeof (dsl_dir_t)); 77789Sahrens } 78789Sahrens 79789Sahrens dsl_dir_t * 80789Sahrens dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, 81789Sahrens const char *tail, void *tag) 82789Sahrens { 83789Sahrens dmu_buf_t *dbuf; 84789Sahrens dsl_dir_t *dd; 85789Sahrens 86789Sahrens ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 87789Sahrens dsl_pool_sync_context(dp)); 88789Sahrens 89789Sahrens dbuf = dmu_bonus_hold_tag(dp->dp_meta_objset, ddobj, tag); 90789Sahrens dmu_buf_read(dbuf); 91789Sahrens dd = dmu_buf_get_user(dbuf); 92789Sahrens #ifdef ZFS_DEBUG 93789Sahrens { 94789Sahrens dmu_object_info_t doi; 95789Sahrens dmu_object_info_from_db(dbuf, &doi); 96789Sahrens ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DATASET); 97789Sahrens } 98789Sahrens #endif 99789Sahrens /* XXX assert bonus buffer size is correct */ 100789Sahrens if (dd == NULL) { 101789Sahrens dsl_dir_t *winner; 102789Sahrens int err; 103789Sahrens 104789Sahrens dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP); 105789Sahrens dd->dd_object = ddobj; 106789Sahrens dd->dd_dbuf = dbuf; 107789Sahrens dd->dd_pool = dp; 108789Sahrens dd->dd_phys = dbuf->db_data; 109789Sahrens dd->dd_used_bytes = dd->dd_phys->dd_used_bytes; 110789Sahrens 111789Sahrens list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t), 112789Sahrens offsetof(dsl_prop_cb_record_t, cbr_node)); 113789Sahrens 114789Sahrens if (dd->dd_phys->dd_parent_obj) { 115789Sahrens dd->dd_parent = dsl_dir_open_obj(dp, 116789Sahrens dd->dd_phys->dd_parent_obj, NULL, dd); 117789Sahrens if (tail) { 118789Sahrens #ifdef ZFS_DEBUG 119789Sahrens uint64_t foundobj; 120789Sahrens 121789Sahrens err = zap_lookup(dp->dp_meta_objset, 122789Sahrens dd->dd_parent->dd_phys-> 123789Sahrens dd_child_dir_zapobj, 124789Sahrens tail, sizeof (foundobj), 1, &foundobj); 125789Sahrens ASSERT3U(err, ==, 0); 126789Sahrens ASSERT3U(foundobj, ==, ddobj); 127789Sahrens #endif 128789Sahrens (void) strcpy(dd->dd_myname, tail); 129789Sahrens } else { 130789Sahrens err = zap_value_search(dp->dp_meta_objset, 131789Sahrens dd->dd_parent->dd_phys-> 132789Sahrens dd_child_dir_zapobj, 133789Sahrens ddobj, dd->dd_myname); 134789Sahrens /* 135789Sahrens * The caller should be protecting this ddobj 136789Sahrens * from being deleted concurrently 137789Sahrens */ 138789Sahrens ASSERT(err == 0); 139789Sahrens } 140789Sahrens } else { 141789Sahrens (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa)); 142789Sahrens } 143789Sahrens 144789Sahrens winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys, 145789Sahrens dsl_dir_evict); 146789Sahrens if (winner) { 147789Sahrens if (dd->dd_parent) 148789Sahrens dsl_dir_close(dd->dd_parent, dd); 149789Sahrens kmem_free(dd, sizeof (dsl_dir_t)); 150789Sahrens dd = winner; 151789Sahrens } else { 152789Sahrens spa_open_ref(dp->dp_spa, dd); 153789Sahrens } 154789Sahrens } 155789Sahrens 156789Sahrens /* 157789Sahrens * The dsl_dir_t has both open-to-close and instantiate-to-evict 158789Sahrens * holds on the spa. We need the open-to-close holds because 159789Sahrens * otherwise the spa_refcnt wouldn't change when we open a 160789Sahrens * dir which the spa also has open, so we could incorrectly 161789Sahrens * think it was OK to unload/export/destroy the pool. We need 162789Sahrens * the instantiate-to-evict hold because the dsl_dir_t has a 163789Sahrens * pointer to the dd_pool, which has a pointer to the spa_t. 164789Sahrens */ 165789Sahrens spa_open_ref(dp->dp_spa, tag); 166789Sahrens ASSERT3P(dd->dd_pool, ==, dp); 167789Sahrens ASSERT3U(dd->dd_object, ==, ddobj); 168789Sahrens ASSERT3P(dd->dd_dbuf, ==, dbuf); 169789Sahrens return (dd); 170789Sahrens } 171789Sahrens 172789Sahrens void 173789Sahrens dsl_dir_close(dsl_dir_t *dd, void *tag) 174789Sahrens { 175789Sahrens dprintf_dd(dd, "%s\n", ""); 176789Sahrens spa_close(dd->dd_pool->dp_spa, tag); 177789Sahrens dmu_buf_rele_tag(dd->dd_dbuf, tag); 178789Sahrens } 179789Sahrens 180789Sahrens /* buf must be long enough (MAXNAMELEN should do) */ 181789Sahrens void 182789Sahrens dsl_dir_name(dsl_dir_t *dd, char *buf) 183789Sahrens { 184789Sahrens if (dd->dd_parent) { 185789Sahrens dsl_dir_name(dd->dd_parent, buf); 186789Sahrens (void) strcat(buf, "/"); 187789Sahrens } else { 188789Sahrens buf[0] = '\0'; 189789Sahrens } 190789Sahrens if (!MUTEX_HELD(&dd->dd_lock)) { 191789Sahrens /* 192789Sahrens * recursive mutex so that we can use 193789Sahrens * dprintf_dd() with dd_lock held 194789Sahrens */ 195789Sahrens mutex_enter(&dd->dd_lock); 196789Sahrens (void) strcat(buf, dd->dd_myname); 197789Sahrens mutex_exit(&dd->dd_lock); 198789Sahrens } else { 199789Sahrens (void) strcat(buf, dd->dd_myname); 200789Sahrens } 201789Sahrens } 202789Sahrens 203789Sahrens int 204789Sahrens dsl_dir_is_private(dsl_dir_t *dd) 205789Sahrens { 206789Sahrens int rv = FALSE; 207789Sahrens 208789Sahrens if (dd->dd_parent && dsl_dir_is_private(dd->dd_parent)) 209789Sahrens rv = TRUE; 210789Sahrens if (dataset_name_hidden(dd->dd_myname)) 211789Sahrens rv = TRUE; 212789Sahrens return (rv); 213789Sahrens } 214789Sahrens 215789Sahrens 216789Sahrens static int 217789Sahrens getcomponent(const char *path, char *component, const char **nextp) 218789Sahrens { 219789Sahrens char *p; 220789Sahrens if (path == NULL) 221789Sahrens return (NULL); 222789Sahrens /* This would be a good place to reserve some namespace... */ 223789Sahrens p = strpbrk(path, "/@"); 224789Sahrens if (p && (p[1] == '/' || p[1] == '@')) { 225789Sahrens /* two separators in a row */ 226789Sahrens return (EINVAL); 227789Sahrens } 228789Sahrens if (p == NULL || p == path) { 229789Sahrens /* 230789Sahrens * if the first thing is an @ or /, it had better be an 231789Sahrens * @ and it had better not have any more ats or slashes, 232789Sahrens * and it had better have something after the @. 233789Sahrens */ 234789Sahrens if (p != NULL && 235789Sahrens (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0')) 236789Sahrens return (EINVAL); 237789Sahrens if (strlen(path) >= MAXNAMELEN) 238789Sahrens return (ENAMETOOLONG); 239789Sahrens (void) strcpy(component, path); 240789Sahrens p = NULL; 241789Sahrens } else if (p[0] == '/') { 242789Sahrens if (p-path >= MAXNAMELEN) 243789Sahrens return (ENAMETOOLONG); 244789Sahrens (void) strncpy(component, path, p - path); 245789Sahrens component[p-path] = '\0'; 246789Sahrens p++; 247789Sahrens } else if (p[0] == '@') { 248789Sahrens /* 249789Sahrens * if the next separator is an @, there better not be 250789Sahrens * any more slashes. 251789Sahrens */ 252789Sahrens if (strchr(path, '/')) 253789Sahrens return (EINVAL); 254789Sahrens if (p-path >= MAXNAMELEN) 255789Sahrens return (ENAMETOOLONG); 256789Sahrens (void) strncpy(component, path, p - path); 257789Sahrens component[p-path] = '\0'; 258789Sahrens } else { 259789Sahrens ASSERT(!"invalid p"); 260789Sahrens } 261789Sahrens *nextp = p; 262789Sahrens return (0); 263789Sahrens } 264789Sahrens 265789Sahrens /* 266789Sahrens * same as dsl_open_dir, ignore the first component of name and use the 267789Sahrens * spa instead 268789Sahrens */ 269789Sahrens dsl_dir_t * 270789Sahrens dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, const char **tailp) 271789Sahrens { 272789Sahrens char buf[MAXNAMELEN]; 273789Sahrens const char *next, *nextnext = NULL; 274789Sahrens int err; 275789Sahrens dsl_dir_t *dd; 276789Sahrens dsl_pool_t *dp; 277789Sahrens uint64_t ddobj; 278789Sahrens int openedspa = FALSE; 279789Sahrens 280789Sahrens dprintf("%s\n", name); 281789Sahrens 282789Sahrens if (name == NULL) 283789Sahrens return (NULL); 284789Sahrens err = getcomponent(name, buf, &next); 285789Sahrens if (err) 286789Sahrens return (NULL); 287789Sahrens if (spa == NULL) { 288789Sahrens err = spa_open(buf, &spa, FTAG); 289789Sahrens if (err) { 290789Sahrens dprintf("spa_open(%s) failed\n", buf); 291789Sahrens return (NULL); 292789Sahrens } 293789Sahrens openedspa = TRUE; 294789Sahrens 295789Sahrens /* XXX this assertion belongs in spa_open */ 296789Sahrens ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa))); 297789Sahrens } 298789Sahrens 299789Sahrens dp = spa_get_dsl(spa); 300789Sahrens 301789Sahrens rw_enter(&dp->dp_config_rwlock, RW_READER); 302789Sahrens dd = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag); 303789Sahrens while (next != NULL) { 304789Sahrens dsl_dir_t *child_ds; 305789Sahrens err = getcomponent(next, buf, &nextnext); 306789Sahrens if (err) { 307789Sahrens dsl_dir_close(dd, tag); 308*885Sahrens rw_exit(&dp->dp_config_rwlock); 309789Sahrens if (openedspa) 310789Sahrens spa_close(spa, FTAG); 311789Sahrens return (NULL); 312789Sahrens } 313789Sahrens ASSERT(next[0] != '\0'); 314789Sahrens if (next[0] == '@') 315789Sahrens break; 316789Sahrens if (dd->dd_phys->dd_child_dir_zapobj == 0) 317789Sahrens break; 318789Sahrens dprintf("looking up %s in obj%lld\n", 319789Sahrens buf, dd->dd_phys->dd_child_dir_zapobj); 320789Sahrens 321789Sahrens err = zap_lookup(dp->dp_meta_objset, 322789Sahrens dd->dd_phys->dd_child_dir_zapobj, 323789Sahrens buf, sizeof (ddobj), 1, &ddobj); 324789Sahrens if (err == ENOENT) { 325789Sahrens break; 326789Sahrens } 327789Sahrens ASSERT(err == 0); 328789Sahrens 329789Sahrens child_ds = dsl_dir_open_obj(dp, ddobj, buf, tag); 330789Sahrens dsl_dir_close(dd, tag); 331789Sahrens dd = child_ds; 332789Sahrens next = nextnext; 333789Sahrens } 334789Sahrens rw_exit(&dp->dp_config_rwlock); 335789Sahrens 336789Sahrens /* 337789Sahrens * It's an error if there's more than one component left, or 338789Sahrens * tailp==NULL and there's any component left. 339789Sahrens */ 340789Sahrens if (next != NULL && 341789Sahrens (tailp == NULL || (nextnext && nextnext[0] != '\0'))) { 342789Sahrens /* bad path name */ 343789Sahrens dsl_dir_close(dd, tag); 344789Sahrens dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); 345789Sahrens next = NULL; 346789Sahrens dd = NULL; 347789Sahrens } 348789Sahrens if (tailp) 349789Sahrens *tailp = next; 350789Sahrens if (openedspa) 351789Sahrens spa_close(spa, FTAG); 352789Sahrens return (dd); 353789Sahrens } 354789Sahrens 355789Sahrens /* 356789Sahrens * Return the dsl_dir_t, and possibly the last component which couldn't 357789Sahrens * be found in *tail. Return NULL if the path is bogus, or if 358789Sahrens * tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@' 359789Sahrens * means that the last component is a snapshot. 360789Sahrens */ 361789Sahrens dsl_dir_t * 362789Sahrens dsl_dir_open(const char *name, void *tag, const char **tailp) 363789Sahrens { 364789Sahrens return (dsl_dir_open_spa(NULL, name, tag, tailp)); 365789Sahrens } 366789Sahrens 367789Sahrens int 368789Sahrens dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx) 369789Sahrens { 370789Sahrens objset_t *mos = pds->dd_pool->dp_meta_objset; 371789Sahrens uint64_t ddobj; 372789Sahrens dsl_dir_phys_t *dsphys; 373789Sahrens dmu_buf_t *dbuf; 374789Sahrens int err; 375789Sahrens 376789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 377789Sahrens 378789Sahrens if (pds->dd_phys->dd_child_dir_zapobj == 0) { 379789Sahrens dmu_buf_will_dirty(pds->dd_dbuf, tx); 380789Sahrens pds->dd_phys->dd_child_dir_zapobj = zap_create(mos, 381*885Sahrens DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); 382789Sahrens } 383789Sahrens 384789Sahrens rw_enter(&pds->dd_pool->dp_config_rwlock, RW_WRITER); 385789Sahrens err = zap_lookup(mos, pds->dd_phys->dd_child_dir_zapobj, 386789Sahrens name, sizeof (uint64_t), 1, &ddobj); 387789Sahrens if (err != ENOENT) { 388789Sahrens rw_exit(&pds->dd_pool->dp_config_rwlock); 389789Sahrens return (err ? err : EEXIST); 390789Sahrens } 391789Sahrens 392789Sahrens ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 393789Sahrens DMU_OT_DSL_DATASET, sizeof (dsl_dir_phys_t), tx); 394789Sahrens err = zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, 395789Sahrens name, sizeof (uint64_t), 1, &ddobj, tx); 396789Sahrens ASSERT3U(err, ==, 0); 397789Sahrens dprintf("dataset_create: zap_add %s->%lld to %lld returned %d\n", 398789Sahrens name, ddobj, pds->dd_phys->dd_child_dir_zapobj, err); 399789Sahrens 400789Sahrens dbuf = dmu_bonus_hold(mos, ddobj); 401789Sahrens dmu_buf_will_dirty(dbuf, tx); 402789Sahrens dsphys = dbuf->db_data; 403789Sahrens 404789Sahrens dsphys->dd_creation_time = gethrestime_sec(); 405789Sahrens dsphys->dd_parent_obj = pds->dd_object; 406789Sahrens dsphys->dd_props_zapobj = zap_create(mos, 407789Sahrens DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); 408789Sahrens dsphys->dd_child_dir_zapobj = zap_create(mos, 409*885Sahrens DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); 410789Sahrens dmu_buf_rele(dbuf); 411789Sahrens 412789Sahrens rw_exit(&pds->dd_pool->dp_config_rwlock); 413789Sahrens 414789Sahrens return (0); 415789Sahrens } 416789Sahrens 417789Sahrens int 418789Sahrens dsl_dir_destroy_sync(dsl_dir_t *pds, void *arg, dmu_tx_t *tx) 419789Sahrens { 420789Sahrens const char *name = arg; 421789Sahrens dsl_dir_t *dd = NULL; 422789Sahrens dsl_pool_t *dp = pds->dd_pool; 423789Sahrens objset_t *mos = dp->dp_meta_objset; 424789Sahrens uint64_t val, obj, child_zapobj, props_zapobj; 425789Sahrens int t, err; 426789Sahrens 427789Sahrens rw_enter(&dp->dp_config_rwlock, RW_WRITER); 428789Sahrens 429789Sahrens err = zap_lookup(mos, pds->dd_phys->dd_child_dir_zapobj, name, 430789Sahrens 8, 1, &obj); 431789Sahrens if (err) 432789Sahrens goto out; 433789Sahrens 434789Sahrens dd = dsl_dir_open_obj(dp, obj, name, FTAG); 435789Sahrens ASSERT3U(dd->dd_phys->dd_parent_obj, ==, pds->dd_object); 436789Sahrens 437789Sahrens if (dmu_buf_refcount(dd->dd_dbuf) > 1) { 438789Sahrens err = EBUSY; 439789Sahrens goto out; 440789Sahrens } 441789Sahrens 442789Sahrens for (t = 0; t < TXG_SIZE; t++) { 443789Sahrens /* 444789Sahrens * if they were dirty, they'd also be open. 445789Sahrens * dp_config_rwlock ensures that it stays that way. 446789Sahrens */ 447789Sahrens ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); 448789Sahrens } 449789Sahrens 450789Sahrens child_zapobj = dd->dd_phys->dd_child_dir_zapobj; 451789Sahrens props_zapobj = dd->dd_phys->dd_props_zapobj; 452789Sahrens 453789Sahrens if (child_zapobj != 0) { 454789Sahrens uint64_t count; 455789Sahrens err = EEXIST; 456789Sahrens (void) zap_count(mos, child_zapobj, &count); 457789Sahrens if (count != 0) 458789Sahrens goto out; 459789Sahrens } 460789Sahrens 461789Sahrens if (dd->dd_phys->dd_head_dataset_obj != 0) { 462789Sahrens err = dsl_dataset_destroy_sync(dd, NULL, tx); 463789Sahrens if (err) 464789Sahrens goto out; 465789Sahrens } 466789Sahrens ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 467789Sahrens 468789Sahrens /* The point of no (unsuccessful) return */ 469789Sahrens 470789Sahrens /* Make sure parent's used gets updated */ 471789Sahrens val = 0; 472789Sahrens err = dsl_dir_set_reservation_sync(dd, &val, tx); 473789Sahrens ASSERT(err == 0); 474789Sahrens ASSERT3U(dd->dd_used_bytes, ==, 0); 475789Sahrens ASSERT3U(dd->dd_phys->dd_reserved, ==, 0); 476789Sahrens dsl_dir_close(dd, FTAG); 477789Sahrens dd = NULL; 478789Sahrens 479789Sahrens err = dmu_object_free(mos, obj, tx); 480789Sahrens ASSERT(err == 0); 481789Sahrens 482789Sahrens if (child_zapobj) 483789Sahrens err = zap_destroy(mos, child_zapobj, tx); 484789Sahrens ASSERT(err == 0); 485789Sahrens 486789Sahrens if (props_zapobj) 487789Sahrens err = zap_destroy(mos, props_zapobj, tx); 488789Sahrens ASSERT(err == 0); 489789Sahrens 490789Sahrens err = zap_remove(mos, pds->dd_phys->dd_child_dir_zapobj, name, tx); 491789Sahrens ASSERT(err == 0); 492789Sahrens 493789Sahrens out: 494789Sahrens rw_exit(&dp->dp_config_rwlock); 495789Sahrens if (dd) 496789Sahrens dsl_dir_close(dd, FTAG); 497789Sahrens 498789Sahrens return (err); 499789Sahrens } 500789Sahrens 501789Sahrens void 502789Sahrens dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx) 503789Sahrens { 504789Sahrens dsl_dir_phys_t *dsp; 505789Sahrens dmu_buf_t *dbuf; 506789Sahrens int error; 507789Sahrens 508789Sahrens *ddobjp = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 509789Sahrens DMU_OT_DSL_DATASET, sizeof (dsl_dir_phys_t), tx); 510789Sahrens 511789Sahrens error = zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, 512789Sahrens sizeof (uint64_t), 1, ddobjp, tx); 513789Sahrens ASSERT3U(error, ==, 0); 514789Sahrens 515789Sahrens dbuf = dmu_bonus_hold(mos, *ddobjp); 516789Sahrens dmu_buf_will_dirty(dbuf, tx); 517789Sahrens dsp = dbuf->db_data; 518789Sahrens 519789Sahrens dsp->dd_creation_time = gethrestime_sec(); 520789Sahrens dsp->dd_props_zapobj = zap_create(mos, 521789Sahrens DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); 522789Sahrens dsp->dd_child_dir_zapobj = zap_create(mos, 523*885Sahrens DMU_OT_DSL_DIR_CHILD_MAP, DMU_OT_NONE, 0, tx); 524789Sahrens 525789Sahrens dmu_buf_rele(dbuf); 526789Sahrens } 527789Sahrens 528789Sahrens void 529789Sahrens dsl_dir_stats(dsl_dir_t *dd, dmu_objset_stats_t *dds) 530789Sahrens { 531789Sahrens bzero(dds, sizeof (dmu_objset_stats_t)); 532789Sahrens 533789Sahrens dds->dds_dir_obj = dd->dd_object; 534789Sahrens dds->dds_available = dsl_dir_space_available(dd, NULL, 0, TRUE); 535789Sahrens 536789Sahrens mutex_enter(&dd->dd_lock); 537789Sahrens dds->dds_space_used = dd->dd_used_bytes; 538789Sahrens dds->dds_compressed_bytes = dd->dd_phys->dd_compressed_bytes; 539789Sahrens dds->dds_uncompressed_bytes = dd->dd_phys->dd_uncompressed_bytes; 540789Sahrens dds->dds_quota = dd->dd_phys->dd_quota; 541789Sahrens dds->dds_reserved = dd->dd_phys->dd_reserved; 542789Sahrens mutex_exit(&dd->dd_lock); 543789Sahrens 544789Sahrens dds->dds_creation_time = dd->dd_phys->dd_creation_time; 545789Sahrens 546789Sahrens dds->dds_is_placeholder = (dd->dd_phys->dd_head_dataset_obj == 0); 547789Sahrens 548789Sahrens if (dd->dd_phys->dd_clone_parent_obj) { 549789Sahrens dsl_dataset_t *ds; 550789Sahrens 551789Sahrens rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 552789Sahrens ds = dsl_dataset_open_obj(dd->dd_pool, 553789Sahrens dd->dd_phys->dd_clone_parent_obj, NULL, DS_MODE_NONE, FTAG); 554789Sahrens dsl_dataset_name(ds, dds->dds_clone_of); 555789Sahrens dds->dds_clone_of_obj = dd->dd_phys->dd_clone_parent_obj; 556789Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 557789Sahrens rw_exit(&dd->dd_pool->dp_config_rwlock); 558789Sahrens } 559789Sahrens 560789Sahrens VERIFY(dsl_prop_get_ds_integer(dd, "checksum", 561789Sahrens &dds->dds_checksum, dds->dds_checksum_setpoint) == 0); 562789Sahrens 563789Sahrens VERIFY(dsl_prop_get_ds_integer(dd, "compression", 564789Sahrens &dds->dds_compression, dds->dds_compression_setpoint) == 0); 565789Sahrens 566789Sahrens VERIFY(dsl_prop_get_ds_integer(dd, "zoned", 567789Sahrens &dds->dds_zoned, dds->dds_zoned_setpoint) == 0); 568789Sahrens 569789Sahrens spa_altroot(dd->dd_pool->dp_spa, dds->dds_altroot, 570789Sahrens sizeof (dds->dds_altroot)); 571789Sahrens } 572789Sahrens 573789Sahrens int 574789Sahrens dsl_dir_sync_task(dsl_dir_t *dd, 575789Sahrens int (*func)(dsl_dir_t *, void*, dmu_tx_t *), void *arg, uint64_t space) 576789Sahrens { 577789Sahrens dmu_tx_t *tx; 578789Sahrens dsl_pool_t *dp = dd->dd_pool; 579789Sahrens int err = 0; 580789Sahrens uint64_t txg; 581789Sahrens 582789Sahrens dprintf_dd(dd, "func=%p space=%llu\n", func, space); 583789Sahrens 584789Sahrens again: 585789Sahrens tx = dmu_tx_create_ds(dd); 586789Sahrens dmu_tx_hold_space(tx, space); 587789Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 588789Sahrens if (err == ENOSPC || err == EDQUOT) { 589789Sahrens dsl_dir_t *rds; 590789Sahrens /* 591789Sahrens * They can get their space from either this dd, or the 592789Sahrens * root dd. 593789Sahrens */ 594789Sahrens for (rds = dd; rds->dd_parent; rds = rds->dd_parent) 595789Sahrens continue; 596789Sahrens dmu_tx_abort(tx); 597789Sahrens tx = dmu_tx_create_ds(rds); 598789Sahrens dmu_tx_hold_space(tx, space); 599789Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 600789Sahrens } 601789Sahrens if (err) { 602789Sahrens dmu_tx_abort(tx); 603789Sahrens return (err); 604789Sahrens } 605789Sahrens 606789Sahrens txg = dmu_tx_get_txg(tx); 607789Sahrens mutex_enter(&dd->dd_lock); 608789Sahrens if (dd->dd_sync_txg != 0) { 609789Sahrens mutex_exit(&dd->dd_lock); 610789Sahrens dmu_tx_commit(tx); 611789Sahrens txg_wait_synced(dp, 0); 612789Sahrens goto again; 613789Sahrens } 614789Sahrens 615789Sahrens /* We're good to go */ 616789Sahrens 617789Sahrens dd->dd_sync_txg = txg; 618789Sahrens dd->dd_sync_func = func; 619789Sahrens dd->dd_sync_arg = arg; 620789Sahrens 621789Sahrens mutex_exit(&dd->dd_lock); 622789Sahrens 623789Sahrens dsl_dir_dirty(dd, tx); 624789Sahrens dmu_tx_commit(tx); 625789Sahrens 626789Sahrens txg_wait_synced(dp, txg); 627789Sahrens 628789Sahrens mutex_enter(&dd->dd_lock); 629789Sahrens ASSERT(dd->dd_sync_txg == txg); 630789Sahrens ASSERT(dd->dd_sync_func == NULL); 631789Sahrens err = dd->dd_sync_err; 632789Sahrens dd->dd_sync_txg = 0; 633789Sahrens mutex_exit(&dd->dd_lock); 634789Sahrens 635789Sahrens return (err); 636789Sahrens } 637789Sahrens 638789Sahrens void 639789Sahrens dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) 640789Sahrens { 641789Sahrens dsl_pool_t *dp = dd->dd_pool; 642789Sahrens 643789Sahrens ASSERT(dd->dd_phys); 644789Sahrens 645789Sahrens if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) { 646789Sahrens /* up the hold count until we can be written out */ 647789Sahrens dmu_buf_add_ref(dd->dd_dbuf, dd); 648789Sahrens } 649789Sahrens } 650789Sahrens 651789Sahrens static int64_t 652789Sahrens parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta) 653789Sahrens { 654789Sahrens uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved); 655789Sahrens uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved); 656789Sahrens return (new_accounted - old_accounted); 657789Sahrens } 658789Sahrens 659789Sahrens void 660789Sahrens dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) 661789Sahrens { 662789Sahrens if (dd->dd_sync_txg == tx->tx_txg && dd->dd_sync_func) { 663789Sahrens dd->dd_sync_err = dd->dd_sync_func(dd, dd->dd_sync_arg, tx); 664789Sahrens dd->dd_sync_func = NULL; 665789Sahrens } 666789Sahrens 667789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 668789Sahrens 669789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 670789Sahrens 671789Sahrens mutex_enter(&dd->dd_lock); 672789Sahrens ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0); 673789Sahrens dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg, 674789Sahrens dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024); 675789Sahrens dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0; 676789Sahrens dd->dd_phys->dd_used_bytes = dd->dd_used_bytes; 677789Sahrens mutex_exit(&dd->dd_lock); 678789Sahrens 679789Sahrens /* release the hold from dsl_dir_dirty */ 680789Sahrens dmu_buf_remove_ref(dd->dd_dbuf, dd); 681789Sahrens } 682789Sahrens 683789Sahrens static uint64_t 684789Sahrens dsl_dir_estimated_space(dsl_dir_t *dd) 685789Sahrens { 686789Sahrens int64_t space; 687789Sahrens int i; 688789Sahrens 689789Sahrens ASSERT(MUTEX_HELD(&dd->dd_lock)); 690789Sahrens 691789Sahrens space = dd->dd_used_bytes; 692789Sahrens ASSERT(space >= 0); 693789Sahrens for (i = 0; i < TXG_SIZE; i++) { 694789Sahrens space += dd->dd_space_towrite[i&TXG_MASK]; 695789Sahrens ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0); 696789Sahrens } 697789Sahrens return (space); 698789Sahrens } 699789Sahrens 700789Sahrens /* 701789Sahrens * How much space would dd have available if ancestor had delta applied 702789Sahrens * to it? If ondiskonly is set, we're only interested in what's 703789Sahrens * on-disk, not estimated pending changes. 704789Sahrens */ 705789Sahrens static uint64_t 706789Sahrens dsl_dir_space_available(dsl_dir_t *dd, 707789Sahrens dsl_dir_t *ancestor, int64_t delta, int ondiskonly) 708789Sahrens { 709789Sahrens uint64_t parentspace, myspace, quota, used; 710789Sahrens 711789Sahrens /* 712789Sahrens * If there are no restrictions otherwise, assume we have 713789Sahrens * unlimited space available. 714789Sahrens */ 715789Sahrens quota = UINT64_MAX; 716789Sahrens parentspace = UINT64_MAX; 717789Sahrens 718789Sahrens if (dd->dd_parent != NULL) { 719789Sahrens parentspace = dsl_dir_space_available(dd->dd_parent, 720789Sahrens ancestor, delta, ondiskonly); 721789Sahrens } 722789Sahrens 723789Sahrens mutex_enter(&dd->dd_lock); 724789Sahrens if (dd->dd_phys->dd_quota != 0) 725789Sahrens quota = dd->dd_phys->dd_quota; 726789Sahrens if (ondiskonly) { 727789Sahrens used = dd->dd_used_bytes; 728789Sahrens } else { 729789Sahrens used = dsl_dir_estimated_space(dd); 730789Sahrens } 731789Sahrens if (dd == ancestor) 732789Sahrens used += delta; 733789Sahrens 734789Sahrens if (dd->dd_parent == NULL) { 735789Sahrens uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE); 736789Sahrens quota = MIN(quota, poolsize); 737789Sahrens } 738789Sahrens 739789Sahrens if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) { 740789Sahrens /* 741789Sahrens * We have some space reserved, in addition to what our 742789Sahrens * parent gave us. 743789Sahrens */ 744789Sahrens parentspace += dd->dd_phys->dd_reserved - used; 745789Sahrens } 746789Sahrens 747789Sahrens if (used > quota) { 748789Sahrens /* over quota */ 749789Sahrens myspace = 0; 750789Sahrens #ifdef ZFS_DEBUG 751789Sahrens { 752789Sahrens /* 753789Sahrens * While it's OK to be a little over quota, if 754789Sahrens * we think we are using more space than there 755789Sahrens * is in the pool (which is already 6% more than 756789Sahrens * dsl_pool_adjustedsize()), something is very 757789Sahrens * wrong. 758789Sahrens */ 759789Sahrens uint64_t space = spa_get_space(dd->dd_pool->dp_spa); 760789Sahrens ASSERT3U(used, <=, space); 761789Sahrens } 762789Sahrens #endif 763789Sahrens } else { 764789Sahrens /* 765789Sahrens * the lesser of parent's space and the space 766789Sahrens * left in our quota 767789Sahrens */ 768789Sahrens myspace = MIN(parentspace, quota - used); 769789Sahrens } 770789Sahrens 771789Sahrens mutex_exit(&dd->dd_lock); 772789Sahrens 773789Sahrens return (myspace); 774789Sahrens } 775789Sahrens 776789Sahrens struct tempreserve { 777789Sahrens list_node_t tr_node; 778789Sahrens dsl_dir_t *tr_ds; 779789Sahrens uint64_t tr_size; 780789Sahrens }; 781789Sahrens 782789Sahrens /* 783789Sahrens * Reserve space in this dsl_dir, to be used in this tx's txg. 784789Sahrens * After the space has been dirtied (and thus 785789Sahrens * dsl_dir_willuse_space() has been called), the reservation should 786789Sahrens * be canceled, using dsl_dir_tempreserve_clear(). 787789Sahrens */ 788789Sahrens static int 789789Sahrens dsl_dir_tempreserve_impl(dsl_dir_t *dd, 790789Sahrens uint64_t asize, boolean_t netfree, list_t *tr_list, dmu_tx_t *tx) 791789Sahrens { 792789Sahrens uint64_t txg = tx->tx_txg; 793789Sahrens uint64_t est_used, quota, parent_rsrv; 794789Sahrens int edquot = EDQUOT; 795789Sahrens int txgidx = txg & TXG_MASK; 796789Sahrens int i; 797789Sahrens struct tempreserve *tr; 798789Sahrens 799789Sahrens ASSERT3U(txg, !=, 0); 800789Sahrens 801789Sahrens mutex_enter(&dd->dd_lock); 802789Sahrens /* 803789Sahrens * Check against the dsl_dir's quota. We don't add in the delta 804789Sahrens * when checking for over-quota because they get one free hit. 805789Sahrens */ 806789Sahrens est_used = dsl_dir_estimated_space(dd); 807789Sahrens for (i = 0; i < TXG_SIZE; i++) 808789Sahrens est_used += dd->dd_tempreserved[i]; 809789Sahrens 810789Sahrens quota = UINT64_MAX; 811789Sahrens 812789Sahrens if (dd->dd_phys->dd_quota) 813789Sahrens quota = dd->dd_phys->dd_quota; 814789Sahrens 815789Sahrens /* 816789Sahrens * If this transaction will result in a net free of space, we want 817789Sahrens * to let it through, but we have to be careful: the space that it 818789Sahrens * frees won't become available until *after* this txg syncs. 819789Sahrens * Therefore, to ensure that it's possible to remove files from 820789Sahrens * a full pool without inducing transient overcommits, we throttle 821789Sahrens * netfree transactions against a quota that is slightly larger, 822789Sahrens * but still within the pool's allocation slop. In cases where 823789Sahrens * we're very close to full, this will allow a steady trickle of 824789Sahrens * removes to get through. 825789Sahrens */ 826789Sahrens if (dd->dd_parent == NULL) { 827789Sahrens uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree); 828789Sahrens if (poolsize < quota) { 829789Sahrens quota = poolsize; 830789Sahrens edquot = ENOSPC; 831789Sahrens } 832789Sahrens } else if (netfree) { 833789Sahrens quota = UINT64_MAX; 834789Sahrens } 835789Sahrens 836789Sahrens /* 837789Sahrens * If they are requesting more space, and our current estimate 838789Sahrens * is over quota. They get to try again unless the actual 839789Sahrens * on-disk is over quota. 840789Sahrens */ 841789Sahrens if (asize > 0 && est_used > quota) { 842789Sahrens if (dd->dd_used_bytes < quota) 843789Sahrens edquot = ERESTART; 844789Sahrens dprintf_dd(dd, "failing: used=%lluK est_used = %lluK " 845789Sahrens "quota=%lluK tr=%lluK err=%d\n", 846789Sahrens dd->dd_used_bytes>>10, est_used>>10, 847789Sahrens quota>>10, asize>>10, edquot); 848789Sahrens mutex_exit(&dd->dd_lock); 849789Sahrens return (edquot); 850789Sahrens } 851789Sahrens 852789Sahrens /* We need to up our estimated delta before dropping dd_lock */ 853789Sahrens dd->dd_tempreserved[txgidx] += asize; 854789Sahrens 855789Sahrens parent_rsrv = parent_delta(dd, est_used, asize); 856789Sahrens mutex_exit(&dd->dd_lock); 857789Sahrens 858789Sahrens tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); 859789Sahrens tr->tr_ds = dd; 860789Sahrens tr->tr_size = asize; 861789Sahrens list_insert_tail(tr_list, tr); 862789Sahrens 863789Sahrens /* see if it's OK with our parent */ 864789Sahrens if (dd->dd_parent && parent_rsrv) { 865789Sahrens return (dsl_dir_tempreserve_impl(dd->dd_parent, 866789Sahrens parent_rsrv, netfree, tr_list, tx)); 867789Sahrens } else { 868789Sahrens return (0); 869789Sahrens } 870789Sahrens } 871789Sahrens 872789Sahrens /* 873789Sahrens * Reserve space in this dsl_dir, to be used in this tx's txg. 874789Sahrens * After the space has been dirtied (and thus 875789Sahrens * dsl_dir_willuse_space() has been called), the reservation should 876789Sahrens * be canceled, using dsl_dir_tempreserve_clear(). 877789Sahrens */ 878789Sahrens int 879789Sahrens dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, 880789Sahrens uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx) 881789Sahrens { 882789Sahrens int err = 0; 883789Sahrens list_t *tr_list; 884789Sahrens 885789Sahrens tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP); 886789Sahrens list_create(tr_list, sizeof (struct tempreserve), 887789Sahrens offsetof(struct tempreserve, tr_node)); 888789Sahrens 889789Sahrens err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize, 890789Sahrens tr_list, tx); 891789Sahrens 892789Sahrens if (err == 0) { 893789Sahrens struct tempreserve *tr; 894789Sahrens 895789Sahrens err = arc_tempreserve_space(lsize); 896789Sahrens if (err == 0) { 897789Sahrens tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); 898789Sahrens tr->tr_ds = NULL; 899789Sahrens tr->tr_size = lsize; 900789Sahrens list_insert_tail(tr_list, tr); 901789Sahrens } 902789Sahrens } 903789Sahrens 904789Sahrens if (err) 905789Sahrens dsl_dir_tempreserve_clear(tr_list, tx); 906789Sahrens else 907789Sahrens *tr_cookiep = tr_list; 908789Sahrens return (err); 909789Sahrens } 910789Sahrens 911789Sahrens /* 912789Sahrens * Clear a temporary reservation that we previously made with 913789Sahrens * dsl_dir_tempreserve_space(). 914789Sahrens */ 915789Sahrens void 916789Sahrens dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) 917789Sahrens { 918789Sahrens int txgidx = tx->tx_txg & TXG_MASK; 919789Sahrens list_t *tr_list = tr_cookie; 920789Sahrens struct tempreserve *tr; 921789Sahrens 922789Sahrens ASSERT3U(tx->tx_txg, !=, 0); 923789Sahrens 924789Sahrens while (tr = list_head(tr_list)) { 925789Sahrens if (tr->tr_ds == NULL) { 926789Sahrens arc_tempreserve_clear(tr->tr_size); 927789Sahrens } else { 928789Sahrens mutex_enter(&tr->tr_ds->dd_lock); 929789Sahrens ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=, 930789Sahrens tr->tr_size); 931789Sahrens tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size; 932789Sahrens mutex_exit(&tr->tr_ds->dd_lock); 933789Sahrens } 934789Sahrens list_remove(tr_list, tr); 935789Sahrens kmem_free(tr, sizeof (struct tempreserve)); 936789Sahrens } 937789Sahrens 938789Sahrens kmem_free(tr_list, sizeof (list_t)); 939789Sahrens } 940789Sahrens 941789Sahrens /* 942789Sahrens * Call in open context when we think we're going to write/free space, 943789Sahrens * eg. when dirtying data. Be conservative (ie. OK to write less than 944789Sahrens * this or free more than this, but don't write more or free less). 945789Sahrens */ 946789Sahrens void 947789Sahrens dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) 948789Sahrens { 949789Sahrens int64_t parent_space; 950789Sahrens uint64_t est_used; 951789Sahrens 952789Sahrens mutex_enter(&dd->dd_lock); 953789Sahrens if (space > 0) 954789Sahrens dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space; 955789Sahrens 956789Sahrens est_used = dsl_dir_estimated_space(dd); 957789Sahrens parent_space = parent_delta(dd, est_used, space); 958789Sahrens mutex_exit(&dd->dd_lock); 959789Sahrens 960789Sahrens /* Make sure that we clean up dd_space_to* */ 961789Sahrens dsl_dir_dirty(dd, tx); 962789Sahrens 963789Sahrens /* XXX this is potentially expensive and unnecessary... */ 964789Sahrens if (parent_space && dd->dd_parent) 965789Sahrens dsl_dir_willuse_space(dd->dd_parent, parent_space, tx); 966789Sahrens } 967789Sahrens 968789Sahrens /* call from syncing context when we actually write/free space for this dd */ 969789Sahrens void 970789Sahrens dsl_dir_diduse_space(dsl_dir_t *dd, 971789Sahrens int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx) 972789Sahrens { 973789Sahrens int64_t accounted_delta; 974789Sahrens 975789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 976789Sahrens 977789Sahrens dsl_dir_dirty(dd, tx); 978789Sahrens 979789Sahrens mutex_enter(&dd->dd_lock); 980789Sahrens accounted_delta = parent_delta(dd, dd->dd_used_bytes, used); 981789Sahrens ASSERT(used >= 0 || dd->dd_used_bytes >= -used); 982789Sahrens ASSERT(compressed >= 0 || 983789Sahrens dd->dd_phys->dd_compressed_bytes >= -compressed); 984789Sahrens ASSERT(uncompressed >= 0 || 985789Sahrens dd->dd_phys->dd_uncompressed_bytes >= -uncompressed); 986789Sahrens dd->dd_used_bytes += used; 987789Sahrens if (used > 0) 988789Sahrens dd->dd_space_towrite[tx->tx_txg & TXG_MASK] -= used; 989789Sahrens dd->dd_phys->dd_uncompressed_bytes += uncompressed; 990789Sahrens dd->dd_phys->dd_compressed_bytes += compressed; 991789Sahrens mutex_exit(&dd->dd_lock); 992789Sahrens 993789Sahrens if (dd->dd_parent != NULL) { 994789Sahrens dsl_dir_diduse_space(dd->dd_parent, 995789Sahrens accounted_delta, compressed, uncompressed, tx); 996789Sahrens } 997789Sahrens } 998789Sahrens 999789Sahrens static int 1000789Sahrens dsl_dir_set_quota_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1001789Sahrens { 1002789Sahrens uint64_t *quotap = arg; 1003789Sahrens uint64_t new_quota = *quotap; 1004789Sahrens int err = 0; 1005789Sahrens 1006789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 1007789Sahrens 1008789Sahrens mutex_enter(&dd->dd_lock); 1009789Sahrens if (new_quota != 0 && (new_quota < dd->dd_phys->dd_reserved || 1010789Sahrens new_quota < dsl_dir_estimated_space(dd))) { 1011789Sahrens err = ENOSPC; 1012789Sahrens } else { 1013789Sahrens dd->dd_phys->dd_quota = new_quota; 1014789Sahrens } 1015789Sahrens mutex_exit(&dd->dd_lock); 1016789Sahrens return (err); 1017789Sahrens } 1018789Sahrens 1019789Sahrens int 1020789Sahrens dsl_dir_set_quota(const char *ddname, uint64_t quota) 1021789Sahrens { 1022789Sahrens dsl_dir_t *dd; 1023789Sahrens int err; 1024789Sahrens 1025789Sahrens dd = dsl_dir_open(ddname, FTAG, NULL); 1026789Sahrens if (dd == NULL) 1027789Sahrens return (ENOENT); 1028789Sahrens /* 1029789Sahrens * If someone removes a file, then tries to set the quota, we 1030789Sahrens * want to make sure the file freeing takes effect. 1031789Sahrens */ 1032789Sahrens txg_wait_open(dd->dd_pool, 0); 1033789Sahrens 1034789Sahrens err = dsl_dir_sync_task(dd, dsl_dir_set_quota_sync, "a, 0); 1035789Sahrens dsl_dir_close(dd, FTAG); 1036789Sahrens return (err); 1037789Sahrens } 1038789Sahrens 1039789Sahrens static int 1040789Sahrens dsl_dir_set_reservation_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1041789Sahrens { 1042789Sahrens uint64_t *reservationp = arg; 1043789Sahrens uint64_t new_reservation = *reservationp; 1044789Sahrens uint64_t used, avail; 1045789Sahrens int64_t delta; 1046789Sahrens 1047789Sahrens if (new_reservation > INT64_MAX) 1048789Sahrens return (EOVERFLOW); 1049789Sahrens 1050789Sahrens mutex_enter(&dd->dd_lock); 1051789Sahrens used = dd->dd_used_bytes; 1052789Sahrens delta = MAX(used, new_reservation) - 1053789Sahrens MAX(used, dd->dd_phys->dd_reserved); 1054789Sahrens mutex_exit(&dd->dd_lock); 1055789Sahrens 1056789Sahrens if (dd->dd_parent) { 1057789Sahrens avail = dsl_dir_space_available(dd->dd_parent, 1058789Sahrens NULL, 0, FALSE); 1059789Sahrens } else { 1060789Sahrens avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used; 1061789Sahrens } 1062789Sahrens 1063789Sahrens if (delta > 0 && delta > avail) 1064789Sahrens return (ENOSPC); 1065789Sahrens if (delta > 0 && dd->dd_phys->dd_quota > 0 && 1066789Sahrens new_reservation > dd->dd_phys->dd_quota) 1067789Sahrens return (ENOSPC); 1068789Sahrens 1069789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 1070789Sahrens dd->dd_phys->dd_reserved = new_reservation; 1071789Sahrens 1072789Sahrens if (dd->dd_parent != NULL) { 1073789Sahrens /* Roll up this additional usage into our ancestors */ 1074789Sahrens dsl_dir_diduse_space(dd->dd_parent, delta, 0, 0, tx); 1075789Sahrens } 1076789Sahrens return (0); 1077789Sahrens } 1078789Sahrens 1079789Sahrens int 1080789Sahrens dsl_dir_set_reservation(const char *ddname, uint64_t reservation) 1081789Sahrens { 1082789Sahrens dsl_dir_t *dd; 1083789Sahrens int err; 1084789Sahrens 1085789Sahrens dd = dsl_dir_open(ddname, FTAG, NULL); 1086789Sahrens if (dd == NULL) 1087789Sahrens return (ENOENT); 1088789Sahrens err = dsl_dir_sync_task(dd, 1089789Sahrens dsl_dir_set_reservation_sync, &reservation, 0); 1090789Sahrens dsl_dir_close(dd, FTAG); 1091789Sahrens return (err); 1092789Sahrens } 1093789Sahrens 1094789Sahrens static dsl_dir_t * 1095789Sahrens closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2) 1096789Sahrens { 1097789Sahrens for (; ds1; ds1 = ds1->dd_parent) { 1098789Sahrens dsl_dir_t *dd; 1099789Sahrens for (dd = ds2; dd; dd = dd->dd_parent) { 1100789Sahrens if (ds1 == dd) 1101789Sahrens return (dd); 1102789Sahrens } 1103789Sahrens } 1104789Sahrens return (NULL); 1105789Sahrens } 1106789Sahrens 1107789Sahrens /* 1108789Sahrens * If delta is applied to dd, how much of that delta would be applied to 1109789Sahrens * ancestor? Syncing context only. 1110789Sahrens */ 1111789Sahrens static int64_t 1112789Sahrens would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) 1113789Sahrens { 1114789Sahrens if (dd == ancestor) 1115789Sahrens return (delta); 1116789Sahrens 1117789Sahrens mutex_enter(&dd->dd_lock); 1118789Sahrens delta = parent_delta(dd, dd->dd_used_bytes, delta); 1119789Sahrens mutex_exit(&dd->dd_lock); 1120789Sahrens return (would_change(dd->dd_parent, delta, ancestor)); 1121789Sahrens } 1122789Sahrens 1123789Sahrens int 1124789Sahrens dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1125789Sahrens { 1126789Sahrens const char *newname = arg; 1127789Sahrens dsl_pool_t *dp = dd->dd_pool; 1128789Sahrens objset_t *mos = dp->dp_meta_objset; 1129789Sahrens dsl_dir_t *newpds; 1130789Sahrens const char *tail; 1131789Sahrens int err, len; 1132789Sahrens 1133789Sahrens /* can't rename to different pool */ 1134789Sahrens len = strlen(dp->dp_root_dir->dd_myname); 1135789Sahrens if (strncmp(dp->dp_root_dir->dd_myname, newname, len != 0) || 1136789Sahrens newname[len] != '/') { 1137789Sahrens return (ENXIO); 1138789Sahrens } 1139789Sahrens 1140789Sahrens newpds = dsl_dir_open_spa(dp->dp_spa, newname, FTAG, &tail); 1141789Sahrens 1142789Sahrens /* new parent should exist */ 1143789Sahrens if (newpds == NULL) 1144789Sahrens return (ENOENT); 1145789Sahrens 1146789Sahrens /* new name should not already exist */ 1147789Sahrens if (tail == NULL) { 1148789Sahrens dsl_dir_close(newpds, FTAG); 1149789Sahrens return (EEXIST); 1150789Sahrens } 1151789Sahrens 1152789Sahrens rw_enter(&dp->dp_config_rwlock, RW_WRITER); 1153789Sahrens 1154789Sahrens /* There should be 2 references: the open and the dirty */ 1155789Sahrens if (dmu_buf_refcount(dd->dd_dbuf) > 2) { 1156789Sahrens rw_exit(&dp->dp_config_rwlock); 1157789Sahrens dsl_dir_close(newpds, FTAG); 1158789Sahrens return (EBUSY); 1159789Sahrens } 1160789Sahrens 1161789Sahrens if (newpds != dd->dd_parent) { 1162789Sahrens dsl_dir_t *ancestor; 1163789Sahrens int64_t adelta; 1164789Sahrens uint64_t myspace, avail; 1165789Sahrens 1166789Sahrens ancestor = closest_common_ancestor(dd, newpds); 1167789Sahrens 1168789Sahrens /* no rename into our descendent */ 1169789Sahrens if (ancestor == dd) { 1170789Sahrens dsl_dir_close(newpds, FTAG); 1171789Sahrens rw_exit(&dp->dp_config_rwlock); 1172789Sahrens return (EINVAL); 1173789Sahrens } 1174789Sahrens 1175789Sahrens myspace = MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); 1176789Sahrens adelta = would_change(dd->dd_parent, -myspace, ancestor); 1177789Sahrens avail = dsl_dir_space_available(newpds, 1178789Sahrens ancestor, adelta, FALSE); 1179789Sahrens if (avail < myspace) { 1180789Sahrens dsl_dir_close(newpds, FTAG); 1181789Sahrens rw_exit(&dp->dp_config_rwlock); 1182789Sahrens return (ENOSPC); 1183789Sahrens } 1184789Sahrens 1185789Sahrens /* The point of no (unsuccessful) return */ 1186789Sahrens 1187789Sahrens dsl_dir_diduse_space(dd->dd_parent, -myspace, 1188789Sahrens -dd->dd_phys->dd_compressed_bytes, 1189789Sahrens -dd->dd_phys->dd_uncompressed_bytes, tx); 1190789Sahrens dsl_dir_diduse_space(newpds, myspace, 1191789Sahrens dd->dd_phys->dd_compressed_bytes, 1192789Sahrens dd->dd_phys->dd_uncompressed_bytes, tx); 1193789Sahrens } 1194789Sahrens 1195789Sahrens /* The point of no (unsuccessful) return */ 1196789Sahrens 1197789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 1198789Sahrens 1199789Sahrens /* remove from old parent zapobj */ 1200789Sahrens err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, 1201789Sahrens dd->dd_myname, tx); 1202789Sahrens ASSERT3U(err, ==, 0); 1203789Sahrens 1204789Sahrens (void) strcpy(dd->dd_myname, tail); 1205789Sahrens dsl_dir_close(dd->dd_parent, dd); 1206789Sahrens dd->dd_phys->dd_parent_obj = newpds->dd_object; 1207789Sahrens dd->dd_parent = dsl_dir_open_obj(dd->dd_pool, 1208789Sahrens newpds->dd_object, NULL, dd); 1209789Sahrens 1210789Sahrens /* add to new parent zapobj */ 1211789Sahrens err = zap_add(mos, newpds->dd_phys->dd_child_dir_zapobj, 1212789Sahrens dd->dd_myname, 8, 1, &dd->dd_object, tx); 1213789Sahrens ASSERT3U(err, ==, 0); 1214789Sahrens 1215789Sahrens dsl_dir_close(newpds, FTAG); 1216789Sahrens rw_exit(&dp->dp_config_rwlock); 1217789Sahrens return (0); 1218789Sahrens } 1219