1*789Sahrens /* 2*789Sahrens * CDDL HEADER START 3*789Sahrens * 4*789Sahrens * The contents of this file are subject to the terms of the 5*789Sahrens * Common Development and Distribution License, Version 1.0 only 6*789Sahrens * (the "License"). You may not use this file except in compliance 7*789Sahrens * with the License. 8*789Sahrens * 9*789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*789Sahrens * or http://www.opensolaris.org/os/licensing. 11*789Sahrens * See the License for the specific language governing permissions 12*789Sahrens * and limitations under the License. 13*789Sahrens * 14*789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 15*789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*789Sahrens * If applicable, add the following below this CDDL HEADER, with the 17*789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 18*789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 19*789Sahrens * 20*789Sahrens * CDDL HEADER END 21*789Sahrens */ 22*789Sahrens /* 23*789Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*789Sahrens * Use is subject to license terms. 25*789Sahrens */ 26*789Sahrens 27*789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28*789Sahrens 29*789Sahrens #include <sys/dmu.h> 30*789Sahrens #include <sys/dmu_tx.h> 31*789Sahrens #include <sys/dsl_dataset.h> 32*789Sahrens #include <sys/dsl_dir.h> 33*789Sahrens #include <sys/dsl_prop.h> 34*789Sahrens #include <sys/spa.h> 35*789Sahrens #include <sys/zap.h> 36*789Sahrens #include <sys/zio.h> 37*789Sahrens #include <sys/arc.h> 38*789Sahrens #include "zfs_namecheck.h" 39*789Sahrens 40*789Sahrens static uint64_t dsl_dir_space_accounted(dsl_dir_t *dd); 41*789Sahrens static uint64_t dsl_dir_estimated_space(dsl_dir_t *dd); 42*789Sahrens static int dsl_dir_set_reservation_sync(dsl_dir_t *dd, 43*789Sahrens void *arg, dmu_tx_t *tx); 44*789Sahrens static uint64_t dsl_dir_space_available(dsl_dir_t *dd, 45*789Sahrens dsl_dir_t *ancestor, int64_t delta, int ondiskonly); 46*789Sahrens 47*789Sahrens 48*789Sahrens /* ARGSUSED */ 49*789Sahrens static void 50*789Sahrens dsl_dir_evict(dmu_buf_t *db, void *arg) 51*789Sahrens { 52*789Sahrens dsl_dir_t *dd = arg; 53*789Sahrens dsl_pool_t *dp = dd->dd_pool; 54*789Sahrens int t; 55*789Sahrens 56*789Sahrens for (t = 0; t < TXG_SIZE; t++) { 57*789Sahrens ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); 58*789Sahrens ASSERT(dd->dd_tempreserved[t] == 0); 59*789Sahrens ASSERT(dd->dd_space_towrite[t] == 0); 60*789Sahrens } 61*789Sahrens 62*789Sahrens ASSERT3U(dd->dd_used_bytes, ==, dd->dd_phys->dd_used_bytes); 63*789Sahrens 64*789Sahrens ASSERT(dd->dd_sync_txg == 0); 65*789Sahrens 66*789Sahrens if (dd->dd_parent) 67*789Sahrens dsl_dir_close(dd->dd_parent, dd); 68*789Sahrens 69*789Sahrens spa_close(dd->dd_pool->dp_spa, dd); 70*789Sahrens 71*789Sahrens /* 72*789Sahrens * The props callback list should be empty since they hold the 73*789Sahrens * dir open. 74*789Sahrens */ 75*789Sahrens list_destroy(&dd->dd_prop_cbs); 76*789Sahrens kmem_free(dd, sizeof (dsl_dir_t)); 77*789Sahrens } 78*789Sahrens 79*789Sahrens dsl_dir_t * 80*789Sahrens dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj, 81*789Sahrens const char *tail, void *tag) 82*789Sahrens { 83*789Sahrens dmu_buf_t *dbuf; 84*789Sahrens dsl_dir_t *dd; 85*789Sahrens 86*789Sahrens ASSERT(RW_LOCK_HELD(&dp->dp_config_rwlock) || 87*789Sahrens dsl_pool_sync_context(dp)); 88*789Sahrens 89*789Sahrens dbuf = dmu_bonus_hold_tag(dp->dp_meta_objset, ddobj, tag); 90*789Sahrens dmu_buf_read(dbuf); 91*789Sahrens dd = dmu_buf_get_user(dbuf); 92*789Sahrens #ifdef ZFS_DEBUG 93*789Sahrens { 94*789Sahrens dmu_object_info_t doi; 95*789Sahrens dmu_object_info_from_db(dbuf, &doi); 96*789Sahrens ASSERT3U(doi.doi_type, ==, DMU_OT_DSL_DATASET); 97*789Sahrens } 98*789Sahrens #endif 99*789Sahrens /* XXX assert bonus buffer size is correct */ 100*789Sahrens if (dd == NULL) { 101*789Sahrens dsl_dir_t *winner; 102*789Sahrens int err; 103*789Sahrens 104*789Sahrens dd = kmem_zalloc(sizeof (dsl_dir_t), KM_SLEEP); 105*789Sahrens dd->dd_object = ddobj; 106*789Sahrens dd->dd_dbuf = dbuf; 107*789Sahrens dd->dd_pool = dp; 108*789Sahrens dd->dd_phys = dbuf->db_data; 109*789Sahrens dd->dd_used_bytes = dd->dd_phys->dd_used_bytes; 110*789Sahrens 111*789Sahrens list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t), 112*789Sahrens offsetof(dsl_prop_cb_record_t, cbr_node)); 113*789Sahrens 114*789Sahrens if (dd->dd_phys->dd_parent_obj) { 115*789Sahrens dd->dd_parent = dsl_dir_open_obj(dp, 116*789Sahrens dd->dd_phys->dd_parent_obj, NULL, dd); 117*789Sahrens if (tail) { 118*789Sahrens #ifdef ZFS_DEBUG 119*789Sahrens uint64_t foundobj; 120*789Sahrens 121*789Sahrens err = zap_lookup(dp->dp_meta_objset, 122*789Sahrens dd->dd_parent->dd_phys-> 123*789Sahrens dd_child_dir_zapobj, 124*789Sahrens tail, sizeof (foundobj), 1, &foundobj); 125*789Sahrens ASSERT3U(err, ==, 0); 126*789Sahrens ASSERT3U(foundobj, ==, ddobj); 127*789Sahrens #endif 128*789Sahrens (void) strcpy(dd->dd_myname, tail); 129*789Sahrens } else { 130*789Sahrens err = zap_value_search(dp->dp_meta_objset, 131*789Sahrens dd->dd_parent->dd_phys-> 132*789Sahrens dd_child_dir_zapobj, 133*789Sahrens ddobj, dd->dd_myname); 134*789Sahrens /* 135*789Sahrens * The caller should be protecting this ddobj 136*789Sahrens * from being deleted concurrently 137*789Sahrens */ 138*789Sahrens ASSERT(err == 0); 139*789Sahrens } 140*789Sahrens } else { 141*789Sahrens (void) strcpy(dd->dd_myname, spa_name(dp->dp_spa)); 142*789Sahrens } 143*789Sahrens 144*789Sahrens winner = dmu_buf_set_user_ie(dbuf, dd, &dd->dd_phys, 145*789Sahrens dsl_dir_evict); 146*789Sahrens if (winner) { 147*789Sahrens if (dd->dd_parent) 148*789Sahrens dsl_dir_close(dd->dd_parent, dd); 149*789Sahrens kmem_free(dd, sizeof (dsl_dir_t)); 150*789Sahrens dd = winner; 151*789Sahrens } else { 152*789Sahrens spa_open_ref(dp->dp_spa, dd); 153*789Sahrens } 154*789Sahrens } 155*789Sahrens 156*789Sahrens /* 157*789Sahrens * The dsl_dir_t has both open-to-close and instantiate-to-evict 158*789Sahrens * holds on the spa. We need the open-to-close holds because 159*789Sahrens * otherwise the spa_refcnt wouldn't change when we open a 160*789Sahrens * dir which the spa also has open, so we could incorrectly 161*789Sahrens * think it was OK to unload/export/destroy the pool. We need 162*789Sahrens * the instantiate-to-evict hold because the dsl_dir_t has a 163*789Sahrens * pointer to the dd_pool, which has a pointer to the spa_t. 164*789Sahrens */ 165*789Sahrens spa_open_ref(dp->dp_spa, tag); 166*789Sahrens ASSERT3P(dd->dd_pool, ==, dp); 167*789Sahrens ASSERT3U(dd->dd_object, ==, ddobj); 168*789Sahrens ASSERT3P(dd->dd_dbuf, ==, dbuf); 169*789Sahrens return (dd); 170*789Sahrens } 171*789Sahrens 172*789Sahrens void 173*789Sahrens dsl_dir_close(dsl_dir_t *dd, void *tag) 174*789Sahrens { 175*789Sahrens dprintf_dd(dd, "%s\n", ""); 176*789Sahrens spa_close(dd->dd_pool->dp_spa, tag); 177*789Sahrens dmu_buf_rele_tag(dd->dd_dbuf, tag); 178*789Sahrens } 179*789Sahrens 180*789Sahrens /* buf must be long enough (MAXNAMELEN should do) */ 181*789Sahrens void 182*789Sahrens dsl_dir_name(dsl_dir_t *dd, char *buf) 183*789Sahrens { 184*789Sahrens if (dd->dd_parent) { 185*789Sahrens dsl_dir_name(dd->dd_parent, buf); 186*789Sahrens (void) strcat(buf, "/"); 187*789Sahrens } else { 188*789Sahrens buf[0] = '\0'; 189*789Sahrens } 190*789Sahrens if (!MUTEX_HELD(&dd->dd_lock)) { 191*789Sahrens /* 192*789Sahrens * recursive mutex so that we can use 193*789Sahrens * dprintf_dd() with dd_lock held 194*789Sahrens */ 195*789Sahrens mutex_enter(&dd->dd_lock); 196*789Sahrens (void) strcat(buf, dd->dd_myname); 197*789Sahrens mutex_exit(&dd->dd_lock); 198*789Sahrens } else { 199*789Sahrens (void) strcat(buf, dd->dd_myname); 200*789Sahrens } 201*789Sahrens } 202*789Sahrens 203*789Sahrens int 204*789Sahrens dsl_dir_is_private(dsl_dir_t *dd) 205*789Sahrens { 206*789Sahrens int rv = FALSE; 207*789Sahrens 208*789Sahrens if (dd->dd_parent && dsl_dir_is_private(dd->dd_parent)) 209*789Sahrens rv = TRUE; 210*789Sahrens if (dataset_name_hidden(dd->dd_myname)) 211*789Sahrens rv = TRUE; 212*789Sahrens return (rv); 213*789Sahrens } 214*789Sahrens 215*789Sahrens 216*789Sahrens static int 217*789Sahrens getcomponent(const char *path, char *component, const char **nextp) 218*789Sahrens { 219*789Sahrens char *p; 220*789Sahrens if (path == NULL) 221*789Sahrens return (NULL); 222*789Sahrens /* This would be a good place to reserve some namespace... */ 223*789Sahrens p = strpbrk(path, "/@"); 224*789Sahrens if (p && (p[1] == '/' || p[1] == '@')) { 225*789Sahrens /* two separators in a row */ 226*789Sahrens return (EINVAL); 227*789Sahrens } 228*789Sahrens if (p == NULL || p == path) { 229*789Sahrens /* 230*789Sahrens * if the first thing is an @ or /, it had better be an 231*789Sahrens * @ and it had better not have any more ats or slashes, 232*789Sahrens * and it had better have something after the @. 233*789Sahrens */ 234*789Sahrens if (p != NULL && 235*789Sahrens (p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0')) 236*789Sahrens return (EINVAL); 237*789Sahrens if (strlen(path) >= MAXNAMELEN) 238*789Sahrens return (ENAMETOOLONG); 239*789Sahrens (void) strcpy(component, path); 240*789Sahrens p = NULL; 241*789Sahrens } else if (p[0] == '/') { 242*789Sahrens if (p-path >= MAXNAMELEN) 243*789Sahrens return (ENAMETOOLONG); 244*789Sahrens (void) strncpy(component, path, p - path); 245*789Sahrens component[p-path] = '\0'; 246*789Sahrens p++; 247*789Sahrens } else if (p[0] == '@') { 248*789Sahrens /* 249*789Sahrens * if the next separator is an @, there better not be 250*789Sahrens * any more slashes. 251*789Sahrens */ 252*789Sahrens if (strchr(path, '/')) 253*789Sahrens return (EINVAL); 254*789Sahrens if (p-path >= MAXNAMELEN) 255*789Sahrens return (ENAMETOOLONG); 256*789Sahrens (void) strncpy(component, path, p - path); 257*789Sahrens component[p-path] = '\0'; 258*789Sahrens } else { 259*789Sahrens ASSERT(!"invalid p"); 260*789Sahrens } 261*789Sahrens *nextp = p; 262*789Sahrens return (0); 263*789Sahrens } 264*789Sahrens 265*789Sahrens /* 266*789Sahrens * same as dsl_open_dir, ignore the first component of name and use the 267*789Sahrens * spa instead 268*789Sahrens */ 269*789Sahrens dsl_dir_t * 270*789Sahrens dsl_dir_open_spa(spa_t *spa, const char *name, void *tag, const char **tailp) 271*789Sahrens { 272*789Sahrens char buf[MAXNAMELEN]; 273*789Sahrens const char *next, *nextnext = NULL; 274*789Sahrens int err; 275*789Sahrens dsl_dir_t *dd; 276*789Sahrens dsl_pool_t *dp; 277*789Sahrens uint64_t ddobj; 278*789Sahrens int openedspa = FALSE; 279*789Sahrens 280*789Sahrens dprintf("%s\n", name); 281*789Sahrens 282*789Sahrens if (name == NULL) 283*789Sahrens return (NULL); 284*789Sahrens err = getcomponent(name, buf, &next); 285*789Sahrens if (err) 286*789Sahrens return (NULL); 287*789Sahrens if (spa == NULL) { 288*789Sahrens err = spa_open(buf, &spa, FTAG); 289*789Sahrens if (err) { 290*789Sahrens dprintf("spa_open(%s) failed\n", buf); 291*789Sahrens return (NULL); 292*789Sahrens } 293*789Sahrens openedspa = TRUE; 294*789Sahrens 295*789Sahrens /* XXX this assertion belongs in spa_open */ 296*789Sahrens ASSERT(!dsl_pool_sync_context(spa_get_dsl(spa))); 297*789Sahrens } 298*789Sahrens 299*789Sahrens dp = spa_get_dsl(spa); 300*789Sahrens 301*789Sahrens rw_enter(&dp->dp_config_rwlock, RW_READER); 302*789Sahrens dd = dsl_dir_open_obj(dp, dp->dp_root_dir_obj, NULL, tag); 303*789Sahrens while (next != NULL) { 304*789Sahrens dsl_dir_t *child_ds; 305*789Sahrens err = getcomponent(next, buf, &nextnext); 306*789Sahrens if (err) { 307*789Sahrens dsl_dir_close(dd, tag); 308*789Sahrens if (openedspa) 309*789Sahrens spa_close(spa, FTAG); 310*789Sahrens return (NULL); 311*789Sahrens } 312*789Sahrens ASSERT(next[0] != '\0'); 313*789Sahrens if (next[0] == '@') 314*789Sahrens break; 315*789Sahrens if (dd->dd_phys->dd_child_dir_zapobj == 0) 316*789Sahrens break; 317*789Sahrens dprintf("looking up %s in obj%lld\n", 318*789Sahrens buf, dd->dd_phys->dd_child_dir_zapobj); 319*789Sahrens 320*789Sahrens err = zap_lookup(dp->dp_meta_objset, 321*789Sahrens dd->dd_phys->dd_child_dir_zapobj, 322*789Sahrens buf, sizeof (ddobj), 1, &ddobj); 323*789Sahrens if (err == ENOENT) { 324*789Sahrens break; 325*789Sahrens } 326*789Sahrens ASSERT(err == 0); 327*789Sahrens 328*789Sahrens child_ds = dsl_dir_open_obj(dp, ddobj, buf, tag); 329*789Sahrens dsl_dir_close(dd, tag); 330*789Sahrens dd = child_ds; 331*789Sahrens next = nextnext; 332*789Sahrens } 333*789Sahrens rw_exit(&dp->dp_config_rwlock); 334*789Sahrens 335*789Sahrens /* 336*789Sahrens * It's an error if there's more than one component left, or 337*789Sahrens * tailp==NULL and there's any component left. 338*789Sahrens */ 339*789Sahrens if (next != NULL && 340*789Sahrens (tailp == NULL || (nextnext && nextnext[0] != '\0'))) { 341*789Sahrens /* bad path name */ 342*789Sahrens dsl_dir_close(dd, tag); 343*789Sahrens dprintf("next=%p (%s) tail=%p\n", next, next?next:"", tailp); 344*789Sahrens next = NULL; 345*789Sahrens dd = NULL; 346*789Sahrens } 347*789Sahrens if (tailp) 348*789Sahrens *tailp = next; 349*789Sahrens if (openedspa) 350*789Sahrens spa_close(spa, FTAG); 351*789Sahrens return (dd); 352*789Sahrens } 353*789Sahrens 354*789Sahrens /* 355*789Sahrens * Return the dsl_dir_t, and possibly the last component which couldn't 356*789Sahrens * be found in *tail. Return NULL if the path is bogus, or if 357*789Sahrens * tail==NULL and we couldn't parse the whole name. (*tail)[0] == '@' 358*789Sahrens * means that the last component is a snapshot. 359*789Sahrens */ 360*789Sahrens dsl_dir_t * 361*789Sahrens dsl_dir_open(const char *name, void *tag, const char **tailp) 362*789Sahrens { 363*789Sahrens return (dsl_dir_open_spa(NULL, name, tag, tailp)); 364*789Sahrens } 365*789Sahrens 366*789Sahrens int 367*789Sahrens dsl_dir_create_sync(dsl_dir_t *pds, const char *name, dmu_tx_t *tx) 368*789Sahrens { 369*789Sahrens objset_t *mos = pds->dd_pool->dp_meta_objset; 370*789Sahrens uint64_t ddobj; 371*789Sahrens dsl_dir_phys_t *dsphys; 372*789Sahrens dmu_buf_t *dbuf; 373*789Sahrens int err; 374*789Sahrens 375*789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 376*789Sahrens 377*789Sahrens if (pds->dd_phys->dd_child_dir_zapobj == 0) { 378*789Sahrens dmu_buf_will_dirty(pds->dd_dbuf, tx); 379*789Sahrens pds->dd_phys->dd_child_dir_zapobj = zap_create(mos, 380*789Sahrens DMU_OT_DSL_DATASET_CHILD_MAP, DMU_OT_NONE, 0, tx); 381*789Sahrens } 382*789Sahrens 383*789Sahrens rw_enter(&pds->dd_pool->dp_config_rwlock, RW_WRITER); 384*789Sahrens err = zap_lookup(mos, pds->dd_phys->dd_child_dir_zapobj, 385*789Sahrens name, sizeof (uint64_t), 1, &ddobj); 386*789Sahrens if (err != ENOENT) { 387*789Sahrens rw_exit(&pds->dd_pool->dp_config_rwlock); 388*789Sahrens return (err ? err : EEXIST); 389*789Sahrens } 390*789Sahrens 391*789Sahrens ddobj = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 392*789Sahrens DMU_OT_DSL_DATASET, sizeof (dsl_dir_phys_t), tx); 393*789Sahrens err = zap_add(mos, pds->dd_phys->dd_child_dir_zapobj, 394*789Sahrens name, sizeof (uint64_t), 1, &ddobj, tx); 395*789Sahrens ASSERT3U(err, ==, 0); 396*789Sahrens dprintf("dataset_create: zap_add %s->%lld to %lld returned %d\n", 397*789Sahrens name, ddobj, pds->dd_phys->dd_child_dir_zapobj, err); 398*789Sahrens 399*789Sahrens dbuf = dmu_bonus_hold(mos, ddobj); 400*789Sahrens dmu_buf_will_dirty(dbuf, tx); 401*789Sahrens dsphys = dbuf->db_data; 402*789Sahrens 403*789Sahrens dsphys->dd_creation_time = gethrestime_sec(); 404*789Sahrens dsphys->dd_parent_obj = pds->dd_object; 405*789Sahrens dsphys->dd_props_zapobj = zap_create(mos, 406*789Sahrens DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); 407*789Sahrens dsphys->dd_child_dir_zapobj = zap_create(mos, 408*789Sahrens DMU_OT_DSL_DATASET_CHILD_MAP, DMU_OT_NONE, 0, tx); 409*789Sahrens dmu_buf_rele(dbuf); 410*789Sahrens 411*789Sahrens rw_exit(&pds->dd_pool->dp_config_rwlock); 412*789Sahrens 413*789Sahrens return (0); 414*789Sahrens } 415*789Sahrens 416*789Sahrens int 417*789Sahrens dsl_dir_destroy_sync(dsl_dir_t *pds, void *arg, dmu_tx_t *tx) 418*789Sahrens { 419*789Sahrens const char *name = arg; 420*789Sahrens dsl_dir_t *dd = NULL; 421*789Sahrens dsl_pool_t *dp = pds->dd_pool; 422*789Sahrens objset_t *mos = dp->dp_meta_objset; 423*789Sahrens uint64_t val, obj, child_zapobj, props_zapobj; 424*789Sahrens int t, err; 425*789Sahrens 426*789Sahrens rw_enter(&dp->dp_config_rwlock, RW_WRITER); 427*789Sahrens 428*789Sahrens err = zap_lookup(mos, pds->dd_phys->dd_child_dir_zapobj, name, 429*789Sahrens 8, 1, &obj); 430*789Sahrens if (err) 431*789Sahrens goto out; 432*789Sahrens 433*789Sahrens dd = dsl_dir_open_obj(dp, obj, name, FTAG); 434*789Sahrens ASSERT3U(dd->dd_phys->dd_parent_obj, ==, pds->dd_object); 435*789Sahrens 436*789Sahrens if (dmu_buf_refcount(dd->dd_dbuf) > 1) { 437*789Sahrens err = EBUSY; 438*789Sahrens goto out; 439*789Sahrens } 440*789Sahrens 441*789Sahrens for (t = 0; t < TXG_SIZE; t++) { 442*789Sahrens /* 443*789Sahrens * if they were dirty, they'd also be open. 444*789Sahrens * dp_config_rwlock ensures that it stays that way. 445*789Sahrens */ 446*789Sahrens ASSERT(!txg_list_member(&dp->dp_dirty_dirs, dd, t)); 447*789Sahrens } 448*789Sahrens 449*789Sahrens child_zapobj = dd->dd_phys->dd_child_dir_zapobj; 450*789Sahrens props_zapobj = dd->dd_phys->dd_props_zapobj; 451*789Sahrens 452*789Sahrens if (child_zapobj != 0) { 453*789Sahrens uint64_t count; 454*789Sahrens err = EEXIST; 455*789Sahrens (void) zap_count(mos, child_zapobj, &count); 456*789Sahrens if (count != 0) 457*789Sahrens goto out; 458*789Sahrens } 459*789Sahrens 460*789Sahrens if (dd->dd_phys->dd_head_dataset_obj != 0) { 461*789Sahrens err = dsl_dataset_destroy_sync(dd, NULL, tx); 462*789Sahrens if (err) 463*789Sahrens goto out; 464*789Sahrens } 465*789Sahrens ASSERT(dd->dd_phys->dd_head_dataset_obj == 0); 466*789Sahrens 467*789Sahrens /* The point of no (unsuccessful) return */ 468*789Sahrens 469*789Sahrens /* Make sure parent's used gets updated */ 470*789Sahrens val = 0; 471*789Sahrens err = dsl_dir_set_reservation_sync(dd, &val, tx); 472*789Sahrens ASSERT(err == 0); 473*789Sahrens ASSERT3U(dd->dd_used_bytes, ==, 0); 474*789Sahrens ASSERT3U(dd->dd_phys->dd_reserved, ==, 0); 475*789Sahrens dsl_dir_close(dd, FTAG); 476*789Sahrens dd = NULL; 477*789Sahrens 478*789Sahrens err = dmu_object_free(mos, obj, tx); 479*789Sahrens ASSERT(err == 0); 480*789Sahrens 481*789Sahrens if (child_zapobj) 482*789Sahrens err = zap_destroy(mos, child_zapobj, tx); 483*789Sahrens ASSERT(err == 0); 484*789Sahrens 485*789Sahrens if (props_zapobj) 486*789Sahrens err = zap_destroy(mos, props_zapobj, tx); 487*789Sahrens ASSERT(err == 0); 488*789Sahrens 489*789Sahrens err = zap_remove(mos, pds->dd_phys->dd_child_dir_zapobj, name, tx); 490*789Sahrens ASSERT(err == 0); 491*789Sahrens 492*789Sahrens out: 493*789Sahrens rw_exit(&dp->dp_config_rwlock); 494*789Sahrens if (dd) 495*789Sahrens dsl_dir_close(dd, FTAG); 496*789Sahrens 497*789Sahrens return (err); 498*789Sahrens } 499*789Sahrens 500*789Sahrens void 501*789Sahrens dsl_dir_create_root(objset_t *mos, uint64_t *ddobjp, dmu_tx_t *tx) 502*789Sahrens { 503*789Sahrens dsl_dir_phys_t *dsp; 504*789Sahrens dmu_buf_t *dbuf; 505*789Sahrens int error; 506*789Sahrens 507*789Sahrens *ddobjp = dmu_object_alloc(mos, DMU_OT_DSL_DATASET, 0, 508*789Sahrens DMU_OT_DSL_DATASET, sizeof (dsl_dir_phys_t), tx); 509*789Sahrens 510*789Sahrens error = zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ROOT_DATASET, 511*789Sahrens sizeof (uint64_t), 1, ddobjp, tx); 512*789Sahrens ASSERT3U(error, ==, 0); 513*789Sahrens 514*789Sahrens dbuf = dmu_bonus_hold(mos, *ddobjp); 515*789Sahrens dmu_buf_will_dirty(dbuf, tx); 516*789Sahrens dsp = dbuf->db_data; 517*789Sahrens 518*789Sahrens dsp->dd_creation_time = gethrestime_sec(); 519*789Sahrens dsp->dd_props_zapobj = zap_create(mos, 520*789Sahrens DMU_OT_DSL_PROPS, DMU_OT_NONE, 0, tx); 521*789Sahrens dsp->dd_child_dir_zapobj = zap_create(mos, 522*789Sahrens DMU_OT_DSL_DATASET_CHILD_MAP, DMU_OT_NONE, 0, tx); 523*789Sahrens 524*789Sahrens dmu_buf_rele(dbuf); 525*789Sahrens } 526*789Sahrens 527*789Sahrens void 528*789Sahrens dsl_dir_stats(dsl_dir_t *dd, dmu_objset_stats_t *dds) 529*789Sahrens { 530*789Sahrens bzero(dds, sizeof (dmu_objset_stats_t)); 531*789Sahrens 532*789Sahrens dds->dds_dir_obj = dd->dd_object; 533*789Sahrens dds->dds_available = dsl_dir_space_available(dd, NULL, 0, TRUE); 534*789Sahrens 535*789Sahrens mutex_enter(&dd->dd_lock); 536*789Sahrens dds->dds_space_used = dd->dd_used_bytes; 537*789Sahrens dds->dds_compressed_bytes = dd->dd_phys->dd_compressed_bytes; 538*789Sahrens dds->dds_uncompressed_bytes = dd->dd_phys->dd_uncompressed_bytes; 539*789Sahrens dds->dds_quota = dd->dd_phys->dd_quota; 540*789Sahrens dds->dds_reserved = dd->dd_phys->dd_reserved; 541*789Sahrens mutex_exit(&dd->dd_lock); 542*789Sahrens 543*789Sahrens dds->dds_creation_time = dd->dd_phys->dd_creation_time; 544*789Sahrens 545*789Sahrens dds->dds_is_placeholder = (dd->dd_phys->dd_head_dataset_obj == 0); 546*789Sahrens 547*789Sahrens if (dd->dd_phys->dd_clone_parent_obj) { 548*789Sahrens dsl_dataset_t *ds; 549*789Sahrens 550*789Sahrens rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER); 551*789Sahrens ds = dsl_dataset_open_obj(dd->dd_pool, 552*789Sahrens dd->dd_phys->dd_clone_parent_obj, NULL, DS_MODE_NONE, FTAG); 553*789Sahrens dsl_dataset_name(ds, dds->dds_clone_of); 554*789Sahrens dds->dds_clone_of_obj = dd->dd_phys->dd_clone_parent_obj; 555*789Sahrens dsl_dataset_close(ds, DS_MODE_NONE, FTAG); 556*789Sahrens rw_exit(&dd->dd_pool->dp_config_rwlock); 557*789Sahrens } 558*789Sahrens 559*789Sahrens VERIFY(dsl_prop_get_ds_integer(dd, "checksum", 560*789Sahrens &dds->dds_checksum, dds->dds_checksum_setpoint) == 0); 561*789Sahrens 562*789Sahrens VERIFY(dsl_prop_get_ds_integer(dd, "compression", 563*789Sahrens &dds->dds_compression, dds->dds_compression_setpoint) == 0); 564*789Sahrens 565*789Sahrens VERIFY(dsl_prop_get_ds_integer(dd, "zoned", 566*789Sahrens &dds->dds_zoned, dds->dds_zoned_setpoint) == 0); 567*789Sahrens 568*789Sahrens spa_altroot(dd->dd_pool->dp_spa, dds->dds_altroot, 569*789Sahrens sizeof (dds->dds_altroot)); 570*789Sahrens } 571*789Sahrens 572*789Sahrens int 573*789Sahrens dsl_dir_sync_task(dsl_dir_t *dd, 574*789Sahrens int (*func)(dsl_dir_t *, void*, dmu_tx_t *), void *arg, uint64_t space) 575*789Sahrens { 576*789Sahrens dmu_tx_t *tx; 577*789Sahrens dsl_pool_t *dp = dd->dd_pool; 578*789Sahrens int err = 0; 579*789Sahrens uint64_t txg; 580*789Sahrens 581*789Sahrens dprintf_dd(dd, "func=%p space=%llu\n", func, space); 582*789Sahrens 583*789Sahrens again: 584*789Sahrens tx = dmu_tx_create_ds(dd); 585*789Sahrens dmu_tx_hold_space(tx, space); 586*789Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 587*789Sahrens if (err == ENOSPC || err == EDQUOT) { 588*789Sahrens dsl_dir_t *rds; 589*789Sahrens /* 590*789Sahrens * They can get their space from either this dd, or the 591*789Sahrens * root dd. 592*789Sahrens */ 593*789Sahrens for (rds = dd; rds->dd_parent; rds = rds->dd_parent) 594*789Sahrens continue; 595*789Sahrens dmu_tx_abort(tx); 596*789Sahrens tx = dmu_tx_create_ds(rds); 597*789Sahrens dmu_tx_hold_space(tx, space); 598*789Sahrens err = dmu_tx_assign(tx, TXG_WAIT); 599*789Sahrens } 600*789Sahrens if (err) { 601*789Sahrens dmu_tx_abort(tx); 602*789Sahrens return (err); 603*789Sahrens } 604*789Sahrens 605*789Sahrens txg = dmu_tx_get_txg(tx); 606*789Sahrens mutex_enter(&dd->dd_lock); 607*789Sahrens if (dd->dd_sync_txg != 0) { 608*789Sahrens mutex_exit(&dd->dd_lock); 609*789Sahrens dmu_tx_commit(tx); 610*789Sahrens txg_wait_synced(dp, 0); 611*789Sahrens goto again; 612*789Sahrens } 613*789Sahrens 614*789Sahrens /* We're good to go */ 615*789Sahrens 616*789Sahrens dd->dd_sync_txg = txg; 617*789Sahrens dd->dd_sync_func = func; 618*789Sahrens dd->dd_sync_arg = arg; 619*789Sahrens 620*789Sahrens mutex_exit(&dd->dd_lock); 621*789Sahrens 622*789Sahrens dsl_dir_dirty(dd, tx); 623*789Sahrens dmu_tx_commit(tx); 624*789Sahrens 625*789Sahrens txg_wait_synced(dp, txg); 626*789Sahrens 627*789Sahrens mutex_enter(&dd->dd_lock); 628*789Sahrens ASSERT(dd->dd_sync_txg == txg); 629*789Sahrens ASSERT(dd->dd_sync_func == NULL); 630*789Sahrens err = dd->dd_sync_err; 631*789Sahrens dd->dd_sync_txg = 0; 632*789Sahrens mutex_exit(&dd->dd_lock); 633*789Sahrens 634*789Sahrens return (err); 635*789Sahrens } 636*789Sahrens 637*789Sahrens void 638*789Sahrens dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx) 639*789Sahrens { 640*789Sahrens dsl_pool_t *dp = dd->dd_pool; 641*789Sahrens 642*789Sahrens ASSERT(dd->dd_phys); 643*789Sahrens 644*789Sahrens if (txg_list_add(&dp->dp_dirty_dirs, dd, tx->tx_txg) == 0) { 645*789Sahrens /* up the hold count until we can be written out */ 646*789Sahrens dmu_buf_add_ref(dd->dd_dbuf, dd); 647*789Sahrens } 648*789Sahrens } 649*789Sahrens 650*789Sahrens static int64_t 651*789Sahrens parent_delta(dsl_dir_t *dd, uint64_t used, int64_t delta) 652*789Sahrens { 653*789Sahrens uint64_t old_accounted = MAX(used, dd->dd_phys->dd_reserved); 654*789Sahrens uint64_t new_accounted = MAX(used + delta, dd->dd_phys->dd_reserved); 655*789Sahrens return (new_accounted - old_accounted); 656*789Sahrens } 657*789Sahrens 658*789Sahrens void 659*789Sahrens dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx) 660*789Sahrens { 661*789Sahrens if (dd->dd_sync_txg == tx->tx_txg && dd->dd_sync_func) { 662*789Sahrens dd->dd_sync_err = dd->dd_sync_func(dd, dd->dd_sync_arg, tx); 663*789Sahrens dd->dd_sync_func = NULL; 664*789Sahrens } 665*789Sahrens 666*789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 667*789Sahrens 668*789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 669*789Sahrens 670*789Sahrens mutex_enter(&dd->dd_lock); 671*789Sahrens ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0); 672*789Sahrens dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg, 673*789Sahrens dd->dd_space_towrite[tx->tx_txg&TXG_MASK] / 1024); 674*789Sahrens dd->dd_space_towrite[tx->tx_txg&TXG_MASK] = 0; 675*789Sahrens dd->dd_phys->dd_used_bytes = dd->dd_used_bytes; 676*789Sahrens mutex_exit(&dd->dd_lock); 677*789Sahrens 678*789Sahrens /* release the hold from dsl_dir_dirty */ 679*789Sahrens dmu_buf_remove_ref(dd->dd_dbuf, dd); 680*789Sahrens } 681*789Sahrens 682*789Sahrens static uint64_t 683*789Sahrens dsl_dir_estimated_space(dsl_dir_t *dd) 684*789Sahrens { 685*789Sahrens int64_t space; 686*789Sahrens int i; 687*789Sahrens 688*789Sahrens ASSERT(MUTEX_HELD(&dd->dd_lock)); 689*789Sahrens 690*789Sahrens space = dd->dd_used_bytes; 691*789Sahrens ASSERT(space >= 0); 692*789Sahrens for (i = 0; i < TXG_SIZE; i++) { 693*789Sahrens space += dd->dd_space_towrite[i&TXG_MASK]; 694*789Sahrens ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0); 695*789Sahrens } 696*789Sahrens return (space); 697*789Sahrens } 698*789Sahrens 699*789Sahrens /* 700*789Sahrens * How much space would dd have available if ancestor had delta applied 701*789Sahrens * to it? If ondiskonly is set, we're only interested in what's 702*789Sahrens * on-disk, not estimated pending changes. 703*789Sahrens */ 704*789Sahrens static uint64_t 705*789Sahrens dsl_dir_space_available(dsl_dir_t *dd, 706*789Sahrens dsl_dir_t *ancestor, int64_t delta, int ondiskonly) 707*789Sahrens { 708*789Sahrens uint64_t parentspace, myspace, quota, used; 709*789Sahrens 710*789Sahrens /* 711*789Sahrens * If there are no restrictions otherwise, assume we have 712*789Sahrens * unlimited space available. 713*789Sahrens */ 714*789Sahrens quota = UINT64_MAX; 715*789Sahrens parentspace = UINT64_MAX; 716*789Sahrens 717*789Sahrens if (dd->dd_parent != NULL) { 718*789Sahrens parentspace = dsl_dir_space_available(dd->dd_parent, 719*789Sahrens ancestor, delta, ondiskonly); 720*789Sahrens } 721*789Sahrens 722*789Sahrens mutex_enter(&dd->dd_lock); 723*789Sahrens if (dd->dd_phys->dd_quota != 0) 724*789Sahrens quota = dd->dd_phys->dd_quota; 725*789Sahrens if (ondiskonly) { 726*789Sahrens used = dd->dd_used_bytes; 727*789Sahrens } else { 728*789Sahrens used = dsl_dir_estimated_space(dd); 729*789Sahrens } 730*789Sahrens if (dd == ancestor) 731*789Sahrens used += delta; 732*789Sahrens 733*789Sahrens if (dd->dd_parent == NULL) { 734*789Sahrens uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE); 735*789Sahrens quota = MIN(quota, poolsize); 736*789Sahrens } 737*789Sahrens 738*789Sahrens if (dd->dd_phys->dd_reserved > used && parentspace != UINT64_MAX) { 739*789Sahrens /* 740*789Sahrens * We have some space reserved, in addition to what our 741*789Sahrens * parent gave us. 742*789Sahrens */ 743*789Sahrens parentspace += dd->dd_phys->dd_reserved - used; 744*789Sahrens } 745*789Sahrens 746*789Sahrens if (used > quota) { 747*789Sahrens /* over quota */ 748*789Sahrens myspace = 0; 749*789Sahrens #ifdef ZFS_DEBUG 750*789Sahrens { 751*789Sahrens /* 752*789Sahrens * While it's OK to be a little over quota, if 753*789Sahrens * we think we are using more space than there 754*789Sahrens * is in the pool (which is already 6% more than 755*789Sahrens * dsl_pool_adjustedsize()), something is very 756*789Sahrens * wrong. 757*789Sahrens */ 758*789Sahrens uint64_t space = spa_get_space(dd->dd_pool->dp_spa); 759*789Sahrens ASSERT3U(used, <=, space); 760*789Sahrens } 761*789Sahrens #endif 762*789Sahrens } else { 763*789Sahrens /* 764*789Sahrens * the lesser of parent's space and the space 765*789Sahrens * left in our quota 766*789Sahrens */ 767*789Sahrens myspace = MIN(parentspace, quota - used); 768*789Sahrens } 769*789Sahrens 770*789Sahrens mutex_exit(&dd->dd_lock); 771*789Sahrens 772*789Sahrens return (myspace); 773*789Sahrens } 774*789Sahrens 775*789Sahrens struct tempreserve { 776*789Sahrens list_node_t tr_node; 777*789Sahrens dsl_dir_t *tr_ds; 778*789Sahrens uint64_t tr_size; 779*789Sahrens }; 780*789Sahrens 781*789Sahrens /* 782*789Sahrens * Reserve space in this dsl_dir, to be used in this tx's txg. 783*789Sahrens * After the space has been dirtied (and thus 784*789Sahrens * dsl_dir_willuse_space() has been called), the reservation should 785*789Sahrens * be canceled, using dsl_dir_tempreserve_clear(). 786*789Sahrens */ 787*789Sahrens static int 788*789Sahrens dsl_dir_tempreserve_impl(dsl_dir_t *dd, 789*789Sahrens uint64_t asize, boolean_t netfree, list_t *tr_list, dmu_tx_t *tx) 790*789Sahrens { 791*789Sahrens uint64_t txg = tx->tx_txg; 792*789Sahrens uint64_t est_used, quota, parent_rsrv; 793*789Sahrens int edquot = EDQUOT; 794*789Sahrens int txgidx = txg & TXG_MASK; 795*789Sahrens int i; 796*789Sahrens struct tempreserve *tr; 797*789Sahrens 798*789Sahrens ASSERT3U(txg, !=, 0); 799*789Sahrens 800*789Sahrens mutex_enter(&dd->dd_lock); 801*789Sahrens /* 802*789Sahrens * Check against the dsl_dir's quota. We don't add in the delta 803*789Sahrens * when checking for over-quota because they get one free hit. 804*789Sahrens */ 805*789Sahrens est_used = dsl_dir_estimated_space(dd); 806*789Sahrens for (i = 0; i < TXG_SIZE; i++) 807*789Sahrens est_used += dd->dd_tempreserved[i]; 808*789Sahrens 809*789Sahrens quota = UINT64_MAX; 810*789Sahrens 811*789Sahrens if (dd->dd_phys->dd_quota) 812*789Sahrens quota = dd->dd_phys->dd_quota; 813*789Sahrens 814*789Sahrens /* 815*789Sahrens * If this transaction will result in a net free of space, we want 816*789Sahrens * to let it through, but we have to be careful: the space that it 817*789Sahrens * frees won't become available until *after* this txg syncs. 818*789Sahrens * Therefore, to ensure that it's possible to remove files from 819*789Sahrens * a full pool without inducing transient overcommits, we throttle 820*789Sahrens * netfree transactions against a quota that is slightly larger, 821*789Sahrens * but still within the pool's allocation slop. In cases where 822*789Sahrens * we're very close to full, this will allow a steady trickle of 823*789Sahrens * removes to get through. 824*789Sahrens */ 825*789Sahrens if (dd->dd_parent == NULL) { 826*789Sahrens uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree); 827*789Sahrens if (poolsize < quota) { 828*789Sahrens quota = poolsize; 829*789Sahrens edquot = ENOSPC; 830*789Sahrens } 831*789Sahrens } else if (netfree) { 832*789Sahrens quota = UINT64_MAX; 833*789Sahrens } 834*789Sahrens 835*789Sahrens /* 836*789Sahrens * If they are requesting more space, and our current estimate 837*789Sahrens * is over quota. They get to try again unless the actual 838*789Sahrens * on-disk is over quota. 839*789Sahrens */ 840*789Sahrens if (asize > 0 && est_used > quota) { 841*789Sahrens if (dd->dd_used_bytes < quota) 842*789Sahrens edquot = ERESTART; 843*789Sahrens dprintf_dd(dd, "failing: used=%lluK est_used = %lluK " 844*789Sahrens "quota=%lluK tr=%lluK err=%d\n", 845*789Sahrens dd->dd_used_bytes>>10, est_used>>10, 846*789Sahrens quota>>10, asize>>10, edquot); 847*789Sahrens mutex_exit(&dd->dd_lock); 848*789Sahrens return (edquot); 849*789Sahrens } 850*789Sahrens 851*789Sahrens /* We need to up our estimated delta before dropping dd_lock */ 852*789Sahrens dd->dd_tempreserved[txgidx] += asize; 853*789Sahrens 854*789Sahrens parent_rsrv = parent_delta(dd, est_used, asize); 855*789Sahrens mutex_exit(&dd->dd_lock); 856*789Sahrens 857*789Sahrens tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); 858*789Sahrens tr->tr_ds = dd; 859*789Sahrens tr->tr_size = asize; 860*789Sahrens list_insert_tail(tr_list, tr); 861*789Sahrens 862*789Sahrens /* see if it's OK with our parent */ 863*789Sahrens if (dd->dd_parent && parent_rsrv) { 864*789Sahrens return (dsl_dir_tempreserve_impl(dd->dd_parent, 865*789Sahrens parent_rsrv, netfree, tr_list, tx)); 866*789Sahrens } else { 867*789Sahrens return (0); 868*789Sahrens } 869*789Sahrens } 870*789Sahrens 871*789Sahrens /* 872*789Sahrens * Reserve space in this dsl_dir, to be used in this tx's txg. 873*789Sahrens * After the space has been dirtied (and thus 874*789Sahrens * dsl_dir_willuse_space() has been called), the reservation should 875*789Sahrens * be canceled, using dsl_dir_tempreserve_clear(). 876*789Sahrens */ 877*789Sahrens int 878*789Sahrens dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, 879*789Sahrens uint64_t asize, uint64_t fsize, void **tr_cookiep, dmu_tx_t *tx) 880*789Sahrens { 881*789Sahrens int err = 0; 882*789Sahrens list_t *tr_list; 883*789Sahrens 884*789Sahrens tr_list = kmem_alloc(sizeof (list_t), KM_SLEEP); 885*789Sahrens list_create(tr_list, sizeof (struct tempreserve), 886*789Sahrens offsetof(struct tempreserve, tr_node)); 887*789Sahrens 888*789Sahrens err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize, 889*789Sahrens tr_list, tx); 890*789Sahrens 891*789Sahrens if (err == 0) { 892*789Sahrens struct tempreserve *tr; 893*789Sahrens 894*789Sahrens err = arc_tempreserve_space(lsize); 895*789Sahrens if (err == 0) { 896*789Sahrens tr = kmem_alloc(sizeof (struct tempreserve), KM_SLEEP); 897*789Sahrens tr->tr_ds = NULL; 898*789Sahrens tr->tr_size = lsize; 899*789Sahrens list_insert_tail(tr_list, tr); 900*789Sahrens } 901*789Sahrens } 902*789Sahrens 903*789Sahrens if (err) 904*789Sahrens dsl_dir_tempreserve_clear(tr_list, tx); 905*789Sahrens else 906*789Sahrens *tr_cookiep = tr_list; 907*789Sahrens return (err); 908*789Sahrens } 909*789Sahrens 910*789Sahrens /* 911*789Sahrens * Clear a temporary reservation that we previously made with 912*789Sahrens * dsl_dir_tempreserve_space(). 913*789Sahrens */ 914*789Sahrens void 915*789Sahrens dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx) 916*789Sahrens { 917*789Sahrens int txgidx = tx->tx_txg & TXG_MASK; 918*789Sahrens list_t *tr_list = tr_cookie; 919*789Sahrens struct tempreserve *tr; 920*789Sahrens 921*789Sahrens ASSERT3U(tx->tx_txg, !=, 0); 922*789Sahrens 923*789Sahrens while (tr = list_head(tr_list)) { 924*789Sahrens if (tr->tr_ds == NULL) { 925*789Sahrens arc_tempreserve_clear(tr->tr_size); 926*789Sahrens } else { 927*789Sahrens mutex_enter(&tr->tr_ds->dd_lock); 928*789Sahrens ASSERT3U(tr->tr_ds->dd_tempreserved[txgidx], >=, 929*789Sahrens tr->tr_size); 930*789Sahrens tr->tr_ds->dd_tempreserved[txgidx] -= tr->tr_size; 931*789Sahrens mutex_exit(&tr->tr_ds->dd_lock); 932*789Sahrens } 933*789Sahrens list_remove(tr_list, tr); 934*789Sahrens kmem_free(tr, sizeof (struct tempreserve)); 935*789Sahrens } 936*789Sahrens 937*789Sahrens kmem_free(tr_list, sizeof (list_t)); 938*789Sahrens } 939*789Sahrens 940*789Sahrens /* 941*789Sahrens * Call in open context when we think we're going to write/free space, 942*789Sahrens * eg. when dirtying data. Be conservative (ie. OK to write less than 943*789Sahrens * this or free more than this, but don't write more or free less). 944*789Sahrens */ 945*789Sahrens void 946*789Sahrens dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx) 947*789Sahrens { 948*789Sahrens int64_t parent_space; 949*789Sahrens uint64_t est_used; 950*789Sahrens 951*789Sahrens mutex_enter(&dd->dd_lock); 952*789Sahrens if (space > 0) 953*789Sahrens dd->dd_space_towrite[tx->tx_txg & TXG_MASK] += space; 954*789Sahrens 955*789Sahrens est_used = dsl_dir_estimated_space(dd); 956*789Sahrens parent_space = parent_delta(dd, est_used, space); 957*789Sahrens mutex_exit(&dd->dd_lock); 958*789Sahrens 959*789Sahrens /* Make sure that we clean up dd_space_to* */ 960*789Sahrens dsl_dir_dirty(dd, tx); 961*789Sahrens 962*789Sahrens /* XXX this is potentially expensive and unnecessary... */ 963*789Sahrens if (parent_space && dd->dd_parent) 964*789Sahrens dsl_dir_willuse_space(dd->dd_parent, parent_space, tx); 965*789Sahrens } 966*789Sahrens 967*789Sahrens /* call from syncing context when we actually write/free space for this dd */ 968*789Sahrens void 969*789Sahrens dsl_dir_diduse_space(dsl_dir_t *dd, 970*789Sahrens int64_t used, int64_t compressed, int64_t uncompressed, dmu_tx_t *tx) 971*789Sahrens { 972*789Sahrens int64_t accounted_delta; 973*789Sahrens 974*789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 975*789Sahrens 976*789Sahrens dsl_dir_dirty(dd, tx); 977*789Sahrens 978*789Sahrens mutex_enter(&dd->dd_lock); 979*789Sahrens accounted_delta = parent_delta(dd, dd->dd_used_bytes, used); 980*789Sahrens ASSERT(used >= 0 || dd->dd_used_bytes >= -used); 981*789Sahrens ASSERT(compressed >= 0 || 982*789Sahrens dd->dd_phys->dd_compressed_bytes >= -compressed); 983*789Sahrens ASSERT(uncompressed >= 0 || 984*789Sahrens dd->dd_phys->dd_uncompressed_bytes >= -uncompressed); 985*789Sahrens dd->dd_used_bytes += used; 986*789Sahrens if (used > 0) 987*789Sahrens dd->dd_space_towrite[tx->tx_txg & TXG_MASK] -= used; 988*789Sahrens dd->dd_phys->dd_uncompressed_bytes += uncompressed; 989*789Sahrens dd->dd_phys->dd_compressed_bytes += compressed; 990*789Sahrens mutex_exit(&dd->dd_lock); 991*789Sahrens 992*789Sahrens if (dd->dd_parent != NULL) { 993*789Sahrens dsl_dir_diduse_space(dd->dd_parent, 994*789Sahrens accounted_delta, compressed, uncompressed, tx); 995*789Sahrens } 996*789Sahrens } 997*789Sahrens 998*789Sahrens static int 999*789Sahrens dsl_dir_set_quota_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1000*789Sahrens { 1001*789Sahrens uint64_t *quotap = arg; 1002*789Sahrens uint64_t new_quota = *quotap; 1003*789Sahrens int err = 0; 1004*789Sahrens 1005*789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 1006*789Sahrens 1007*789Sahrens mutex_enter(&dd->dd_lock); 1008*789Sahrens if (new_quota != 0 && (new_quota < dd->dd_phys->dd_reserved || 1009*789Sahrens new_quota < dsl_dir_estimated_space(dd))) { 1010*789Sahrens err = ENOSPC; 1011*789Sahrens } else { 1012*789Sahrens dd->dd_phys->dd_quota = new_quota; 1013*789Sahrens } 1014*789Sahrens mutex_exit(&dd->dd_lock); 1015*789Sahrens return (err); 1016*789Sahrens } 1017*789Sahrens 1018*789Sahrens int 1019*789Sahrens dsl_dir_set_quota(const char *ddname, uint64_t quota) 1020*789Sahrens { 1021*789Sahrens dsl_dir_t *dd; 1022*789Sahrens int err; 1023*789Sahrens 1024*789Sahrens dd = dsl_dir_open(ddname, FTAG, NULL); 1025*789Sahrens if (dd == NULL) 1026*789Sahrens return (ENOENT); 1027*789Sahrens /* 1028*789Sahrens * If someone removes a file, then tries to set the quota, we 1029*789Sahrens * want to make sure the file freeing takes effect. 1030*789Sahrens */ 1031*789Sahrens txg_wait_open(dd->dd_pool, 0); 1032*789Sahrens 1033*789Sahrens err = dsl_dir_sync_task(dd, dsl_dir_set_quota_sync, "a, 0); 1034*789Sahrens dsl_dir_close(dd, FTAG); 1035*789Sahrens return (err); 1036*789Sahrens } 1037*789Sahrens 1038*789Sahrens static int 1039*789Sahrens dsl_dir_set_reservation_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1040*789Sahrens { 1041*789Sahrens uint64_t *reservationp = arg; 1042*789Sahrens uint64_t new_reservation = *reservationp; 1043*789Sahrens uint64_t used, avail; 1044*789Sahrens int64_t delta; 1045*789Sahrens 1046*789Sahrens if (new_reservation > INT64_MAX) 1047*789Sahrens return (EOVERFLOW); 1048*789Sahrens 1049*789Sahrens mutex_enter(&dd->dd_lock); 1050*789Sahrens used = dd->dd_used_bytes; 1051*789Sahrens delta = MAX(used, new_reservation) - 1052*789Sahrens MAX(used, dd->dd_phys->dd_reserved); 1053*789Sahrens mutex_exit(&dd->dd_lock); 1054*789Sahrens 1055*789Sahrens if (dd->dd_parent) { 1056*789Sahrens avail = dsl_dir_space_available(dd->dd_parent, 1057*789Sahrens NULL, 0, FALSE); 1058*789Sahrens } else { 1059*789Sahrens avail = dsl_pool_adjustedsize(dd->dd_pool, B_FALSE) - used; 1060*789Sahrens } 1061*789Sahrens 1062*789Sahrens if (delta > 0 && delta > avail) 1063*789Sahrens return (ENOSPC); 1064*789Sahrens if (delta > 0 && dd->dd_phys->dd_quota > 0 && 1065*789Sahrens new_reservation > dd->dd_phys->dd_quota) 1066*789Sahrens return (ENOSPC); 1067*789Sahrens 1068*789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 1069*789Sahrens dd->dd_phys->dd_reserved = new_reservation; 1070*789Sahrens 1071*789Sahrens if (dd->dd_parent != NULL) { 1072*789Sahrens /* Roll up this additional usage into our ancestors */ 1073*789Sahrens dsl_dir_diduse_space(dd->dd_parent, delta, 0, 0, tx); 1074*789Sahrens } 1075*789Sahrens return (0); 1076*789Sahrens } 1077*789Sahrens 1078*789Sahrens int 1079*789Sahrens dsl_dir_set_reservation(const char *ddname, uint64_t reservation) 1080*789Sahrens { 1081*789Sahrens dsl_dir_t *dd; 1082*789Sahrens int err; 1083*789Sahrens 1084*789Sahrens dd = dsl_dir_open(ddname, FTAG, NULL); 1085*789Sahrens if (dd == NULL) 1086*789Sahrens return (ENOENT); 1087*789Sahrens err = dsl_dir_sync_task(dd, 1088*789Sahrens dsl_dir_set_reservation_sync, &reservation, 0); 1089*789Sahrens dsl_dir_close(dd, FTAG); 1090*789Sahrens return (err); 1091*789Sahrens } 1092*789Sahrens 1093*789Sahrens static dsl_dir_t * 1094*789Sahrens closest_common_ancestor(dsl_dir_t *ds1, dsl_dir_t *ds2) 1095*789Sahrens { 1096*789Sahrens for (; ds1; ds1 = ds1->dd_parent) { 1097*789Sahrens dsl_dir_t *dd; 1098*789Sahrens for (dd = ds2; dd; dd = dd->dd_parent) { 1099*789Sahrens if (ds1 == dd) 1100*789Sahrens return (dd); 1101*789Sahrens } 1102*789Sahrens } 1103*789Sahrens return (NULL); 1104*789Sahrens } 1105*789Sahrens 1106*789Sahrens /* 1107*789Sahrens * If delta is applied to dd, how much of that delta would be applied to 1108*789Sahrens * ancestor? Syncing context only. 1109*789Sahrens */ 1110*789Sahrens static int64_t 1111*789Sahrens would_change(dsl_dir_t *dd, int64_t delta, dsl_dir_t *ancestor) 1112*789Sahrens { 1113*789Sahrens if (dd == ancestor) 1114*789Sahrens return (delta); 1115*789Sahrens 1116*789Sahrens mutex_enter(&dd->dd_lock); 1117*789Sahrens delta = parent_delta(dd, dd->dd_used_bytes, delta); 1118*789Sahrens mutex_exit(&dd->dd_lock); 1119*789Sahrens return (would_change(dd->dd_parent, delta, ancestor)); 1120*789Sahrens } 1121*789Sahrens 1122*789Sahrens int 1123*789Sahrens dsl_dir_rename_sync(dsl_dir_t *dd, void *arg, dmu_tx_t *tx) 1124*789Sahrens { 1125*789Sahrens const char *newname = arg; 1126*789Sahrens dsl_pool_t *dp = dd->dd_pool; 1127*789Sahrens objset_t *mos = dp->dp_meta_objset; 1128*789Sahrens dsl_dir_t *newpds; 1129*789Sahrens const char *tail; 1130*789Sahrens int err, len; 1131*789Sahrens 1132*789Sahrens /* can't rename to different pool */ 1133*789Sahrens len = strlen(dp->dp_root_dir->dd_myname); 1134*789Sahrens if (strncmp(dp->dp_root_dir->dd_myname, newname, len != 0) || 1135*789Sahrens newname[len] != '/') { 1136*789Sahrens return (ENXIO); 1137*789Sahrens } 1138*789Sahrens 1139*789Sahrens newpds = dsl_dir_open_spa(dp->dp_spa, newname, FTAG, &tail); 1140*789Sahrens 1141*789Sahrens /* new parent should exist */ 1142*789Sahrens if (newpds == NULL) 1143*789Sahrens return (ENOENT); 1144*789Sahrens 1145*789Sahrens /* new name should not already exist */ 1146*789Sahrens if (tail == NULL) { 1147*789Sahrens dsl_dir_close(newpds, FTAG); 1148*789Sahrens return (EEXIST); 1149*789Sahrens } 1150*789Sahrens 1151*789Sahrens rw_enter(&dp->dp_config_rwlock, RW_WRITER); 1152*789Sahrens 1153*789Sahrens /* There should be 2 references: the open and the dirty */ 1154*789Sahrens if (dmu_buf_refcount(dd->dd_dbuf) > 2) { 1155*789Sahrens rw_exit(&dp->dp_config_rwlock); 1156*789Sahrens dsl_dir_close(newpds, FTAG); 1157*789Sahrens return (EBUSY); 1158*789Sahrens } 1159*789Sahrens 1160*789Sahrens if (newpds != dd->dd_parent) { 1161*789Sahrens dsl_dir_t *ancestor; 1162*789Sahrens int64_t adelta; 1163*789Sahrens uint64_t myspace, avail; 1164*789Sahrens 1165*789Sahrens ancestor = closest_common_ancestor(dd, newpds); 1166*789Sahrens 1167*789Sahrens /* no rename into our descendent */ 1168*789Sahrens if (ancestor == dd) { 1169*789Sahrens dsl_dir_close(newpds, FTAG); 1170*789Sahrens rw_exit(&dp->dp_config_rwlock); 1171*789Sahrens return (EINVAL); 1172*789Sahrens } 1173*789Sahrens 1174*789Sahrens myspace = MAX(dd->dd_used_bytes, dd->dd_phys->dd_reserved); 1175*789Sahrens adelta = would_change(dd->dd_parent, -myspace, ancestor); 1176*789Sahrens avail = dsl_dir_space_available(newpds, 1177*789Sahrens ancestor, adelta, FALSE); 1178*789Sahrens if (avail < myspace) { 1179*789Sahrens dsl_dir_close(newpds, FTAG); 1180*789Sahrens rw_exit(&dp->dp_config_rwlock); 1181*789Sahrens return (ENOSPC); 1182*789Sahrens } 1183*789Sahrens 1184*789Sahrens /* The point of no (unsuccessful) return */ 1185*789Sahrens 1186*789Sahrens dsl_dir_diduse_space(dd->dd_parent, -myspace, 1187*789Sahrens -dd->dd_phys->dd_compressed_bytes, 1188*789Sahrens -dd->dd_phys->dd_uncompressed_bytes, tx); 1189*789Sahrens dsl_dir_diduse_space(newpds, myspace, 1190*789Sahrens dd->dd_phys->dd_compressed_bytes, 1191*789Sahrens dd->dd_phys->dd_uncompressed_bytes, tx); 1192*789Sahrens } 1193*789Sahrens 1194*789Sahrens /* The point of no (unsuccessful) return */ 1195*789Sahrens 1196*789Sahrens dmu_buf_will_dirty(dd->dd_dbuf, tx); 1197*789Sahrens 1198*789Sahrens /* remove from old parent zapobj */ 1199*789Sahrens err = zap_remove(mos, dd->dd_parent->dd_phys->dd_child_dir_zapobj, 1200*789Sahrens dd->dd_myname, tx); 1201*789Sahrens ASSERT3U(err, ==, 0); 1202*789Sahrens 1203*789Sahrens (void) strcpy(dd->dd_myname, tail); 1204*789Sahrens dsl_dir_close(dd->dd_parent, dd); 1205*789Sahrens dd->dd_phys->dd_parent_obj = newpds->dd_object; 1206*789Sahrens dd->dd_parent = dsl_dir_open_obj(dd->dd_pool, 1207*789Sahrens newpds->dd_object, NULL, dd); 1208*789Sahrens 1209*789Sahrens /* add to new parent zapobj */ 1210*789Sahrens err = zap_add(mos, newpds->dd_phys->dd_child_dir_zapobj, 1211*789Sahrens dd->dd_myname, 8, 1, &dd->dd_object, tx); 1212*789Sahrens ASSERT3U(err, ==, 0); 1213*789Sahrens 1214*789Sahrens dsl_dir_close(newpds, FTAG); 1215*789Sahrens rw_exit(&dp->dp_config_rwlock); 1216*789Sahrens return (0); 1217*789Sahrens } 1218