1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 2212178SMark.Shellenbaum@Sun.COM * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23789Sahrens */ 24789Sahrens 25789Sahrens #include <sys/zfs_context.h> 26789Sahrens #include <sys/dbuf.h> 27789Sahrens #include <sys/dnode.h> 28789Sahrens #include <sys/dmu.h> 29789Sahrens #include <sys/dmu_tx.h> 30789Sahrens #include <sys/dmu_objset.h> 31789Sahrens #include <sys/dsl_dataset.h> 32789Sahrens #include <sys/spa.h> 33789Sahrens 34789Sahrens static void 35789Sahrens dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) 36789Sahrens { 37789Sahrens dmu_buf_impl_t *db; 383547Smaybee int txgoff = tx->tx_txg & TXG_MASK; 393547Smaybee int nblkptr = dn->dn_phys->dn_nblkptr; 403547Smaybee int old_toplvl = dn->dn_phys->dn_nlevels - 1; 413547Smaybee int new_level = dn->dn_next_nlevels[txgoff]; 42789Sahrens int i; 43789Sahrens 443547Smaybee rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 453547Smaybee 463547Smaybee /* this dnode can't be paged out because it's dirty */ 47789Sahrens ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); 48789Sahrens ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); 493547Smaybee ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0); 50789Sahrens 51789Sahrens db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG); 521544Seschrock ASSERT(db != NULL); 53789Sahrens 543547Smaybee dn->dn_phys->dn_nlevels = new_level; 554312Sgw25295 dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset, 564312Sgw25295 dn->dn_object, dn->dn_phys->dn_nlevels); 57789Sahrens 583547Smaybee /* check for existing blkptrs in the dnode */ 593547Smaybee for (i = 0; i < nblkptr; i++) 603547Smaybee if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i])) 613547Smaybee break; 623547Smaybee if (i != nblkptr) { 633547Smaybee /* transfer dnode's block pointers to new indirect block */ 643547Smaybee (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT); 653547Smaybee ASSERT(db->db.db_data); 663547Smaybee ASSERT(arc_released(db->db_buf)); 673547Smaybee ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size); 683547Smaybee bcopy(dn->dn_phys->dn_blkptr, db->db.db_data, 693547Smaybee sizeof (blkptr_t) * nblkptr); 703547Smaybee arc_buf_freeze(db->db_buf); 713547Smaybee } 723547Smaybee 73789Sahrens /* set dbuf's parent pointers to new indirect buf */ 743547Smaybee for (i = 0; i < nblkptr; i++) { 753547Smaybee dmu_buf_impl_t *child = dbuf_find(dn, old_toplvl, i); 763547Smaybee 77789Sahrens if (child == NULL) 78789Sahrens continue; 793547Smaybee ASSERT3P(child->db_dnode, ==, dn); 803547Smaybee if (child->db_parent && child->db_parent != dn->dn_dbuf) { 813547Smaybee ASSERT(child->db_parent->db_level == db->db_level); 823547Smaybee ASSERT(child->db_blkptr != 833547Smaybee &dn->dn_phys->dn_blkptr[child->db_blkid]); 84789Sahrens mutex_exit(&child->db_mtx); 85789Sahrens continue; 86789Sahrens } 873547Smaybee ASSERT(child->db_parent == NULL || 883547Smaybee child->db_parent == dn->dn_dbuf); 89789Sahrens 903547Smaybee child->db_parent = db; 913547Smaybee dbuf_add_ref(db, child); 923547Smaybee if (db->db.db_data) 933547Smaybee child->db_blkptr = (blkptr_t *)db->db.db_data + i; 943547Smaybee else 953547Smaybee child->db_blkptr = NULL; 963547Smaybee dprintf_dbuf_bp(child, child->db_blkptr, 973547Smaybee "changed db_blkptr to new indirect %s", ""); 98789Sahrens 99789Sahrens mutex_exit(&child->db_mtx); 100789Sahrens } 101789Sahrens 1023547Smaybee bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr); 103789Sahrens 1041544Seschrock dbuf_rele(db, FTAG); 1053547Smaybee 1063547Smaybee rw_exit(&dn->dn_struct_rwlock); 107789Sahrens } 108789Sahrens 1096992Smaybee static int 110789Sahrens free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx) 111789Sahrens { 1126992Smaybee dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; 113789Sahrens uint64_t bytesfreed = 0; 1146992Smaybee int i, blocks_freed = 0; 115789Sahrens 1166992Smaybee dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num); 117789Sahrens 118789Sahrens for (i = 0; i < num; i++, bp++) { 119789Sahrens if (BP_IS_HOLE(bp)) 120789Sahrens continue; 121789Sahrens 12210922SJeff.Bonwick@Sun.COM bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE); 1232082Seschrock ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys)); 1243547Smaybee bzero(bp, sizeof (blkptr_t)); 1256992Smaybee blocks_freed += 1; 126789Sahrens } 127789Sahrens dnode_diduse_space(dn, -bytesfreed); 1286992Smaybee return (blocks_freed); 129789Sahrens } 130789Sahrens 131873Sek110237 #ifdef ZFS_DEBUG 132789Sahrens static void 133789Sahrens free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) 134789Sahrens { 135789Sahrens int off, num; 136789Sahrens int i, err, epbs; 137789Sahrens uint64_t txg = tx->tx_txg; 138789Sahrens 139789Sahrens epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 140789Sahrens off = start - (db->db_blkid * 1<<epbs); 141789Sahrens num = end - start + 1; 142789Sahrens 143789Sahrens ASSERT3U(off, >=, 0); 144789Sahrens ASSERT3U(num, >=, 0); 145789Sahrens ASSERT3U(db->db_level, >, 0); 146789Sahrens ASSERT3U(db->db.db_size, ==, 1<<db->db_dnode->dn_phys->dn_indblkshift); 147789Sahrens ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT); 148789Sahrens ASSERT(db->db_blkptr != NULL); 149789Sahrens 150789Sahrens for (i = off; i < off+num; i++) { 151789Sahrens uint64_t *buf; 1523547Smaybee dmu_buf_impl_t *child; 1533547Smaybee dbuf_dirty_record_t *dr; 154789Sahrens int j; 155789Sahrens 156789Sahrens ASSERT(db->db_level == 1); 157789Sahrens 158789Sahrens rw_enter(&db->db_dnode->dn_struct_rwlock, RW_READER); 159789Sahrens err = dbuf_hold_impl(db->db_dnode, db->db_level-1, 1604312Sgw25295 (db->db_blkid << epbs) + i, TRUE, FTAG, &child); 161789Sahrens rw_exit(&db->db_dnode->dn_struct_rwlock); 162789Sahrens if (err == ENOENT) 163789Sahrens continue; 164789Sahrens ASSERT(err == 0); 165789Sahrens ASSERT(child->db_level == 0); 1663547Smaybee dr = child->db_last_dirty; 1673547Smaybee while (dr && dr->dr_txg > txg) 1683547Smaybee dr = dr->dr_next; 1693547Smaybee ASSERT(dr == NULL || dr->dr_txg == txg); 170789Sahrens 1713547Smaybee /* data_old better be zeroed */ 1723547Smaybee if (dr) { 1733547Smaybee buf = dr->dt.dl.dr_data->b_data; 174789Sahrens for (j = 0; j < child->db.db_size >> 3; j++) { 175789Sahrens if (buf[j] != 0) { 176789Sahrens panic("freed data not zero: " 177789Sahrens "child=%p i=%d off=%d num=%d\n", 1787240Srh87107 (void *)child, i, off, num); 179789Sahrens } 180789Sahrens } 181789Sahrens } 182789Sahrens 183789Sahrens /* 184789Sahrens * db_data better be zeroed unless it's dirty in a 185789Sahrens * future txg. 186789Sahrens */ 187789Sahrens mutex_enter(&child->db_mtx); 188789Sahrens buf = child->db.db_data; 189789Sahrens if (buf != NULL && child->db_state != DB_FILL && 1903547Smaybee child->db_last_dirty == NULL) { 191789Sahrens for (j = 0; j < child->db.db_size >> 3; j++) { 192789Sahrens if (buf[j] != 0) { 193789Sahrens panic("freed data not zero: " 194789Sahrens "child=%p i=%d off=%d num=%d\n", 1957240Srh87107 (void *)child, i, off, num); 196789Sahrens } 197789Sahrens } 198789Sahrens } 199789Sahrens mutex_exit(&child->db_mtx); 200789Sahrens 2011544Seschrock dbuf_rele(child, FTAG); 202789Sahrens } 203873Sek110237 } 204789Sahrens #endif 205789Sahrens 2066992Smaybee #define ALL -1 2076992Smaybee 208789Sahrens static int 209789Sahrens free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc, 210789Sahrens dmu_tx_t *tx) 211789Sahrens { 212789Sahrens dnode_t *dn = db->db_dnode; 213789Sahrens blkptr_t *bp; 214789Sahrens dmu_buf_impl_t *subdb; 215789Sahrens uint64_t start, end, dbstart, dbend, i; 216789Sahrens int epbs, shift, err; 217789Sahrens int all = TRUE; 2186992Smaybee int blocks_freed = 0; 219789Sahrens 2206992Smaybee /* 2216992Smaybee * There is a small possibility that this block will not be cached: 2226992Smaybee * 1 - if level > 1 and there are no children with level <= 1 2236992Smaybee * 2 - if we didn't get a dirty hold (because this block had just 2246992Smaybee * finished being written -- and so had no holds), and then this 2256992Smaybee * block got evicted before we got here. 2266992Smaybee */ 2276992Smaybee if (db->db_state != DB_CACHED) 2286992Smaybee (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED); 2296992Smaybee 230*12296SLin.Ling@Sun.COM dbuf_release_bp(db); 231789Sahrens bp = (blkptr_t *)db->db.db_data; 232789Sahrens 233789Sahrens epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 234789Sahrens shift = (db->db_level - 1) * epbs; 235789Sahrens dbstart = db->db_blkid << epbs; 236789Sahrens start = blkid >> shift; 237789Sahrens if (dbstart < start) { 238789Sahrens bp += start - dbstart; 239789Sahrens all = FALSE; 240789Sahrens } else { 241789Sahrens start = dbstart; 242789Sahrens } 243789Sahrens dbend = ((db->db_blkid + 1) << epbs) - 1; 244789Sahrens end = (blkid + nblks - 1) >> shift; 245789Sahrens if (dbend <= end) 246789Sahrens end = dbend; 247789Sahrens else if (all) 248789Sahrens all = trunc; 249789Sahrens ASSERT3U(start, <=, end); 250789Sahrens 251789Sahrens if (db->db_level == 1) { 252873Sek110237 FREE_VERIFY(db, start, end, tx); 2536992Smaybee blocks_freed = free_blocks(dn, bp, end-start+1, tx); 2543093Sahrens arc_buf_freeze(db->db_buf); 2556992Smaybee ASSERT(all || blocks_freed == 0 || db->db_last_dirty); 2566992Smaybee return (all ? ALL : blocks_freed); 257789Sahrens } 258789Sahrens 259789Sahrens for (i = start; i <= end; i++, bp++) { 260789Sahrens if (BP_IS_HOLE(bp)) 261789Sahrens continue; 262789Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 263789Sahrens err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb); 264789Sahrens ASSERT3U(err, ==, 0); 265789Sahrens rw_exit(&dn->dn_struct_rwlock); 266789Sahrens 2676992Smaybee if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) { 268789Sahrens ASSERT3P(subdb->db_blkptr, ==, bp); 2696992Smaybee blocks_freed += free_blocks(dn, bp, 1, tx); 2701163Smaybee } else { 2711163Smaybee all = FALSE; 272789Sahrens } 2731544Seschrock dbuf_rele(subdb, FTAG); 274789Sahrens } 2753093Sahrens arc_buf_freeze(db->db_buf); 276789Sahrens #ifdef ZFS_DEBUG 277789Sahrens bp -= (end-start)+1; 278789Sahrens for (i = start; i <= end; i++, bp++) { 279789Sahrens if (i == start && blkid != 0) 280789Sahrens continue; 281789Sahrens else if (i == end && !trunc) 282789Sahrens continue; 283789Sahrens ASSERT3U(bp->blk_birth, ==, 0); 284789Sahrens } 285789Sahrens #endif 2866992Smaybee ASSERT(all || blocks_freed == 0 || db->db_last_dirty); 2876992Smaybee return (all ? ALL : blocks_freed); 288789Sahrens } 289789Sahrens 290789Sahrens /* 291789Sahrens * free_range: Traverse the indicated range of the provided file 292789Sahrens * and "free" all the blocks contained there. 293789Sahrens */ 294789Sahrens static void 295789Sahrens dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) 296789Sahrens { 297789Sahrens blkptr_t *bp = dn->dn_phys->dn_blkptr; 298789Sahrens dmu_buf_impl_t *db; 299789Sahrens int trunc, start, end, shift, i, err; 300789Sahrens int dnlevel = dn->dn_phys->dn_nlevels; 301789Sahrens 302789Sahrens if (blkid > dn->dn_phys->dn_maxblkid) 303789Sahrens return; 304789Sahrens 305789Sahrens ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX); 306789Sahrens trunc = blkid + nblks > dn->dn_phys->dn_maxblkid; 307789Sahrens if (trunc) 308789Sahrens nblks = dn->dn_phys->dn_maxblkid - blkid + 1; 309789Sahrens 310789Sahrens /* There are no indirect blocks in the object */ 311789Sahrens if (dnlevel == 1) { 312789Sahrens if (blkid >= dn->dn_phys->dn_nblkptr) { 313789Sahrens /* this range was never made persistent */ 314789Sahrens return; 315789Sahrens } 316789Sahrens ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr); 3176992Smaybee (void) free_blocks(dn, bp + blkid, nblks, tx); 318789Sahrens if (trunc) { 319789Sahrens uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * 320789Sahrens (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT); 321789Sahrens dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0); 322789Sahrens ASSERT(off < dn->dn_phys->dn_maxblkid || 323789Sahrens dn->dn_phys->dn_maxblkid == 0 || 3246992Smaybee dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0); 325789Sahrens } 326789Sahrens return; 327789Sahrens } 328789Sahrens 329789Sahrens shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT); 330789Sahrens start = blkid >> shift; 331789Sahrens ASSERT(start < dn->dn_phys->dn_nblkptr); 332789Sahrens end = (blkid + nblks - 1) >> shift; 333789Sahrens bp += start; 334789Sahrens for (i = start; i <= end; i++, bp++) { 335789Sahrens if (BP_IS_HOLE(bp)) 336789Sahrens continue; 337789Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 338789Sahrens err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db); 339789Sahrens ASSERT3U(err, ==, 0); 340789Sahrens rw_exit(&dn->dn_struct_rwlock); 341789Sahrens 3426992Smaybee if (free_children(db, blkid, nblks, trunc, tx) == ALL) { 343789Sahrens ASSERT3P(db->db_blkptr, ==, bp); 3446992Smaybee (void) free_blocks(dn, bp, 1, tx); 345789Sahrens } 3461544Seschrock dbuf_rele(db, FTAG); 347789Sahrens } 348789Sahrens if (trunc) { 349789Sahrens uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * 350789Sahrens (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT); 351789Sahrens dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0); 352789Sahrens ASSERT(off < dn->dn_phys->dn_maxblkid || 353789Sahrens dn->dn_phys->dn_maxblkid == 0 || 3546992Smaybee dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0); 355789Sahrens } 356789Sahrens } 357789Sahrens 3581544Seschrock /* 3591544Seschrock * Try to kick all the dnodes dbufs out of the cache... 3601544Seschrock */ 3614944Smaybee void 3624944Smaybee dnode_evict_dbufs(dnode_t *dn) 3631544Seschrock { 3641596Sahrens int progress; 3651596Sahrens int pass = 0; 3661596Sahrens 3671596Sahrens do { 3681602Smaybee dmu_buf_impl_t *db, marker; 3691596Sahrens int evicting = FALSE; 3701544Seschrock 3711596Sahrens progress = FALSE; 3721596Sahrens mutex_enter(&dn->dn_dbufs_mtx); 3731602Smaybee list_insert_tail(&dn->dn_dbufs, &marker); 3741602Smaybee db = list_head(&dn->dn_dbufs); 3751602Smaybee for (; db != ▮ db = list_head(&dn->dn_dbufs)) { 3761602Smaybee list_remove(&dn->dn_dbufs, db); 3771602Smaybee list_insert_tail(&dn->dn_dbufs, db); 3784312Sgw25295 ASSERT3P(db->db_dnode, ==, dn); 3791596Sahrens 3801544Seschrock mutex_enter(&db->db_mtx); 3811596Sahrens if (db->db_state == DB_EVICTING) { 3821596Sahrens progress = TRUE; 3831596Sahrens evicting = TRUE; 3841596Sahrens mutex_exit(&db->db_mtx); 3851596Sahrens } else if (refcount_is_zero(&db->db_holds)) { 3861596Sahrens progress = TRUE; 3871596Sahrens dbuf_clear(db); /* exits db_mtx for us */ 3881596Sahrens } else { 3891596Sahrens mutex_exit(&db->db_mtx); 3901596Sahrens } 3911596Sahrens 3921544Seschrock } 3931602Smaybee list_remove(&dn->dn_dbufs, &marker); 3941596Sahrens /* 3951596Sahrens * NB: we need to drop dn_dbufs_mtx between passes so 3961596Sahrens * that any DB_EVICTING dbufs can make progress. 3971596Sahrens * Ideally, we would have some cv we could wait on, but 3981596Sahrens * since we don't, just wait a bit to give the other 3991596Sahrens * thread a chance to run. 4001596Sahrens */ 4011596Sahrens mutex_exit(&dn->dn_dbufs_mtx); 4021596Sahrens if (evicting) 4031596Sahrens delay(1); 4041596Sahrens pass++; 4051596Sahrens ASSERT(pass < 100); /* sanity check */ 4061596Sahrens } while (progress); 4071596Sahrens 4081544Seschrock rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 4091544Seschrock if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) { 4101544Seschrock mutex_enter(&dn->dn_bonus->db_mtx); 4111544Seschrock dbuf_evict(dn->dn_bonus); 4121544Seschrock dn->dn_bonus = NULL; 4131544Seschrock } 4141544Seschrock rw_exit(&dn->dn_struct_rwlock); 4151544Seschrock } 4161544Seschrock 4173547Smaybee static void 4183547Smaybee dnode_undirty_dbufs(list_t *list) 4193547Smaybee { 4203547Smaybee dbuf_dirty_record_t *dr; 4213547Smaybee 4223547Smaybee while (dr = list_head(list)) { 4233547Smaybee dmu_buf_impl_t *db = dr->dr_dbuf; 4243547Smaybee uint64_t txg = dr->dr_txg; 4253547Smaybee 42610922SJeff.Bonwick@Sun.COM if (db->db_level != 0) 42710922SJeff.Bonwick@Sun.COM dnode_undirty_dbufs(&dr->dt.di.dr_children); 42810922SJeff.Bonwick@Sun.COM 4293547Smaybee mutex_enter(&db->db_mtx); 4303547Smaybee /* XXX - use dbuf_undirty()? */ 4313547Smaybee list_remove(list, dr); 4323547Smaybee ASSERT(db->db_last_dirty == dr); 4333547Smaybee db->db_last_dirty = NULL; 4343547Smaybee db->db_dirtycnt -= 1; 4353547Smaybee if (db->db_level == 0) { 43611935SMark.Shellenbaum@Sun.COM ASSERT(db->db_blkid == DMU_BONUS_BLKID || 4373547Smaybee dr->dt.dl.dr_data == db->db_buf); 4383547Smaybee dbuf_unoverride(dr); 4393547Smaybee } 4403547Smaybee kmem_free(dr, sizeof (dbuf_dirty_record_t)); 44110922SJeff.Bonwick@Sun.COM dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg); 4423547Smaybee } 4433547Smaybee } 4443547Smaybee 4453547Smaybee static void 446789Sahrens dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) 447789Sahrens { 448789Sahrens int txgoff = tx->tx_txg & TXG_MASK; 449789Sahrens 450789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 451789Sahrens 4526992Smaybee /* 4536992Smaybee * Our contents should have been freed in dnode_sync() by the 4546992Smaybee * free range record inserted by the caller of dnode_free(). 4556992Smaybee */ 4566992Smaybee ASSERT3U(DN_USED_BYTES(dn->dn_phys), ==, 0); 4576992Smaybee ASSERT(BP_IS_HOLE(dn->dn_phys->dn_blkptr)); 4586992Smaybee 4593547Smaybee dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]); 4604944Smaybee dnode_evict_dbufs(dn); 4611544Seschrock ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL); 4621544Seschrock 4631544Seschrock /* 4641544Seschrock * XXX - It would be nice to assert this, but we may still 4651544Seschrock * have residual holds from async evictions from the arc... 4661544Seschrock * 4673444Sek110237 * zfs_obj_to_path() also depends on this being 4683444Sek110237 * commented out. 4693444Sek110237 * 4701544Seschrock * ASSERT3U(refcount_count(&dn->dn_holds), ==, 1); 4711544Seschrock */ 472789Sahrens 473789Sahrens /* Undirty next bits */ 474789Sahrens dn->dn_next_nlevels[txgoff] = 0; 475789Sahrens dn->dn_next_indblkshift[txgoff] = 0; 4761596Sahrens dn->dn_next_blksz[txgoff] = 0; 477789Sahrens 478789Sahrens /* ASSERT(blkptrs are zero); */ 479789Sahrens ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); 480789Sahrens ASSERT(dn->dn_type != DMU_OT_NONE); 481789Sahrens 482789Sahrens ASSERT(dn->dn_free_txg > 0); 483789Sahrens if (dn->dn_allocated_txg != dn->dn_free_txg) 484789Sahrens dbuf_will_dirty(dn->dn_dbuf, tx); 485789Sahrens bzero(dn->dn_phys, sizeof (dnode_phys_t)); 486789Sahrens 487789Sahrens mutex_enter(&dn->dn_mtx); 488789Sahrens dn->dn_type = DMU_OT_NONE; 489789Sahrens dn->dn_maxblkid = 0; 490789Sahrens dn->dn_allocated_txg = 0; 4914480Sgw25295 dn->dn_free_txg = 0; 49211935SMark.Shellenbaum@Sun.COM dn->dn_have_spill = B_FALSE; 493789Sahrens mutex_exit(&dn->dn_mtx); 494789Sahrens 4951544Seschrock ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 496789Sahrens 497789Sahrens dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); 498789Sahrens /* 499789Sahrens * Now that we've released our hold, the dnode may 500789Sahrens * be evicted, so we musn't access it. 501789Sahrens */ 502789Sahrens } 503789Sahrens 504789Sahrens /* 5053547Smaybee * Write out the dnode's dirty buffers. 506789Sahrens */ 5073547Smaybee void 5083547Smaybee dnode_sync(dnode_t *dn, dmu_tx_t *tx) 509789Sahrens { 510789Sahrens free_range_t *rp; 5113547Smaybee dnode_phys_t *dnp = dn->dn_phys; 512789Sahrens int txgoff = tx->tx_txg & TXG_MASK; 5133547Smaybee list_t *list = &dn->dn_dirty_records[txgoff]; 5149396SMatthew.Ahrens@Sun.COM static const dnode_phys_t zerodn = { 0 }; 51511935SMark.Shellenbaum@Sun.COM boolean_t kill_spill = B_FALSE; 516789Sahrens 517789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 518789Sahrens ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg); 5199396SMatthew.Ahrens@Sun.COM ASSERT(dnp->dn_type != DMU_OT_NONE || 5209396SMatthew.Ahrens@Sun.COM bcmp(dnp, &zerodn, DNODE_SIZE) == 0); 521873Sek110237 DNODE_VERIFY(dn); 5221596Sahrens 5233547Smaybee ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf)); 524789Sahrens 5259396SMatthew.Ahrens@Sun.COM if (dmu_objset_userused_enabled(dn->dn_objset) && 5269396SMatthew.Ahrens@Sun.COM !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { 52711935SMark.Shellenbaum@Sun.COM mutex_enter(&dn->dn_mtx); 52811935SMark.Shellenbaum@Sun.COM dn->dn_oldused = DN_USED_BYTES(dn->dn_phys); 52911935SMark.Shellenbaum@Sun.COM dn->dn_oldflags = dn->dn_phys->dn_flags; 53011935SMark.Shellenbaum@Sun.COM dn->dn_id_flags |= DN_ID_SYNC; 5319396SMatthew.Ahrens@Sun.COM dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED; 53211935SMark.Shellenbaum@Sun.COM mutex_exit(&dn->dn_mtx); 53312178SMark.Shellenbaum@Sun.COM dmu_objset_userquota_get_ids(dn, B_FALSE, tx); 5349396SMatthew.Ahrens@Sun.COM } else { 5359396SMatthew.Ahrens@Sun.COM /* Once we account for it, we should always account for it. */ 5369396SMatthew.Ahrens@Sun.COM ASSERT(!(dn->dn_phys->dn_flags & 5379396SMatthew.Ahrens@Sun.COM DNODE_FLAG_USERUSED_ACCOUNTED)); 5389396SMatthew.Ahrens@Sun.COM } 5399396SMatthew.Ahrens@Sun.COM 540789Sahrens mutex_enter(&dn->dn_mtx); 541789Sahrens if (dn->dn_allocated_txg == tx->tx_txg) { 542789Sahrens /* The dnode is newly allocated or reallocated */ 543789Sahrens if (dnp->dn_type == DMU_OT_NONE) { 544789Sahrens /* this is a first alloc, not a realloc */ 545789Sahrens dnp->dn_nlevels = 1; 5468644SMark.Maybee@Sun.COM dnp->dn_nblkptr = dn->dn_nblkptr; 547789Sahrens } 548789Sahrens 549789Sahrens dnp->dn_type = dn->dn_type; 550789Sahrens dnp->dn_bonustype = dn->dn_bonustype; 551789Sahrens dnp->dn_bonuslen = dn->dn_bonuslen; 552789Sahrens } 553789Sahrens 5543547Smaybee ASSERT(dnp->dn_nlevels > 1 || 5551599Sahrens BP_IS_HOLE(&dnp->dn_blkptr[0]) || 5561599Sahrens BP_GET_LSIZE(&dnp->dn_blkptr[0]) == 5571599Sahrens dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 5581599Sahrens 5591596Sahrens if (dn->dn_next_blksz[txgoff]) { 5601596Sahrens ASSERT(P2PHASE(dn->dn_next_blksz[txgoff], 561789Sahrens SPA_MINBLOCKSIZE) == 0); 5621599Sahrens ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) || 5636992Smaybee dn->dn_maxblkid == 0 || list_head(list) != NULL || 56412178SMark.Shellenbaum@Sun.COM avl_last(&dn->dn_ranges[txgoff]) || 5651600Sahrens dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT == 5661600Sahrens dnp->dn_datablkszsec); 567789Sahrens dnp->dn_datablkszsec = 5681596Sahrens dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT; 5691596Sahrens dn->dn_next_blksz[txgoff] = 0; 570789Sahrens } 571789Sahrens 5724944Smaybee if (dn->dn_next_bonuslen[txgoff]) { 5734944Smaybee if (dn->dn_next_bonuslen[txgoff] == DN_ZERO_BONUSLEN) 5744944Smaybee dnp->dn_bonuslen = 0; 5754944Smaybee else 5764944Smaybee dnp->dn_bonuslen = dn->dn_next_bonuslen[txgoff]; 5774944Smaybee ASSERT(dnp->dn_bonuslen <= DN_MAX_BONUSLEN); 5784944Smaybee dn->dn_next_bonuslen[txgoff] = 0; 5794944Smaybee } 5804944Smaybee 58111935SMark.Shellenbaum@Sun.COM if (dn->dn_next_bonustype[txgoff]) { 58211935SMark.Shellenbaum@Sun.COM ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES); 58311935SMark.Shellenbaum@Sun.COM dnp->dn_bonustype = dn->dn_next_bonustype[txgoff]; 58411935SMark.Shellenbaum@Sun.COM dn->dn_next_bonustype[txgoff] = 0; 58511935SMark.Shellenbaum@Sun.COM } 58611935SMark.Shellenbaum@Sun.COM 58711935SMark.Shellenbaum@Sun.COM /* 58811935SMark.Shellenbaum@Sun.COM * We will either remove a spill block when a file is being removed 58911935SMark.Shellenbaum@Sun.COM * or we have been asked to remove it. 59011935SMark.Shellenbaum@Sun.COM */ 59111935SMark.Shellenbaum@Sun.COM if (dn->dn_rm_spillblk[txgoff] || 59211935SMark.Shellenbaum@Sun.COM ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && 59311935SMark.Shellenbaum@Sun.COM dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) { 59411935SMark.Shellenbaum@Sun.COM if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) 59511935SMark.Shellenbaum@Sun.COM kill_spill = B_TRUE; 59611935SMark.Shellenbaum@Sun.COM dn->dn_rm_spillblk[txgoff] = 0; 59711935SMark.Shellenbaum@Sun.COM } 59811935SMark.Shellenbaum@Sun.COM 599789Sahrens if (dn->dn_next_indblkshift[txgoff]) { 600789Sahrens ASSERT(dnp->dn_nlevels == 1); 601789Sahrens dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff]; 602789Sahrens dn->dn_next_indblkshift[txgoff] = 0; 603789Sahrens } 604789Sahrens 605789Sahrens /* 606789Sahrens * Just take the live (open-context) values for checksum and compress. 607789Sahrens * Strictly speaking it's a future leak, but nothing bad happens if we 608789Sahrens * start using the new checksum or compress algorithm a little early. 609789Sahrens */ 610789Sahrens dnp->dn_checksum = dn->dn_checksum; 611789Sahrens dnp->dn_compress = dn->dn_compress; 612789Sahrens 613789Sahrens mutex_exit(&dn->dn_mtx); 614789Sahrens 61511935SMark.Shellenbaum@Sun.COM if (kill_spill) { 61611935SMark.Shellenbaum@Sun.COM (void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx); 61711935SMark.Shellenbaum@Sun.COM mutex_enter(&dn->dn_mtx); 61811935SMark.Shellenbaum@Sun.COM dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR; 61911935SMark.Shellenbaum@Sun.COM mutex_exit(&dn->dn_mtx); 62011935SMark.Shellenbaum@Sun.COM } 62111935SMark.Shellenbaum@Sun.COM 622789Sahrens /* process all the "freed" ranges in the file */ 6236992Smaybee while (rp = avl_last(&dn->dn_ranges[txgoff])) { 6246992Smaybee dnode_sync_free_range(dn, rp->fr_blkid, rp->fr_nblks, tx); 6256992Smaybee /* grab the mutex so we don't race with dnode_block_freed() */ 6266992Smaybee mutex_enter(&dn->dn_mtx); 6276992Smaybee avl_remove(&dn->dn_ranges[txgoff], rp); 6286992Smaybee mutex_exit(&dn->dn_mtx); 6296992Smaybee kmem_free(rp, sizeof (free_range_t)); 630789Sahrens } 6314944Smaybee 632789Sahrens if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) { 6333547Smaybee dnode_sync_free(dn, tx); 6343547Smaybee return; 635789Sahrens } 636789Sahrens 6378644SMark.Maybee@Sun.COM if (dn->dn_next_nblkptr[txgoff]) { 6388644SMark.Maybee@Sun.COM /* this should only happen on a realloc */ 6398644SMark.Maybee@Sun.COM ASSERT(dn->dn_allocated_txg == tx->tx_txg); 6408644SMark.Maybee@Sun.COM if (dn->dn_next_nblkptr[txgoff] > dnp->dn_nblkptr) { 6418644SMark.Maybee@Sun.COM /* zero the new blkptrs we are gaining */ 6428644SMark.Maybee@Sun.COM bzero(dnp->dn_blkptr + dnp->dn_nblkptr, 6438644SMark.Maybee@Sun.COM sizeof (blkptr_t) * 6448644SMark.Maybee@Sun.COM (dn->dn_next_nblkptr[txgoff] - dnp->dn_nblkptr)); 6458644SMark.Maybee@Sun.COM #ifdef ZFS_DEBUG 6468644SMark.Maybee@Sun.COM } else { 6478644SMark.Maybee@Sun.COM int i; 6488644SMark.Maybee@Sun.COM ASSERT(dn->dn_next_nblkptr[txgoff] < dnp->dn_nblkptr); 6498644SMark.Maybee@Sun.COM /* the blkptrs we are losing better be unallocated */ 6508644SMark.Maybee@Sun.COM for (i = dn->dn_next_nblkptr[txgoff]; 6518644SMark.Maybee@Sun.COM i < dnp->dn_nblkptr; i++) 6528644SMark.Maybee@Sun.COM ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[i])); 6538644SMark.Maybee@Sun.COM #endif 6548644SMark.Maybee@Sun.COM } 6558644SMark.Maybee@Sun.COM mutex_enter(&dn->dn_mtx); 6568644SMark.Maybee@Sun.COM dnp->dn_nblkptr = dn->dn_next_nblkptr[txgoff]; 6578644SMark.Maybee@Sun.COM dn->dn_next_nblkptr[txgoff] = 0; 6588644SMark.Maybee@Sun.COM mutex_exit(&dn->dn_mtx); 6598644SMark.Maybee@Sun.COM } 6608644SMark.Maybee@Sun.COM 661789Sahrens if (dn->dn_next_nlevels[txgoff]) { 6623547Smaybee dnode_increase_indirection(dn, tx); 663789Sahrens dn->dn_next_nlevels[txgoff] = 0; 664789Sahrens } 665789Sahrens 6663547Smaybee dbuf_sync_list(list, tx); 667789Sahrens 6689396SMatthew.Ahrens@Sun.COM if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { 6693547Smaybee ASSERT3P(list_head(list), ==, NULL); 6703547Smaybee dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); 6713547Smaybee } 6721599Sahrens 6733547Smaybee /* 6743547Smaybee * Although we have dropped our reference to the dnode, it 6753547Smaybee * can't be evicted until its written, and we haven't yet 6763547Smaybee * initiated the IO for the dnode's dbuf. 6773547Smaybee */ 678789Sahrens } 679