1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 22*11935SMark.Shellenbaum@Sun.COM * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #include <sys/zfs_context.h> 27789Sahrens #include <sys/dbuf.h> 28789Sahrens #include <sys/dnode.h> 29789Sahrens #include <sys/dmu.h> 30789Sahrens #include <sys/dmu_tx.h> 31789Sahrens #include <sys/dmu_objset.h> 32789Sahrens #include <sys/dsl_dataset.h> 33789Sahrens #include <sys/spa.h> 34789Sahrens 35789Sahrens static void 36789Sahrens dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) 37789Sahrens { 38789Sahrens dmu_buf_impl_t *db; 393547Smaybee int txgoff = tx->tx_txg & TXG_MASK; 403547Smaybee int nblkptr = dn->dn_phys->dn_nblkptr; 413547Smaybee int old_toplvl = dn->dn_phys->dn_nlevels - 1; 423547Smaybee int new_level = dn->dn_next_nlevels[txgoff]; 43789Sahrens int i; 44789Sahrens 453547Smaybee rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 463547Smaybee 473547Smaybee /* this dnode can't be paged out because it's dirty */ 48789Sahrens ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); 49789Sahrens ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); 503547Smaybee ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0); 51789Sahrens 52789Sahrens db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG); 531544Seschrock ASSERT(db != NULL); 54789Sahrens 553547Smaybee dn->dn_phys->dn_nlevels = new_level; 564312Sgw25295 dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset, 574312Sgw25295 dn->dn_object, dn->dn_phys->dn_nlevels); 58789Sahrens 593547Smaybee /* check for existing blkptrs in the dnode */ 603547Smaybee for (i = 0; i < nblkptr; i++) 613547Smaybee if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i])) 623547Smaybee break; 633547Smaybee if (i != nblkptr) { 643547Smaybee /* transfer dnode's block pointers to new indirect block */ 653547Smaybee (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT); 663547Smaybee ASSERT(db->db.db_data); 673547Smaybee ASSERT(arc_released(db->db_buf)); 683547Smaybee ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size); 693547Smaybee bcopy(dn->dn_phys->dn_blkptr, db->db.db_data, 703547Smaybee sizeof (blkptr_t) * nblkptr); 713547Smaybee arc_buf_freeze(db->db_buf); 723547Smaybee } 733547Smaybee 74789Sahrens /* set dbuf's parent pointers to new indirect buf */ 753547Smaybee for (i = 0; i < nblkptr; i++) { 763547Smaybee dmu_buf_impl_t *child = dbuf_find(dn, old_toplvl, i); 773547Smaybee 78789Sahrens if (child == NULL) 79789Sahrens continue; 803547Smaybee ASSERT3P(child->db_dnode, ==, dn); 813547Smaybee if (child->db_parent && child->db_parent != dn->dn_dbuf) { 823547Smaybee ASSERT(child->db_parent->db_level == db->db_level); 833547Smaybee ASSERT(child->db_blkptr != 843547Smaybee &dn->dn_phys->dn_blkptr[child->db_blkid]); 85789Sahrens mutex_exit(&child->db_mtx); 86789Sahrens continue; 87789Sahrens } 883547Smaybee ASSERT(child->db_parent == NULL || 893547Smaybee child->db_parent == dn->dn_dbuf); 90789Sahrens 913547Smaybee child->db_parent = db; 923547Smaybee dbuf_add_ref(db, child); 933547Smaybee if (db->db.db_data) 943547Smaybee child->db_blkptr = (blkptr_t *)db->db.db_data + i; 953547Smaybee else 963547Smaybee child->db_blkptr = NULL; 973547Smaybee dprintf_dbuf_bp(child, child->db_blkptr, 983547Smaybee "changed db_blkptr to new indirect %s", ""); 99789Sahrens 100789Sahrens mutex_exit(&child->db_mtx); 101789Sahrens } 102789Sahrens 1033547Smaybee bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr); 104789Sahrens 1051544Seschrock dbuf_rele(db, FTAG); 1063547Smaybee 1073547Smaybee rw_exit(&dn->dn_struct_rwlock); 108789Sahrens } 109789Sahrens 1106992Smaybee static int 111789Sahrens free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx) 112789Sahrens { 1136992Smaybee dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; 114789Sahrens uint64_t bytesfreed = 0; 1156992Smaybee int i, blocks_freed = 0; 116789Sahrens 1176992Smaybee dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num); 118789Sahrens 119789Sahrens for (i = 0; i < num; i++, bp++) { 120789Sahrens if (BP_IS_HOLE(bp)) 121789Sahrens continue; 122789Sahrens 12310922SJeff.Bonwick@Sun.COM bytesfreed += dsl_dataset_block_kill(ds, bp, tx, B_FALSE); 1242082Seschrock ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys)); 1253547Smaybee bzero(bp, sizeof (blkptr_t)); 1266992Smaybee blocks_freed += 1; 127789Sahrens } 128789Sahrens dnode_diduse_space(dn, -bytesfreed); 1296992Smaybee return (blocks_freed); 130789Sahrens } 131789Sahrens 132873Sek110237 #ifdef ZFS_DEBUG 133789Sahrens static void 134789Sahrens free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) 135789Sahrens { 136789Sahrens int off, num; 137789Sahrens int i, err, epbs; 138789Sahrens uint64_t txg = tx->tx_txg; 139789Sahrens 140789Sahrens epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 141789Sahrens off = start - (db->db_blkid * 1<<epbs); 142789Sahrens num = end - start + 1; 143789Sahrens 144789Sahrens ASSERT3U(off, >=, 0); 145789Sahrens ASSERT3U(num, >=, 0); 146789Sahrens ASSERT3U(db->db_level, >, 0); 147789Sahrens ASSERT3U(db->db.db_size, ==, 1<<db->db_dnode->dn_phys->dn_indblkshift); 148789Sahrens ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT); 149789Sahrens ASSERT(db->db_blkptr != NULL); 150789Sahrens 151789Sahrens for (i = off; i < off+num; i++) { 152789Sahrens uint64_t *buf; 1533547Smaybee dmu_buf_impl_t *child; 1543547Smaybee dbuf_dirty_record_t *dr; 155789Sahrens int j; 156789Sahrens 157789Sahrens ASSERT(db->db_level == 1); 158789Sahrens 159789Sahrens rw_enter(&db->db_dnode->dn_struct_rwlock, RW_READER); 160789Sahrens err = dbuf_hold_impl(db->db_dnode, db->db_level-1, 1614312Sgw25295 (db->db_blkid << epbs) + i, TRUE, FTAG, &child); 162789Sahrens rw_exit(&db->db_dnode->dn_struct_rwlock); 163789Sahrens if (err == ENOENT) 164789Sahrens continue; 165789Sahrens ASSERT(err == 0); 166789Sahrens ASSERT(child->db_level == 0); 1673547Smaybee dr = child->db_last_dirty; 1683547Smaybee while (dr && dr->dr_txg > txg) 1693547Smaybee dr = dr->dr_next; 1703547Smaybee ASSERT(dr == NULL || dr->dr_txg == txg); 171789Sahrens 1723547Smaybee /* data_old better be zeroed */ 1733547Smaybee if (dr) { 1743547Smaybee buf = dr->dt.dl.dr_data->b_data; 175789Sahrens for (j = 0; j < child->db.db_size >> 3; j++) { 176789Sahrens if (buf[j] != 0) { 177789Sahrens panic("freed data not zero: " 178789Sahrens "child=%p i=%d off=%d num=%d\n", 1797240Srh87107 (void *)child, i, off, num); 180789Sahrens } 181789Sahrens } 182789Sahrens } 183789Sahrens 184789Sahrens /* 185789Sahrens * db_data better be zeroed unless it's dirty in a 186789Sahrens * future txg. 187789Sahrens */ 188789Sahrens mutex_enter(&child->db_mtx); 189789Sahrens buf = child->db.db_data; 190789Sahrens if (buf != NULL && child->db_state != DB_FILL && 1913547Smaybee child->db_last_dirty == NULL) { 192789Sahrens for (j = 0; j < child->db.db_size >> 3; j++) { 193789Sahrens if (buf[j] != 0) { 194789Sahrens panic("freed data not zero: " 195789Sahrens "child=%p i=%d off=%d num=%d\n", 1967240Srh87107 (void *)child, i, off, num); 197789Sahrens } 198789Sahrens } 199789Sahrens } 200789Sahrens mutex_exit(&child->db_mtx); 201789Sahrens 2021544Seschrock dbuf_rele(child, FTAG); 203789Sahrens } 204873Sek110237 } 205789Sahrens #endif 206789Sahrens 2076992Smaybee #define ALL -1 2086992Smaybee 209789Sahrens static int 210789Sahrens free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc, 211789Sahrens dmu_tx_t *tx) 212789Sahrens { 213789Sahrens dnode_t *dn = db->db_dnode; 214789Sahrens blkptr_t *bp; 215789Sahrens dmu_buf_impl_t *subdb; 216789Sahrens uint64_t start, end, dbstart, dbend, i; 217789Sahrens int epbs, shift, err; 218789Sahrens int all = TRUE; 2196992Smaybee int blocks_freed = 0; 220789Sahrens 2216992Smaybee /* 2226992Smaybee * There is a small possibility that this block will not be cached: 2236992Smaybee * 1 - if level > 1 and there are no children with level <= 1 2246992Smaybee * 2 - if we didn't get a dirty hold (because this block had just 2256992Smaybee * finished being written -- and so had no holds), and then this 2266992Smaybee * block got evicted before we got here. 2276992Smaybee */ 2286992Smaybee if (db->db_state != DB_CACHED) 2296992Smaybee (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED); 2306992Smaybee 231789Sahrens arc_release(db->db_buf, db); 232789Sahrens bp = (blkptr_t *)db->db.db_data; 233789Sahrens 234789Sahrens epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 235789Sahrens shift = (db->db_level - 1) * epbs; 236789Sahrens dbstart = db->db_blkid << epbs; 237789Sahrens start = blkid >> shift; 238789Sahrens if (dbstart < start) { 239789Sahrens bp += start - dbstart; 240789Sahrens all = FALSE; 241789Sahrens } else { 242789Sahrens start = dbstart; 243789Sahrens } 244789Sahrens dbend = ((db->db_blkid + 1) << epbs) - 1; 245789Sahrens end = (blkid + nblks - 1) >> shift; 246789Sahrens if (dbend <= end) 247789Sahrens end = dbend; 248789Sahrens else if (all) 249789Sahrens all = trunc; 250789Sahrens ASSERT3U(start, <=, end); 251789Sahrens 252789Sahrens if (db->db_level == 1) { 253873Sek110237 FREE_VERIFY(db, start, end, tx); 2546992Smaybee blocks_freed = free_blocks(dn, bp, end-start+1, tx); 2553093Sahrens arc_buf_freeze(db->db_buf); 2566992Smaybee ASSERT(all || blocks_freed == 0 || db->db_last_dirty); 2576992Smaybee return (all ? ALL : blocks_freed); 258789Sahrens } 259789Sahrens 260789Sahrens for (i = start; i <= end; i++, bp++) { 261789Sahrens if (BP_IS_HOLE(bp)) 262789Sahrens continue; 263789Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 264789Sahrens err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb); 265789Sahrens ASSERT3U(err, ==, 0); 266789Sahrens rw_exit(&dn->dn_struct_rwlock); 267789Sahrens 2686992Smaybee if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) { 269789Sahrens ASSERT3P(subdb->db_blkptr, ==, bp); 2706992Smaybee blocks_freed += free_blocks(dn, bp, 1, tx); 2711163Smaybee } else { 2721163Smaybee all = FALSE; 273789Sahrens } 2741544Seschrock dbuf_rele(subdb, FTAG); 275789Sahrens } 2763093Sahrens arc_buf_freeze(db->db_buf); 277789Sahrens #ifdef ZFS_DEBUG 278789Sahrens bp -= (end-start)+1; 279789Sahrens for (i = start; i <= end; i++, bp++) { 280789Sahrens if (i == start && blkid != 0) 281789Sahrens continue; 282789Sahrens else if (i == end && !trunc) 283789Sahrens continue; 284789Sahrens ASSERT3U(bp->blk_birth, ==, 0); 285789Sahrens } 286789Sahrens #endif 2876992Smaybee ASSERT(all || blocks_freed == 0 || db->db_last_dirty); 2886992Smaybee return (all ? ALL : blocks_freed); 289789Sahrens } 290789Sahrens 291789Sahrens /* 292789Sahrens * free_range: Traverse the indicated range of the provided file 293789Sahrens * and "free" all the blocks contained there. 294789Sahrens */ 295789Sahrens static void 296789Sahrens dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) 297789Sahrens { 298789Sahrens blkptr_t *bp = dn->dn_phys->dn_blkptr; 299789Sahrens dmu_buf_impl_t *db; 300789Sahrens int trunc, start, end, shift, i, err; 301789Sahrens int dnlevel = dn->dn_phys->dn_nlevels; 302789Sahrens 303789Sahrens if (blkid > dn->dn_phys->dn_maxblkid) 304789Sahrens return; 305789Sahrens 306789Sahrens ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX); 307789Sahrens trunc = blkid + nblks > dn->dn_phys->dn_maxblkid; 308789Sahrens if (trunc) 309789Sahrens nblks = dn->dn_phys->dn_maxblkid - blkid + 1; 310789Sahrens 311789Sahrens /* There are no indirect blocks in the object */ 312789Sahrens if (dnlevel == 1) { 313789Sahrens if (blkid >= dn->dn_phys->dn_nblkptr) { 314789Sahrens /* this range was never made persistent */ 315789Sahrens return; 316789Sahrens } 317789Sahrens ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr); 3186992Smaybee (void) free_blocks(dn, bp + blkid, nblks, tx); 319789Sahrens if (trunc) { 320789Sahrens uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * 321789Sahrens (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT); 322789Sahrens dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0); 323789Sahrens ASSERT(off < dn->dn_phys->dn_maxblkid || 324789Sahrens dn->dn_phys->dn_maxblkid == 0 || 3256992Smaybee dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0); 326789Sahrens } 327789Sahrens return; 328789Sahrens } 329789Sahrens 330789Sahrens shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT); 331789Sahrens start = blkid >> shift; 332789Sahrens ASSERT(start < dn->dn_phys->dn_nblkptr); 333789Sahrens end = (blkid + nblks - 1) >> shift; 334789Sahrens bp += start; 335789Sahrens for (i = start; i <= end; i++, bp++) { 336789Sahrens if (BP_IS_HOLE(bp)) 337789Sahrens continue; 338789Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 339789Sahrens err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db); 340789Sahrens ASSERT3U(err, ==, 0); 341789Sahrens rw_exit(&dn->dn_struct_rwlock); 342789Sahrens 3436992Smaybee if (free_children(db, blkid, nblks, trunc, tx) == ALL) { 344789Sahrens ASSERT3P(db->db_blkptr, ==, bp); 3456992Smaybee (void) free_blocks(dn, bp, 1, tx); 346789Sahrens } 3471544Seschrock dbuf_rele(db, FTAG); 348789Sahrens } 349789Sahrens if (trunc) { 350789Sahrens uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * 351789Sahrens (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT); 352789Sahrens dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0); 353789Sahrens ASSERT(off < dn->dn_phys->dn_maxblkid || 354789Sahrens dn->dn_phys->dn_maxblkid == 0 || 3556992Smaybee dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0); 356789Sahrens } 357789Sahrens } 358789Sahrens 3591544Seschrock /* 3601544Seschrock * Try to kick all the dnodes dbufs out of the cache... 3611544Seschrock */ 3624944Smaybee void 3634944Smaybee dnode_evict_dbufs(dnode_t *dn) 3641544Seschrock { 3651596Sahrens int progress; 3661596Sahrens int pass = 0; 3671596Sahrens 3681596Sahrens do { 3691602Smaybee dmu_buf_impl_t *db, marker; 3701596Sahrens int evicting = FALSE; 3711544Seschrock 3721596Sahrens progress = FALSE; 3731596Sahrens mutex_enter(&dn->dn_dbufs_mtx); 3741602Smaybee list_insert_tail(&dn->dn_dbufs, &marker); 3751602Smaybee db = list_head(&dn->dn_dbufs); 3761602Smaybee for (; db != ▮ db = list_head(&dn->dn_dbufs)) { 3771602Smaybee list_remove(&dn->dn_dbufs, db); 3781602Smaybee list_insert_tail(&dn->dn_dbufs, db); 3794312Sgw25295 ASSERT3P(db->db_dnode, ==, dn); 3801596Sahrens 3811544Seschrock mutex_enter(&db->db_mtx); 3821596Sahrens if (db->db_state == DB_EVICTING) { 3831596Sahrens progress = TRUE; 3841596Sahrens evicting = TRUE; 3851596Sahrens mutex_exit(&db->db_mtx); 3861596Sahrens } else if (refcount_is_zero(&db->db_holds)) { 3871596Sahrens progress = TRUE; 3881596Sahrens dbuf_clear(db); /* exits db_mtx for us */ 3891596Sahrens } else { 3901596Sahrens mutex_exit(&db->db_mtx); 3911596Sahrens } 3921596Sahrens 3931544Seschrock } 3941602Smaybee list_remove(&dn->dn_dbufs, &marker); 3951596Sahrens /* 3961596Sahrens * NB: we need to drop dn_dbufs_mtx between passes so 3971596Sahrens * that any DB_EVICTING dbufs can make progress. 3981596Sahrens * Ideally, we would have some cv we could wait on, but 3991596Sahrens * since we don't, just wait a bit to give the other 4001596Sahrens * thread a chance to run. 4011596Sahrens */ 4021596Sahrens mutex_exit(&dn->dn_dbufs_mtx); 4031596Sahrens if (evicting) 4041596Sahrens delay(1); 4051596Sahrens pass++; 4061596Sahrens ASSERT(pass < 100); /* sanity check */ 4071596Sahrens } while (progress); 4081596Sahrens 4091544Seschrock rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 4101544Seschrock if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) { 4111544Seschrock mutex_enter(&dn->dn_bonus->db_mtx); 4121544Seschrock dbuf_evict(dn->dn_bonus); 4131544Seschrock dn->dn_bonus = NULL; 4141544Seschrock } 4151544Seschrock rw_exit(&dn->dn_struct_rwlock); 4161544Seschrock } 4171544Seschrock 4183547Smaybee static void 4193547Smaybee dnode_undirty_dbufs(list_t *list) 4203547Smaybee { 4213547Smaybee dbuf_dirty_record_t *dr; 4223547Smaybee 4233547Smaybee while (dr = list_head(list)) { 4243547Smaybee dmu_buf_impl_t *db = dr->dr_dbuf; 4253547Smaybee uint64_t txg = dr->dr_txg; 4263547Smaybee 42710922SJeff.Bonwick@Sun.COM if (db->db_level != 0) 42810922SJeff.Bonwick@Sun.COM dnode_undirty_dbufs(&dr->dt.di.dr_children); 42910922SJeff.Bonwick@Sun.COM 4303547Smaybee mutex_enter(&db->db_mtx); 4313547Smaybee /* XXX - use dbuf_undirty()? */ 4323547Smaybee list_remove(list, dr); 4333547Smaybee ASSERT(db->db_last_dirty == dr); 4343547Smaybee db->db_last_dirty = NULL; 4353547Smaybee db->db_dirtycnt -= 1; 4363547Smaybee if (db->db_level == 0) { 437*11935SMark.Shellenbaum@Sun.COM ASSERT(db->db_blkid == DMU_BONUS_BLKID || 4383547Smaybee dr->dt.dl.dr_data == db->db_buf); 4393547Smaybee dbuf_unoverride(dr); 4403547Smaybee } 4413547Smaybee kmem_free(dr, sizeof (dbuf_dirty_record_t)); 44210922SJeff.Bonwick@Sun.COM dbuf_rele_and_unlock(db, (void *)(uintptr_t)txg); 4433547Smaybee } 4443547Smaybee } 4453547Smaybee 4463547Smaybee static void 447789Sahrens dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) 448789Sahrens { 449789Sahrens int txgoff = tx->tx_txg & TXG_MASK; 450789Sahrens 451789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 452789Sahrens 4536992Smaybee /* 4546992Smaybee * Our contents should have been freed in dnode_sync() by the 4556992Smaybee * free range record inserted by the caller of dnode_free(). 4566992Smaybee */ 4576992Smaybee ASSERT3U(DN_USED_BYTES(dn->dn_phys), ==, 0); 4586992Smaybee ASSERT(BP_IS_HOLE(dn->dn_phys->dn_blkptr)); 4596992Smaybee 4603547Smaybee dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]); 4614944Smaybee dnode_evict_dbufs(dn); 4621544Seschrock ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL); 4631544Seschrock 4641544Seschrock /* 4651544Seschrock * XXX - It would be nice to assert this, but we may still 4661544Seschrock * have residual holds from async evictions from the arc... 4671544Seschrock * 4683444Sek110237 * zfs_obj_to_path() also depends on this being 4693444Sek110237 * commented out. 4703444Sek110237 * 4711544Seschrock * ASSERT3U(refcount_count(&dn->dn_holds), ==, 1); 4721544Seschrock */ 473789Sahrens 474789Sahrens /* Undirty next bits */ 475789Sahrens dn->dn_next_nlevels[txgoff] = 0; 476789Sahrens dn->dn_next_indblkshift[txgoff] = 0; 4771596Sahrens dn->dn_next_blksz[txgoff] = 0; 478789Sahrens 479789Sahrens /* ASSERT(blkptrs are zero); */ 480789Sahrens ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); 481789Sahrens ASSERT(dn->dn_type != DMU_OT_NONE); 482789Sahrens 483789Sahrens ASSERT(dn->dn_free_txg > 0); 484789Sahrens if (dn->dn_allocated_txg != dn->dn_free_txg) 485789Sahrens dbuf_will_dirty(dn->dn_dbuf, tx); 486789Sahrens bzero(dn->dn_phys, sizeof (dnode_phys_t)); 487789Sahrens 488789Sahrens mutex_enter(&dn->dn_mtx); 489789Sahrens dn->dn_type = DMU_OT_NONE; 490789Sahrens dn->dn_maxblkid = 0; 491789Sahrens dn->dn_allocated_txg = 0; 4924480Sgw25295 dn->dn_free_txg = 0; 493*11935SMark.Shellenbaum@Sun.COM dn->dn_have_spill = B_FALSE; 494789Sahrens mutex_exit(&dn->dn_mtx); 495789Sahrens 4961544Seschrock ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 497789Sahrens 498789Sahrens dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); 499789Sahrens /* 500789Sahrens * Now that we've released our hold, the dnode may 501789Sahrens * be evicted, so we musn't access it. 502789Sahrens */ 503789Sahrens } 504789Sahrens 505789Sahrens /* 5063547Smaybee * Write out the dnode's dirty buffers. 507789Sahrens */ 5083547Smaybee void 5093547Smaybee dnode_sync(dnode_t *dn, dmu_tx_t *tx) 510789Sahrens { 511789Sahrens free_range_t *rp; 5123547Smaybee dnode_phys_t *dnp = dn->dn_phys; 513789Sahrens int txgoff = tx->tx_txg & TXG_MASK; 5143547Smaybee list_t *list = &dn->dn_dirty_records[txgoff]; 5159396SMatthew.Ahrens@Sun.COM static const dnode_phys_t zerodn = { 0 }; 516*11935SMark.Shellenbaum@Sun.COM boolean_t kill_spill = B_FALSE; 517789Sahrens 518789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 519789Sahrens ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg); 5209396SMatthew.Ahrens@Sun.COM ASSERT(dnp->dn_type != DMU_OT_NONE || 5219396SMatthew.Ahrens@Sun.COM bcmp(dnp, &zerodn, DNODE_SIZE) == 0); 522873Sek110237 DNODE_VERIFY(dn); 5231596Sahrens 5243547Smaybee ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf)); 525789Sahrens 5269396SMatthew.Ahrens@Sun.COM if (dmu_objset_userused_enabled(dn->dn_objset) && 5279396SMatthew.Ahrens@Sun.COM !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { 528*11935SMark.Shellenbaum@Sun.COM mutex_enter(&dn->dn_mtx); 529*11935SMark.Shellenbaum@Sun.COM dn->dn_oldused = DN_USED_BYTES(dn->dn_phys); 530*11935SMark.Shellenbaum@Sun.COM dn->dn_oldflags = dn->dn_phys->dn_flags; 531*11935SMark.Shellenbaum@Sun.COM dn->dn_id_flags |= DN_ID_SYNC; 5329396SMatthew.Ahrens@Sun.COM dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED; 533*11935SMark.Shellenbaum@Sun.COM mutex_exit(&dn->dn_mtx); 534*11935SMark.Shellenbaum@Sun.COM dmu_objset_userquota_get_ids(dn, B_FALSE); 5359396SMatthew.Ahrens@Sun.COM } else { 5369396SMatthew.Ahrens@Sun.COM /* Once we account for it, we should always account for it. */ 5379396SMatthew.Ahrens@Sun.COM ASSERT(!(dn->dn_phys->dn_flags & 5389396SMatthew.Ahrens@Sun.COM DNODE_FLAG_USERUSED_ACCOUNTED)); 5399396SMatthew.Ahrens@Sun.COM } 5409396SMatthew.Ahrens@Sun.COM 541789Sahrens mutex_enter(&dn->dn_mtx); 542789Sahrens if (dn->dn_allocated_txg == tx->tx_txg) { 543789Sahrens /* The dnode is newly allocated or reallocated */ 544789Sahrens if (dnp->dn_type == DMU_OT_NONE) { 545789Sahrens /* this is a first alloc, not a realloc */ 546789Sahrens dnp->dn_nlevels = 1; 5478644SMark.Maybee@Sun.COM dnp->dn_nblkptr = dn->dn_nblkptr; 548789Sahrens } 549789Sahrens 550789Sahrens dnp->dn_type = dn->dn_type; 551789Sahrens dnp->dn_bonustype = dn->dn_bonustype; 552789Sahrens dnp->dn_bonuslen = dn->dn_bonuslen; 553789Sahrens } 554789Sahrens 5553547Smaybee ASSERT(dnp->dn_nlevels > 1 || 5561599Sahrens BP_IS_HOLE(&dnp->dn_blkptr[0]) || 5571599Sahrens BP_GET_LSIZE(&dnp->dn_blkptr[0]) == 5581599Sahrens dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 5591599Sahrens 5601596Sahrens if (dn->dn_next_blksz[txgoff]) { 5611596Sahrens ASSERT(P2PHASE(dn->dn_next_blksz[txgoff], 562789Sahrens SPA_MINBLOCKSIZE) == 0); 5631599Sahrens ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) || 5646992Smaybee dn->dn_maxblkid == 0 || list_head(list) != NULL || 5651600Sahrens dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT == 5661600Sahrens dnp->dn_datablkszsec); 567789Sahrens dnp->dn_datablkszsec = 5681596Sahrens dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT; 5691596Sahrens dn->dn_next_blksz[txgoff] = 0; 570789Sahrens } 571789Sahrens 5724944Smaybee if (dn->dn_next_bonuslen[txgoff]) { 5734944Smaybee if (dn->dn_next_bonuslen[txgoff] == DN_ZERO_BONUSLEN) 5744944Smaybee dnp->dn_bonuslen = 0; 5754944Smaybee else 5764944Smaybee dnp->dn_bonuslen = dn->dn_next_bonuslen[txgoff]; 5774944Smaybee ASSERT(dnp->dn_bonuslen <= DN_MAX_BONUSLEN); 5784944Smaybee dn->dn_next_bonuslen[txgoff] = 0; 5794944Smaybee } 5804944Smaybee 581*11935SMark.Shellenbaum@Sun.COM if (dn->dn_next_bonustype[txgoff]) { 582*11935SMark.Shellenbaum@Sun.COM ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES); 583*11935SMark.Shellenbaum@Sun.COM dnp->dn_bonustype = dn->dn_next_bonustype[txgoff]; 584*11935SMark.Shellenbaum@Sun.COM dn->dn_next_bonustype[txgoff] = 0; 585*11935SMark.Shellenbaum@Sun.COM } 586*11935SMark.Shellenbaum@Sun.COM 587*11935SMark.Shellenbaum@Sun.COM /* 588*11935SMark.Shellenbaum@Sun.COM * We will either remove a spill block when a file is being removed 589*11935SMark.Shellenbaum@Sun.COM * or we have been asked to remove it. 590*11935SMark.Shellenbaum@Sun.COM */ 591*11935SMark.Shellenbaum@Sun.COM if (dn->dn_rm_spillblk[txgoff] || 592*11935SMark.Shellenbaum@Sun.COM ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) && 593*11935SMark.Shellenbaum@Sun.COM dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg)) { 594*11935SMark.Shellenbaum@Sun.COM if ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) 595*11935SMark.Shellenbaum@Sun.COM kill_spill = B_TRUE; 596*11935SMark.Shellenbaum@Sun.COM dn->dn_rm_spillblk[txgoff] = 0; 597*11935SMark.Shellenbaum@Sun.COM } 598*11935SMark.Shellenbaum@Sun.COM 599789Sahrens if (dn->dn_next_indblkshift[txgoff]) { 600789Sahrens ASSERT(dnp->dn_nlevels == 1); 601789Sahrens dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff]; 602789Sahrens dn->dn_next_indblkshift[txgoff] = 0; 603789Sahrens } 604789Sahrens 605789Sahrens /* 606789Sahrens * Just take the live (open-context) values for checksum and compress. 607789Sahrens * Strictly speaking it's a future leak, but nothing bad happens if we 608789Sahrens * start using the new checksum or compress algorithm a little early. 609789Sahrens */ 610789Sahrens dnp->dn_checksum = dn->dn_checksum; 611789Sahrens dnp->dn_compress = dn->dn_compress; 612789Sahrens 613789Sahrens mutex_exit(&dn->dn_mtx); 614789Sahrens 615*11935SMark.Shellenbaum@Sun.COM if (kill_spill) { 616*11935SMark.Shellenbaum@Sun.COM dmu_buf_impl_t *spilldb; 617*11935SMark.Shellenbaum@Sun.COM (void) free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx); 618*11935SMark.Shellenbaum@Sun.COM mutex_enter(&dn->dn_mtx); 619*11935SMark.Shellenbaum@Sun.COM dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR; 620*11935SMark.Shellenbaum@Sun.COM mutex_exit(&dn->dn_mtx); 621*11935SMark.Shellenbaum@Sun.COM rw_enter(&dn->dn_struct_rwlock, RW_READER); 622*11935SMark.Shellenbaum@Sun.COM spilldb = dbuf_find(dn, 0, DMU_SPILL_BLKID); 623*11935SMark.Shellenbaum@Sun.COM if (spilldb) { 624*11935SMark.Shellenbaum@Sun.COM spilldb->db_blkptr = NULL; 625*11935SMark.Shellenbaum@Sun.COM mutex_exit(&spilldb->db_mtx); 626*11935SMark.Shellenbaum@Sun.COM } 627*11935SMark.Shellenbaum@Sun.COM rw_exit(&dn->dn_struct_rwlock); 628*11935SMark.Shellenbaum@Sun.COM } 629*11935SMark.Shellenbaum@Sun.COM 630789Sahrens /* process all the "freed" ranges in the file */ 6316992Smaybee while (rp = avl_last(&dn->dn_ranges[txgoff])) { 6326992Smaybee dnode_sync_free_range(dn, rp->fr_blkid, rp->fr_nblks, tx); 6336992Smaybee /* grab the mutex so we don't race with dnode_block_freed() */ 6346992Smaybee mutex_enter(&dn->dn_mtx); 6356992Smaybee avl_remove(&dn->dn_ranges[txgoff], rp); 6366992Smaybee mutex_exit(&dn->dn_mtx); 6376992Smaybee kmem_free(rp, sizeof (free_range_t)); 638789Sahrens } 6394944Smaybee 640789Sahrens if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) { 6413547Smaybee dnode_sync_free(dn, tx); 6423547Smaybee return; 643789Sahrens } 644789Sahrens 6458644SMark.Maybee@Sun.COM if (dn->dn_next_nblkptr[txgoff]) { 6468644SMark.Maybee@Sun.COM /* this should only happen on a realloc */ 6478644SMark.Maybee@Sun.COM ASSERT(dn->dn_allocated_txg == tx->tx_txg); 6488644SMark.Maybee@Sun.COM if (dn->dn_next_nblkptr[txgoff] > dnp->dn_nblkptr) { 6498644SMark.Maybee@Sun.COM /* zero the new blkptrs we are gaining */ 6508644SMark.Maybee@Sun.COM bzero(dnp->dn_blkptr + dnp->dn_nblkptr, 6518644SMark.Maybee@Sun.COM sizeof (blkptr_t) * 6528644SMark.Maybee@Sun.COM (dn->dn_next_nblkptr[txgoff] - dnp->dn_nblkptr)); 6538644SMark.Maybee@Sun.COM #ifdef ZFS_DEBUG 6548644SMark.Maybee@Sun.COM } else { 6558644SMark.Maybee@Sun.COM int i; 6568644SMark.Maybee@Sun.COM ASSERT(dn->dn_next_nblkptr[txgoff] < dnp->dn_nblkptr); 6578644SMark.Maybee@Sun.COM /* the blkptrs we are losing better be unallocated */ 6588644SMark.Maybee@Sun.COM for (i = dn->dn_next_nblkptr[txgoff]; 6598644SMark.Maybee@Sun.COM i < dnp->dn_nblkptr; i++) 6608644SMark.Maybee@Sun.COM ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[i])); 6618644SMark.Maybee@Sun.COM #endif 6628644SMark.Maybee@Sun.COM } 6638644SMark.Maybee@Sun.COM mutex_enter(&dn->dn_mtx); 6648644SMark.Maybee@Sun.COM dnp->dn_nblkptr = dn->dn_next_nblkptr[txgoff]; 6658644SMark.Maybee@Sun.COM dn->dn_next_nblkptr[txgoff] = 0; 6668644SMark.Maybee@Sun.COM mutex_exit(&dn->dn_mtx); 6678644SMark.Maybee@Sun.COM } 6688644SMark.Maybee@Sun.COM 669789Sahrens if (dn->dn_next_nlevels[txgoff]) { 6703547Smaybee dnode_increase_indirection(dn, tx); 671789Sahrens dn->dn_next_nlevels[txgoff] = 0; 672789Sahrens } 673789Sahrens 6743547Smaybee dbuf_sync_list(list, tx); 675789Sahrens 6769396SMatthew.Ahrens@Sun.COM if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { 6773547Smaybee ASSERT3P(list_head(list), ==, NULL); 6783547Smaybee dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); 6793547Smaybee } 6801599Sahrens 6813547Smaybee /* 6823547Smaybee * Although we have dropped our reference to the dnode, it 6833547Smaybee * can't be evicted until its written, and we haven't yet 6843547Smaybee * initiated the IO for the dnode's dbuf. 6853547Smaybee */ 686789Sahrens } 687