1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51544Seschrock * Common Development and Distribution License (the "License"). 61544Seschrock * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 228644SMark.Maybee@Sun.COM * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #include <sys/zfs_context.h> 27789Sahrens #include <sys/dbuf.h> 28789Sahrens #include <sys/dnode.h> 29789Sahrens #include <sys/dmu.h> 30789Sahrens #include <sys/dmu_tx.h> 31789Sahrens #include <sys/dmu_objset.h> 32789Sahrens #include <sys/dsl_dataset.h> 33789Sahrens #include <sys/spa.h> 34789Sahrens 35789Sahrens static void 36789Sahrens dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) 37789Sahrens { 38789Sahrens dmu_buf_impl_t *db; 393547Smaybee int txgoff = tx->tx_txg & TXG_MASK; 403547Smaybee int nblkptr = dn->dn_phys->dn_nblkptr; 413547Smaybee int old_toplvl = dn->dn_phys->dn_nlevels - 1; 423547Smaybee int new_level = dn->dn_next_nlevels[txgoff]; 43789Sahrens int i; 44789Sahrens 453547Smaybee rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 463547Smaybee 473547Smaybee /* this dnode can't be paged out because it's dirty */ 48789Sahrens ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); 49789Sahrens ASSERT(RW_WRITE_HELD(&dn->dn_struct_rwlock)); 503547Smaybee ASSERT(new_level > 1 && dn->dn_phys->dn_nlevels > 0); 51789Sahrens 52789Sahrens db = dbuf_hold_level(dn, dn->dn_phys->dn_nlevels, 0, FTAG); 531544Seschrock ASSERT(db != NULL); 54789Sahrens 553547Smaybee dn->dn_phys->dn_nlevels = new_level; 564312Sgw25295 dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset, 574312Sgw25295 dn->dn_object, dn->dn_phys->dn_nlevels); 58789Sahrens 593547Smaybee /* check for existing blkptrs in the dnode */ 603547Smaybee for (i = 0; i < nblkptr; i++) 613547Smaybee if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i])) 623547Smaybee break; 633547Smaybee if (i != nblkptr) { 643547Smaybee /* transfer dnode's block pointers to new indirect block */ 653547Smaybee (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT); 663547Smaybee ASSERT(db->db.db_data); 673547Smaybee ASSERT(arc_released(db->db_buf)); 683547Smaybee ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size); 693547Smaybee bcopy(dn->dn_phys->dn_blkptr, db->db.db_data, 703547Smaybee sizeof (blkptr_t) * nblkptr); 713547Smaybee arc_buf_freeze(db->db_buf); 723547Smaybee } 733547Smaybee 74789Sahrens /* set dbuf's parent pointers to new indirect buf */ 753547Smaybee for (i = 0; i < nblkptr; i++) { 763547Smaybee dmu_buf_impl_t *child = dbuf_find(dn, old_toplvl, i); 773547Smaybee 78789Sahrens if (child == NULL) 79789Sahrens continue; 803547Smaybee ASSERT3P(child->db_dnode, ==, dn); 813547Smaybee if (child->db_parent && child->db_parent != dn->dn_dbuf) { 823547Smaybee ASSERT(child->db_parent->db_level == db->db_level); 833547Smaybee ASSERT(child->db_blkptr != 843547Smaybee &dn->dn_phys->dn_blkptr[child->db_blkid]); 85789Sahrens mutex_exit(&child->db_mtx); 86789Sahrens continue; 87789Sahrens } 883547Smaybee ASSERT(child->db_parent == NULL || 893547Smaybee child->db_parent == dn->dn_dbuf); 90789Sahrens 913547Smaybee child->db_parent = db; 923547Smaybee dbuf_add_ref(db, child); 933547Smaybee if (db->db.db_data) 943547Smaybee child->db_blkptr = (blkptr_t *)db->db.db_data + i; 953547Smaybee else 963547Smaybee child->db_blkptr = NULL; 973547Smaybee dprintf_dbuf_bp(child, child->db_blkptr, 983547Smaybee "changed db_blkptr to new indirect %s", ""); 99789Sahrens 100789Sahrens mutex_exit(&child->db_mtx); 101789Sahrens } 102789Sahrens 1033547Smaybee bzero(dn->dn_phys->dn_blkptr, sizeof (blkptr_t) * nblkptr); 104789Sahrens 1051544Seschrock dbuf_rele(db, FTAG); 1063547Smaybee 1073547Smaybee rw_exit(&dn->dn_struct_rwlock); 108789Sahrens } 109789Sahrens 1106992Smaybee static int 111789Sahrens free_blocks(dnode_t *dn, blkptr_t *bp, int num, dmu_tx_t *tx) 112789Sahrens { 1136992Smaybee dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; 114789Sahrens uint64_t bytesfreed = 0; 1156992Smaybee int i, blocks_freed = 0; 116789Sahrens 1176992Smaybee dprintf("ds=%p obj=%llx num=%d\n", ds, dn->dn_object, num); 118789Sahrens 119789Sahrens for (i = 0; i < num; i++, bp++) { 120789Sahrens if (BP_IS_HOLE(bp)) 121789Sahrens continue; 122789Sahrens 1236992Smaybee bytesfreed += dsl_dataset_block_kill(ds, bp, dn->dn_zio, tx); 1242082Seschrock ASSERT3U(bytesfreed, <=, DN_USED_BYTES(dn->dn_phys)); 1253547Smaybee bzero(bp, sizeof (blkptr_t)); 1266992Smaybee blocks_freed += 1; 127789Sahrens } 128789Sahrens dnode_diduse_space(dn, -bytesfreed); 1296992Smaybee return (blocks_freed); 130789Sahrens } 131789Sahrens 132873Sek110237 #ifdef ZFS_DEBUG 133789Sahrens static void 134789Sahrens free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) 135789Sahrens { 136789Sahrens int off, num; 137789Sahrens int i, err, epbs; 138789Sahrens uint64_t txg = tx->tx_txg; 139789Sahrens 140789Sahrens epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 141789Sahrens off = start - (db->db_blkid * 1<<epbs); 142789Sahrens num = end - start + 1; 143789Sahrens 144789Sahrens ASSERT3U(off, >=, 0); 145789Sahrens ASSERT3U(num, >=, 0); 146789Sahrens ASSERT3U(db->db_level, >, 0); 147789Sahrens ASSERT3U(db->db.db_size, ==, 1<<db->db_dnode->dn_phys->dn_indblkshift); 148789Sahrens ASSERT3U(off+num, <=, db->db.db_size >> SPA_BLKPTRSHIFT); 149789Sahrens ASSERT(db->db_blkptr != NULL); 150789Sahrens 151789Sahrens for (i = off; i < off+num; i++) { 152789Sahrens uint64_t *buf; 1533547Smaybee dmu_buf_impl_t *child; 1543547Smaybee dbuf_dirty_record_t *dr; 155789Sahrens int j; 156789Sahrens 157789Sahrens ASSERT(db->db_level == 1); 158789Sahrens 159789Sahrens rw_enter(&db->db_dnode->dn_struct_rwlock, RW_READER); 160789Sahrens err = dbuf_hold_impl(db->db_dnode, db->db_level-1, 1614312Sgw25295 (db->db_blkid << epbs) + i, TRUE, FTAG, &child); 162789Sahrens rw_exit(&db->db_dnode->dn_struct_rwlock); 163789Sahrens if (err == ENOENT) 164789Sahrens continue; 165789Sahrens ASSERT(err == 0); 166789Sahrens ASSERT(child->db_level == 0); 1673547Smaybee dr = child->db_last_dirty; 1683547Smaybee while (dr && dr->dr_txg > txg) 1693547Smaybee dr = dr->dr_next; 1703547Smaybee ASSERT(dr == NULL || dr->dr_txg == txg); 171789Sahrens 1723547Smaybee /* data_old better be zeroed */ 1733547Smaybee if (dr) { 1743547Smaybee buf = dr->dt.dl.dr_data->b_data; 175789Sahrens for (j = 0; j < child->db.db_size >> 3; j++) { 176789Sahrens if (buf[j] != 0) { 177789Sahrens panic("freed data not zero: " 178789Sahrens "child=%p i=%d off=%d num=%d\n", 1797240Srh87107 (void *)child, i, off, num); 180789Sahrens } 181789Sahrens } 182789Sahrens } 183789Sahrens 184789Sahrens /* 185789Sahrens * db_data better be zeroed unless it's dirty in a 186789Sahrens * future txg. 187789Sahrens */ 188789Sahrens mutex_enter(&child->db_mtx); 189789Sahrens buf = child->db.db_data; 190789Sahrens if (buf != NULL && child->db_state != DB_FILL && 1913547Smaybee child->db_last_dirty == NULL) { 192789Sahrens for (j = 0; j < child->db.db_size >> 3; j++) { 193789Sahrens if (buf[j] != 0) { 194789Sahrens panic("freed data not zero: " 195789Sahrens "child=%p i=%d off=%d num=%d\n", 1967240Srh87107 (void *)child, i, off, num); 197789Sahrens } 198789Sahrens } 199789Sahrens } 200789Sahrens mutex_exit(&child->db_mtx); 201789Sahrens 2021544Seschrock dbuf_rele(child, FTAG); 203789Sahrens } 204873Sek110237 } 205789Sahrens #endif 206789Sahrens 2076992Smaybee #define ALL -1 2086992Smaybee 209789Sahrens static int 210789Sahrens free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, int trunc, 211789Sahrens dmu_tx_t *tx) 212789Sahrens { 213789Sahrens dnode_t *dn = db->db_dnode; 214789Sahrens blkptr_t *bp; 215789Sahrens dmu_buf_impl_t *subdb; 216789Sahrens uint64_t start, end, dbstart, dbend, i; 217789Sahrens int epbs, shift, err; 218789Sahrens int all = TRUE; 2196992Smaybee int blocks_freed = 0; 220789Sahrens 2216992Smaybee /* 2226992Smaybee * There is a small possibility that this block will not be cached: 2236992Smaybee * 1 - if level > 1 and there are no children with level <= 1 2246992Smaybee * 2 - if we didn't get a dirty hold (because this block had just 2256992Smaybee * finished being written -- and so had no holds), and then this 2266992Smaybee * block got evicted before we got here. 2276992Smaybee */ 2286992Smaybee if (db->db_state != DB_CACHED) 2296992Smaybee (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED); 2306992Smaybee 231789Sahrens arc_release(db->db_buf, db); 232789Sahrens bp = (blkptr_t *)db->db.db_data; 233789Sahrens 234789Sahrens epbs = db->db_dnode->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT; 235789Sahrens shift = (db->db_level - 1) * epbs; 236789Sahrens dbstart = db->db_blkid << epbs; 237789Sahrens start = blkid >> shift; 238789Sahrens if (dbstart < start) { 239789Sahrens bp += start - dbstart; 240789Sahrens all = FALSE; 241789Sahrens } else { 242789Sahrens start = dbstart; 243789Sahrens } 244789Sahrens dbend = ((db->db_blkid + 1) << epbs) - 1; 245789Sahrens end = (blkid + nblks - 1) >> shift; 246789Sahrens if (dbend <= end) 247789Sahrens end = dbend; 248789Sahrens else if (all) 249789Sahrens all = trunc; 250789Sahrens ASSERT3U(start, <=, end); 251789Sahrens 252789Sahrens if (db->db_level == 1) { 253873Sek110237 FREE_VERIFY(db, start, end, tx); 2546992Smaybee blocks_freed = free_blocks(dn, bp, end-start+1, tx); 2553093Sahrens arc_buf_freeze(db->db_buf); 2566992Smaybee ASSERT(all || blocks_freed == 0 || db->db_last_dirty); 2576992Smaybee return (all ? ALL : blocks_freed); 258789Sahrens } 259789Sahrens 260789Sahrens for (i = start; i <= end; i++, bp++) { 261789Sahrens if (BP_IS_HOLE(bp)) 262789Sahrens continue; 263789Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 264789Sahrens err = dbuf_hold_impl(dn, db->db_level-1, i, TRUE, FTAG, &subdb); 265789Sahrens ASSERT3U(err, ==, 0); 266789Sahrens rw_exit(&dn->dn_struct_rwlock); 267789Sahrens 2686992Smaybee if (free_children(subdb, blkid, nblks, trunc, tx) == ALL) { 269789Sahrens ASSERT3P(subdb->db_blkptr, ==, bp); 2706992Smaybee blocks_freed += free_blocks(dn, bp, 1, tx); 2711163Smaybee } else { 2721163Smaybee all = FALSE; 273789Sahrens } 2741544Seschrock dbuf_rele(subdb, FTAG); 275789Sahrens } 2763093Sahrens arc_buf_freeze(db->db_buf); 277789Sahrens #ifdef ZFS_DEBUG 278789Sahrens bp -= (end-start)+1; 279789Sahrens for (i = start; i <= end; i++, bp++) { 280789Sahrens if (i == start && blkid != 0) 281789Sahrens continue; 282789Sahrens else if (i == end && !trunc) 283789Sahrens continue; 284789Sahrens ASSERT3U(bp->blk_birth, ==, 0); 285789Sahrens } 286789Sahrens #endif 2876992Smaybee ASSERT(all || blocks_freed == 0 || db->db_last_dirty); 2886992Smaybee return (all ? ALL : blocks_freed); 289789Sahrens } 290789Sahrens 291789Sahrens /* 292789Sahrens * free_range: Traverse the indicated range of the provided file 293789Sahrens * and "free" all the blocks contained there. 294789Sahrens */ 295789Sahrens static void 296789Sahrens dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx) 297789Sahrens { 298789Sahrens blkptr_t *bp = dn->dn_phys->dn_blkptr; 299789Sahrens dmu_buf_impl_t *db; 300789Sahrens int trunc, start, end, shift, i, err; 301789Sahrens int dnlevel = dn->dn_phys->dn_nlevels; 302789Sahrens 303789Sahrens if (blkid > dn->dn_phys->dn_maxblkid) 304789Sahrens return; 305789Sahrens 306789Sahrens ASSERT(dn->dn_phys->dn_maxblkid < UINT64_MAX); 307789Sahrens trunc = blkid + nblks > dn->dn_phys->dn_maxblkid; 308789Sahrens if (trunc) 309789Sahrens nblks = dn->dn_phys->dn_maxblkid - blkid + 1; 310789Sahrens 311789Sahrens /* There are no indirect blocks in the object */ 312789Sahrens if (dnlevel == 1) { 313789Sahrens if (blkid >= dn->dn_phys->dn_nblkptr) { 314789Sahrens /* this range was never made persistent */ 315789Sahrens return; 316789Sahrens } 317789Sahrens ASSERT3U(blkid + nblks, <=, dn->dn_phys->dn_nblkptr); 3186992Smaybee (void) free_blocks(dn, bp + blkid, nblks, tx); 319789Sahrens if (trunc) { 320789Sahrens uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * 321789Sahrens (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT); 322789Sahrens dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0); 323789Sahrens ASSERT(off < dn->dn_phys->dn_maxblkid || 324789Sahrens dn->dn_phys->dn_maxblkid == 0 || 3256992Smaybee dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0); 326789Sahrens } 327789Sahrens return; 328789Sahrens } 329789Sahrens 330789Sahrens shift = (dnlevel - 1) * (dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT); 331789Sahrens start = blkid >> shift; 332789Sahrens ASSERT(start < dn->dn_phys->dn_nblkptr); 333789Sahrens end = (blkid + nblks - 1) >> shift; 334789Sahrens bp += start; 335789Sahrens for (i = start; i <= end; i++, bp++) { 336789Sahrens if (BP_IS_HOLE(bp)) 337789Sahrens continue; 338789Sahrens rw_enter(&dn->dn_struct_rwlock, RW_READER); 339789Sahrens err = dbuf_hold_impl(dn, dnlevel-1, i, TRUE, FTAG, &db); 340789Sahrens ASSERT3U(err, ==, 0); 341789Sahrens rw_exit(&dn->dn_struct_rwlock); 342789Sahrens 3436992Smaybee if (free_children(db, blkid, nblks, trunc, tx) == ALL) { 344789Sahrens ASSERT3P(db->db_blkptr, ==, bp); 3456992Smaybee (void) free_blocks(dn, bp, 1, tx); 346789Sahrens } 3471544Seschrock dbuf_rele(db, FTAG); 348789Sahrens } 349789Sahrens if (trunc) { 350789Sahrens uint64_t off = (dn->dn_phys->dn_maxblkid + 1) * 351789Sahrens (dn->dn_phys->dn_datablkszsec << SPA_MINBLOCKSHIFT); 352789Sahrens dn->dn_phys->dn_maxblkid = (blkid ? blkid - 1 : 0); 353789Sahrens ASSERT(off < dn->dn_phys->dn_maxblkid || 354789Sahrens dn->dn_phys->dn_maxblkid == 0 || 3556992Smaybee dnode_next_offset(dn, 0, &off, 1, 1, 0) != 0); 356789Sahrens } 357789Sahrens } 358789Sahrens 3591544Seschrock /* 3601544Seschrock * Try to kick all the dnodes dbufs out of the cache... 3611544Seschrock */ 3624944Smaybee void 3634944Smaybee dnode_evict_dbufs(dnode_t *dn) 3641544Seschrock { 3651596Sahrens int progress; 3661596Sahrens int pass = 0; 3671596Sahrens 3681596Sahrens do { 3691602Smaybee dmu_buf_impl_t *db, marker; 3701596Sahrens int evicting = FALSE; 3711544Seschrock 3721596Sahrens progress = FALSE; 3731596Sahrens mutex_enter(&dn->dn_dbufs_mtx); 3741602Smaybee list_insert_tail(&dn->dn_dbufs, &marker); 3751602Smaybee db = list_head(&dn->dn_dbufs); 3761602Smaybee for (; db != ▮ db = list_head(&dn->dn_dbufs)) { 3771602Smaybee list_remove(&dn->dn_dbufs, db); 3781602Smaybee list_insert_tail(&dn->dn_dbufs, db); 3794312Sgw25295 ASSERT3P(db->db_dnode, ==, dn); 3801596Sahrens 3811544Seschrock mutex_enter(&db->db_mtx); 3821596Sahrens if (db->db_state == DB_EVICTING) { 3831596Sahrens progress = TRUE; 3841596Sahrens evicting = TRUE; 3851596Sahrens mutex_exit(&db->db_mtx); 3861596Sahrens } else if (refcount_is_zero(&db->db_holds)) { 3871596Sahrens progress = TRUE; 3881596Sahrens dbuf_clear(db); /* exits db_mtx for us */ 3891596Sahrens } else { 3901596Sahrens mutex_exit(&db->db_mtx); 3911596Sahrens } 3921596Sahrens 3931544Seschrock } 3941602Smaybee list_remove(&dn->dn_dbufs, &marker); 3951596Sahrens /* 3961596Sahrens * NB: we need to drop dn_dbufs_mtx between passes so 3971596Sahrens * that any DB_EVICTING dbufs can make progress. 3981596Sahrens * Ideally, we would have some cv we could wait on, but 3991596Sahrens * since we don't, just wait a bit to give the other 4001596Sahrens * thread a chance to run. 4011596Sahrens */ 4021596Sahrens mutex_exit(&dn->dn_dbufs_mtx); 4031596Sahrens if (evicting) 4041596Sahrens delay(1); 4051596Sahrens pass++; 4061596Sahrens ASSERT(pass < 100); /* sanity check */ 4071596Sahrens } while (progress); 4081596Sahrens 4091544Seschrock rw_enter(&dn->dn_struct_rwlock, RW_WRITER); 4101544Seschrock if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) { 4111544Seschrock mutex_enter(&dn->dn_bonus->db_mtx); 4121544Seschrock dbuf_evict(dn->dn_bonus); 4131544Seschrock dn->dn_bonus = NULL; 4141544Seschrock } 4151544Seschrock rw_exit(&dn->dn_struct_rwlock); 4161544Seschrock } 4171544Seschrock 4183547Smaybee static void 4193547Smaybee dnode_undirty_dbufs(list_t *list) 4203547Smaybee { 4213547Smaybee dbuf_dirty_record_t *dr; 4223547Smaybee 4233547Smaybee while (dr = list_head(list)) { 4243547Smaybee dmu_buf_impl_t *db = dr->dr_dbuf; 4253547Smaybee uint64_t txg = dr->dr_txg; 4263547Smaybee 4273547Smaybee mutex_enter(&db->db_mtx); 4283547Smaybee /* XXX - use dbuf_undirty()? */ 4293547Smaybee list_remove(list, dr); 4303547Smaybee ASSERT(db->db_last_dirty == dr); 4313547Smaybee db->db_last_dirty = NULL; 4323547Smaybee db->db_dirtycnt -= 1; 4333547Smaybee if (db->db_level == 0) { 4343547Smaybee ASSERT(db->db_blkid == DB_BONUS_BLKID || 4353547Smaybee dr->dt.dl.dr_data == db->db_buf); 4363547Smaybee dbuf_unoverride(dr); 4373547Smaybee mutex_exit(&db->db_mtx); 4383547Smaybee } else { 4393547Smaybee mutex_exit(&db->db_mtx); 4403547Smaybee dnode_undirty_dbufs(&dr->dt.di.dr_children); 4413547Smaybee } 4423547Smaybee kmem_free(dr, sizeof (dbuf_dirty_record_t)); 4433547Smaybee dbuf_rele(db, (void *)(uintptr_t)txg); 4443547Smaybee } 4453547Smaybee } 4463547Smaybee 4473547Smaybee static void 448789Sahrens dnode_sync_free(dnode_t *dn, dmu_tx_t *tx) 449789Sahrens { 450789Sahrens int txgoff = tx->tx_txg & TXG_MASK; 451789Sahrens 452789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 453789Sahrens 4546992Smaybee /* 4556992Smaybee * Our contents should have been freed in dnode_sync() by the 4566992Smaybee * free range record inserted by the caller of dnode_free(). 4576992Smaybee */ 4586992Smaybee ASSERT3U(DN_USED_BYTES(dn->dn_phys), ==, 0); 4596992Smaybee ASSERT(BP_IS_HOLE(dn->dn_phys->dn_blkptr)); 4606992Smaybee 4613547Smaybee dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]); 4624944Smaybee dnode_evict_dbufs(dn); 4631544Seschrock ASSERT3P(list_head(&dn->dn_dbufs), ==, NULL); 4641544Seschrock 4651544Seschrock /* 4661544Seschrock * XXX - It would be nice to assert this, but we may still 4671544Seschrock * have residual holds from async evictions from the arc... 4681544Seschrock * 4693444Sek110237 * zfs_obj_to_path() also depends on this being 4703444Sek110237 * commented out. 4713444Sek110237 * 4721544Seschrock * ASSERT3U(refcount_count(&dn->dn_holds), ==, 1); 4731544Seschrock */ 474789Sahrens 475789Sahrens /* Undirty next bits */ 476789Sahrens dn->dn_next_nlevels[txgoff] = 0; 477789Sahrens dn->dn_next_indblkshift[txgoff] = 0; 4781596Sahrens dn->dn_next_blksz[txgoff] = 0; 479789Sahrens 480789Sahrens /* ASSERT(blkptrs are zero); */ 481789Sahrens ASSERT(dn->dn_phys->dn_type != DMU_OT_NONE); 482789Sahrens ASSERT(dn->dn_type != DMU_OT_NONE); 483789Sahrens 484789Sahrens ASSERT(dn->dn_free_txg > 0); 485789Sahrens if (dn->dn_allocated_txg != dn->dn_free_txg) 486789Sahrens dbuf_will_dirty(dn->dn_dbuf, tx); 487789Sahrens bzero(dn->dn_phys, sizeof (dnode_phys_t)); 488789Sahrens 489789Sahrens mutex_enter(&dn->dn_mtx); 490789Sahrens dn->dn_type = DMU_OT_NONE; 491789Sahrens dn->dn_maxblkid = 0; 492789Sahrens dn->dn_allocated_txg = 0; 4934480Sgw25295 dn->dn_free_txg = 0; 494789Sahrens mutex_exit(&dn->dn_mtx); 495789Sahrens 4961544Seschrock ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT); 497789Sahrens 498789Sahrens dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); 499789Sahrens /* 500789Sahrens * Now that we've released our hold, the dnode may 501789Sahrens * be evicted, so we musn't access it. 502789Sahrens */ 503789Sahrens } 504789Sahrens 505789Sahrens /* 5063547Smaybee * Write out the dnode's dirty buffers. 507789Sahrens */ 5083547Smaybee void 5093547Smaybee dnode_sync(dnode_t *dn, dmu_tx_t *tx) 510789Sahrens { 511789Sahrens free_range_t *rp; 5123547Smaybee dnode_phys_t *dnp = dn->dn_phys; 513789Sahrens int txgoff = tx->tx_txg & TXG_MASK; 5143547Smaybee list_t *list = &dn->dn_dirty_records[txgoff]; 515*9396SMatthew.Ahrens@Sun.COM static const dnode_phys_t zerodn = { 0 }; 516789Sahrens 517789Sahrens ASSERT(dmu_tx_is_syncing(tx)); 518789Sahrens ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg); 519*9396SMatthew.Ahrens@Sun.COM ASSERT(dnp->dn_type != DMU_OT_NONE || 520*9396SMatthew.Ahrens@Sun.COM bcmp(dnp, &zerodn, DNODE_SIZE) == 0); 521873Sek110237 DNODE_VERIFY(dn); 5221596Sahrens 5233547Smaybee ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf)); 524789Sahrens 525*9396SMatthew.Ahrens@Sun.COM if (dmu_objset_userused_enabled(dn->dn_objset) && 526*9396SMatthew.Ahrens@Sun.COM !DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { 527*9396SMatthew.Ahrens@Sun.COM ASSERT(dn->dn_oldphys == NULL); 528*9396SMatthew.Ahrens@Sun.COM dn->dn_oldphys = zio_buf_alloc(sizeof (dnode_phys_t)); 529*9396SMatthew.Ahrens@Sun.COM *dn->dn_oldphys = *dn->dn_phys; /* struct assignment */ 530*9396SMatthew.Ahrens@Sun.COM dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED; 531*9396SMatthew.Ahrens@Sun.COM } else { 532*9396SMatthew.Ahrens@Sun.COM /* Once we account for it, we should always account for it. */ 533*9396SMatthew.Ahrens@Sun.COM ASSERT(!(dn->dn_phys->dn_flags & 534*9396SMatthew.Ahrens@Sun.COM DNODE_FLAG_USERUSED_ACCOUNTED)); 535*9396SMatthew.Ahrens@Sun.COM } 536*9396SMatthew.Ahrens@Sun.COM 537789Sahrens mutex_enter(&dn->dn_mtx); 538789Sahrens if (dn->dn_allocated_txg == tx->tx_txg) { 539789Sahrens /* The dnode is newly allocated or reallocated */ 540789Sahrens if (dnp->dn_type == DMU_OT_NONE) { 541789Sahrens /* this is a first alloc, not a realloc */ 542789Sahrens dnp->dn_nlevels = 1; 5438644SMark.Maybee@Sun.COM dnp->dn_nblkptr = dn->dn_nblkptr; 544789Sahrens } 545789Sahrens 546789Sahrens dnp->dn_type = dn->dn_type; 547789Sahrens dnp->dn_bonustype = dn->dn_bonustype; 548789Sahrens dnp->dn_bonuslen = dn->dn_bonuslen; 549789Sahrens } 550789Sahrens 5513547Smaybee ASSERT(dnp->dn_nlevels > 1 || 5521599Sahrens BP_IS_HOLE(&dnp->dn_blkptr[0]) || 5531599Sahrens BP_GET_LSIZE(&dnp->dn_blkptr[0]) == 5541599Sahrens dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT); 5551599Sahrens 5561596Sahrens if (dn->dn_next_blksz[txgoff]) { 5571596Sahrens ASSERT(P2PHASE(dn->dn_next_blksz[txgoff], 558789Sahrens SPA_MINBLOCKSIZE) == 0); 5591599Sahrens ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) || 5606992Smaybee dn->dn_maxblkid == 0 || list_head(list) != NULL || 5611600Sahrens dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT == 5621600Sahrens dnp->dn_datablkszsec); 563789Sahrens dnp->dn_datablkszsec = 5641596Sahrens dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT; 5651596Sahrens dn->dn_next_blksz[txgoff] = 0; 566789Sahrens } 567789Sahrens 5684944Smaybee if (dn->dn_next_bonuslen[txgoff]) { 5694944Smaybee if (dn->dn_next_bonuslen[txgoff] == DN_ZERO_BONUSLEN) 5704944Smaybee dnp->dn_bonuslen = 0; 5714944Smaybee else 5724944Smaybee dnp->dn_bonuslen = dn->dn_next_bonuslen[txgoff]; 5734944Smaybee ASSERT(dnp->dn_bonuslen <= DN_MAX_BONUSLEN); 5744944Smaybee dn->dn_next_bonuslen[txgoff] = 0; 5754944Smaybee } 5764944Smaybee 577789Sahrens if (dn->dn_next_indblkshift[txgoff]) { 578789Sahrens ASSERT(dnp->dn_nlevels == 1); 579789Sahrens dnp->dn_indblkshift = dn->dn_next_indblkshift[txgoff]; 580789Sahrens dn->dn_next_indblkshift[txgoff] = 0; 581789Sahrens } 582789Sahrens 583789Sahrens /* 584789Sahrens * Just take the live (open-context) values for checksum and compress. 585789Sahrens * Strictly speaking it's a future leak, but nothing bad happens if we 586789Sahrens * start using the new checksum or compress algorithm a little early. 587789Sahrens */ 588789Sahrens dnp->dn_checksum = dn->dn_checksum; 589789Sahrens dnp->dn_compress = dn->dn_compress; 590789Sahrens 591789Sahrens mutex_exit(&dn->dn_mtx); 592789Sahrens 593789Sahrens /* process all the "freed" ranges in the file */ 5946992Smaybee while (rp = avl_last(&dn->dn_ranges[txgoff])) { 5956992Smaybee dnode_sync_free_range(dn, rp->fr_blkid, rp->fr_nblks, tx); 5966992Smaybee /* grab the mutex so we don't race with dnode_block_freed() */ 5976992Smaybee mutex_enter(&dn->dn_mtx); 5986992Smaybee avl_remove(&dn->dn_ranges[txgoff], rp); 5996992Smaybee mutex_exit(&dn->dn_mtx); 6006992Smaybee kmem_free(rp, sizeof (free_range_t)); 601789Sahrens } 6024944Smaybee 603789Sahrens if (dn->dn_free_txg > 0 && dn->dn_free_txg <= tx->tx_txg) { 6043547Smaybee dnode_sync_free(dn, tx); 6053547Smaybee return; 606789Sahrens } 607789Sahrens 6088644SMark.Maybee@Sun.COM if (dn->dn_next_nblkptr[txgoff]) { 6098644SMark.Maybee@Sun.COM /* this should only happen on a realloc */ 6108644SMark.Maybee@Sun.COM ASSERT(dn->dn_allocated_txg == tx->tx_txg); 6118644SMark.Maybee@Sun.COM if (dn->dn_next_nblkptr[txgoff] > dnp->dn_nblkptr) { 6128644SMark.Maybee@Sun.COM /* zero the new blkptrs we are gaining */ 6138644SMark.Maybee@Sun.COM bzero(dnp->dn_blkptr + dnp->dn_nblkptr, 6148644SMark.Maybee@Sun.COM sizeof (blkptr_t) * 6158644SMark.Maybee@Sun.COM (dn->dn_next_nblkptr[txgoff] - dnp->dn_nblkptr)); 6168644SMark.Maybee@Sun.COM #ifdef ZFS_DEBUG 6178644SMark.Maybee@Sun.COM } else { 6188644SMark.Maybee@Sun.COM int i; 6198644SMark.Maybee@Sun.COM ASSERT(dn->dn_next_nblkptr[txgoff] < dnp->dn_nblkptr); 6208644SMark.Maybee@Sun.COM /* the blkptrs we are losing better be unallocated */ 6218644SMark.Maybee@Sun.COM for (i = dn->dn_next_nblkptr[txgoff]; 6228644SMark.Maybee@Sun.COM i < dnp->dn_nblkptr; i++) 6238644SMark.Maybee@Sun.COM ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[i])); 6248644SMark.Maybee@Sun.COM #endif 6258644SMark.Maybee@Sun.COM } 6268644SMark.Maybee@Sun.COM mutex_enter(&dn->dn_mtx); 6278644SMark.Maybee@Sun.COM dnp->dn_nblkptr = dn->dn_next_nblkptr[txgoff]; 6288644SMark.Maybee@Sun.COM dn->dn_next_nblkptr[txgoff] = 0; 6298644SMark.Maybee@Sun.COM mutex_exit(&dn->dn_mtx); 6308644SMark.Maybee@Sun.COM } 6318644SMark.Maybee@Sun.COM 632789Sahrens if (dn->dn_next_nlevels[txgoff]) { 6333547Smaybee dnode_increase_indirection(dn, tx); 634789Sahrens dn->dn_next_nlevels[txgoff] = 0; 635789Sahrens } 636789Sahrens 6373547Smaybee dbuf_sync_list(list, tx); 638789Sahrens 639*9396SMatthew.Ahrens@Sun.COM if (!DMU_OBJECT_IS_SPECIAL(dn->dn_object)) { 6403547Smaybee ASSERT3P(list_head(list), ==, NULL); 6413547Smaybee dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg); 6423547Smaybee } 6431599Sahrens 6443547Smaybee /* 6453547Smaybee * Although we have dropped our reference to the dnode, it 6463547Smaybee * can't be evicted until its written, and we haven't yet 6473547Smaybee * initiated the IO for the dnode's dbuf. 6483547Smaybee */ 649789Sahrens } 650