1789Sahrens /* 2789Sahrens * CDDL HEADER START 3789Sahrens * 4789Sahrens * The contents of this file are subject to the terms of the 51491Sahrens * Common Development and Distribution License (the "License"). 61491Sahrens * You may not use this file except in compliance with the License. 7789Sahrens * 8789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9789Sahrens * or http://www.opensolaris.org/os/licensing. 10789Sahrens * See the License for the specific language governing permissions 11789Sahrens * and limitations under the License. 12789Sahrens * 13789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15789Sahrens * If applicable, add the following below this CDDL HEADER, with the 16789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18789Sahrens * 19789Sahrens * CDDL HEADER END 20789Sahrens */ 21789Sahrens /* 224577Sahrens * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23789Sahrens * Use is subject to license terms. 24789Sahrens */ 25789Sahrens 26789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27789Sahrens 28789Sahrens 29789Sahrens /* 30789Sahrens * This file contains the top half of the zfs directory structure 31789Sahrens * implementation. The bottom half is in zap_leaf.c. 32789Sahrens * 33789Sahrens * The zdir is an extendable hash data structure. There is a table of 34789Sahrens * pointers to buckets (zap_t->zd_data->zd_leafs). The buckets are 35789Sahrens * each a constant size and hold a variable number of directory entries. 36789Sahrens * The buckets (aka "leaf nodes") are implemented in zap_leaf.c. 37789Sahrens * 38789Sahrens * The pointer table holds a power of 2 number of pointers. 39789Sahrens * (1<<zap_t->zd_data->zd_phys->zd_prefix_len). The bucket pointed to 40789Sahrens * by the pointer at index i in the table holds entries whose hash value 41789Sahrens * has a zd_prefix_len - bit prefix 42789Sahrens */ 43789Sahrens 44789Sahrens #include <sys/spa.h> 45789Sahrens #include <sys/dmu.h> 46789Sahrens #include <sys/zfs_context.h> 47*5498Stimh #include <sys/zfs_znode.h> 48789Sahrens #include <sys/zap.h> 491544Seschrock #include <sys/refcount.h> 50789Sahrens #include <sys/zap_impl.h> 51789Sahrens #include <sys/zap_leaf.h> 52789Sahrens 531491Sahrens int fzap_default_block_shift = 14; /* 16k blocksize */ 54789Sahrens 55789Sahrens static void zap_leaf_pageout(dmu_buf_t *db, void *vl); 561578Sahrens static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks); 57789Sahrens 58789Sahrens 59789Sahrens void 60789Sahrens fzap_byteswap(void *vbuf, size_t size) 61789Sahrens { 62789Sahrens uint64_t block_type; 63789Sahrens 64789Sahrens block_type = *(uint64_t *)vbuf; 65789Sahrens 662856Snd150628 if (block_type == ZBT_LEAF || block_type == BSWAP_64(ZBT_LEAF)) 671491Sahrens zap_leaf_byteswap(vbuf, size); 682856Snd150628 else { 69789Sahrens /* it's a ptrtbl block */ 701491Sahrens byteswap_uint64_array(vbuf, size); 71789Sahrens } 72789Sahrens } 73789Sahrens 74789Sahrens void 75789Sahrens fzap_upgrade(zap_t *zap, dmu_tx_t *tx) 76789Sahrens { 77789Sahrens dmu_buf_t *db; 78789Sahrens zap_leaf_t *l; 79789Sahrens int i; 80789Sahrens zap_phys_t *zp; 81789Sahrens 82789Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 83789Sahrens zap->zap_ismicro = FALSE; 84789Sahrens 85789Sahrens (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap, 862641Sahrens &zap->zap_f.zap_phys, zap_evict); 87789Sahrens 88789Sahrens mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); 891491Sahrens zap->zap_f.zap_block_shift = highbit(zap->zap_dbuf->db_size) - 1; 90789Sahrens 91789Sahrens zp = zap->zap_f.zap_phys; 92789Sahrens /* 93789Sahrens * explicitly zero it since it might be coming from an 94789Sahrens * initialized microzap 95789Sahrens */ 961491Sahrens bzero(zap->zap_dbuf->db_data, zap->zap_dbuf->db_size); 97789Sahrens zp->zap_block_type = ZBT_HEADER; 98789Sahrens zp->zap_magic = ZAP_MAGIC; 99789Sahrens 1001491Sahrens zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap); 101789Sahrens 102789Sahrens zp->zap_freeblk = 2; /* block 1 will be the first leaf */ 103789Sahrens zp->zap_num_leafs = 1; 104789Sahrens zp->zap_num_entries = 0; 105789Sahrens zp->zap_salt = zap->zap_salt; 1065331Samw zp->zap_normflags = zap->zap_normflags; 107789Sahrens 1081491Sahrens /* block 1 will be the first leaf */ 1091491Sahrens for (i = 0; i < (1<<zp->zap_ptrtbl.zt_shift); i++) 1101491Sahrens ZAP_EMBEDDED_PTRTBL_ENT(zap, i) = 1; 111789Sahrens 112789Sahrens /* 113789Sahrens * set up block 1 - the first leaf 114789Sahrens */ 1151544Seschrock VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, 1161544Seschrock 1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db)); 117789Sahrens dmu_buf_will_dirty(db, tx); 118789Sahrens 119789Sahrens l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP); 120789Sahrens l->l_dbuf = db; 121789Sahrens l->l_phys = db->db_data; 122789Sahrens 123*5498Stimh zap_leaf_init(l, zp->zap_normflags != 0); 124789Sahrens 125789Sahrens kmem_free(l, sizeof (zap_leaf_t)); 1261544Seschrock dmu_buf_rele(db, FTAG); 127789Sahrens } 128789Sahrens 129789Sahrens static int 130789Sahrens zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx) 131789Sahrens { 132789Sahrens if (RW_WRITE_HELD(&zap->zap_rwlock)) 133789Sahrens return (1); 134789Sahrens if (rw_tryupgrade(&zap->zap_rwlock)) { 135789Sahrens dmu_buf_will_dirty(zap->zap_dbuf, tx); 136789Sahrens return (1); 137789Sahrens } 138789Sahrens return (0); 139789Sahrens } 140789Sahrens 141789Sahrens /* 142789Sahrens * Generic routines for dealing with the pointer & cookie tables. 143789Sahrens */ 144789Sahrens 1451578Sahrens static int 146789Sahrens zap_table_grow(zap_t *zap, zap_table_phys_t *tbl, 147789Sahrens void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n), 148789Sahrens dmu_tx_t *tx) 149789Sahrens { 150789Sahrens uint64_t b, newblk; 151789Sahrens dmu_buf_t *db_old, *db_new; 1521544Seschrock int err; 1531491Sahrens int bs = FZAP_BLOCK_SHIFT(zap); 1541491Sahrens int hepb = 1<<(bs-4); 155789Sahrens /* hepb = half the number of entries in a block */ 156789Sahrens 157789Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 158789Sahrens ASSERT(tbl->zt_blk != 0); 159789Sahrens ASSERT(tbl->zt_numblks > 0); 160789Sahrens 161789Sahrens if (tbl->zt_nextblk != 0) { 162789Sahrens newblk = tbl->zt_nextblk; 163789Sahrens } else { 1641578Sahrens newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2); 165789Sahrens tbl->zt_nextblk = newblk; 166789Sahrens ASSERT3U(tbl->zt_blks_copied, ==, 0); 167789Sahrens dmu_prefetch(zap->zap_objset, zap->zap_object, 1681491Sahrens tbl->zt_blk << bs, tbl->zt_numblks << bs); 169789Sahrens } 170789Sahrens 171789Sahrens /* 1721578Sahrens * Copy the ptrtbl from the old to new location. 173789Sahrens */ 174789Sahrens 175789Sahrens b = tbl->zt_blks_copied; 1761544Seschrock err = dmu_buf_hold(zap->zap_objset, zap->zap_object, 1771544Seschrock (tbl->zt_blk + b) << bs, FTAG, &db_old); 1781544Seschrock if (err) 1791578Sahrens return (err); 180789Sahrens 181789Sahrens /* first half of entries in old[b] go to new[2*b+0] */ 1821544Seschrock VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, 1831544Seschrock (newblk + 2*b+0) << bs, FTAG, &db_new)); 184789Sahrens dmu_buf_will_dirty(db_new, tx); 185789Sahrens transfer_func(db_old->db_data, db_new->db_data, hepb); 1861544Seschrock dmu_buf_rele(db_new, FTAG); 187789Sahrens 188789Sahrens /* second half of entries in old[b] go to new[2*b+1] */ 1891544Seschrock VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, 1901544Seschrock (newblk + 2*b+1) << bs, FTAG, &db_new)); 191789Sahrens dmu_buf_will_dirty(db_new, tx); 192789Sahrens transfer_func((uint64_t *)db_old->db_data + hepb, 193789Sahrens db_new->db_data, hepb); 1941544Seschrock dmu_buf_rele(db_new, FTAG); 195789Sahrens 1961544Seschrock dmu_buf_rele(db_old, FTAG); 197789Sahrens 198789Sahrens tbl->zt_blks_copied++; 199789Sahrens 200789Sahrens dprintf("copied block %llu of %llu\n", 201789Sahrens tbl->zt_blks_copied, tbl->zt_numblks); 202789Sahrens 203789Sahrens if (tbl->zt_blks_copied == tbl->zt_numblks) { 2041544Seschrock (void) dmu_free_range(zap->zap_objset, zap->zap_object, 2051491Sahrens tbl->zt_blk << bs, tbl->zt_numblks << bs, tx); 206789Sahrens 207789Sahrens tbl->zt_blk = newblk; 208789Sahrens tbl->zt_numblks *= 2; 209789Sahrens tbl->zt_shift++; 210789Sahrens tbl->zt_nextblk = 0; 211789Sahrens tbl->zt_blks_copied = 0; 212789Sahrens 213789Sahrens dprintf("finished; numblocks now %llu (%lluk entries)\n", 214789Sahrens tbl->zt_numblks, 1<<(tbl->zt_shift-10)); 215789Sahrens } 2161578Sahrens 2171578Sahrens return (0); 218789Sahrens } 219789Sahrens 2201544Seschrock static int 221789Sahrens zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val, 222789Sahrens dmu_tx_t *tx) 223789Sahrens { 2241544Seschrock int err; 2251544Seschrock uint64_t blk, off; 2261544Seschrock int bs = FZAP_BLOCK_SHIFT(zap); 227789Sahrens dmu_buf_t *db; 228789Sahrens 229789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 230789Sahrens ASSERT(tbl->zt_blk != 0); 231789Sahrens 232789Sahrens dprintf("storing %llx at index %llx\n", val, idx); 233789Sahrens 2341491Sahrens blk = idx >> (bs-3); 2351491Sahrens off = idx & ((1<<(bs-3))-1); 236789Sahrens 2371544Seschrock err = dmu_buf_hold(zap->zap_objset, zap->zap_object, 2381544Seschrock (tbl->zt_blk + blk) << bs, FTAG, &db); 2391544Seschrock if (err) 2401544Seschrock return (err); 241789Sahrens dmu_buf_will_dirty(db, tx); 242789Sahrens 243789Sahrens if (tbl->zt_nextblk != 0) { 2441544Seschrock uint64_t idx2 = idx * 2; 2451544Seschrock uint64_t blk2 = idx2 >> (bs-3); 2461544Seschrock uint64_t off2 = idx2 & ((1<<(bs-3))-1); 2471544Seschrock dmu_buf_t *db2; 248789Sahrens 2491544Seschrock err = dmu_buf_hold(zap->zap_objset, zap->zap_object, 2501544Seschrock (tbl->zt_nextblk + blk2) << bs, FTAG, &db2); 2511544Seschrock if (err) { 2521544Seschrock dmu_buf_rele(db, FTAG); 2531544Seschrock return (err); 2541544Seschrock } 2551544Seschrock dmu_buf_will_dirty(db2, tx); 2561544Seschrock ((uint64_t *)db2->db_data)[off2] = val; 2571544Seschrock ((uint64_t *)db2->db_data)[off2+1] = val; 2581544Seschrock dmu_buf_rele(db2, FTAG); 259789Sahrens } 260789Sahrens 2611544Seschrock ((uint64_t *)db->db_data)[off] = val; 2621544Seschrock dmu_buf_rele(db, FTAG); 2631544Seschrock 2641544Seschrock return (0); 265789Sahrens } 266789Sahrens 2671544Seschrock static int 2681544Seschrock zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp) 269789Sahrens { 2701544Seschrock uint64_t blk, off; 2711544Seschrock int err; 272789Sahrens dmu_buf_t *db; 2731491Sahrens int bs = FZAP_BLOCK_SHIFT(zap); 274789Sahrens 275789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 276789Sahrens 2771491Sahrens blk = idx >> (bs-3); 2781491Sahrens off = idx & ((1<<(bs-3))-1); 279789Sahrens 2801544Seschrock err = dmu_buf_hold(zap->zap_objset, zap->zap_object, 2811544Seschrock (tbl->zt_blk + blk) << bs, FTAG, &db); 2821544Seschrock if (err) 2831544Seschrock return (err); 2841544Seschrock *valp = ((uint64_t *)db->db_data)[off]; 2851544Seschrock dmu_buf_rele(db, FTAG); 2861544Seschrock 2871544Seschrock if (tbl->zt_nextblk != 0) { 2881544Seschrock /* 2891544Seschrock * read the nextblk for the sake of i/o error checking, 2901544Seschrock * so that zap_table_load() will catch errors for 2911544Seschrock * zap_table_store. 2921544Seschrock */ 2931544Seschrock blk = (idx*2) >> (bs-3); 2941544Seschrock 2951544Seschrock err = dmu_buf_hold(zap->zap_objset, zap->zap_object, 2961544Seschrock (tbl->zt_nextblk + blk) << bs, FTAG, &db); 2971544Seschrock dmu_buf_rele(db, FTAG); 2981544Seschrock } 2991544Seschrock return (err); 300789Sahrens } 301789Sahrens 302789Sahrens /* 303789Sahrens * Routines for growing the ptrtbl. 304789Sahrens */ 305789Sahrens 306789Sahrens static void 307789Sahrens zap_ptrtbl_transfer(const uint64_t *src, uint64_t *dst, int n) 308789Sahrens { 309789Sahrens int i; 310789Sahrens for (i = 0; i < n; i++) { 311789Sahrens uint64_t lb = src[i]; 312789Sahrens dst[2*i+0] = lb; 313789Sahrens dst[2*i+1] = lb; 314789Sahrens } 315789Sahrens } 316789Sahrens 3171578Sahrens static int 318789Sahrens zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx) 319789Sahrens { 3201578Sahrens /* In case things go horribly wrong. */ 3211578Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift >= ZAP_HASHBITS-2) 3221578Sahrens return (ENOSPC); 323789Sahrens 324789Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { 325789Sahrens /* 3261491Sahrens * We are outgrowing the "embedded" ptrtbl (the one 3271491Sahrens * stored in the header block). Give it its own entire 3281491Sahrens * block, which will double the size of the ptrtbl. 329789Sahrens */ 330789Sahrens uint64_t newblk; 331789Sahrens dmu_buf_t *db_new; 3321544Seschrock int err; 333789Sahrens 334789Sahrens ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==, 3351491Sahrens ZAP_EMBEDDED_PTRTBL_SHIFT(zap)); 336789Sahrens ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk, ==, 0); 337789Sahrens 3381578Sahrens newblk = zap_allocate_blocks(zap, 1); 3391544Seschrock err = dmu_buf_hold(zap->zap_objset, zap->zap_object, 3401544Seschrock newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new); 3411544Seschrock if (err) 3421578Sahrens return (err); 343789Sahrens dmu_buf_will_dirty(db_new, tx); 3441491Sahrens zap_ptrtbl_transfer(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), 3451491Sahrens db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap)); 3461544Seschrock dmu_buf_rele(db_new, FTAG); 347789Sahrens 348789Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_blk = newblk; 349789Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks = 1; 350789Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_shift++; 351789Sahrens 352789Sahrens ASSERT3U(1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==, 353789Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << 3541491Sahrens (FZAP_BLOCK_SHIFT(zap)-3)); 3551578Sahrens 3561578Sahrens return (0); 357789Sahrens } else { 3581578Sahrens return (zap_table_grow(zap, &zap->zap_f.zap_phys->zap_ptrtbl, 3591578Sahrens zap_ptrtbl_transfer, tx)); 360789Sahrens } 361789Sahrens } 362789Sahrens 363789Sahrens static void 364789Sahrens zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx) 365789Sahrens { 366789Sahrens dmu_buf_will_dirty(zap->zap_dbuf, tx); 367789Sahrens mutex_enter(&zap->zap_f.zap_num_entries_mtx); 368789Sahrens ASSERT(delta > 0 || zap->zap_f.zap_phys->zap_num_entries >= -delta); 369789Sahrens zap->zap_f.zap_phys->zap_num_entries += delta; 370789Sahrens mutex_exit(&zap->zap_f.zap_num_entries_mtx); 371789Sahrens } 372789Sahrens 3731578Sahrens static uint64_t 3741578Sahrens zap_allocate_blocks(zap_t *zap, int nblocks) 375789Sahrens { 376789Sahrens uint64_t newblk; 3771578Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 3781578Sahrens newblk = zap->zap_f.zap_phys->zap_freeblk; 3791578Sahrens zap->zap_f.zap_phys->zap_freeblk += nblocks; 380789Sahrens return (newblk); 381789Sahrens } 382789Sahrens 3831578Sahrens static zap_leaf_t * 384789Sahrens zap_create_leaf(zap_t *zap, dmu_tx_t *tx) 385789Sahrens { 386789Sahrens void *winner; 387789Sahrens zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); 388789Sahrens 389789Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 390789Sahrens 391789Sahrens rw_init(&l->l_rwlock, 0, 0, 0); 392789Sahrens rw_enter(&l->l_rwlock, RW_WRITER); 3931578Sahrens l->l_blkid = zap_allocate_blocks(zap, 1); 394789Sahrens l->l_dbuf = NULL; 395789Sahrens l->l_phys = NULL; 396789Sahrens 3971544Seschrock VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object, 3981544Seschrock l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf)); 399789Sahrens winner = dmu_buf_set_user(l->l_dbuf, l, &l->l_phys, zap_leaf_pageout); 400789Sahrens ASSERT(winner == NULL); 401789Sahrens dmu_buf_will_dirty(l->l_dbuf, tx); 402789Sahrens 403*5498Stimh zap_leaf_init(l, zap->zap_normflags != 0); 404789Sahrens 4051578Sahrens zap->zap_f.zap_phys->zap_num_leafs++; 406789Sahrens 4071578Sahrens return (l); 408789Sahrens } 409789Sahrens 410789Sahrens int 411789Sahrens fzap_count(zap_t *zap, uint64_t *count) 412789Sahrens { 413789Sahrens ASSERT(!zap->zap_ismicro); 414789Sahrens mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */ 415789Sahrens *count = zap->zap_f.zap_phys->zap_num_entries; 416789Sahrens mutex_exit(&zap->zap_f.zap_num_entries_mtx); 417789Sahrens return (0); 418789Sahrens } 419789Sahrens 420789Sahrens /* 421789Sahrens * Routines for obtaining zap_leaf_t's 422789Sahrens */ 423789Sahrens 424885Sahrens void 425789Sahrens zap_put_leaf(zap_leaf_t *l) 426789Sahrens { 427789Sahrens rw_exit(&l->l_rwlock); 4281544Seschrock dmu_buf_rele(l->l_dbuf, NULL); 429789Sahrens } 430789Sahrens 431789Sahrens _NOTE(ARGSUSED(0)) 432789Sahrens static void 433789Sahrens zap_leaf_pageout(dmu_buf_t *db, void *vl) 434789Sahrens { 435789Sahrens zap_leaf_t *l = vl; 436789Sahrens 437789Sahrens rw_destroy(&l->l_rwlock); 438789Sahrens kmem_free(l, sizeof (zap_leaf_t)); 439789Sahrens } 440789Sahrens 441789Sahrens static zap_leaf_t * 442789Sahrens zap_open_leaf(uint64_t blkid, dmu_buf_t *db) 443789Sahrens { 444789Sahrens zap_leaf_t *l, *winner; 445789Sahrens 446789Sahrens ASSERT(blkid != 0); 447789Sahrens 448789Sahrens l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); 449789Sahrens rw_init(&l->l_rwlock, 0, 0, 0); 450789Sahrens rw_enter(&l->l_rwlock, RW_WRITER); 451789Sahrens l->l_blkid = blkid; 4521491Sahrens l->l_bs = highbit(db->db_size)-1; 453789Sahrens l->l_dbuf = db; 454789Sahrens l->l_phys = NULL; 455789Sahrens 456789Sahrens winner = dmu_buf_set_user(db, l, &l->l_phys, zap_leaf_pageout); 457789Sahrens 458789Sahrens rw_exit(&l->l_rwlock); 459789Sahrens if (winner != NULL) { 460789Sahrens /* someone else set it first */ 461789Sahrens zap_leaf_pageout(NULL, l); 462789Sahrens l = winner; 463789Sahrens } 464789Sahrens 4651491Sahrens /* 4661578Sahrens * lhr_pad was previously used for the next leaf in the leaf 4671578Sahrens * chain. There should be no chained leafs (as we have removed 4681578Sahrens * support for them). 4691578Sahrens */ 4701578Sahrens ASSERT3U(l->l_phys->l_hdr.lh_pad1, ==, 0); 4711578Sahrens 4721578Sahrens /* 4731491Sahrens * There should be more hash entries than there can be 4741491Sahrens * chunks to put in the hash table 4751491Sahrens */ 4761491Sahrens ASSERT3U(ZAP_LEAF_HASH_NUMENTRIES(l), >, ZAP_LEAF_NUMCHUNKS(l) / 3); 4771491Sahrens 4781491Sahrens /* The chunks should begin at the end of the hash table */ 4791491Sahrens ASSERT3P(&ZAP_LEAF_CHUNK(l, 0), ==, 4801491Sahrens &l->l_phys->l_hash[ZAP_LEAF_HASH_NUMENTRIES(l)]); 4811491Sahrens 4821491Sahrens /* The chunks should end at the end of the block */ 4831491Sahrens ASSERT3U((uintptr_t)&ZAP_LEAF_CHUNK(l, ZAP_LEAF_NUMCHUNKS(l)) - 4841491Sahrens (uintptr_t)l->l_phys, ==, l->l_dbuf->db_size); 4851491Sahrens 486789Sahrens return (l); 487789Sahrens } 488789Sahrens 4891544Seschrock static int 4901578Sahrens zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt, 4911544Seschrock zap_leaf_t **lp) 492789Sahrens { 493789Sahrens dmu_buf_t *db; 494789Sahrens zap_leaf_t *l; 4951491Sahrens int bs = FZAP_BLOCK_SHIFT(zap); 4961544Seschrock int err; 497789Sahrens 498789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 499789Sahrens 5001544Seschrock err = dmu_buf_hold(zap->zap_objset, zap->zap_object, 5011544Seschrock blkid << bs, NULL, &db); 5021544Seschrock if (err) 5031544Seschrock return (err); 504789Sahrens 505789Sahrens ASSERT3U(db->db_object, ==, zap->zap_object); 5061491Sahrens ASSERT3U(db->db_offset, ==, blkid << bs); 5071491Sahrens ASSERT3U(db->db_size, ==, 1 << bs); 508789Sahrens ASSERT(blkid != 0); 509789Sahrens 510789Sahrens l = dmu_buf_get_user(db); 511789Sahrens 512789Sahrens if (l == NULL) 513789Sahrens l = zap_open_leaf(blkid, db); 514789Sahrens 515789Sahrens rw_enter(&l->l_rwlock, lt); 516789Sahrens /* 517789Sahrens * Must lock before dirtying, otherwise l->l_phys could change, 518789Sahrens * causing ASSERT below to fail. 519789Sahrens */ 520789Sahrens if (lt == RW_WRITER) 521789Sahrens dmu_buf_will_dirty(db, tx); 522789Sahrens ASSERT3U(l->l_blkid, ==, blkid); 523789Sahrens ASSERT3P(l->l_dbuf, ==, db); 524789Sahrens ASSERT3P(l->l_phys, ==, l->l_dbuf->db_data); 5251578Sahrens ASSERT3U(l->l_phys->l_hdr.lh_block_type, ==, ZBT_LEAF); 5261578Sahrens ASSERT3U(l->l_phys->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC); 527789Sahrens 5281544Seschrock *lp = l; 5291544Seschrock return (0); 530789Sahrens } 531789Sahrens 5321544Seschrock static int 5331544Seschrock zap_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t *valp) 534789Sahrens { 535789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 536789Sahrens 537789Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { 538789Sahrens ASSERT3U(idx, <, 539789Sahrens (1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift)); 5401544Seschrock *valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx); 5411544Seschrock return (0); 542789Sahrens } else { 543789Sahrens return (zap_table_load(zap, &zap->zap_f.zap_phys->zap_ptrtbl, 5441544Seschrock idx, valp)); 545789Sahrens } 546789Sahrens } 547789Sahrens 5481544Seschrock static int 549789Sahrens zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx) 550789Sahrens { 551789Sahrens ASSERT(tx != NULL); 552789Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 553789Sahrens 554789Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0) { 5551491Sahrens ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) = blk; 5561544Seschrock return (0); 557789Sahrens } else { 5581544Seschrock return (zap_table_store(zap, &zap->zap_f.zap_phys->zap_ptrtbl, 5591544Seschrock idx, blk, tx)); 560789Sahrens } 561789Sahrens } 562789Sahrens 5631544Seschrock static int 5641544Seschrock zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp) 565789Sahrens { 5661544Seschrock uint64_t idx, blk; 5671544Seschrock int err; 568789Sahrens 569789Sahrens ASSERT(zap->zap_dbuf == NULL || 570789Sahrens zap->zap_f.zap_phys == zap->zap_dbuf->db_data); 571789Sahrens ASSERT3U(zap->zap_f.zap_phys->zap_magic, ==, ZAP_MAGIC); 572789Sahrens idx = ZAP_HASH_IDX(h, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); 5731544Seschrock err = zap_idx_to_blk(zap, idx, &blk); 5741544Seschrock if (err != 0) 5751544Seschrock return (err); 5761544Seschrock err = zap_get_leaf_byblk(zap, blk, tx, lt, lp); 577789Sahrens 5781578Sahrens ASSERT(err || ZAP_HASH_IDX(h, (*lp)->l_phys->l_hdr.lh_prefix_len) == 5791578Sahrens (*lp)->l_phys->l_hdr.lh_prefix); 5801544Seschrock return (err); 581789Sahrens } 582789Sahrens 5831544Seschrock static int 5845497Sbonwick zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp) 585789Sahrens { 5865497Sbonwick zap_t *zap = zn->zn_zap; 5875497Sbonwick uint64_t hash = zn->zn_hash; 588789Sahrens zap_leaf_t *nl; 589789Sahrens int prefix_diff, i, err; 590789Sahrens uint64_t sibling; 5911578Sahrens int old_prefix_len = l->l_phys->l_hdr.lh_prefix_len; 592789Sahrens 5931578Sahrens ASSERT3U(old_prefix_len, <=, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); 594789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 595789Sahrens 5961578Sahrens ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==, 5971578Sahrens l->l_phys->l_hdr.lh_prefix); 598789Sahrens 5991578Sahrens if (zap_tryupgradedir(zap, tx) == 0 || 6001578Sahrens old_prefix_len == zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) { 6011578Sahrens /* We failed to upgrade, or need to grow the pointer table */ 602789Sahrens objset_t *os = zap->zap_objset; 603789Sahrens uint64_t object = zap->zap_object; 604789Sahrens 605789Sahrens zap_put_leaf(l); 606789Sahrens zap_unlockdir(zap); 6075384Sahrens err = zap_lockdir(os, object, tx, RW_WRITER, 6085497Sbonwick FALSE, FALSE, &zn->zn_zap); 6095497Sbonwick zap = zn->zn_zap; 6101578Sahrens if (err) 6111578Sahrens return (err); 612789Sahrens ASSERT(!zap->zap_ismicro); 613789Sahrens 6141578Sahrens while (old_prefix_len == 6151578Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) { 6161578Sahrens err = zap_grow_ptrtbl(zap, tx); 6171578Sahrens if (err) 6181578Sahrens return (err); 6191578Sahrens } 6201578Sahrens 6211578Sahrens err = zap_deref_leaf(zap, hash, tx, RW_WRITER, &l); 6221578Sahrens if (err) 6231578Sahrens return (err); 6241578Sahrens 6251578Sahrens if (l->l_phys->l_hdr.lh_prefix_len != old_prefix_len) { 626789Sahrens /* it split while our locks were down */ 6271544Seschrock *lp = l; 6281544Seschrock return (0); 6291544Seschrock } 630789Sahrens } 631789Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 6321578Sahrens ASSERT3U(old_prefix_len, <, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); 6331578Sahrens ASSERT3U(ZAP_HASH_IDX(hash, old_prefix_len), ==, 6341578Sahrens l->l_phys->l_hdr.lh_prefix); 635789Sahrens 6361578Sahrens prefix_diff = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - 6371578Sahrens (old_prefix_len + 1); 6381578Sahrens sibling = (ZAP_HASH_IDX(hash, old_prefix_len + 1) | 1) << prefix_diff; 6391544Seschrock 6401544Seschrock /* check for i/o errors before doing zap_leaf_split */ 641789Sahrens for (i = 0; i < (1ULL<<prefix_diff); i++) { 6421544Seschrock uint64_t blk; 6431544Seschrock err = zap_idx_to_blk(zap, sibling+i, &blk); 6441544Seschrock if (err) 6451544Seschrock return (err); 6461544Seschrock ASSERT3U(blk, ==, l->l_blkid); 6471544Seschrock } 6481544Seschrock 6491578Sahrens nl = zap_create_leaf(zap, tx); 650*5498Stimh zap_leaf_split(l, nl, zap->zap_normflags != 0); 6511544Seschrock 6521578Sahrens /* set sibling pointers */ 6531544Seschrock for (i = 0; i < (1ULL<<prefix_diff); i++) { 6541544Seschrock err = zap_set_idx_to_blk(zap, sibling+i, nl->l_blkid, tx); 6551544Seschrock ASSERT3U(err, ==, 0); /* we checked for i/o errors above */ 656789Sahrens } 657789Sahrens 6581578Sahrens if (hash & (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len))) { 659789Sahrens /* we want the sibling */ 660789Sahrens zap_put_leaf(l); 6611578Sahrens *lp = nl; 662789Sahrens } else { 663789Sahrens zap_put_leaf(nl); 6641578Sahrens *lp = l; 665789Sahrens } 666789Sahrens 6671544Seschrock return (0); 668789Sahrens } 669789Sahrens 670789Sahrens static void 6715497Sbonwick zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx) 672789Sahrens { 6735497Sbonwick zap_t *zap = zn->zn_zap; 6741578Sahrens int shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; 6751578Sahrens int leaffull = (l->l_phys->l_hdr.lh_prefix_len == shift && 6761578Sahrens l->l_phys->l_hdr.lh_nfree < ZAP_LEAF_LOW_WATER); 677789Sahrens 6781578Sahrens zap_put_leaf(l); 679789Sahrens 6801578Sahrens if (leaffull || zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk) { 6811578Sahrens int err; 682789Sahrens 6831578Sahrens /* 6841578Sahrens * We are in the middle of growing the pointer table, or 6851578Sahrens * this leaf will soon make us grow it. 6861578Sahrens */ 687789Sahrens if (zap_tryupgradedir(zap, tx) == 0) { 688789Sahrens objset_t *os = zap->zap_objset; 689789Sahrens uint64_t zapobj = zap->zap_object; 690789Sahrens 691789Sahrens zap_unlockdir(zap); 692789Sahrens err = zap_lockdir(os, zapobj, tx, 6935497Sbonwick RW_WRITER, FALSE, FALSE, &zn->zn_zap); 6945497Sbonwick zap = zn->zn_zap; 6951578Sahrens if (err) 6961578Sahrens return; 697789Sahrens } 698789Sahrens 6991578Sahrens /* could have finished growing while our locks were down */ 7001578Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift == shift) 7011578Sahrens (void) zap_grow_ptrtbl(zap, tx); 702789Sahrens } 703789Sahrens } 704789Sahrens 705789Sahrens 706789Sahrens static int 7071578Sahrens fzap_checksize(const char *name, uint64_t integer_size, uint64_t num_integers) 708789Sahrens { 7091578Sahrens if (name && strlen(name) > ZAP_MAXNAMELEN) 7101578Sahrens return (E2BIG); 7111578Sahrens 712789Sahrens /* Only integer sizes supported by C */ 713789Sahrens switch (integer_size) { 714789Sahrens case 1: 715789Sahrens case 2: 716789Sahrens case 4: 717789Sahrens case 8: 718789Sahrens break; 719789Sahrens default: 720789Sahrens return (EINVAL); 721789Sahrens } 722789Sahrens 7231578Sahrens if (integer_size * num_integers > ZAP_MAXVALUELEN) 7241578Sahrens return (E2BIG); 725789Sahrens 726789Sahrens return (0); 727789Sahrens } 728789Sahrens 729789Sahrens /* 7305331Samw * Routines for manipulating attributes. 731789Sahrens */ 732789Sahrens int 7335331Samw fzap_lookup(zap_name_t *zn, 7345331Samw uint64_t integer_size, uint64_t num_integers, void *buf, 7355331Samw char *realname, int rn_len, boolean_t *ncp) 736789Sahrens { 737789Sahrens zap_leaf_t *l; 738789Sahrens int err; 739789Sahrens zap_entry_handle_t zeh; 740789Sahrens 7415331Samw err = fzap_checksize(zn->zn_name_orij, integer_size, num_integers); 742789Sahrens if (err != 0) 743789Sahrens return (err); 744789Sahrens 7455331Samw err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l); 7461544Seschrock if (err != 0) 7471544Seschrock return (err); 7485331Samw err = zap_leaf_lookup(l, zn, &zeh); 7495331Samw if (err == 0) { 7501578Sahrens err = zap_entry_read(&zeh, integer_size, num_integers, buf); 7515331Samw (void) zap_entry_read_name(&zeh, rn_len, realname); 7525331Samw if (ncp) { 7535331Samw *ncp = zap_entry_normalization_conflict(&zeh, 7545331Samw zn, NULL, zn->zn_zap); 7555331Samw } 7565331Samw } 7571578Sahrens 758789Sahrens zap_put_leaf(l); 759789Sahrens return (err); 760789Sahrens } 761789Sahrens 762789Sahrens int 7635331Samw fzap_add_cd(zap_name_t *zn, 764789Sahrens uint64_t integer_size, uint64_t num_integers, 7651544Seschrock const void *val, uint32_t cd, dmu_tx_t *tx) 766789Sahrens { 767789Sahrens zap_leaf_t *l; 768789Sahrens int err; 769789Sahrens zap_entry_handle_t zeh; 7705331Samw zap_t *zap = zn->zn_zap; 771789Sahrens 772789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 773789Sahrens ASSERT(!zap->zap_ismicro); 7745331Samw ASSERT(fzap_checksize(zn->zn_name_orij, 7755331Samw integer_size, num_integers) == 0); 776789Sahrens 7775331Samw err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l); 7781544Seschrock if (err != 0) 7791544Seschrock return (err); 780789Sahrens retry: 7815331Samw err = zap_leaf_lookup(l, zn, &zeh); 782789Sahrens if (err == 0) { 783789Sahrens err = EEXIST; 784789Sahrens goto out; 785789Sahrens } 7861544Seschrock if (err != ENOENT) 7871544Seschrock goto out; 788789Sahrens 7895331Samw err = zap_entry_create(l, zn->zn_name_orij, zn->zn_hash, cd, 790789Sahrens integer_size, num_integers, val, &zeh); 791789Sahrens 792789Sahrens if (err == 0) { 793789Sahrens zap_increment_num_entries(zap, 1, tx); 794789Sahrens } else if (err == EAGAIN) { 7955497Sbonwick err = zap_expand_leaf(zn, l, tx, &l); 7965497Sbonwick zap = zn->zn_zap; /* zap_expand_leaf() may change zap */ 7971578Sahrens if (err == 0) 7981578Sahrens goto retry; 799789Sahrens } 800789Sahrens 801789Sahrens out: 8025497Sbonwick if (zap != NULL) 8035497Sbonwick zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx); 804789Sahrens return (err); 805789Sahrens } 806789Sahrens 807789Sahrens int 8085331Samw fzap_add(zap_name_t *zn, 809789Sahrens uint64_t integer_size, uint64_t num_integers, 810789Sahrens const void *val, dmu_tx_t *tx) 811789Sahrens { 8125331Samw int err = fzap_checksize(zn->zn_name_orij, integer_size, num_integers); 813789Sahrens if (err != 0) 814789Sahrens return (err); 815789Sahrens 8165331Samw return (fzap_add_cd(zn, integer_size, num_integers, 8171578Sahrens val, ZAP_MAXCD, tx)); 818789Sahrens } 819789Sahrens 820789Sahrens int 8215331Samw fzap_update(zap_name_t *zn, 822789Sahrens int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 823789Sahrens { 824789Sahrens zap_leaf_t *l; 825789Sahrens int err, create; 826789Sahrens zap_entry_handle_t zeh; 8275331Samw zap_t *zap = zn->zn_zap; 828789Sahrens 829789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 8305331Samw err = fzap_checksize(zn->zn_name_orij, integer_size, num_integers); 831789Sahrens if (err != 0) 832789Sahrens return (err); 833789Sahrens 8345331Samw err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l); 8351544Seschrock if (err != 0) 8361544Seschrock return (err); 837789Sahrens retry: 8385331Samw err = zap_leaf_lookup(l, zn, &zeh); 839789Sahrens create = (err == ENOENT); 840789Sahrens ASSERT(err == 0 || err == ENOENT); 841789Sahrens 842789Sahrens if (create) { 8435331Samw err = zap_entry_create(l, zn->zn_name_orij, zn->zn_hash, 8445331Samw ZAP_MAXCD, integer_size, num_integers, val, &zeh); 845789Sahrens if (err == 0) 846789Sahrens zap_increment_num_entries(zap, 1, tx); 847789Sahrens } else { 848789Sahrens err = zap_entry_update(&zeh, integer_size, num_integers, val); 849789Sahrens } 850789Sahrens 851789Sahrens if (err == EAGAIN) { 8525497Sbonwick err = zap_expand_leaf(zn, l, tx, &l); 8535497Sbonwick zap = zn->zn_zap; /* zap_expand_leaf() may change zap */ 8541578Sahrens if (err == 0) 8551578Sahrens goto retry; 856789Sahrens } 857789Sahrens 8585497Sbonwick if (zap != NULL) 8595497Sbonwick zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx); 860789Sahrens return (err); 861789Sahrens } 862789Sahrens 863789Sahrens int 8645331Samw fzap_length(zap_name_t *zn, 865789Sahrens uint64_t *integer_size, uint64_t *num_integers) 866789Sahrens { 867789Sahrens zap_leaf_t *l; 868789Sahrens int err; 869789Sahrens zap_entry_handle_t zeh; 870789Sahrens 8715331Samw err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, NULL, RW_READER, &l); 8721544Seschrock if (err != 0) 8731544Seschrock return (err); 8745331Samw err = zap_leaf_lookup(l, zn, &zeh); 875789Sahrens if (err != 0) 876789Sahrens goto out; 877789Sahrens 878789Sahrens if (integer_size) 879789Sahrens *integer_size = zeh.zeh_integer_size; 880789Sahrens if (num_integers) 881789Sahrens *num_integers = zeh.zeh_num_integers; 882789Sahrens out: 883789Sahrens zap_put_leaf(l); 884789Sahrens return (err); 885789Sahrens } 886789Sahrens 887789Sahrens int 8885331Samw fzap_remove(zap_name_t *zn, dmu_tx_t *tx) 889789Sahrens { 890789Sahrens zap_leaf_t *l; 891789Sahrens int err; 892789Sahrens zap_entry_handle_t zeh; 893789Sahrens 8945331Samw err = zap_deref_leaf(zn->zn_zap, zn->zn_hash, tx, RW_WRITER, &l); 8951544Seschrock if (err != 0) 8961544Seschrock return (err); 8975331Samw err = zap_leaf_lookup(l, zn, &zeh); 898789Sahrens if (err == 0) { 899789Sahrens zap_entry_remove(&zeh); 9005331Samw zap_increment_num_entries(zn->zn_zap, -1, tx); 901789Sahrens } 902789Sahrens zap_put_leaf(l); 903789Sahrens return (err); 904789Sahrens } 905789Sahrens 906789Sahrens int 9074577Sahrens zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask, 9084577Sahrens char *name) 909789Sahrens { 910789Sahrens zap_cursor_t zc; 911789Sahrens zap_attribute_t *za; 912789Sahrens int err; 913789Sahrens 9144577Sahrens if (mask == 0) 9154577Sahrens mask = -1ULL; 9164577Sahrens 917789Sahrens za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 918789Sahrens for (zap_cursor_init(&zc, os, zapobj); 919789Sahrens (err = zap_cursor_retrieve(&zc, za)) == 0; 920789Sahrens zap_cursor_advance(&zc)) { 9214577Sahrens if ((za->za_first_integer & mask) == (value & mask)) { 922789Sahrens (void) strcpy(name, za->za_name); 923789Sahrens break; 924789Sahrens } 925789Sahrens } 926885Sahrens zap_cursor_fini(&zc); 927789Sahrens kmem_free(za, sizeof (zap_attribute_t)); 928789Sahrens return (err); 929789Sahrens } 930789Sahrens 931789Sahrens 932789Sahrens /* 933789Sahrens * Routines for iterating over the attributes. 934789Sahrens */ 935789Sahrens 936789Sahrens int 937789Sahrens fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za) 938789Sahrens { 939789Sahrens int err = ENOENT; 940789Sahrens zap_entry_handle_t zeh; 941789Sahrens zap_leaf_t *l; 942789Sahrens 943789Sahrens /* retrieve the next entry at or after zc_hash/zc_cd */ 944789Sahrens /* if no entry, return ENOENT */ 945789Sahrens 946885Sahrens if (zc->zc_leaf && 9471578Sahrens (ZAP_HASH_IDX(zc->zc_hash, 9481578Sahrens zc->zc_leaf->l_phys->l_hdr.lh_prefix_len) != 9491578Sahrens zc->zc_leaf->l_phys->l_hdr.lh_prefix)) { 950885Sahrens rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); 951885Sahrens zap_put_leaf(zc->zc_leaf); 952885Sahrens zc->zc_leaf = NULL; 953885Sahrens } 954885Sahrens 955789Sahrens again: 956885Sahrens if (zc->zc_leaf == NULL) { 9571544Seschrock err = zap_deref_leaf(zap, zc->zc_hash, NULL, RW_READER, 9581544Seschrock &zc->zc_leaf); 9591544Seschrock if (err != 0) 9601544Seschrock return (err); 961885Sahrens } else { 962885Sahrens rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); 963885Sahrens } 964885Sahrens l = zc->zc_leaf; 965885Sahrens 966789Sahrens err = zap_leaf_lookup_closest(l, zc->zc_hash, zc->zc_cd, &zeh); 967789Sahrens 968789Sahrens if (err == ENOENT) { 9691578Sahrens uint64_t nocare = 9701578Sahrens (1ULL << (64 - l->l_phys->l_hdr.lh_prefix_len)) - 1; 971789Sahrens zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1; 972789Sahrens zc->zc_cd = 0; 9731578Sahrens if (l->l_phys->l_hdr.lh_prefix_len == 0 || zc->zc_hash == 0) { 974789Sahrens zc->zc_hash = -1ULL; 975789Sahrens } else { 976885Sahrens zap_put_leaf(zc->zc_leaf); 977885Sahrens zc->zc_leaf = NULL; 978789Sahrens goto again; 979789Sahrens } 980789Sahrens } 981789Sahrens 982789Sahrens if (err == 0) { 983789Sahrens zc->zc_hash = zeh.zeh_hash; 984789Sahrens zc->zc_cd = zeh.zeh_cd; 985789Sahrens za->za_integer_length = zeh.zeh_integer_size; 986789Sahrens za->za_num_integers = zeh.zeh_num_integers; 987789Sahrens if (zeh.zeh_num_integers == 0) { 988789Sahrens za->za_first_integer = 0; 989789Sahrens } else { 990789Sahrens err = zap_entry_read(&zeh, 8, 1, &za->za_first_integer); 991789Sahrens ASSERT(err == 0 || err == EOVERFLOW); 992789Sahrens } 993789Sahrens err = zap_entry_read_name(&zeh, 994789Sahrens sizeof (za->za_name), za->za_name); 995789Sahrens ASSERT(err == 0); 9965331Samw 9975331Samw za->za_normalization_conflict = 9985331Samw zap_entry_normalization_conflict(&zeh, 9995331Samw NULL, za->za_name, zap); 1000789Sahrens } 1001885Sahrens rw_exit(&zc->zc_leaf->l_rwlock); 1002789Sahrens return (err); 1003789Sahrens } 1004789Sahrens 1005789Sahrens 1006789Sahrens static void 1007789Sahrens zap_stats_ptrtbl(zap_t *zap, uint64_t *tbl, int len, zap_stats_t *zs) 1008789Sahrens { 10091544Seschrock int i, err; 1010789Sahrens uint64_t lastblk = 0; 1011789Sahrens 1012789Sahrens /* 1013789Sahrens * NB: if a leaf has more pointers than an entire ptrtbl block 1014789Sahrens * can hold, then it'll be accounted for more than once, since 1015789Sahrens * we won't have lastblk. 1016789Sahrens */ 1017789Sahrens for (i = 0; i < len; i++) { 1018789Sahrens zap_leaf_t *l; 1019789Sahrens 1020789Sahrens if (tbl[i] == lastblk) 1021789Sahrens continue; 1022789Sahrens lastblk = tbl[i]; 1023789Sahrens 10241544Seschrock err = zap_get_leaf_byblk(zap, tbl[i], NULL, RW_READER, &l); 10251544Seschrock if (err == 0) { 10261578Sahrens zap_leaf_stats(zap, l, zs); 10271544Seschrock zap_put_leaf(l); 10281544Seschrock } 1029789Sahrens } 1030789Sahrens } 1031789Sahrens 1032789Sahrens void 1033789Sahrens fzap_get_stats(zap_t *zap, zap_stats_t *zs) 1034789Sahrens { 10351491Sahrens int bs = FZAP_BLOCK_SHIFT(zap); 10361491Sahrens zs->zs_blocksize = 1ULL << bs; 10371632Snd150628 10381632Snd150628 /* 10391632Snd150628 * Set zap_phys_t fields 10401632Snd150628 */ 1041789Sahrens zs->zs_num_leafs = zap->zap_f.zap_phys->zap_num_leafs; 1042789Sahrens zs->zs_num_entries = zap->zap_f.zap_phys->zap_num_entries; 1043789Sahrens zs->zs_num_blocks = zap->zap_f.zap_phys->zap_freeblk; 10441632Snd150628 zs->zs_block_type = zap->zap_f.zap_phys->zap_block_type; 10451632Snd150628 zs->zs_magic = zap->zap_f.zap_phys->zap_magic; 10461632Snd150628 zs->zs_salt = zap->zap_f.zap_phys->zap_salt; 10471632Snd150628 10481632Snd150628 /* 10491632Snd150628 * Set zap_ptrtbl fields 10501632Snd150628 */ 10511632Snd150628 zs->zs_ptrtbl_len = 1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; 10521632Snd150628 zs->zs_ptrtbl_nextblk = zap->zap_f.zap_phys->zap_ptrtbl.zt_nextblk; 10531632Snd150628 zs->zs_ptrtbl_blks_copied = 10541632Snd150628 zap->zap_f.zap_phys->zap_ptrtbl.zt_blks_copied; 10551632Snd150628 zs->zs_ptrtbl_zt_blk = zap->zap_f.zap_phys->zap_ptrtbl.zt_blk; 10561632Snd150628 zs->zs_ptrtbl_zt_numblks = zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks; 10571632Snd150628 zs->zs_ptrtbl_zt_shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; 1058789Sahrens 1059789Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { 1060789Sahrens /* the ptrtbl is entirely in the header block. */ 10611491Sahrens zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), 10621491Sahrens 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs); 1063789Sahrens } else { 1064789Sahrens int b; 1065789Sahrens 1066789Sahrens dmu_prefetch(zap->zap_objset, zap->zap_object, 10671491Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_blk << bs, 10681491Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << bs); 1069789Sahrens 1070789Sahrens for (b = 0; b < zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks; 1071789Sahrens b++) { 1072789Sahrens dmu_buf_t *db; 10731544Seschrock int err; 1074789Sahrens 10751544Seschrock err = dmu_buf_hold(zap->zap_objset, zap->zap_object, 10761544Seschrock (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk + b) << bs, 10771544Seschrock FTAG, &db); 10781544Seschrock if (err == 0) { 10791544Seschrock zap_stats_ptrtbl(zap, db->db_data, 10801544Seschrock 1<<(bs-3), zs); 10811544Seschrock dmu_buf_rele(db, FTAG); 10821544Seschrock } 1083789Sahrens } 1084789Sahrens } 1085789Sahrens } 1086