1*789Sahrens /* 2*789Sahrens * CDDL HEADER START 3*789Sahrens * 4*789Sahrens * The contents of this file are subject to the terms of the 5*789Sahrens * Common Development and Distribution License, Version 1.0 only 6*789Sahrens * (the "License"). You may not use this file except in compliance 7*789Sahrens * with the License. 8*789Sahrens * 9*789Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*789Sahrens * or http://www.opensolaris.org/os/licensing. 11*789Sahrens * See the License for the specific language governing permissions 12*789Sahrens * and limitations under the License. 13*789Sahrens * 14*789Sahrens * When distributing Covered Code, include this CDDL HEADER in each 15*789Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*789Sahrens * If applicable, add the following below this CDDL HEADER, with the 17*789Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 18*789Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 19*789Sahrens * 20*789Sahrens * CDDL HEADER END 21*789Sahrens */ 22*789Sahrens /* 23*789Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*789Sahrens * Use is subject to license terms. 25*789Sahrens */ 26*789Sahrens 27*789Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28*789Sahrens 29*789Sahrens 30*789Sahrens /* 31*789Sahrens * This file contains the top half of the zfs directory structure 32*789Sahrens * implementation. The bottom half is in zap_leaf.c. 33*789Sahrens * 34*789Sahrens * The zdir is an extendable hash data structure. There is a table of 35*789Sahrens * pointers to buckets (zap_t->zd_data->zd_leafs). The buckets are 36*789Sahrens * each a constant size and hold a variable number of directory entries. 37*789Sahrens * The buckets (aka "leaf nodes") are implemented in zap_leaf.c. 38*789Sahrens * 39*789Sahrens * The pointer table holds a power of 2 number of pointers. 40*789Sahrens * (1<<zap_t->zd_data->zd_phys->zd_prefix_len). The bucket pointed to 41*789Sahrens * by the pointer at index i in the table holds entries whose hash value 42*789Sahrens * has a zd_prefix_len - bit prefix 43*789Sahrens */ 44*789Sahrens 45*789Sahrens #include <sys/spa.h> 46*789Sahrens #include <sys/dmu.h> 47*789Sahrens #include <sys/zfs_context.h> 48*789Sahrens #include <sys/zap.h> 49*789Sahrens #include <sys/zap_impl.h> 50*789Sahrens #include <sys/zap_leaf.h> 51*789Sahrens 52*789Sahrens #define MIN_FREE (ZAP_LEAF_NUMCHUNKS*9/10) 53*789Sahrens 54*789Sahrens static void zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx); 55*789Sahrens static int zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx); 56*789Sahrens static zap_leaf_t *zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, 57*789Sahrens dmu_tx_t *tx, krw_t lt); 58*789Sahrens static void zap_put_leaf(zap_leaf_t *l); 59*789Sahrens static void zap_leaf_pageout(dmu_buf_t *db, void *vl); 60*789Sahrens 61*789Sahrens 62*789Sahrens void 63*789Sahrens fzap_byteswap(void *vbuf, size_t size) 64*789Sahrens { 65*789Sahrens uint64_t block_type; 66*789Sahrens 67*789Sahrens ASSERT(size == (1<<ZAP_BLOCK_SHIFT)); 68*789Sahrens block_type = *(uint64_t *)vbuf; 69*789Sahrens 70*789Sahrens switch (block_type) { 71*789Sahrens case ZBT_LEAF: 72*789Sahrens case BSWAP_64(ZBT_LEAF): 73*789Sahrens zap_leaf_byteswap(vbuf); 74*789Sahrens return; 75*789Sahrens case ZBT_HEADER: 76*789Sahrens case BSWAP_64(ZBT_HEADER): 77*789Sahrens default: 78*789Sahrens /* it's a ptrtbl block */ 79*789Sahrens byteswap_uint64_array(vbuf, 1<<ZAP_BLOCK_SHIFT); 80*789Sahrens return; 81*789Sahrens } 82*789Sahrens } 83*789Sahrens 84*789Sahrens void 85*789Sahrens fzap_upgrade(zap_t *zap, dmu_tx_t *tx) 86*789Sahrens { 87*789Sahrens dmu_buf_t *db; 88*789Sahrens zap_leaf_t *l; 89*789Sahrens int i; 90*789Sahrens zap_phys_t *zp; 91*789Sahrens 92*789Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 93*789Sahrens zap->zap_ismicro = FALSE; 94*789Sahrens 95*789Sahrens (void) dmu_buf_update_user(zap->zap_dbuf, zap, zap, 96*789Sahrens &zap->zap_f.zap_phys, zap_pageout); 97*789Sahrens 98*789Sahrens mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0); 99*789Sahrens 100*789Sahrens zp = zap->zap_f.zap_phys; 101*789Sahrens /* 102*789Sahrens * explicitly zero it since it might be coming from an 103*789Sahrens * initialized microzap 104*789Sahrens */ 105*789Sahrens ASSERT3U(sizeof (zap_phys_t), ==, zap->zap_dbuf->db_size); 106*789Sahrens bzero(zp, sizeof (zap_phys_t)); 107*789Sahrens zp->zap_block_type = ZBT_HEADER; 108*789Sahrens zp->zap_magic = ZAP_MAGIC; 109*789Sahrens 110*789Sahrens zp->zap_ptrtbl.zt_shift = ZAP_PTRTBL_MIN_SHIFT; 111*789Sahrens 112*789Sahrens zp->zap_freeblk = 2; /* block 1 will be the first leaf */ 113*789Sahrens zp->zap_num_leafs = 1; 114*789Sahrens zp->zap_num_entries = 0; 115*789Sahrens zp->zap_salt = zap->zap_salt; 116*789Sahrens 117*789Sahrens for (i = 0; i < (1<<ZAP_PTRTBL_MIN_SHIFT); i++) 118*789Sahrens zp->zap_leafs[i] = 1; /* block 1 will be the first leaf */ 119*789Sahrens 120*789Sahrens /* 121*789Sahrens * set up block 1 - the first leaf 122*789Sahrens */ 123*789Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 124*789Sahrens 1<<ZAP_BLOCK_SHIFT); 125*789Sahrens dmu_buf_will_dirty(db, tx); 126*789Sahrens 127*789Sahrens l = kmem_zalloc(sizeof (zap_leaf_t), KM_SLEEP); 128*789Sahrens l->l_dbuf = db; 129*789Sahrens l->l_phys = db->db_data; 130*789Sahrens 131*789Sahrens zap_leaf_init(l); 132*789Sahrens 133*789Sahrens kmem_free(l, sizeof (zap_leaf_t)); 134*789Sahrens dmu_buf_rele(db); 135*789Sahrens } 136*789Sahrens 137*789Sahrens static int 138*789Sahrens zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx) 139*789Sahrens { 140*789Sahrens if (RW_WRITE_HELD(&zap->zap_rwlock)) 141*789Sahrens return (1); 142*789Sahrens if (rw_tryupgrade(&zap->zap_rwlock)) { 143*789Sahrens dmu_buf_will_dirty(zap->zap_dbuf, tx); 144*789Sahrens return (1); 145*789Sahrens } 146*789Sahrens return (0); 147*789Sahrens } 148*789Sahrens 149*789Sahrens /* 150*789Sahrens * Generic routines for dealing with the pointer & cookie tables. 151*789Sahrens */ 152*789Sahrens 153*789Sahrens static void 154*789Sahrens zap_table_grow(zap_t *zap, zap_table_phys_t *tbl, 155*789Sahrens void (*transfer_func)(const uint64_t *src, uint64_t *dst, int n), 156*789Sahrens dmu_tx_t *tx) 157*789Sahrens { 158*789Sahrens uint64_t b, newblk; 159*789Sahrens dmu_buf_t *db_old, *db_new; 160*789Sahrens int hepb = 1<<(ZAP_BLOCK_SHIFT-4); 161*789Sahrens /* hepb = half the number of entries in a block */ 162*789Sahrens 163*789Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 164*789Sahrens ASSERT(tbl->zt_blk != 0); 165*789Sahrens ASSERT(tbl->zt_numblks > 0); 166*789Sahrens 167*789Sahrens if (tbl->zt_nextblk != 0) { 168*789Sahrens newblk = tbl->zt_nextblk; 169*789Sahrens } else { 170*789Sahrens newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2, tx); 171*789Sahrens tbl->zt_nextblk = newblk; 172*789Sahrens ASSERT3U(tbl->zt_blks_copied, ==, 0); 173*789Sahrens dmu_prefetch(zap->zap_objset, zap->zap_object, 174*789Sahrens tbl->zt_blk << ZAP_BLOCK_SHIFT, tbl->zt_numblks << 175*789Sahrens ZAP_BLOCK_SHIFT); 176*789Sahrens } 177*789Sahrens 178*789Sahrens /* 179*789Sahrens * Copy the ptrtbl from the old to new location, leaving the odd 180*789Sahrens * entries blank as we go. 181*789Sahrens */ 182*789Sahrens 183*789Sahrens b = tbl->zt_blks_copied; 184*789Sahrens db_old = dmu_buf_hold(zap->zap_objset, zap->zap_object, 185*789Sahrens (tbl->zt_blk + b) << ZAP_BLOCK_SHIFT); 186*789Sahrens dmu_buf_read(db_old); 187*789Sahrens 188*789Sahrens /* first half of entries in old[b] go to new[2*b+0] */ 189*789Sahrens db_new = dmu_buf_hold(zap->zap_objset, zap->zap_object, 190*789Sahrens (newblk + 2*b+0) << ZAP_BLOCK_SHIFT); 191*789Sahrens dmu_buf_will_dirty(db_new, tx); 192*789Sahrens transfer_func(db_old->db_data, db_new->db_data, hepb); 193*789Sahrens dmu_buf_rele(db_new); 194*789Sahrens 195*789Sahrens /* second half of entries in old[b] go to new[2*b+1] */ 196*789Sahrens db_new = dmu_buf_hold(zap->zap_objset, zap->zap_object, 197*789Sahrens (newblk + 2*b+1) << ZAP_BLOCK_SHIFT); 198*789Sahrens dmu_buf_will_dirty(db_new, tx); 199*789Sahrens transfer_func((uint64_t *)db_old->db_data + hepb, 200*789Sahrens db_new->db_data, hepb); 201*789Sahrens dmu_buf_rele(db_new); 202*789Sahrens 203*789Sahrens dmu_buf_rele(db_old); 204*789Sahrens 205*789Sahrens tbl->zt_blks_copied++; 206*789Sahrens 207*789Sahrens dprintf("copied block %llu of %llu\n", 208*789Sahrens tbl->zt_blks_copied, tbl->zt_numblks); 209*789Sahrens 210*789Sahrens if (tbl->zt_blks_copied == tbl->zt_numblks) { 211*789Sahrens dmu_free_range(zap->zap_objset, zap->zap_object, 212*789Sahrens tbl->zt_blk << ZAP_BLOCK_SHIFT, 213*789Sahrens tbl->zt_numblks << ZAP_BLOCK_SHIFT, tx); 214*789Sahrens 215*789Sahrens tbl->zt_blk = newblk; 216*789Sahrens tbl->zt_numblks *= 2; 217*789Sahrens tbl->zt_shift++; 218*789Sahrens tbl->zt_nextblk = 0; 219*789Sahrens tbl->zt_blks_copied = 0; 220*789Sahrens 221*789Sahrens dprintf("finished; numblocks now %llu (%lluk entries)\n", 222*789Sahrens tbl->zt_numblks, 1<<(tbl->zt_shift-10)); 223*789Sahrens } 224*789Sahrens } 225*789Sahrens 226*789Sahrens static uint64_t 227*789Sahrens zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val, 228*789Sahrens dmu_tx_t *tx) 229*789Sahrens { 230*789Sahrens uint64_t blk, off, oldval; 231*789Sahrens dmu_buf_t *db; 232*789Sahrens 233*789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 234*789Sahrens ASSERT(tbl->zt_blk != 0); 235*789Sahrens 236*789Sahrens dprintf("storing %llx at index %llx\n", val, idx); 237*789Sahrens 238*789Sahrens blk = idx >> (ZAP_BLOCK_SHIFT-3); 239*789Sahrens off = idx & ((1<<(ZAP_BLOCK_SHIFT-3))-1); 240*789Sahrens 241*789Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 242*789Sahrens (tbl->zt_blk + blk) << ZAP_BLOCK_SHIFT); 243*789Sahrens dmu_buf_will_dirty(db, tx); 244*789Sahrens oldval = ((uint64_t *)db->db_data)[off]; 245*789Sahrens ((uint64_t *)db->db_data)[off] = val; 246*789Sahrens dmu_buf_rele(db); 247*789Sahrens 248*789Sahrens if (tbl->zt_nextblk != 0) { 249*789Sahrens idx *= 2; 250*789Sahrens blk = idx >> (ZAP_BLOCK_SHIFT-3); 251*789Sahrens off = idx & ((1<<(ZAP_BLOCK_SHIFT-3))-1); 252*789Sahrens 253*789Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 254*789Sahrens (tbl->zt_nextblk + blk) << ZAP_BLOCK_SHIFT); 255*789Sahrens dmu_buf_will_dirty(db, tx); 256*789Sahrens ((uint64_t *)db->db_data)[off] = val; 257*789Sahrens ((uint64_t *)db->db_data)[off+1] = val; 258*789Sahrens dmu_buf_rele(db); 259*789Sahrens } 260*789Sahrens 261*789Sahrens return (oldval); 262*789Sahrens } 263*789Sahrens 264*789Sahrens static uint64_t 265*789Sahrens zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx) 266*789Sahrens { 267*789Sahrens uint64_t blk, off, val; 268*789Sahrens dmu_buf_t *db; 269*789Sahrens 270*789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 271*789Sahrens 272*789Sahrens blk = idx >> (ZAP_BLOCK_SHIFT-3); 273*789Sahrens off = idx & ((1<<(ZAP_BLOCK_SHIFT-3))-1); 274*789Sahrens 275*789Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 276*789Sahrens (tbl->zt_blk + blk) << ZAP_BLOCK_SHIFT); 277*789Sahrens dmu_buf_read(db); 278*789Sahrens val = ((uint64_t *)db->db_data)[off]; 279*789Sahrens dmu_buf_rele(db); 280*789Sahrens return (val); 281*789Sahrens } 282*789Sahrens 283*789Sahrens /* 284*789Sahrens * Routines for growing the ptrtbl. 285*789Sahrens */ 286*789Sahrens 287*789Sahrens static void 288*789Sahrens zap_ptrtbl_transfer(const uint64_t *src, uint64_t *dst, int n) 289*789Sahrens { 290*789Sahrens int i; 291*789Sahrens for (i = 0; i < n; i++) { 292*789Sahrens uint64_t lb = src[i]; 293*789Sahrens dst[2*i+0] = lb; 294*789Sahrens dst[2*i+1] = lb; 295*789Sahrens } 296*789Sahrens } 297*789Sahrens 298*789Sahrens static void 299*789Sahrens zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx) 300*789Sahrens { 301*789Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_shift == 32) 302*789Sahrens return; 303*789Sahrens 304*789Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { 305*789Sahrens /* 306*789Sahrens * The ptrtbl can no longer be contained in the 307*789Sahrens * header block. Give it its own entire block, which 308*789Sahrens * will quadruple the size of the ptrtbl. 309*789Sahrens */ 310*789Sahrens uint64_t newblk; 311*789Sahrens dmu_buf_t *db_new; 312*789Sahrens 313*789Sahrens ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==, 314*789Sahrens ZAP_PTRTBL_MIN_SHIFT); 315*789Sahrens ASSERT3U(zap->zap_f.zap_phys->zap_ptrtbl.zt_blk, ==, 0); 316*789Sahrens 317*789Sahrens newblk = zap_allocate_blocks(zap, 1, tx); 318*789Sahrens db_new = dmu_buf_hold(zap->zap_objset, zap->zap_object, 319*789Sahrens newblk << ZAP_BLOCK_SHIFT); 320*789Sahrens 321*789Sahrens dmu_buf_will_dirty(db_new, tx); 322*789Sahrens zap_ptrtbl_transfer(zap->zap_f.zap_phys->zap_leafs, 323*789Sahrens db_new->db_data, 1 << ZAP_PTRTBL_MIN_SHIFT); 324*789Sahrens dmu_buf_rele(db_new); 325*789Sahrens 326*789Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_blk = newblk; 327*789Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks = 1; 328*789Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_shift++; 329*789Sahrens 330*789Sahrens ASSERT3U(1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift, ==, 331*789Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << 332*789Sahrens (ZAP_BLOCK_SHIFT-3)); 333*789Sahrens } else { 334*789Sahrens zap_table_grow(zap, &zap->zap_f.zap_phys->zap_ptrtbl, 335*789Sahrens zap_ptrtbl_transfer, tx); 336*789Sahrens } 337*789Sahrens } 338*789Sahrens 339*789Sahrens static void 340*789Sahrens zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx) 341*789Sahrens { 342*789Sahrens dmu_buf_will_dirty(zap->zap_dbuf, tx); 343*789Sahrens mutex_enter(&zap->zap_f.zap_num_entries_mtx); 344*789Sahrens 345*789Sahrens ASSERT(delta > 0 || zap->zap_f.zap_phys->zap_num_entries >= -delta); 346*789Sahrens 347*789Sahrens zap->zap_f.zap_phys->zap_num_entries += delta; 348*789Sahrens 349*789Sahrens mutex_exit(&zap->zap_f.zap_num_entries_mtx); 350*789Sahrens } 351*789Sahrens 352*789Sahrens uint64_t 353*789Sahrens zap_allocate_blocks(zap_t *zap, int nblocks, dmu_tx_t *tx) 354*789Sahrens { 355*789Sahrens uint64_t newblk; 356*789Sahrens ASSERT(tx != NULL); 357*789Sahrens if (!RW_WRITE_HELD(&zap->zap_rwlock)) { 358*789Sahrens dmu_buf_will_dirty(zap->zap_dbuf, tx); 359*789Sahrens } 360*789Sahrens newblk = atomic_add_64_nv(&zap->zap_f.zap_phys->zap_freeblk, nblocks) - 361*789Sahrens nblocks; 362*789Sahrens return (newblk); 363*789Sahrens } 364*789Sahrens 365*789Sahrens 366*789Sahrens /* 367*789Sahrens * This function doesn't increment zap_num_leafs because it's used to 368*789Sahrens * allocate a leaf chain, which doesn't count against zap_num_leafs. 369*789Sahrens * The directory must be held exclusively for this tx. 370*789Sahrens */ 371*789Sahrens zap_leaf_t * 372*789Sahrens zap_create_leaf(zap_t *zap, dmu_tx_t *tx) 373*789Sahrens { 374*789Sahrens void *winner; 375*789Sahrens zap_leaf_t *l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); 376*789Sahrens 377*789Sahrens ASSERT(tx != NULL); 378*789Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 379*789Sahrens /* hence we already dirtied zap->zap_dbuf */ 380*789Sahrens 381*789Sahrens rw_init(&l->l_rwlock, 0, 0, 0); 382*789Sahrens rw_enter(&l->l_rwlock, RW_WRITER); 383*789Sahrens l->l_blkid = zap_allocate_blocks(zap, 1, tx); 384*789Sahrens l->l_next = NULL; 385*789Sahrens l->l_dbuf = NULL; 386*789Sahrens l->l_phys = NULL; 387*789Sahrens 388*789Sahrens l->l_dbuf = dmu_buf_hold(zap->zap_objset, zap->zap_object, 389*789Sahrens l->l_blkid << ZAP_BLOCK_SHIFT); 390*789Sahrens winner = dmu_buf_set_user(l->l_dbuf, l, &l->l_phys, zap_leaf_pageout); 391*789Sahrens ASSERT(winner == NULL); 392*789Sahrens dmu_buf_will_dirty(l->l_dbuf, tx); 393*789Sahrens 394*789Sahrens zap_leaf_init(l); 395*789Sahrens 396*789Sahrens return (l); 397*789Sahrens } 398*789Sahrens 399*789Sahrens /* ARGSUSED */ 400*789Sahrens void 401*789Sahrens zap_destroy_leaf(zap_t *zap, zap_leaf_t *l, dmu_tx_t *tx) 402*789Sahrens { 403*789Sahrens /* uint64_t offset = l->l_blkid << ZAP_BLOCK_SHIFT; */ 404*789Sahrens rw_exit(&l->l_rwlock); 405*789Sahrens dmu_buf_rele(l->l_dbuf); 406*789Sahrens /* XXX there are still holds on this block, so we can't free it? */ 407*789Sahrens /* dmu_free_range(zap->zap_objset, zap->zap_object, */ 408*789Sahrens /* offset, 1<<ZAP_BLOCK_SHIFT, tx); */ 409*789Sahrens } 410*789Sahrens 411*789Sahrens int 412*789Sahrens fzap_count(zap_t *zap, uint64_t *count) 413*789Sahrens { 414*789Sahrens ASSERT(!zap->zap_ismicro); 415*789Sahrens mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */ 416*789Sahrens *count = zap->zap_f.zap_phys->zap_num_entries; 417*789Sahrens mutex_exit(&zap->zap_f.zap_num_entries_mtx); 418*789Sahrens return (0); 419*789Sahrens } 420*789Sahrens 421*789Sahrens /* 422*789Sahrens * Routines for obtaining zap_leaf_t's 423*789Sahrens */ 424*789Sahrens 425*789Sahrens static void 426*789Sahrens zap_put_leaf(zap_leaf_t *l) 427*789Sahrens { 428*789Sahrens zap_leaf_t *nl = l->l_next; 429*789Sahrens while (nl) { 430*789Sahrens zap_leaf_t *nnl = nl->l_next; 431*789Sahrens rw_exit(&nl->l_rwlock); 432*789Sahrens dmu_buf_rele(nl->l_dbuf); 433*789Sahrens nl = nnl; 434*789Sahrens } 435*789Sahrens rw_exit(&l->l_rwlock); 436*789Sahrens dmu_buf_rele(l->l_dbuf); 437*789Sahrens } 438*789Sahrens 439*789Sahrens _NOTE(ARGSUSED(0)) 440*789Sahrens static void 441*789Sahrens zap_leaf_pageout(dmu_buf_t *db, void *vl) 442*789Sahrens { 443*789Sahrens zap_leaf_t *l = vl; 444*789Sahrens 445*789Sahrens rw_destroy(&l->l_rwlock); 446*789Sahrens kmem_free(l, sizeof (zap_leaf_t)); 447*789Sahrens } 448*789Sahrens 449*789Sahrens static zap_leaf_t * 450*789Sahrens zap_open_leaf(uint64_t blkid, dmu_buf_t *db) 451*789Sahrens { 452*789Sahrens zap_leaf_t *l, *winner; 453*789Sahrens 454*789Sahrens ASSERT(blkid != 0); 455*789Sahrens 456*789Sahrens l = kmem_alloc(sizeof (zap_leaf_t), KM_SLEEP); 457*789Sahrens rw_init(&l->l_rwlock, 0, 0, 0); 458*789Sahrens rw_enter(&l->l_rwlock, RW_WRITER); 459*789Sahrens l->l_blkid = blkid; 460*789Sahrens l->l_next = NULL; 461*789Sahrens l->l_dbuf = db; 462*789Sahrens l->l_phys = NULL; 463*789Sahrens 464*789Sahrens winner = dmu_buf_set_user(db, l, &l->l_phys, zap_leaf_pageout); 465*789Sahrens 466*789Sahrens rw_exit(&l->l_rwlock); 467*789Sahrens if (winner != NULL) { 468*789Sahrens /* someone else set it first */ 469*789Sahrens zap_leaf_pageout(NULL, l); 470*789Sahrens l = winner; 471*789Sahrens } 472*789Sahrens 473*789Sahrens return (l); 474*789Sahrens } 475*789Sahrens 476*789Sahrens static zap_leaf_t * 477*789Sahrens zap_get_leaf_byblk_impl(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt) 478*789Sahrens { 479*789Sahrens dmu_buf_t *db; 480*789Sahrens zap_leaf_t *l; 481*789Sahrens 482*789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 483*789Sahrens 484*789Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 485*789Sahrens blkid << ZAP_BLOCK_SHIFT); 486*789Sahrens 487*789Sahrens ASSERT3U(db->db_object, ==, zap->zap_object); 488*789Sahrens ASSERT3U(db->db_offset, ==, blkid << ZAP_BLOCK_SHIFT); 489*789Sahrens ASSERT3U(db->db_size, ==, 1 << ZAP_BLOCK_SHIFT); 490*789Sahrens ASSERT(blkid != 0); 491*789Sahrens 492*789Sahrens dmu_buf_read(db); 493*789Sahrens l = dmu_buf_get_user(db); 494*789Sahrens 495*789Sahrens if (l == NULL) 496*789Sahrens l = zap_open_leaf(blkid, db); 497*789Sahrens 498*789Sahrens rw_enter(&l->l_rwlock, lt); 499*789Sahrens /* 500*789Sahrens * Must lock before dirtying, otherwise l->l_phys could change, 501*789Sahrens * causing ASSERT below to fail. 502*789Sahrens */ 503*789Sahrens if (lt == RW_WRITER) 504*789Sahrens dmu_buf_will_dirty(db, tx); 505*789Sahrens ASSERT3U(l->l_blkid, ==, blkid); 506*789Sahrens ASSERT3P(l->l_dbuf, ==, db); 507*789Sahrens ASSERT3P(l->l_phys, ==, l->l_dbuf->db_data); 508*789Sahrens ASSERT3U(l->lh_block_type, ==, ZBT_LEAF); 509*789Sahrens ASSERT3U(l->lh_magic, ==, ZAP_LEAF_MAGIC); 510*789Sahrens 511*789Sahrens return (l); 512*789Sahrens } 513*789Sahrens 514*789Sahrens static zap_leaf_t * 515*789Sahrens zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt) 516*789Sahrens { 517*789Sahrens zap_leaf_t *l, *nl; 518*789Sahrens 519*789Sahrens l = zap_get_leaf_byblk_impl(zap, blkid, tx, lt); 520*789Sahrens 521*789Sahrens nl = l; 522*789Sahrens while (nl->lh_next != 0) { 523*789Sahrens zap_leaf_t *nnl; 524*789Sahrens nnl = zap_get_leaf_byblk_impl(zap, nl->lh_next, tx, lt); 525*789Sahrens nl->l_next = nnl; 526*789Sahrens nl = nnl; 527*789Sahrens } 528*789Sahrens 529*789Sahrens return (l); 530*789Sahrens } 531*789Sahrens 532*789Sahrens static uint64_t 533*789Sahrens zap_idx_to_blk(zap_t *zap, uint64_t idx) 534*789Sahrens { 535*789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 536*789Sahrens 537*789Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { 538*789Sahrens ASSERT3U(idx, <, 539*789Sahrens (1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift)); 540*789Sahrens return (zap->zap_f.zap_phys->zap_leafs[idx]); 541*789Sahrens } else { 542*789Sahrens return (zap_table_load(zap, &zap->zap_f.zap_phys->zap_ptrtbl, 543*789Sahrens idx)); 544*789Sahrens } 545*789Sahrens } 546*789Sahrens 547*789Sahrens static void 548*789Sahrens zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx) 549*789Sahrens { 550*789Sahrens ASSERT(tx != NULL); 551*789Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 552*789Sahrens 553*789Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk == 0) { 554*789Sahrens zap->zap_f.zap_phys->zap_leafs[idx] = blk; 555*789Sahrens } else { 556*789Sahrens (void) zap_table_store(zap, &zap->zap_f.zap_phys->zap_ptrtbl, 557*789Sahrens idx, blk, tx); 558*789Sahrens } 559*789Sahrens } 560*789Sahrens 561*789Sahrens static zap_leaf_t * 562*789Sahrens zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt) 563*789Sahrens { 564*789Sahrens uint64_t idx; 565*789Sahrens zap_leaf_t *l; 566*789Sahrens 567*789Sahrens ASSERT(zap->zap_dbuf == NULL || 568*789Sahrens zap->zap_f.zap_phys == zap->zap_dbuf->db_data); 569*789Sahrens ASSERT3U(zap->zap_f.zap_phys->zap_magic, ==, ZAP_MAGIC); 570*789Sahrens idx = ZAP_HASH_IDX(h, zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); 571*789Sahrens l = zap_get_leaf_byblk(zap, zap_idx_to_blk(zap, idx), tx, lt); 572*789Sahrens 573*789Sahrens ASSERT3U(ZAP_HASH_IDX(h, l->lh_prefix_len), ==, l->lh_prefix); 574*789Sahrens 575*789Sahrens return (l); 576*789Sahrens } 577*789Sahrens 578*789Sahrens 579*789Sahrens static zap_leaf_t * 580*789Sahrens zap_expand_leaf(zap_t *zap, zap_leaf_t *l, uint64_t hash, dmu_tx_t *tx) 581*789Sahrens { 582*789Sahrens zap_leaf_t *nl; 583*789Sahrens int prefix_diff, i, err; 584*789Sahrens uint64_t sibling; 585*789Sahrens 586*789Sahrens ASSERT3U(l->lh_prefix_len, <=, 587*789Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_shift); 588*789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 589*789Sahrens 590*789Sahrens ASSERT3U(ZAP_HASH_IDX(hash, l->lh_prefix_len), ==, l->lh_prefix); 591*789Sahrens 592*789Sahrens if (zap_tryupgradedir(zap, tx) == 0) { 593*789Sahrens /* failed to upgrade */ 594*789Sahrens int old_prefix_len = l->lh_prefix_len; 595*789Sahrens objset_t *os = zap->zap_objset; 596*789Sahrens uint64_t object = zap->zap_object; 597*789Sahrens 598*789Sahrens zap_put_leaf(l); 599*789Sahrens zap_unlockdir(zap); 600*789Sahrens err = zap_lockdir(os, object, tx, RW_WRITER, FALSE, &zap); 601*789Sahrens ASSERT3U(err, ==, 0); 602*789Sahrens ASSERT(!zap->zap_ismicro); 603*789Sahrens l = zap_deref_leaf(zap, hash, tx, RW_WRITER); 604*789Sahrens 605*789Sahrens if (l->lh_prefix_len != old_prefix_len) 606*789Sahrens /* it split while our locks were down */ 607*789Sahrens return (l); 608*789Sahrens } 609*789Sahrens ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 610*789Sahrens 611*789Sahrens if (l->lh_prefix_len == zap->zap_f.zap_phys->zap_ptrtbl.zt_shift) { 612*789Sahrens /* There's only one pointer to us. Chain on another leaf blk. */ 613*789Sahrens (void) zap_leaf_chainmore(l, zap_create_leaf(zap, tx)); 614*789Sahrens dprintf("chaining leaf %x/%d\n", l->lh_prefix, 615*789Sahrens l->lh_prefix_len); 616*789Sahrens return (l); 617*789Sahrens } 618*789Sahrens 619*789Sahrens ASSERT3U(ZAP_HASH_IDX(hash, l->lh_prefix_len), ==, l->lh_prefix); 620*789Sahrens 621*789Sahrens /* There's more than one pointer to us. Split this leaf. */ 622*789Sahrens nl = zap_leaf_split(zap, l, tx); 623*789Sahrens 624*789Sahrens /* set sibling pointers */ 625*789Sahrens prefix_diff = 626*789Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_shift - l->lh_prefix_len; 627*789Sahrens sibling = (ZAP_HASH_IDX(hash, l->lh_prefix_len) | 1) << prefix_diff; 628*789Sahrens for (i = 0; i < (1ULL<<prefix_diff); i++) { 629*789Sahrens ASSERT3U(zap_idx_to_blk(zap, sibling+i), ==, l->l_blkid); 630*789Sahrens zap_set_idx_to_blk(zap, sibling+i, nl->l_blkid, tx); 631*789Sahrens /* dprintf("set %d to %u %x\n", sibling+i, nl->l_blkid, nl); */ 632*789Sahrens } 633*789Sahrens 634*789Sahrens zap->zap_f.zap_phys->zap_num_leafs++; 635*789Sahrens 636*789Sahrens if (hash & (1ULL << (64 - l->lh_prefix_len))) { 637*789Sahrens /* we want the sibling */ 638*789Sahrens zap_put_leaf(l); 639*789Sahrens l = nl; 640*789Sahrens } else { 641*789Sahrens zap_put_leaf(nl); 642*789Sahrens } 643*789Sahrens 644*789Sahrens return (l); 645*789Sahrens } 646*789Sahrens 647*789Sahrens static void 648*789Sahrens zap_put_leaf_maybe_grow_ptrtbl(zap_t *zap, 649*789Sahrens zap_leaf_t *l, dmu_tx_t *tx) 650*789Sahrens { 651*789Sahrens int shift, err; 652*789Sahrens 653*789Sahrens again: 654*789Sahrens shift = zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; 655*789Sahrens 656*789Sahrens if (l->lh_prefix_len == shift && 657*789Sahrens (l->l_next != NULL || l->lh_nfree < MIN_FREE)) { 658*789Sahrens /* this leaf will soon make us grow the pointer table */ 659*789Sahrens 660*789Sahrens if (zap_tryupgradedir(zap, tx) == 0) { 661*789Sahrens objset_t *os = zap->zap_objset; 662*789Sahrens uint64_t zapobj = zap->zap_object; 663*789Sahrens uint64_t blkid = l->l_blkid; 664*789Sahrens 665*789Sahrens zap_put_leaf(l); 666*789Sahrens zap_unlockdir(zap); 667*789Sahrens err = zap_lockdir(os, zapobj, tx, 668*789Sahrens RW_WRITER, FALSE, &zap); 669*789Sahrens ASSERT3U(err, ==, 0); 670*789Sahrens l = zap_get_leaf_byblk(zap, blkid, tx, RW_READER); 671*789Sahrens goto again; 672*789Sahrens } 673*789Sahrens 674*789Sahrens zap_put_leaf(l); 675*789Sahrens zap_grow_ptrtbl(zap, tx); 676*789Sahrens } else { 677*789Sahrens zap_put_leaf(l); 678*789Sahrens } 679*789Sahrens } 680*789Sahrens 681*789Sahrens 682*789Sahrens static int 683*789Sahrens fzap_checksize(uint64_t integer_size, uint64_t num_integers) 684*789Sahrens { 685*789Sahrens /* Only integer sizes supported by C */ 686*789Sahrens switch (integer_size) { 687*789Sahrens case 1: 688*789Sahrens case 2: 689*789Sahrens case 4: 690*789Sahrens case 8: 691*789Sahrens break; 692*789Sahrens default: 693*789Sahrens return (EINVAL); 694*789Sahrens } 695*789Sahrens 696*789Sahrens /* Make sure we won't overflow */ 697*789Sahrens if (integer_size * num_integers < num_integers) 698*789Sahrens return (EINVAL); 699*789Sahrens if (integer_size * num_integers > DMU_MAX_ACCESS) 700*789Sahrens return (EINVAL); 701*789Sahrens 702*789Sahrens return (0); 703*789Sahrens } 704*789Sahrens 705*789Sahrens /* 706*789Sahrens * Routines for maniplulating attributes. 707*789Sahrens */ 708*789Sahrens int 709*789Sahrens fzap_lookup(zap_t *zap, const char *name, 710*789Sahrens uint64_t integer_size, uint64_t num_integers, void *buf) 711*789Sahrens { 712*789Sahrens zap_leaf_t *l; 713*789Sahrens int err; 714*789Sahrens uint64_t hash; 715*789Sahrens zap_entry_handle_t zeh; 716*789Sahrens 717*789Sahrens err = fzap_checksize(integer_size, num_integers); 718*789Sahrens if (err != 0) 719*789Sahrens return (err); 720*789Sahrens 721*789Sahrens hash = zap_hash(zap, name); 722*789Sahrens l = zap_deref_leaf(zap, hash, NULL, RW_READER); 723*789Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 724*789Sahrens if (err != 0) 725*789Sahrens goto out; 726*789Sahrens err = zap_entry_read(&zeh, integer_size, num_integers, buf); 727*789Sahrens out: 728*789Sahrens zap_put_leaf(l); 729*789Sahrens return (err); 730*789Sahrens } 731*789Sahrens 732*789Sahrens int 733*789Sahrens fzap_add_cd(zap_t *zap, const char *name, 734*789Sahrens uint64_t integer_size, uint64_t num_integers, 735*789Sahrens const void *val, uint32_t cd, dmu_tx_t *tx, zap_leaf_t **lp) 736*789Sahrens { 737*789Sahrens zap_leaf_t *l; 738*789Sahrens uint64_t hash; 739*789Sahrens int err; 740*789Sahrens zap_entry_handle_t zeh; 741*789Sahrens 742*789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 743*789Sahrens ASSERT(!zap->zap_ismicro); 744*789Sahrens ASSERT(fzap_checksize(integer_size, num_integers) == 0); 745*789Sahrens 746*789Sahrens hash = zap_hash(zap, name); 747*789Sahrens l = zap_deref_leaf(zap, hash, tx, RW_WRITER); 748*789Sahrens retry: 749*789Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 750*789Sahrens if (err == 0) { 751*789Sahrens err = EEXIST; 752*789Sahrens goto out; 753*789Sahrens } 754*789Sahrens ASSERT(err == ENOENT); 755*789Sahrens 756*789Sahrens /* XXX If this leaf is chained, split it if we can. */ 757*789Sahrens err = zap_entry_create(l, name, hash, cd, 758*789Sahrens integer_size, num_integers, val, &zeh); 759*789Sahrens 760*789Sahrens if (err == 0) { 761*789Sahrens zap_increment_num_entries(zap, 1, tx); 762*789Sahrens } else if (err == EAGAIN) { 763*789Sahrens l = zap_expand_leaf(zap, l, hash, tx); 764*789Sahrens goto retry; 765*789Sahrens } 766*789Sahrens 767*789Sahrens out: 768*789Sahrens if (lp) 769*789Sahrens *lp = l; 770*789Sahrens else 771*789Sahrens zap_put_leaf(l); 772*789Sahrens return (err); 773*789Sahrens } 774*789Sahrens 775*789Sahrens int 776*789Sahrens fzap_add(zap_t *zap, const char *name, 777*789Sahrens uint64_t integer_size, uint64_t num_integers, 778*789Sahrens const void *val, dmu_tx_t *tx) 779*789Sahrens { 780*789Sahrens int err; 781*789Sahrens zap_leaf_t *l; 782*789Sahrens 783*789Sahrens err = fzap_checksize(integer_size, num_integers); 784*789Sahrens if (err != 0) 785*789Sahrens return (err); 786*789Sahrens 787*789Sahrens err = fzap_add_cd(zap, name, integer_size, num_integers, 788*789Sahrens val, ZAP_MAXCD, tx, &l); 789*789Sahrens 790*789Sahrens zap_put_leaf_maybe_grow_ptrtbl(zap, l, tx); 791*789Sahrens return (err); 792*789Sahrens } 793*789Sahrens 794*789Sahrens int 795*789Sahrens fzap_update(zap_t *zap, const char *name, 796*789Sahrens int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 797*789Sahrens { 798*789Sahrens zap_leaf_t *l; 799*789Sahrens uint64_t hash; 800*789Sahrens int err, create; 801*789Sahrens zap_entry_handle_t zeh; 802*789Sahrens 803*789Sahrens ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 804*789Sahrens err = fzap_checksize(integer_size, num_integers); 805*789Sahrens if (err != 0) 806*789Sahrens return (err); 807*789Sahrens 808*789Sahrens hash = zap_hash(zap, name); 809*789Sahrens l = zap_deref_leaf(zap, hash, tx, RW_WRITER); 810*789Sahrens retry: 811*789Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 812*789Sahrens create = (err == ENOENT); 813*789Sahrens ASSERT(err == 0 || err == ENOENT); 814*789Sahrens 815*789Sahrens /* XXX If this leaf is chained, split it if we can. */ 816*789Sahrens 817*789Sahrens if (create) { 818*789Sahrens err = zap_entry_create(l, name, hash, ZAP_MAXCD, 819*789Sahrens integer_size, num_integers, val, &zeh); 820*789Sahrens if (err == 0) 821*789Sahrens zap_increment_num_entries(zap, 1, tx); 822*789Sahrens } else { 823*789Sahrens err = zap_entry_update(&zeh, integer_size, num_integers, val); 824*789Sahrens } 825*789Sahrens 826*789Sahrens if (err == EAGAIN) { 827*789Sahrens l = zap_expand_leaf(zap, l, hash, tx); 828*789Sahrens goto retry; 829*789Sahrens } 830*789Sahrens 831*789Sahrens zap_put_leaf_maybe_grow_ptrtbl(zap, l, tx); 832*789Sahrens return (err); 833*789Sahrens } 834*789Sahrens 835*789Sahrens int 836*789Sahrens fzap_length(zap_t *zap, const char *name, 837*789Sahrens uint64_t *integer_size, uint64_t *num_integers) 838*789Sahrens { 839*789Sahrens zap_leaf_t *l; 840*789Sahrens int err; 841*789Sahrens uint64_t hash; 842*789Sahrens zap_entry_handle_t zeh; 843*789Sahrens 844*789Sahrens hash = zap_hash(zap, name); 845*789Sahrens l = zap_deref_leaf(zap, hash, NULL, RW_READER); 846*789Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 847*789Sahrens if (err != 0) 848*789Sahrens goto out; 849*789Sahrens 850*789Sahrens if (integer_size) 851*789Sahrens *integer_size = zeh.zeh_integer_size; 852*789Sahrens if (num_integers) 853*789Sahrens *num_integers = zeh.zeh_num_integers; 854*789Sahrens out: 855*789Sahrens zap_put_leaf(l); 856*789Sahrens return (err); 857*789Sahrens } 858*789Sahrens 859*789Sahrens int 860*789Sahrens fzap_remove(zap_t *zap, const char *name, dmu_tx_t *tx) 861*789Sahrens { 862*789Sahrens zap_leaf_t *l; 863*789Sahrens uint64_t hash; 864*789Sahrens int err; 865*789Sahrens zap_entry_handle_t zeh; 866*789Sahrens 867*789Sahrens hash = zap_hash(zap, name); 868*789Sahrens l = zap_deref_leaf(zap, hash, tx, RW_WRITER); 869*789Sahrens err = zap_leaf_lookup(l, name, hash, &zeh); 870*789Sahrens if (err == 0) { 871*789Sahrens zap_entry_remove(&zeh); 872*789Sahrens zap_increment_num_entries(zap, -1, tx); 873*789Sahrens } 874*789Sahrens zap_put_leaf(l); 875*789Sahrens dprintf("fzap_remove: ds=%p obj=%llu name=%s err=%d\n", 876*789Sahrens zap->zap_objset, zap->zap_object, name, err); 877*789Sahrens return (err); 878*789Sahrens } 879*789Sahrens 880*789Sahrens int 881*789Sahrens zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, char *name) 882*789Sahrens { 883*789Sahrens zap_cursor_t zc; 884*789Sahrens zap_attribute_t *za; 885*789Sahrens int err; 886*789Sahrens 887*789Sahrens za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 888*789Sahrens for (zap_cursor_init(&zc, os, zapobj); 889*789Sahrens (err = zap_cursor_retrieve(&zc, za)) == 0; 890*789Sahrens zap_cursor_advance(&zc)) { 891*789Sahrens if (za->za_first_integer == value) { 892*789Sahrens (void) strcpy(name, za->za_name); 893*789Sahrens break; 894*789Sahrens } 895*789Sahrens } 896*789Sahrens kmem_free(za, sizeof (zap_attribute_t)); 897*789Sahrens return (err); 898*789Sahrens } 899*789Sahrens 900*789Sahrens 901*789Sahrens /* 902*789Sahrens * Routines for iterating over the attributes. 903*789Sahrens */ 904*789Sahrens 905*789Sahrens int 906*789Sahrens fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za) 907*789Sahrens { 908*789Sahrens int err = ENOENT; 909*789Sahrens zap_entry_handle_t zeh; 910*789Sahrens zap_leaf_t *l; 911*789Sahrens 912*789Sahrens /* retrieve the next entry at or after zc_hash/zc_cd */ 913*789Sahrens /* if no entry, return ENOENT */ 914*789Sahrens 915*789Sahrens again: 916*789Sahrens l = zap_deref_leaf(zap, zc->zc_hash, NULL, RW_READER); 917*789Sahrens err = zap_leaf_lookup_closest(l, zc->zc_hash, zc->zc_cd, &zeh); 918*789Sahrens 919*789Sahrens if (err == ENOENT) { 920*789Sahrens uint64_t nocare = (1ULL << (64 - l->lh_prefix_len)) - 1; 921*789Sahrens zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1; 922*789Sahrens zc->zc_cd = 0; 923*789Sahrens if (l->lh_prefix_len == 0 || zc->zc_hash == 0) { 924*789Sahrens zc->zc_hash = -1ULL; 925*789Sahrens } else { 926*789Sahrens zap_put_leaf(l); 927*789Sahrens goto again; 928*789Sahrens } 929*789Sahrens } 930*789Sahrens 931*789Sahrens if (err == 0) { 932*789Sahrens zc->zc_hash = zeh.zeh_hash; 933*789Sahrens zc->zc_cd = zeh.zeh_cd; 934*789Sahrens za->za_integer_length = zeh.zeh_integer_size; 935*789Sahrens za->za_num_integers = zeh.zeh_num_integers; 936*789Sahrens if (zeh.zeh_num_integers == 0) { 937*789Sahrens za->za_first_integer = 0; 938*789Sahrens } else { 939*789Sahrens err = zap_entry_read(&zeh, 8, 1, &za->za_first_integer); 940*789Sahrens ASSERT(err == 0 || err == EOVERFLOW); 941*789Sahrens } 942*789Sahrens err = zap_entry_read_name(&zeh, 943*789Sahrens sizeof (za->za_name), za->za_name); 944*789Sahrens ASSERT(err == 0); 945*789Sahrens } 946*789Sahrens zap_put_leaf(l); 947*789Sahrens return (err); 948*789Sahrens } 949*789Sahrens 950*789Sahrens 951*789Sahrens static void 952*789Sahrens zap_stats_ptrtbl(zap_t *zap, uint64_t *tbl, int len, zap_stats_t *zs) 953*789Sahrens { 954*789Sahrens int i; 955*789Sahrens uint64_t lastblk = 0; 956*789Sahrens 957*789Sahrens /* 958*789Sahrens * NB: if a leaf has more pointers than an entire ptrtbl block 959*789Sahrens * can hold, then it'll be accounted for more than once, since 960*789Sahrens * we won't have lastblk. 961*789Sahrens */ 962*789Sahrens for (i = 0; i < len; i++) { 963*789Sahrens zap_leaf_t *l; 964*789Sahrens 965*789Sahrens if (tbl[i] == lastblk) 966*789Sahrens continue; 967*789Sahrens lastblk = tbl[i]; 968*789Sahrens 969*789Sahrens l = zap_get_leaf_byblk(zap, tbl[i], NULL, RW_READER); 970*789Sahrens 971*789Sahrens zap_stats_leaf(zap, l, zs); 972*789Sahrens zap_put_leaf(l); 973*789Sahrens } 974*789Sahrens } 975*789Sahrens 976*789Sahrens void 977*789Sahrens fzap_get_stats(zap_t *zap, zap_stats_t *zs) 978*789Sahrens { 979*789Sahrens zs->zs_ptrtbl_len = 1ULL << zap->zap_f.zap_phys->zap_ptrtbl.zt_shift; 980*789Sahrens zs->zs_blocksize = 1ULL << ZAP_BLOCK_SHIFT; 981*789Sahrens zs->zs_num_leafs = zap->zap_f.zap_phys->zap_num_leafs; 982*789Sahrens zs->zs_num_entries = zap->zap_f.zap_phys->zap_num_entries; 983*789Sahrens zs->zs_num_blocks = zap->zap_f.zap_phys->zap_freeblk; 984*789Sahrens 985*789Sahrens if (zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks == 0) { 986*789Sahrens /* the ptrtbl is entirely in the header block. */ 987*789Sahrens zap_stats_ptrtbl(zap, zap->zap_f.zap_phys->zap_leafs, 988*789Sahrens 1 << ZAP_PTRTBL_MIN_SHIFT, zs); 989*789Sahrens } else { 990*789Sahrens int b; 991*789Sahrens 992*789Sahrens dmu_prefetch(zap->zap_objset, zap->zap_object, 993*789Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_blk << ZAP_BLOCK_SHIFT, 994*789Sahrens zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks << 995*789Sahrens ZAP_BLOCK_SHIFT); 996*789Sahrens 997*789Sahrens for (b = 0; b < zap->zap_f.zap_phys->zap_ptrtbl.zt_numblks; 998*789Sahrens b++) { 999*789Sahrens dmu_buf_t *db; 1000*789Sahrens 1001*789Sahrens db = dmu_buf_hold(zap->zap_objset, zap->zap_object, 1002*789Sahrens (zap->zap_f.zap_phys->zap_ptrtbl.zt_blk + b) << 1003*789Sahrens ZAP_BLOCK_SHIFT); 1004*789Sahrens dmu_buf_read(db); 1005*789Sahrens zap_stats_ptrtbl(zap, db->db_data, 1006*789Sahrens 1<<(ZAP_BLOCK_SHIFT-3), zs); 1007*789Sahrens dmu_buf_rele(db); 1008*789Sahrens } 1009*789Sahrens } 1010*789Sahrens } 1011