Lines Matching full:zap
50 #include <sys/zap.h>
55 * If zap_iterate_prefetch is set, we will prefetch the entire ZAP object
71 * Given that the ZAP entries aren't returned in a specific order, the only
85 * Enable ZAP shrinking. When enabled, empty sibling leaf blocks will be
92 static uint64_t zap_allocate_blocks(zap_t *zap, int nblocks);
109 fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
111 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
112 zap->zap_ismicro = FALSE;
114 zap->zap_dbu.dbu_evict_func_sync = zap_evict_sync;
115 zap->zap_dbu.dbu_evict_func_async = NULL;
117 mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, MUTEX_DEFAULT, 0);
118 zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;
120 zap_phys_t *zp = zap_f_phys(zap);
125 memset(zap->zap_dbuf->db_data, 0, zap->zap_dbuf->db_size);
129 zp->zap_ptrtbl.zt_shift = ZAP_EMBEDDED_PTRTBL_SHIFT(zap);
134 zp->zap_salt = zap->zap_salt;
135 zp->zap_normflags = zap->zap_normflags;
140 ZAP_EMBEDDED_PTRTBL_ENT(zap, i) = 1;
146 VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
147 1<<FZAP_BLOCK_SHIFT(zap), FTAG, &db, DMU_READ_NO_PREFETCH));
160 zap_tryupgradedir(zap_t *zap, dmu_tx_t *tx)
162 if (RW_WRITE_HELD(&zap->zap_rwlock))
164 if (rw_tryupgrade(&zap->zap_rwlock)) {
165 dmu_buf_will_dirty(zap->zap_dbuf, tx);
176 zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
181 int bs = FZAP_BLOCK_SHIFT(zap);
185 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
192 newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
195 dmu_prefetch_by_dnode(zap->zap_dnode, 0,
206 int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
213 VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
220 VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
236 (void) dmu_free_range(zap->zap_objset, zap->zap_object,
253 zap_table_store(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t val,
256 int bs = FZAP_BLOCK_SHIFT(zap);
258 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
268 int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
280 err = dmu_buf_hold_by_dnode(zap->zap_dnode,
300 zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
302 int bs = FZAP_BLOCK_SHIFT(zap);
304 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
310 int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
325 err = dmu_buf_hold_by_dnode(zap->zap_dnode,
349 zap_grow_ptrtbl(zap_t *zap, dmu_tx_t *tx)
357 if (zap_f_phys(zap)->zap_ptrtbl.zt_shift >= zap_hashbits(zap) - 2)
360 if (zap_f_phys(zap)->zap_ptrtbl.zt_numblks == 0) {
366 ASSERT3U(zap_f_phys(zap)->zap_ptrtbl.zt_shift, ==,
367 ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
368 ASSERT0(zap_f_phys(zap)->zap_ptrtbl.zt_blk);
370 uint64_t newblk = zap_allocate_blocks(zap, 1);
372 int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
373 newblk << FZAP_BLOCK_SHIFT(zap), FTAG, &db_new,
378 zap_ptrtbl_transfer(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
379 db_new->db_data, 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
382 zap_f_phys(zap)->zap_ptrtbl.zt_blk = newblk;
383 zap_f_phys(zap)->zap_ptrtbl.zt_numblks = 1;
384 zap_f_phys(zap)->zap_ptrtbl.zt_shift++;
386 ASSERT3U(1ULL << zap_f_phys(zap)->zap_ptrtbl.zt_shift, ==,
387 zap_f_phys(zap)->zap_ptrtbl.zt_numblks <<
388 (FZAP_BLOCK_SHIFT(zap)-3));
392 return (zap_table_grow(zap, &zap_f_phys(zap)->zap_ptrtbl,
398 zap_increment_num_entries(zap_t *zap, int delta, dmu_tx_t *tx)
400 dmu_buf_will_dirty(zap->zap_dbuf, tx);
401 mutex_enter(&zap->zap_f.zap_num_entries_mtx);
402 ASSERT(delta > 0 || zap_f_phys(zap)->zap_num_entries >= -delta);
403 zap_f_phys(zap)->zap_num_entries += delta;
404 mutex_exit(&zap->zap_f.zap_num_entries_mtx);
408 zap_allocate_blocks(zap_t *zap, int nblocks)
410 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
411 uint64_t newblk = zap_f_phys(zap)->zap_freeblk;
412 zap_f_phys(zap)->zap_freeblk += nblocks;
426 zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
428 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
430 uint64_t blkid = zap_allocate_blocks(zap, 1);
433 VERIFY0(dmu_buf_hold_by_dnode(zap->zap_dnode,
434 blkid << FZAP_BLOCK_SHIFT(zap), NULL, &db,
438 * Create the leaf structure and stash it on the dbuf. If zap was
460 zap_leaf_init(l, zap->zap_normflags != 0);
462 zap_f_phys(zap)->zap_num_leafs++;
468 fzap_count(zap_t *zap, uint64_t *count)
470 ASSERT(!zap->zap_ismicro);
471 mutex_enter(&zap->zap_f.zap_num_entries_mtx); /* unnecessary */
472 *count = zap_f_phys(zap)->zap_num_entries;
473 mutex_exit(&zap->zap_f.zap_num_entries_mtx);
535 zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
540 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
552 int bs = FZAP_BLOCK_SHIFT(zap);
553 int err = dmu_buf_hold_by_dnode(zap->zap_dnode,
558 ASSERT3U(db->db_object, ==, zap->zap_object);
585 zap_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t *valp)
587 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
589 if (zap_f_phys(zap)->zap_ptrtbl.zt_numblks == 0) {
591 (1ULL << zap_f_phys(zap)->zap_ptrtbl.zt_shift));
592 *valp = ZAP_EMBEDDED_PTRTBL_ENT(zap, idx);
595 return (zap_table_load(zap, &zap_f_phys(zap)->zap_ptrtbl,
601 zap_set_idx_to_blk(zap_t *zap, uint64_t idx, uint64_t blk, dmu_tx_t *tx)
604 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
606 if (zap_f_phys(zap)->zap_ptrtbl.zt_blk == 0) {
607 ZAP_EMBEDDED_PTRTBL_ENT(zap, idx) = blk;
610 return (zap_table_store(zap, &zap_f_phys(zap)->zap_ptrtbl,
616 zap_set_idx_range_to_blk(zap_t *zap, uint64_t idx, uint64_t nptrs, uint64_t blk,
619 int bs = FZAP_BLOCK_SHIFT(zap);
624 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
631 err = zap_idx_to_blk(zap, idx + i, &blk);
638 err = zap_set_idx_to_blk(zap, idx + i, blk, tx);
650 * Each leaf has single range of entries (block pointers) in the ZAP ptrtbl.
657 check_sibling_ptrtbl_range(zap_t *zap, uint64_t prefix, uint64_t prefix_len)
659 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
662 uint64_t idx = ZAP_HASH_IDX(h, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
663 uint64_t pref_diff = zap_f_phys(zap)->zap_ptrtbl.zt_shift - prefix_len;
668 ASSERT3U(idx+nptrs, <=, (1UL << zap_f_phys(zap)->zap_ptrtbl.zt_shift));
670 if (zap_idx_to_blk(zap, idx, &first) != 0)
673 if (zap_idx_to_blk(zap, idx + nptrs - 1, &last) != 0)
682 zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp)
686 ASSERT(zap->zap_dbuf == NULL ||
687 zap_f_phys(zap) == zap->zap_dbuf->db_data);
689 /* Reality check for corrupt zap objects (leaf or header). */
690 if ((zap_f_phys(zap)->zap_block_type != ZBT_LEAF &&
691 zap_f_phys(zap)->zap_block_type != ZBT_HEADER) ||
692 zap_f_phys(zap)->zap_magic != ZAP_MAGIC) {
696 uint64_t idx = ZAP_HASH_IDX(h, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
697 int err = zap_idx_to_blk(zap, idx, &blk);
700 err = zap_get_leaf_byblk(zap, blk, tx, lt, lp);
712 zap_t *zap = zn->zn_zap;
717 ASSERT3U(old_prefix_len, <=, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
718 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
723 if (zap_tryupgradedir(zap, tx) == 0 ||
724 old_prefix_len == zap_f_phys(zap)->zap_ptrtbl.zt_shift) {
726 objset_t *os = zap->zap_objset;
727 uint64_t object = zap->zap_object;
731 zap_unlockdir(zap, tag);
734 zap = zn->zn_zap;
737 ASSERT(!zap->zap_ismicro);
740 zap_f_phys(zap)->zap_ptrtbl.zt_shift) {
741 err = zap_grow_ptrtbl(zap, tx);
746 err = zap_deref_leaf(zap, hash, tx, RW_WRITER, &l);
756 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
757 ASSERT3U(old_prefix_len, <, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
761 int prefix_diff = zap_f_phys(zap)->zap_ptrtbl.zt_shift -
769 err = zap_idx_to_blk(zap, sibling + i, &blk);
775 zap_leaf_t *nl = zap_create_leaf(zap, tx);
776 zap_leaf_split(l, nl, zap->zap_normflags != 0);
780 err = zap_set_idx_to_blk(zap, sibling + i, nl->l_blkid, tx);
802 zap_t *zap = zn->zn_zap;
803 int shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
809 if (leaffull || zap_f_phys(zap)->zap_ptrtbl.zt_nextblk) {
814 if (zap_tryupgradedir(zap, tx) == 0) {
815 objset_t *os = zap->zap_objset;
816 uint64_t zapobj = zap->zap_object;
818 zap_unlockdir(zap, tag);
821 zap = zn->zn_zap;
827 if (zap_f_phys(zap)->zap_ptrtbl.zt_shift == shift)
828 (void) zap_grow_ptrtbl(zap, tx);
837 /* Only allow directory zap to have longname */
919 zap_t *zap = zn->zn_zap;
921 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
922 ASSERT(!zap->zap_ismicro);
925 err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
941 zap_increment_num_entries(zap, 1, tx);
944 zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
981 zap_t *zap = zn->zn_zap;
983 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
988 err = zap_deref_leaf(zap, zn->zn_hash, tx, RW_WRITER, &l);
1000 zap_increment_num_entries(zap, 1, tx);
1007 zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
1072 zap_t *zap = zn->zn_zap;
1075 zap_f_phys(zap)->zap_ptrtbl.zt_shift);
1076 if (zap_idx_to_blk(zap, idx, &blk) != 0)
1078 int bs = FZAP_BLOCK_SHIFT(zap);
1079 dmu_prefetch_by_dnode(zap->zap_dnode, 0, blk << bs, 1 << bs,
1304 fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
1315 * iterate over the entire ZAP object. If there are multiple leaf
1322 zc->zc_prefetch && zap_f_phys(zap)->zap_freeblk > 2) {
1323 dmu_prefetch_by_dnode(zap->zap_dnode, 0, 0,
1324 zap_f_phys(zap)->zap_freeblk << FZAP_BLOCK_SHIFT(zap),
1345 err = zap_deref_leaf(zap, zc->zc_hash, NULL, RW_READER,
1386 err = zap_entry_read_name(zap, &zeh,
1392 NULL, za->za_name, zap);
1399 zap_stats_ptrtbl(zap_t *zap, uint64_t *tbl, int len, zap_stats_t *zs)
1415 int err = zap_get_leaf_byblk(zap, tbl[i], NULL, RW_READER, &l);
1417 zap_leaf_stats(zap, l, zs);
1424 fzap_get_stats(zap_t *zap, zap_stats_t *zs)
1426 int bs = FZAP_BLOCK_SHIFT(zap);
1432 zs->zs_num_leafs = zap_f_phys(zap)->zap_num_leafs;
1433 zs->zs_num_entries = zap_f_phys(zap)->zap_num_entries;
1434 zs->zs_num_blocks = zap_f_phys(zap)->zap_freeblk;
1435 zs->zs_block_type = zap_f_phys(zap)->zap_block_type;
1436 zs->zs_magic = zap_f_phys(zap)->zap_magic;
1437 zs->zs_salt = zap_f_phys(zap)->zap_salt;
1442 zs->zs_ptrtbl_len = 1ULL << zap_f_phys(zap)->zap_ptrtbl.zt_shift;
1443 zs->zs_ptrtbl_nextblk = zap_f_phys(zap)->zap_ptrtbl.zt_nextblk;
1445 zap_f_phys(zap)->zap_ptrtbl.zt_blks_copied;
1446 zs->zs_ptrtbl_zt_blk = zap_f_phys(zap)->zap_ptrtbl.zt_blk;
1447 zs->zs_ptrtbl_zt_numblks = zap_f_phys(zap)->zap_ptrtbl.zt_numblks;
1448 zs->zs_ptrtbl_zt_shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
1450 if (zap_f_phys(zap)->zap_ptrtbl.zt_numblks == 0) {
1452 zap_stats_ptrtbl(zap, &ZAP_EMBEDDED_PTRTBL_ENT(zap, 0),
1453 1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap), zs);
1455 dmu_prefetch_by_dnode(zap->zap_dnode, 0,
1456 zap_f_phys(zap)->zap_ptrtbl.zt_blk << bs,
1457 zap_f_phys(zap)->zap_ptrtbl.zt_numblks << bs,
1460 for (int b = 0; b < zap_f_phys(zap)->zap_ptrtbl.zt_numblks;
1465 err = dmu_buf_hold_by_dnode(zap->zap_dnode,
1466 (zap_f_phys(zap)->zap_ptrtbl.zt_blk + b) << bs,
1469 zap_stats_ptrtbl(zap, db->db_data,
1481 zap_trunc(zap_t *zap)
1486 ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
1488 if (zap_f_phys(zap)->zap_ptrtbl.zt_blk > 0) {
1490 nentries = (1 << zap_f_phys(zap)->zap_ptrtbl.zt_shift);
1491 lastblk = zap_f_phys(zap)->zap_ptrtbl.zt_blk +
1492 zap_f_phys(zap)->zap_ptrtbl.zt_numblks - 1;
1495 nentries = (1 << ZAP_EMBEDDED_PTRTBL_SHIFT(zap));
1501 if (zap_idx_to_blk(zap, idx, &blk) != 0)
1507 ASSERT3U(lastblk, <, zap_f_phys(zap)->zap_freeblk);
1509 zap_f_phys(zap)->zap_freeblk = lastblk + 1;
1513 * ZAP shrinking algorithm.
1515 * We shrink ZAP recuresively removing empty leaves. We can remove an empty leaf
1540 zap_t *zap = zn->zn_zap;
1541 int64_t zt_shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
1549 ASSERT3U(prefix_len, <=, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
1550 ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
1571 if (check_sibling_ptrtbl_range(zap, sl_prefix, prefix_len) == 0)
1585 if ((err = zap_deref_leaf(zap, sl_hash, tx, RW_READER,
1604 * we need to lock ZAP ptrtbl as WRITER.
1606 if (!writer && (writer = zap_tryupgradedir(zap, tx)) == 0) {
1619 rw_exit(&zap->zap_rwlock);
1620 rw_enter(&zap->zap_rwlock, RW_WRITER);
1621 dmu_buf_will_dirty(zap->zap_dbuf, tx);
1623 zt_shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
1635 if ((err = zap_deref_leaf(zap, (slbit ? sl_hash : hash),
1649 if ((err = zap_deref_leaf(zap, (slbit ? hash : sl_hash), tx,
1671 if ((err = zap_set_idx_range_to_blk(zap, idx, nptrs, l->l_blkid,
1680 int bs = FZAP_BLOCK_SHIFT(zap);
1681 if (sl_blkid == zap_f_phys(zap)->zap_freeblk - 1)
1684 (void) dmu_free_range(zap->zap_objset, zap->zap_object,
1688 zap_f_phys(zap)->zap_num_leafs--;
1701 zap_trunc(zap);
1710 "When iterating ZAP object, prefetch it");
1713 "Enable ZAP shrinking");