1*eda14cbcSMatt Macy /* 2*eda14cbcSMatt Macy * CDDL HEADER START 3*eda14cbcSMatt Macy * 4*eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5*eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6*eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7*eda14cbcSMatt Macy * 8*eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10*eda14cbcSMatt Macy * See the License for the specific language governing permissions 11*eda14cbcSMatt Macy * and limitations under the License. 12*eda14cbcSMatt Macy * 13*eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14*eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16*eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17*eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18*eda14cbcSMatt Macy * 19*eda14cbcSMatt Macy * CDDL HEADER END 20*eda14cbcSMatt Macy */ 21*eda14cbcSMatt Macy 22*eda14cbcSMatt Macy /* 23*eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24*eda14cbcSMatt Macy * Copyright (c) 2011, 2018 by Delphix. All rights reserved. 25*eda14cbcSMatt Macy * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. 26*eda14cbcSMatt Macy * Copyright 2017 Nexenta Systems, Inc. 27*eda14cbcSMatt Macy */ 28*eda14cbcSMatt Macy 29*eda14cbcSMatt Macy #include <sys/zio.h> 30*eda14cbcSMatt Macy #include <sys/spa.h> 31*eda14cbcSMatt Macy #include <sys/dmu.h> 32*eda14cbcSMatt Macy #include <sys/zfs_context.h> 33*eda14cbcSMatt Macy #include <sys/zap.h> 34*eda14cbcSMatt Macy #include <sys/zap_impl.h> 35*eda14cbcSMatt Macy #include <sys/zap_leaf.h> 36*eda14cbcSMatt Macy #include <sys/avl.h> 37*eda14cbcSMatt Macy #include <sys/arc.h> 38*eda14cbcSMatt Macy #include <sys/dmu_objset.h> 39*eda14cbcSMatt Macy 40*eda14cbcSMatt Macy #ifdef _KERNEL 41*eda14cbcSMatt Macy #include <sys/sunddi.h> 42*eda14cbcSMatt Macy #endif 43*eda14cbcSMatt Macy 44*eda14cbcSMatt Macy extern inline mzap_phys_t *zap_m_phys(zap_t *zap); 45*eda14cbcSMatt Macy 46*eda14cbcSMatt Macy static int mzap_upgrade(zap_t **zapp, 47*eda14cbcSMatt Macy void *tag, dmu_tx_t *tx, zap_flags_t flags); 48*eda14cbcSMatt Macy 49*eda14cbcSMatt Macy uint64_t 50*eda14cbcSMatt Macy zap_getflags(zap_t *zap) 51*eda14cbcSMatt Macy { 52*eda14cbcSMatt Macy if (zap->zap_ismicro) 53*eda14cbcSMatt Macy return (0); 54*eda14cbcSMatt Macy return (zap_f_phys(zap)->zap_flags); 55*eda14cbcSMatt Macy } 56*eda14cbcSMatt Macy 57*eda14cbcSMatt Macy int 58*eda14cbcSMatt Macy zap_hashbits(zap_t *zap) 59*eda14cbcSMatt Macy { 60*eda14cbcSMatt Macy if (zap_getflags(zap) & ZAP_FLAG_HASH64) 61*eda14cbcSMatt Macy return (48); 62*eda14cbcSMatt Macy else 63*eda14cbcSMatt Macy return (28); 64*eda14cbcSMatt Macy } 65*eda14cbcSMatt Macy 66*eda14cbcSMatt Macy uint32_t 67*eda14cbcSMatt Macy zap_maxcd(zap_t *zap) 68*eda14cbcSMatt Macy { 69*eda14cbcSMatt Macy if (zap_getflags(zap) & ZAP_FLAG_HASH64) 70*eda14cbcSMatt Macy return ((1<<16)-1); 71*eda14cbcSMatt Macy else 72*eda14cbcSMatt Macy return (-1U); 73*eda14cbcSMatt Macy } 74*eda14cbcSMatt Macy 75*eda14cbcSMatt Macy static uint64_t 76*eda14cbcSMatt Macy zap_hash(zap_name_t *zn) 77*eda14cbcSMatt Macy { 78*eda14cbcSMatt Macy zap_t *zap = zn->zn_zap; 79*eda14cbcSMatt Macy uint64_t h = 0; 80*eda14cbcSMatt Macy 81*eda14cbcSMatt Macy if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) { 82*eda14cbcSMatt Macy ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY); 83*eda14cbcSMatt Macy h = *(uint64_t *)zn->zn_key_orig; 84*eda14cbcSMatt Macy } else { 85*eda14cbcSMatt Macy h = zap->zap_salt; 86*eda14cbcSMatt Macy ASSERT(h != 0); 87*eda14cbcSMatt Macy ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY); 88*eda14cbcSMatt Macy 89*eda14cbcSMatt Macy if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) { 90*eda14cbcSMatt Macy const uint64_t *wp = zn->zn_key_norm; 91*eda14cbcSMatt Macy 92*eda14cbcSMatt Macy ASSERT(zn->zn_key_intlen == 8); 93*eda14cbcSMatt Macy for (int i = 0; i < zn->zn_key_norm_numints; 94*eda14cbcSMatt Macy wp++, i++) { 95*eda14cbcSMatt Macy uint64_t word = *wp; 96*eda14cbcSMatt Macy 97*eda14cbcSMatt Macy for (int j = 0; j < zn->zn_key_intlen; j++) { 98*eda14cbcSMatt Macy h = (h >> 8) ^ 99*eda14cbcSMatt Macy zfs_crc64_table[(h ^ word) & 0xFF]; 100*eda14cbcSMatt Macy word >>= NBBY; 101*eda14cbcSMatt Macy } 102*eda14cbcSMatt Macy } 103*eda14cbcSMatt Macy } else { 104*eda14cbcSMatt Macy const uint8_t *cp = zn->zn_key_norm; 105*eda14cbcSMatt Macy 106*eda14cbcSMatt Macy /* 107*eda14cbcSMatt Macy * We previously stored the terminating null on 108*eda14cbcSMatt Macy * disk, but didn't hash it, so we need to 109*eda14cbcSMatt Macy * continue to not hash it. (The 110*eda14cbcSMatt Macy * zn_key_*_numints includes the terminating 111*eda14cbcSMatt Macy * null for non-binary keys.) 112*eda14cbcSMatt Macy */ 113*eda14cbcSMatt Macy int len = zn->zn_key_norm_numints - 1; 114*eda14cbcSMatt Macy 115*eda14cbcSMatt Macy ASSERT(zn->zn_key_intlen == 1); 116*eda14cbcSMatt Macy for (int i = 0; i < len; cp++, i++) { 117*eda14cbcSMatt Macy h = (h >> 8) ^ 118*eda14cbcSMatt Macy zfs_crc64_table[(h ^ *cp) & 0xFF]; 119*eda14cbcSMatt Macy } 120*eda14cbcSMatt Macy } 121*eda14cbcSMatt Macy } 122*eda14cbcSMatt Macy /* 123*eda14cbcSMatt Macy * Don't use all 64 bits, since we need some in the cookie for 124*eda14cbcSMatt Macy * the collision differentiator. We MUST use the high bits, 125*eda14cbcSMatt Macy * since those are the ones that we first pay attention to when 126*eda14cbcSMatt Macy * choosing the bucket. 127*eda14cbcSMatt Macy */ 128*eda14cbcSMatt Macy h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1); 129*eda14cbcSMatt Macy 130*eda14cbcSMatt Macy return (h); 131*eda14cbcSMatt Macy } 132*eda14cbcSMatt Macy 133*eda14cbcSMatt Macy static int 134*eda14cbcSMatt Macy zap_normalize(zap_t *zap, const char *name, char *namenorm, int normflags) 135*eda14cbcSMatt Macy { 136*eda14cbcSMatt Macy ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY)); 137*eda14cbcSMatt Macy 138*eda14cbcSMatt Macy size_t inlen = strlen(name) + 1; 139*eda14cbcSMatt Macy size_t outlen = ZAP_MAXNAMELEN; 140*eda14cbcSMatt Macy 141*eda14cbcSMatt Macy int err = 0; 142*eda14cbcSMatt Macy (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen, 143*eda14cbcSMatt Macy normflags | U8_TEXTPREP_IGNORE_NULL | U8_TEXTPREP_IGNORE_INVALID, 144*eda14cbcSMatt Macy U8_UNICODE_LATEST, &err); 145*eda14cbcSMatt Macy 146*eda14cbcSMatt Macy return (err); 147*eda14cbcSMatt Macy } 148*eda14cbcSMatt Macy 149*eda14cbcSMatt Macy boolean_t 150*eda14cbcSMatt Macy zap_match(zap_name_t *zn, const char *matchname) 151*eda14cbcSMatt Macy { 152*eda14cbcSMatt Macy ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY)); 153*eda14cbcSMatt Macy 154*eda14cbcSMatt Macy if (zn->zn_matchtype & MT_NORMALIZE) { 155*eda14cbcSMatt Macy char norm[ZAP_MAXNAMELEN]; 156*eda14cbcSMatt Macy 157*eda14cbcSMatt Macy if (zap_normalize(zn->zn_zap, matchname, norm, 158*eda14cbcSMatt Macy zn->zn_normflags) != 0) 159*eda14cbcSMatt Macy return (B_FALSE); 160*eda14cbcSMatt Macy 161*eda14cbcSMatt Macy return (strcmp(zn->zn_key_norm, norm) == 0); 162*eda14cbcSMatt Macy } else { 163*eda14cbcSMatt Macy return (strcmp(zn->zn_key_orig, matchname) == 0); 164*eda14cbcSMatt Macy } 165*eda14cbcSMatt Macy } 166*eda14cbcSMatt Macy 167*eda14cbcSMatt Macy void 168*eda14cbcSMatt Macy zap_name_free(zap_name_t *zn) 169*eda14cbcSMatt Macy { 170*eda14cbcSMatt Macy kmem_free(zn, sizeof (zap_name_t)); 171*eda14cbcSMatt Macy } 172*eda14cbcSMatt Macy 173*eda14cbcSMatt Macy zap_name_t * 174*eda14cbcSMatt Macy zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt) 175*eda14cbcSMatt Macy { 176*eda14cbcSMatt Macy zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 177*eda14cbcSMatt Macy 178*eda14cbcSMatt Macy zn->zn_zap = zap; 179*eda14cbcSMatt Macy zn->zn_key_intlen = sizeof (*key); 180*eda14cbcSMatt Macy zn->zn_key_orig = key; 181*eda14cbcSMatt Macy zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1; 182*eda14cbcSMatt Macy zn->zn_matchtype = mt; 183*eda14cbcSMatt Macy zn->zn_normflags = zap->zap_normflags; 184*eda14cbcSMatt Macy 185*eda14cbcSMatt Macy /* 186*eda14cbcSMatt Macy * If we're dealing with a case sensitive lookup on a mixed or 187*eda14cbcSMatt Macy * insensitive fs, remove U8_TEXTPREP_TOUPPER or the lookup 188*eda14cbcSMatt Macy * will fold case to all caps overriding the lookup request. 189*eda14cbcSMatt Macy */ 190*eda14cbcSMatt Macy if (mt & MT_MATCH_CASE) 191*eda14cbcSMatt Macy zn->zn_normflags &= ~U8_TEXTPREP_TOUPPER; 192*eda14cbcSMatt Macy 193*eda14cbcSMatt Macy if (zap->zap_normflags) { 194*eda14cbcSMatt Macy /* 195*eda14cbcSMatt Macy * We *must* use zap_normflags because this normalization is 196*eda14cbcSMatt Macy * what the hash is computed from. 197*eda14cbcSMatt Macy */ 198*eda14cbcSMatt Macy if (zap_normalize(zap, key, zn->zn_normbuf, 199*eda14cbcSMatt Macy zap->zap_normflags) != 0) { 200*eda14cbcSMatt Macy zap_name_free(zn); 201*eda14cbcSMatt Macy return (NULL); 202*eda14cbcSMatt Macy } 203*eda14cbcSMatt Macy zn->zn_key_norm = zn->zn_normbuf; 204*eda14cbcSMatt Macy zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; 205*eda14cbcSMatt Macy } else { 206*eda14cbcSMatt Macy if (mt != 0) { 207*eda14cbcSMatt Macy zap_name_free(zn); 208*eda14cbcSMatt Macy return (NULL); 209*eda14cbcSMatt Macy } 210*eda14cbcSMatt Macy zn->zn_key_norm = zn->zn_key_orig; 211*eda14cbcSMatt Macy zn->zn_key_norm_numints = zn->zn_key_orig_numints; 212*eda14cbcSMatt Macy } 213*eda14cbcSMatt Macy 214*eda14cbcSMatt Macy zn->zn_hash = zap_hash(zn); 215*eda14cbcSMatt Macy 216*eda14cbcSMatt Macy if (zap->zap_normflags != zn->zn_normflags) { 217*eda14cbcSMatt Macy /* 218*eda14cbcSMatt Macy * We *must* use zn_normflags because this normalization is 219*eda14cbcSMatt Macy * what the matching is based on. (Not the hash!) 220*eda14cbcSMatt Macy */ 221*eda14cbcSMatt Macy if (zap_normalize(zap, key, zn->zn_normbuf, 222*eda14cbcSMatt Macy zn->zn_normflags) != 0) { 223*eda14cbcSMatt Macy zap_name_free(zn); 224*eda14cbcSMatt Macy return (NULL); 225*eda14cbcSMatt Macy } 226*eda14cbcSMatt Macy zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1; 227*eda14cbcSMatt Macy } 228*eda14cbcSMatt Macy 229*eda14cbcSMatt Macy return (zn); 230*eda14cbcSMatt Macy } 231*eda14cbcSMatt Macy 232*eda14cbcSMatt Macy static zap_name_t * 233*eda14cbcSMatt Macy zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints) 234*eda14cbcSMatt Macy { 235*eda14cbcSMatt Macy zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP); 236*eda14cbcSMatt Macy 237*eda14cbcSMatt Macy ASSERT(zap->zap_normflags == 0); 238*eda14cbcSMatt Macy zn->zn_zap = zap; 239*eda14cbcSMatt Macy zn->zn_key_intlen = sizeof (*key); 240*eda14cbcSMatt Macy zn->zn_key_orig = zn->zn_key_norm = key; 241*eda14cbcSMatt Macy zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints; 242*eda14cbcSMatt Macy zn->zn_matchtype = 0; 243*eda14cbcSMatt Macy 244*eda14cbcSMatt Macy zn->zn_hash = zap_hash(zn); 245*eda14cbcSMatt Macy return (zn); 246*eda14cbcSMatt Macy } 247*eda14cbcSMatt Macy 248*eda14cbcSMatt Macy static void 249*eda14cbcSMatt Macy mzap_byteswap(mzap_phys_t *buf, size_t size) 250*eda14cbcSMatt Macy { 251*eda14cbcSMatt Macy buf->mz_block_type = BSWAP_64(buf->mz_block_type); 252*eda14cbcSMatt Macy buf->mz_salt = BSWAP_64(buf->mz_salt); 253*eda14cbcSMatt Macy buf->mz_normflags = BSWAP_64(buf->mz_normflags); 254*eda14cbcSMatt Macy int max = (size / MZAP_ENT_LEN) - 1; 255*eda14cbcSMatt Macy for (int i = 0; i < max; i++) { 256*eda14cbcSMatt Macy buf->mz_chunk[i].mze_value = 257*eda14cbcSMatt Macy BSWAP_64(buf->mz_chunk[i].mze_value); 258*eda14cbcSMatt Macy buf->mz_chunk[i].mze_cd = 259*eda14cbcSMatt Macy BSWAP_32(buf->mz_chunk[i].mze_cd); 260*eda14cbcSMatt Macy } 261*eda14cbcSMatt Macy } 262*eda14cbcSMatt Macy 263*eda14cbcSMatt Macy void 264*eda14cbcSMatt Macy zap_byteswap(void *buf, size_t size) 265*eda14cbcSMatt Macy { 266*eda14cbcSMatt Macy uint64_t block_type = *(uint64_t *)buf; 267*eda14cbcSMatt Macy 268*eda14cbcSMatt Macy if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) { 269*eda14cbcSMatt Macy /* ASSERT(magic == ZAP_LEAF_MAGIC); */ 270*eda14cbcSMatt Macy mzap_byteswap(buf, size); 271*eda14cbcSMatt Macy } else { 272*eda14cbcSMatt Macy fzap_byteswap(buf, size); 273*eda14cbcSMatt Macy } 274*eda14cbcSMatt Macy } 275*eda14cbcSMatt Macy 276*eda14cbcSMatt Macy static int 277*eda14cbcSMatt Macy mze_compare(const void *arg1, const void *arg2) 278*eda14cbcSMatt Macy { 279*eda14cbcSMatt Macy const mzap_ent_t *mze1 = arg1; 280*eda14cbcSMatt Macy const mzap_ent_t *mze2 = arg2; 281*eda14cbcSMatt Macy 282*eda14cbcSMatt Macy int cmp = TREE_CMP(mze1->mze_hash, mze2->mze_hash); 283*eda14cbcSMatt Macy if (likely(cmp)) 284*eda14cbcSMatt Macy return (cmp); 285*eda14cbcSMatt Macy 286*eda14cbcSMatt Macy return (TREE_CMP(mze1->mze_cd, mze2->mze_cd)); 287*eda14cbcSMatt Macy } 288*eda14cbcSMatt Macy 289*eda14cbcSMatt Macy static void 290*eda14cbcSMatt Macy mze_insert(zap_t *zap, int chunkid, uint64_t hash) 291*eda14cbcSMatt Macy { 292*eda14cbcSMatt Macy ASSERT(zap->zap_ismicro); 293*eda14cbcSMatt Macy ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 294*eda14cbcSMatt Macy 295*eda14cbcSMatt Macy mzap_ent_t *mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP); 296*eda14cbcSMatt Macy mze->mze_chunkid = chunkid; 297*eda14cbcSMatt Macy mze->mze_hash = hash; 298*eda14cbcSMatt Macy mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd; 299*eda14cbcSMatt Macy ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0); 300*eda14cbcSMatt Macy avl_add(&zap->zap_m.zap_avl, mze); 301*eda14cbcSMatt Macy } 302*eda14cbcSMatt Macy 303*eda14cbcSMatt Macy static mzap_ent_t * 304*eda14cbcSMatt Macy mze_find(zap_name_t *zn) 305*eda14cbcSMatt Macy { 306*eda14cbcSMatt Macy mzap_ent_t mze_tofind; 307*eda14cbcSMatt Macy mzap_ent_t *mze; 308*eda14cbcSMatt Macy avl_index_t idx; 309*eda14cbcSMatt Macy avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl; 310*eda14cbcSMatt Macy 311*eda14cbcSMatt Macy ASSERT(zn->zn_zap->zap_ismicro); 312*eda14cbcSMatt Macy ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock)); 313*eda14cbcSMatt Macy 314*eda14cbcSMatt Macy mze_tofind.mze_hash = zn->zn_hash; 315*eda14cbcSMatt Macy mze_tofind.mze_cd = 0; 316*eda14cbcSMatt Macy 317*eda14cbcSMatt Macy mze = avl_find(avl, &mze_tofind, &idx); 318*eda14cbcSMatt Macy if (mze == NULL) 319*eda14cbcSMatt Macy mze = avl_nearest(avl, idx, AVL_AFTER); 320*eda14cbcSMatt Macy for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) { 321*eda14cbcSMatt Macy ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd); 322*eda14cbcSMatt Macy if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name)) 323*eda14cbcSMatt Macy return (mze); 324*eda14cbcSMatt Macy } 325*eda14cbcSMatt Macy 326*eda14cbcSMatt Macy return (NULL); 327*eda14cbcSMatt Macy } 328*eda14cbcSMatt Macy 329*eda14cbcSMatt Macy static uint32_t 330*eda14cbcSMatt Macy mze_find_unused_cd(zap_t *zap, uint64_t hash) 331*eda14cbcSMatt Macy { 332*eda14cbcSMatt Macy mzap_ent_t mze_tofind; 333*eda14cbcSMatt Macy avl_index_t idx; 334*eda14cbcSMatt Macy avl_tree_t *avl = &zap->zap_m.zap_avl; 335*eda14cbcSMatt Macy 336*eda14cbcSMatt Macy ASSERT(zap->zap_ismicro); 337*eda14cbcSMatt Macy ASSERT(RW_LOCK_HELD(&zap->zap_rwlock)); 338*eda14cbcSMatt Macy 339*eda14cbcSMatt Macy mze_tofind.mze_hash = hash; 340*eda14cbcSMatt Macy mze_tofind.mze_cd = 0; 341*eda14cbcSMatt Macy 342*eda14cbcSMatt Macy uint32_t cd = 0; 343*eda14cbcSMatt Macy for (mzap_ent_t *mze = avl_find(avl, &mze_tofind, &idx); 344*eda14cbcSMatt Macy mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { 345*eda14cbcSMatt Macy if (mze->mze_cd != cd) 346*eda14cbcSMatt Macy break; 347*eda14cbcSMatt Macy cd++; 348*eda14cbcSMatt Macy } 349*eda14cbcSMatt Macy 350*eda14cbcSMatt Macy return (cd); 351*eda14cbcSMatt Macy } 352*eda14cbcSMatt Macy 353*eda14cbcSMatt Macy /* 354*eda14cbcSMatt Macy * Each mzap entry requires at max : 4 chunks 355*eda14cbcSMatt Macy * 3 chunks for names + 1 chunk for value. 356*eda14cbcSMatt Macy */ 357*eda14cbcSMatt Macy #define MZAP_ENT_CHUNKS (1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \ 358*eda14cbcSMatt Macy ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t))) 359*eda14cbcSMatt Macy 360*eda14cbcSMatt Macy /* 361*eda14cbcSMatt Macy * Check if the current entry keeps the colliding entries under the fatzap leaf 362*eda14cbcSMatt Macy * size. 363*eda14cbcSMatt Macy */ 364*eda14cbcSMatt Macy static boolean_t 365*eda14cbcSMatt Macy mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash) 366*eda14cbcSMatt Macy { 367*eda14cbcSMatt Macy zap_t *zap = zn->zn_zap; 368*eda14cbcSMatt Macy mzap_ent_t mze_tofind; 369*eda14cbcSMatt Macy mzap_ent_t *mze; 370*eda14cbcSMatt Macy avl_index_t idx; 371*eda14cbcSMatt Macy avl_tree_t *avl = &zap->zap_m.zap_avl; 372*eda14cbcSMatt Macy uint32_t mzap_ents = 0; 373*eda14cbcSMatt Macy 374*eda14cbcSMatt Macy mze_tofind.mze_hash = hash; 375*eda14cbcSMatt Macy mze_tofind.mze_cd = 0; 376*eda14cbcSMatt Macy 377*eda14cbcSMatt Macy for (mze = avl_find(avl, &mze_tofind, &idx); 378*eda14cbcSMatt Macy mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) { 379*eda14cbcSMatt Macy mzap_ents++; 380*eda14cbcSMatt Macy } 381*eda14cbcSMatt Macy 382*eda14cbcSMatt Macy /* Include the new entry being added */ 383*eda14cbcSMatt Macy mzap_ents++; 384*eda14cbcSMatt Macy 385*eda14cbcSMatt Macy return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS)); 386*eda14cbcSMatt Macy } 387*eda14cbcSMatt Macy 388*eda14cbcSMatt Macy static void 389*eda14cbcSMatt Macy mze_remove(zap_t *zap, mzap_ent_t *mze) 390*eda14cbcSMatt Macy { 391*eda14cbcSMatt Macy ASSERT(zap->zap_ismicro); 392*eda14cbcSMatt Macy ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 393*eda14cbcSMatt Macy 394*eda14cbcSMatt Macy avl_remove(&zap->zap_m.zap_avl, mze); 395*eda14cbcSMatt Macy kmem_free(mze, sizeof (mzap_ent_t)); 396*eda14cbcSMatt Macy } 397*eda14cbcSMatt Macy 398*eda14cbcSMatt Macy static void 399*eda14cbcSMatt Macy mze_destroy(zap_t *zap) 400*eda14cbcSMatt Macy { 401*eda14cbcSMatt Macy mzap_ent_t *mze; 402*eda14cbcSMatt Macy void *avlcookie = NULL; 403*eda14cbcSMatt Macy 404*eda14cbcSMatt Macy while ((mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))) 405*eda14cbcSMatt Macy kmem_free(mze, sizeof (mzap_ent_t)); 406*eda14cbcSMatt Macy avl_destroy(&zap->zap_m.zap_avl); 407*eda14cbcSMatt Macy } 408*eda14cbcSMatt Macy 409*eda14cbcSMatt Macy static zap_t * 410*eda14cbcSMatt Macy mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db) 411*eda14cbcSMatt Macy { 412*eda14cbcSMatt Macy zap_t *winner; 413*eda14cbcSMatt Macy uint64_t *zap_hdr = (uint64_t *)db->db_data; 414*eda14cbcSMatt Macy uint64_t zap_block_type = zap_hdr[0]; 415*eda14cbcSMatt Macy uint64_t zap_magic = zap_hdr[1]; 416*eda14cbcSMatt Macy 417*eda14cbcSMatt Macy ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t)); 418*eda14cbcSMatt Macy 419*eda14cbcSMatt Macy zap_t *zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP); 420*eda14cbcSMatt Macy rw_init(&zap->zap_rwlock, NULL, RW_DEFAULT, NULL); 421*eda14cbcSMatt Macy rw_enter(&zap->zap_rwlock, RW_WRITER); 422*eda14cbcSMatt Macy zap->zap_objset = os; 423*eda14cbcSMatt Macy zap->zap_object = obj; 424*eda14cbcSMatt Macy zap->zap_dbuf = db; 425*eda14cbcSMatt Macy 426*eda14cbcSMatt Macy if (zap_block_type != ZBT_MICRO) { 427*eda14cbcSMatt Macy mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, MUTEX_DEFAULT, 428*eda14cbcSMatt Macy 0); 429*eda14cbcSMatt Macy zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1; 430*eda14cbcSMatt Macy if (zap_block_type != ZBT_HEADER || zap_magic != ZAP_MAGIC) { 431*eda14cbcSMatt Macy winner = NULL; /* No actual winner here... */ 432*eda14cbcSMatt Macy goto handle_winner; 433*eda14cbcSMatt Macy } 434*eda14cbcSMatt Macy } else { 435*eda14cbcSMatt Macy zap->zap_ismicro = TRUE; 436*eda14cbcSMatt Macy } 437*eda14cbcSMatt Macy 438*eda14cbcSMatt Macy /* 439*eda14cbcSMatt Macy * Make sure that zap_ismicro is set before we let others see 440*eda14cbcSMatt Macy * it, because zap_lockdir() checks zap_ismicro without the lock 441*eda14cbcSMatt Macy * held. 442*eda14cbcSMatt Macy */ 443*eda14cbcSMatt Macy dmu_buf_init_user(&zap->zap_dbu, zap_evict_sync, NULL, &zap->zap_dbuf); 444*eda14cbcSMatt Macy winner = dmu_buf_set_user(db, &zap->zap_dbu); 445*eda14cbcSMatt Macy 446*eda14cbcSMatt Macy if (winner != NULL) 447*eda14cbcSMatt Macy goto handle_winner; 448*eda14cbcSMatt Macy 449*eda14cbcSMatt Macy if (zap->zap_ismicro) { 450*eda14cbcSMatt Macy zap->zap_salt = zap_m_phys(zap)->mz_salt; 451*eda14cbcSMatt Macy zap->zap_normflags = zap_m_phys(zap)->mz_normflags; 452*eda14cbcSMatt Macy zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1; 453*eda14cbcSMatt Macy avl_create(&zap->zap_m.zap_avl, mze_compare, 454*eda14cbcSMatt Macy sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node)); 455*eda14cbcSMatt Macy 456*eda14cbcSMatt Macy for (int i = 0; i < zap->zap_m.zap_num_chunks; i++) { 457*eda14cbcSMatt Macy mzap_ent_phys_t *mze = 458*eda14cbcSMatt Macy &zap_m_phys(zap)->mz_chunk[i]; 459*eda14cbcSMatt Macy if (mze->mze_name[0]) { 460*eda14cbcSMatt Macy zap_name_t *zn; 461*eda14cbcSMatt Macy 462*eda14cbcSMatt Macy zap->zap_m.zap_num_entries++; 463*eda14cbcSMatt Macy zn = zap_name_alloc(zap, mze->mze_name, 0); 464*eda14cbcSMatt Macy mze_insert(zap, i, zn->zn_hash); 465*eda14cbcSMatt Macy zap_name_free(zn); 466*eda14cbcSMatt Macy } 467*eda14cbcSMatt Macy } 468*eda14cbcSMatt Macy } else { 469*eda14cbcSMatt Macy zap->zap_salt = zap_f_phys(zap)->zap_salt; 470*eda14cbcSMatt Macy zap->zap_normflags = zap_f_phys(zap)->zap_normflags; 471*eda14cbcSMatt Macy 472*eda14cbcSMatt Macy ASSERT3U(sizeof (struct zap_leaf_header), ==, 473*eda14cbcSMatt Macy 2*ZAP_LEAF_CHUNKSIZE); 474*eda14cbcSMatt Macy 475*eda14cbcSMatt Macy /* 476*eda14cbcSMatt Macy * The embedded pointer table should not overlap the 477*eda14cbcSMatt Macy * other members. 478*eda14cbcSMatt Macy */ 479*eda14cbcSMatt Macy ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >, 480*eda14cbcSMatt Macy &zap_f_phys(zap)->zap_salt); 481*eda14cbcSMatt Macy 482*eda14cbcSMatt Macy /* 483*eda14cbcSMatt Macy * The embedded pointer table should end at the end of 484*eda14cbcSMatt Macy * the block 485*eda14cbcSMatt Macy */ 486*eda14cbcSMatt Macy ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap, 487*eda14cbcSMatt Macy 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) - 488*eda14cbcSMatt Macy (uintptr_t)zap_f_phys(zap), ==, 489*eda14cbcSMatt Macy zap->zap_dbuf->db_size); 490*eda14cbcSMatt Macy } 491*eda14cbcSMatt Macy rw_exit(&zap->zap_rwlock); 492*eda14cbcSMatt Macy return (zap); 493*eda14cbcSMatt Macy 494*eda14cbcSMatt Macy handle_winner: 495*eda14cbcSMatt Macy rw_exit(&zap->zap_rwlock); 496*eda14cbcSMatt Macy rw_destroy(&zap->zap_rwlock); 497*eda14cbcSMatt Macy if (!zap->zap_ismicro) 498*eda14cbcSMatt Macy mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 499*eda14cbcSMatt Macy kmem_free(zap, sizeof (zap_t)); 500*eda14cbcSMatt Macy return (winner); 501*eda14cbcSMatt Macy } 502*eda14cbcSMatt Macy 503*eda14cbcSMatt Macy /* 504*eda14cbcSMatt Macy * This routine "consumes" the caller's hold on the dbuf, which must 505*eda14cbcSMatt Macy * have the specified tag. 506*eda14cbcSMatt Macy */ 507*eda14cbcSMatt Macy static int 508*eda14cbcSMatt Macy zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx, 509*eda14cbcSMatt Macy krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp) 510*eda14cbcSMatt Macy { 511*eda14cbcSMatt Macy ASSERT0(db->db_offset); 512*eda14cbcSMatt Macy objset_t *os = dmu_buf_get_objset(db); 513*eda14cbcSMatt Macy uint64_t obj = db->db_object; 514*eda14cbcSMatt Macy dmu_object_info_t doi; 515*eda14cbcSMatt Macy 516*eda14cbcSMatt Macy *zapp = NULL; 517*eda14cbcSMatt Macy 518*eda14cbcSMatt Macy dmu_object_info_from_db(db, &doi); 519*eda14cbcSMatt Macy if (DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP) 520*eda14cbcSMatt Macy return (SET_ERROR(EINVAL)); 521*eda14cbcSMatt Macy 522*eda14cbcSMatt Macy zap_t *zap = dmu_buf_get_user(db); 523*eda14cbcSMatt Macy if (zap == NULL) { 524*eda14cbcSMatt Macy zap = mzap_open(os, obj, db); 525*eda14cbcSMatt Macy if (zap == NULL) { 526*eda14cbcSMatt Macy /* 527*eda14cbcSMatt Macy * mzap_open() didn't like what it saw on-disk. 528*eda14cbcSMatt Macy * Check for corruption! 529*eda14cbcSMatt Macy */ 530*eda14cbcSMatt Macy return (SET_ERROR(EIO)); 531*eda14cbcSMatt Macy } 532*eda14cbcSMatt Macy } 533*eda14cbcSMatt Macy 534*eda14cbcSMatt Macy /* 535*eda14cbcSMatt Macy * We're checking zap_ismicro without the lock held, in order to 536*eda14cbcSMatt Macy * tell what type of lock we want. Once we have some sort of 537*eda14cbcSMatt Macy * lock, see if it really is the right type. In practice this 538*eda14cbcSMatt Macy * can only be different if it was upgraded from micro to fat, 539*eda14cbcSMatt Macy * and micro wanted WRITER but fat only needs READER. 540*eda14cbcSMatt Macy */ 541*eda14cbcSMatt Macy krw_t lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti; 542*eda14cbcSMatt Macy rw_enter(&zap->zap_rwlock, lt); 543*eda14cbcSMatt Macy if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) { 544*eda14cbcSMatt Macy /* it was upgraded, now we only need reader */ 545*eda14cbcSMatt Macy ASSERT(lt == RW_WRITER); 546*eda14cbcSMatt Macy ASSERT(RW_READER == 547*eda14cbcSMatt Macy ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)); 548*eda14cbcSMatt Macy rw_downgrade(&zap->zap_rwlock); 549*eda14cbcSMatt Macy lt = RW_READER; 550*eda14cbcSMatt Macy } 551*eda14cbcSMatt Macy 552*eda14cbcSMatt Macy zap->zap_objset = os; 553*eda14cbcSMatt Macy 554*eda14cbcSMatt Macy if (lt == RW_WRITER) 555*eda14cbcSMatt Macy dmu_buf_will_dirty(db, tx); 556*eda14cbcSMatt Macy 557*eda14cbcSMatt Macy ASSERT3P(zap->zap_dbuf, ==, db); 558*eda14cbcSMatt Macy 559*eda14cbcSMatt Macy ASSERT(!zap->zap_ismicro || 560*eda14cbcSMatt Macy zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks); 561*eda14cbcSMatt Macy if (zap->zap_ismicro && tx && adding && 562*eda14cbcSMatt Macy zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) { 563*eda14cbcSMatt Macy uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE; 564*eda14cbcSMatt Macy if (newsz > MZAP_MAX_BLKSZ) { 565*eda14cbcSMatt Macy dprintf("upgrading obj %llu: num_entries=%u\n", 566*eda14cbcSMatt Macy obj, zap->zap_m.zap_num_entries); 567*eda14cbcSMatt Macy *zapp = zap; 568*eda14cbcSMatt Macy int err = mzap_upgrade(zapp, tag, tx, 0); 569*eda14cbcSMatt Macy if (err != 0) 570*eda14cbcSMatt Macy rw_exit(&zap->zap_rwlock); 571*eda14cbcSMatt Macy return (err); 572*eda14cbcSMatt Macy } 573*eda14cbcSMatt Macy VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx)); 574*eda14cbcSMatt Macy zap->zap_m.zap_num_chunks = 575*eda14cbcSMatt Macy db->db_size / MZAP_ENT_LEN - 1; 576*eda14cbcSMatt Macy } 577*eda14cbcSMatt Macy 578*eda14cbcSMatt Macy *zapp = zap; 579*eda14cbcSMatt Macy return (0); 580*eda14cbcSMatt Macy } 581*eda14cbcSMatt Macy 582*eda14cbcSMatt Macy static int 583*eda14cbcSMatt Macy zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx, 584*eda14cbcSMatt Macy krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp) 585*eda14cbcSMatt Macy { 586*eda14cbcSMatt Macy dmu_buf_t *db; 587*eda14cbcSMatt Macy 588*eda14cbcSMatt Macy int err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH); 589*eda14cbcSMatt Macy if (err != 0) { 590*eda14cbcSMatt Macy return (err); 591*eda14cbcSMatt Macy } 592*eda14cbcSMatt Macy #ifdef ZFS_DEBUG 593*eda14cbcSMatt Macy { 594*eda14cbcSMatt Macy dmu_object_info_t doi; 595*eda14cbcSMatt Macy dmu_object_info_from_db(db, &doi); 596*eda14cbcSMatt Macy ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); 597*eda14cbcSMatt Macy } 598*eda14cbcSMatt Macy #endif 599*eda14cbcSMatt Macy 600*eda14cbcSMatt Macy err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); 601*eda14cbcSMatt Macy if (err != 0) { 602*eda14cbcSMatt Macy dmu_buf_rele(db, tag); 603*eda14cbcSMatt Macy } 604*eda14cbcSMatt Macy return (err); 605*eda14cbcSMatt Macy } 606*eda14cbcSMatt Macy 607*eda14cbcSMatt Macy int 608*eda14cbcSMatt Macy zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx, 609*eda14cbcSMatt Macy krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp) 610*eda14cbcSMatt Macy { 611*eda14cbcSMatt Macy dmu_buf_t *db; 612*eda14cbcSMatt Macy 613*eda14cbcSMatt Macy int err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH); 614*eda14cbcSMatt Macy if (err != 0) 615*eda14cbcSMatt Macy return (err); 616*eda14cbcSMatt Macy #ifdef ZFS_DEBUG 617*eda14cbcSMatt Macy { 618*eda14cbcSMatt Macy dmu_object_info_t doi; 619*eda14cbcSMatt Macy dmu_object_info_from_db(db, &doi); 620*eda14cbcSMatt Macy ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP); 621*eda14cbcSMatt Macy } 622*eda14cbcSMatt Macy #endif 623*eda14cbcSMatt Macy err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp); 624*eda14cbcSMatt Macy if (err != 0) 625*eda14cbcSMatt Macy dmu_buf_rele(db, tag); 626*eda14cbcSMatt Macy return (err); 627*eda14cbcSMatt Macy } 628*eda14cbcSMatt Macy 629*eda14cbcSMatt Macy void 630*eda14cbcSMatt Macy zap_unlockdir(zap_t *zap, void *tag) 631*eda14cbcSMatt Macy { 632*eda14cbcSMatt Macy rw_exit(&zap->zap_rwlock); 633*eda14cbcSMatt Macy dmu_buf_rele(zap->zap_dbuf, tag); 634*eda14cbcSMatt Macy } 635*eda14cbcSMatt Macy 636*eda14cbcSMatt Macy static int 637*eda14cbcSMatt Macy mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags) 638*eda14cbcSMatt Macy { 639*eda14cbcSMatt Macy int err = 0; 640*eda14cbcSMatt Macy zap_t *zap = *zapp; 641*eda14cbcSMatt Macy 642*eda14cbcSMatt Macy ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 643*eda14cbcSMatt Macy 644*eda14cbcSMatt Macy int sz = zap->zap_dbuf->db_size; 645*eda14cbcSMatt Macy mzap_phys_t *mzp = vmem_alloc(sz, KM_SLEEP); 646*eda14cbcSMatt Macy bcopy(zap->zap_dbuf->db_data, mzp, sz); 647*eda14cbcSMatt Macy int nchunks = zap->zap_m.zap_num_chunks; 648*eda14cbcSMatt Macy 649*eda14cbcSMatt Macy if (!flags) { 650*eda14cbcSMatt Macy err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object, 651*eda14cbcSMatt Macy 1ULL << fzap_default_block_shift, 0, tx); 652*eda14cbcSMatt Macy if (err != 0) { 653*eda14cbcSMatt Macy vmem_free(mzp, sz); 654*eda14cbcSMatt Macy return (err); 655*eda14cbcSMatt Macy } 656*eda14cbcSMatt Macy } 657*eda14cbcSMatt Macy 658*eda14cbcSMatt Macy dprintf("upgrading obj=%llu with %u chunks\n", 659*eda14cbcSMatt Macy zap->zap_object, nchunks); 660*eda14cbcSMatt Macy /* XXX destroy the avl later, so we can use the stored hash value */ 661*eda14cbcSMatt Macy mze_destroy(zap); 662*eda14cbcSMatt Macy 663*eda14cbcSMatt Macy fzap_upgrade(zap, tx, flags); 664*eda14cbcSMatt Macy 665*eda14cbcSMatt Macy for (int i = 0; i < nchunks; i++) { 666*eda14cbcSMatt Macy mzap_ent_phys_t *mze = &mzp->mz_chunk[i]; 667*eda14cbcSMatt Macy if (mze->mze_name[0] == 0) 668*eda14cbcSMatt Macy continue; 669*eda14cbcSMatt Macy dprintf("adding %s=%llu\n", 670*eda14cbcSMatt Macy mze->mze_name, mze->mze_value); 671*eda14cbcSMatt Macy zap_name_t *zn = zap_name_alloc(zap, mze->mze_name, 0); 672*eda14cbcSMatt Macy /* If we fail here, we would end up losing entries */ 673*eda14cbcSMatt Macy VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, 674*eda14cbcSMatt Macy tag, tx)); 675*eda14cbcSMatt Macy zap = zn->zn_zap; /* fzap_add_cd() may change zap */ 676*eda14cbcSMatt Macy zap_name_free(zn); 677*eda14cbcSMatt Macy } 678*eda14cbcSMatt Macy vmem_free(mzp, sz); 679*eda14cbcSMatt Macy *zapp = zap; 680*eda14cbcSMatt Macy return (0); 681*eda14cbcSMatt Macy } 682*eda14cbcSMatt Macy 683*eda14cbcSMatt Macy /* 684*eda14cbcSMatt Macy * The "normflags" determine the behavior of the matchtype_t which is 685*eda14cbcSMatt Macy * passed to zap_lookup_norm(). Names which have the same normalized 686*eda14cbcSMatt Macy * version will be stored with the same hash value, and therefore we can 687*eda14cbcSMatt Macy * perform normalization-insensitive lookups. We can be Unicode form- 688*eda14cbcSMatt Macy * insensitive and/or case-insensitive. The following flags are valid for 689*eda14cbcSMatt Macy * "normflags": 690*eda14cbcSMatt Macy * 691*eda14cbcSMatt Macy * U8_TEXTPREP_NFC 692*eda14cbcSMatt Macy * U8_TEXTPREP_NFD 693*eda14cbcSMatt Macy * U8_TEXTPREP_NFKC 694*eda14cbcSMatt Macy * U8_TEXTPREP_NFKD 695*eda14cbcSMatt Macy * U8_TEXTPREP_TOUPPER 696*eda14cbcSMatt Macy * 697*eda14cbcSMatt Macy * The *_NF* (Normalization Form) flags are mutually exclusive; at most one 698*eda14cbcSMatt Macy * of them may be supplied. 699*eda14cbcSMatt Macy */ 700*eda14cbcSMatt Macy void 701*eda14cbcSMatt Macy mzap_create_impl(dnode_t *dn, int normflags, zap_flags_t flags, dmu_tx_t *tx) 702*eda14cbcSMatt Macy { 703*eda14cbcSMatt Macy dmu_buf_t *db; 704*eda14cbcSMatt Macy 705*eda14cbcSMatt Macy VERIFY0(dmu_buf_hold_by_dnode(dn, 0, FTAG, &db, DMU_READ_NO_PREFETCH)); 706*eda14cbcSMatt Macy 707*eda14cbcSMatt Macy dmu_buf_will_dirty(db, tx); 708*eda14cbcSMatt Macy mzap_phys_t *zp = db->db_data; 709*eda14cbcSMatt Macy zp->mz_block_type = ZBT_MICRO; 710*eda14cbcSMatt Macy zp->mz_salt = 711*eda14cbcSMatt Macy ((uintptr_t)db ^ (uintptr_t)tx ^ (dn->dn_object << 1)) | 1ULL; 712*eda14cbcSMatt Macy zp->mz_normflags = normflags; 713*eda14cbcSMatt Macy 714*eda14cbcSMatt Macy if (flags != 0) { 715*eda14cbcSMatt Macy zap_t *zap; 716*eda14cbcSMatt Macy /* Only fat zap supports flags; upgrade immediately. */ 717*eda14cbcSMatt Macy VERIFY0(zap_lockdir_impl(db, FTAG, tx, RW_WRITER, 718*eda14cbcSMatt Macy B_FALSE, B_FALSE, &zap)); 719*eda14cbcSMatt Macy VERIFY0(mzap_upgrade(&zap, FTAG, tx, flags)); 720*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 721*eda14cbcSMatt Macy } else { 722*eda14cbcSMatt Macy dmu_buf_rele(db, FTAG); 723*eda14cbcSMatt Macy } 724*eda14cbcSMatt Macy } 725*eda14cbcSMatt Macy 726*eda14cbcSMatt Macy static uint64_t 727*eda14cbcSMatt Macy zap_create_impl(objset_t *os, int normflags, zap_flags_t flags, 728*eda14cbcSMatt Macy dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, 729*eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, int dnodesize, 730*eda14cbcSMatt Macy dnode_t **allocated_dnode, void *tag, dmu_tx_t *tx) 731*eda14cbcSMatt Macy { 732*eda14cbcSMatt Macy uint64_t obj; 733*eda14cbcSMatt Macy 734*eda14cbcSMatt Macy ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); 735*eda14cbcSMatt Macy 736*eda14cbcSMatt Macy if (allocated_dnode == NULL) { 737*eda14cbcSMatt Macy dnode_t *dn; 738*eda14cbcSMatt Macy obj = dmu_object_alloc_hold(os, ot, 1ULL << leaf_blockshift, 739*eda14cbcSMatt Macy indirect_blockshift, bonustype, bonuslen, dnodesize, 740*eda14cbcSMatt Macy &dn, FTAG, tx); 741*eda14cbcSMatt Macy mzap_create_impl(dn, normflags, flags, tx); 742*eda14cbcSMatt Macy dnode_rele(dn, FTAG); 743*eda14cbcSMatt Macy } else { 744*eda14cbcSMatt Macy obj = dmu_object_alloc_hold(os, ot, 1ULL << leaf_blockshift, 745*eda14cbcSMatt Macy indirect_blockshift, bonustype, bonuslen, dnodesize, 746*eda14cbcSMatt Macy allocated_dnode, tag, tx); 747*eda14cbcSMatt Macy mzap_create_impl(*allocated_dnode, normflags, flags, tx); 748*eda14cbcSMatt Macy } 749*eda14cbcSMatt Macy 750*eda14cbcSMatt Macy return (obj); 751*eda14cbcSMatt Macy } 752*eda14cbcSMatt Macy 753*eda14cbcSMatt Macy int 754*eda14cbcSMatt Macy zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot, 755*eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 756*eda14cbcSMatt Macy { 757*eda14cbcSMatt Macy return (zap_create_claim_dnsize(os, obj, ot, bonustype, bonuslen, 758*eda14cbcSMatt Macy 0, tx)); 759*eda14cbcSMatt Macy } 760*eda14cbcSMatt Macy 761*eda14cbcSMatt Macy int 762*eda14cbcSMatt Macy zap_create_claim_dnsize(objset_t *os, uint64_t obj, dmu_object_type_t ot, 763*eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) 764*eda14cbcSMatt Macy { 765*eda14cbcSMatt Macy return (zap_create_claim_norm_dnsize(os, obj, 766*eda14cbcSMatt Macy 0, ot, bonustype, bonuslen, dnodesize, tx)); 767*eda14cbcSMatt Macy } 768*eda14cbcSMatt Macy 769*eda14cbcSMatt Macy int 770*eda14cbcSMatt Macy zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags, 771*eda14cbcSMatt Macy dmu_object_type_t ot, 772*eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 773*eda14cbcSMatt Macy { 774*eda14cbcSMatt Macy return (zap_create_claim_norm_dnsize(os, obj, normflags, ot, bonustype, 775*eda14cbcSMatt Macy bonuslen, 0, tx)); 776*eda14cbcSMatt Macy } 777*eda14cbcSMatt Macy 778*eda14cbcSMatt Macy int 779*eda14cbcSMatt Macy zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj, int normflags, 780*eda14cbcSMatt Macy dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen, 781*eda14cbcSMatt Macy int dnodesize, dmu_tx_t *tx) 782*eda14cbcSMatt Macy { 783*eda14cbcSMatt Macy dnode_t *dn; 784*eda14cbcSMatt Macy int error; 785*eda14cbcSMatt Macy 786*eda14cbcSMatt Macy ASSERT3U(DMU_OT_BYTESWAP(ot), ==, DMU_BSWAP_ZAP); 787*eda14cbcSMatt Macy error = dmu_object_claim_dnsize(os, obj, ot, 0, bonustype, bonuslen, 788*eda14cbcSMatt Macy dnodesize, tx); 789*eda14cbcSMatt Macy if (error != 0) 790*eda14cbcSMatt Macy return (error); 791*eda14cbcSMatt Macy 792*eda14cbcSMatt Macy error = dnode_hold(os, obj, FTAG, &dn); 793*eda14cbcSMatt Macy if (error != 0) 794*eda14cbcSMatt Macy return (error); 795*eda14cbcSMatt Macy 796*eda14cbcSMatt Macy mzap_create_impl(dn, normflags, 0, tx); 797*eda14cbcSMatt Macy 798*eda14cbcSMatt Macy dnode_rele(dn, FTAG); 799*eda14cbcSMatt Macy 800*eda14cbcSMatt Macy return (0); 801*eda14cbcSMatt Macy } 802*eda14cbcSMatt Macy 803*eda14cbcSMatt Macy uint64_t 804*eda14cbcSMatt Macy zap_create(objset_t *os, dmu_object_type_t ot, 805*eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 806*eda14cbcSMatt Macy { 807*eda14cbcSMatt Macy return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx)); 808*eda14cbcSMatt Macy } 809*eda14cbcSMatt Macy 810*eda14cbcSMatt Macy uint64_t 811*eda14cbcSMatt Macy zap_create_dnsize(objset_t *os, dmu_object_type_t ot, 812*eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) 813*eda14cbcSMatt Macy { 814*eda14cbcSMatt Macy return (zap_create_norm_dnsize(os, 0, ot, bonustype, bonuslen, 815*eda14cbcSMatt Macy dnodesize, tx)); 816*eda14cbcSMatt Macy } 817*eda14cbcSMatt Macy 818*eda14cbcSMatt Macy uint64_t 819*eda14cbcSMatt Macy zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot, 820*eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 821*eda14cbcSMatt Macy { 822*eda14cbcSMatt Macy return (zap_create_norm_dnsize(os, normflags, ot, bonustype, bonuslen, 823*eda14cbcSMatt Macy 0, tx)); 824*eda14cbcSMatt Macy } 825*eda14cbcSMatt Macy 826*eda14cbcSMatt Macy uint64_t 827*eda14cbcSMatt Macy zap_create_norm_dnsize(objset_t *os, int normflags, dmu_object_type_t ot, 828*eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) 829*eda14cbcSMatt Macy { 830*eda14cbcSMatt Macy return (zap_create_impl(os, normflags, 0, ot, 0, 0, 831*eda14cbcSMatt Macy bonustype, bonuslen, dnodesize, NULL, NULL, tx)); 832*eda14cbcSMatt Macy } 833*eda14cbcSMatt Macy 834*eda14cbcSMatt Macy uint64_t 835*eda14cbcSMatt Macy zap_create_flags(objset_t *os, int normflags, zap_flags_t flags, 836*eda14cbcSMatt Macy dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, 837*eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) 838*eda14cbcSMatt Macy { 839*eda14cbcSMatt Macy return (zap_create_flags_dnsize(os, normflags, flags, ot, 840*eda14cbcSMatt Macy leaf_blockshift, indirect_blockshift, bonustype, bonuslen, 0, tx)); 841*eda14cbcSMatt Macy } 842*eda14cbcSMatt Macy 843*eda14cbcSMatt Macy uint64_t 844*eda14cbcSMatt Macy zap_create_flags_dnsize(objset_t *os, int normflags, zap_flags_t flags, 845*eda14cbcSMatt Macy dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, 846*eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) 847*eda14cbcSMatt Macy { 848*eda14cbcSMatt Macy return (zap_create_impl(os, normflags, flags, ot, leaf_blockshift, 849*eda14cbcSMatt Macy indirect_blockshift, bonustype, bonuslen, dnodesize, NULL, NULL, 850*eda14cbcSMatt Macy tx)); 851*eda14cbcSMatt Macy } 852*eda14cbcSMatt Macy 853*eda14cbcSMatt Macy /* 854*eda14cbcSMatt Macy * Create a zap object and return a pointer to the newly allocated dnode via 855*eda14cbcSMatt Macy * the allocated_dnode argument. The returned dnode will be held and the 856*eda14cbcSMatt Macy * caller is responsible for releasing the hold by calling dnode_rele(). 857*eda14cbcSMatt Macy */ 858*eda14cbcSMatt Macy uint64_t 859*eda14cbcSMatt Macy zap_create_hold(objset_t *os, int normflags, zap_flags_t flags, 860*eda14cbcSMatt Macy dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift, 861*eda14cbcSMatt Macy dmu_object_type_t bonustype, int bonuslen, int dnodesize, 862*eda14cbcSMatt Macy dnode_t **allocated_dnode, void *tag, dmu_tx_t *tx) 863*eda14cbcSMatt Macy { 864*eda14cbcSMatt Macy return (zap_create_impl(os, normflags, flags, ot, leaf_blockshift, 865*eda14cbcSMatt Macy indirect_blockshift, bonustype, bonuslen, dnodesize, 866*eda14cbcSMatt Macy allocated_dnode, tag, tx)); 867*eda14cbcSMatt Macy } 868*eda14cbcSMatt Macy 869*eda14cbcSMatt Macy int 870*eda14cbcSMatt Macy zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx) 871*eda14cbcSMatt Macy { 872*eda14cbcSMatt Macy /* 873*eda14cbcSMatt Macy * dmu_object_free will free the object number and free the 874*eda14cbcSMatt Macy * data. Freeing the data will cause our pageout function to be 875*eda14cbcSMatt Macy * called, which will destroy our data (zap_leaf_t's and zap_t). 876*eda14cbcSMatt Macy */ 877*eda14cbcSMatt Macy 878*eda14cbcSMatt Macy return (dmu_object_free(os, zapobj, tx)); 879*eda14cbcSMatt Macy } 880*eda14cbcSMatt Macy 881*eda14cbcSMatt Macy void 882*eda14cbcSMatt Macy zap_evict_sync(void *dbu) 883*eda14cbcSMatt Macy { 884*eda14cbcSMatt Macy zap_t *zap = dbu; 885*eda14cbcSMatt Macy 886*eda14cbcSMatt Macy rw_destroy(&zap->zap_rwlock); 887*eda14cbcSMatt Macy 888*eda14cbcSMatt Macy if (zap->zap_ismicro) 889*eda14cbcSMatt Macy mze_destroy(zap); 890*eda14cbcSMatt Macy else 891*eda14cbcSMatt Macy mutex_destroy(&zap->zap_f.zap_num_entries_mtx); 892*eda14cbcSMatt Macy 893*eda14cbcSMatt Macy kmem_free(zap, sizeof (zap_t)); 894*eda14cbcSMatt Macy } 895*eda14cbcSMatt Macy 896*eda14cbcSMatt Macy int 897*eda14cbcSMatt Macy zap_count(objset_t *os, uint64_t zapobj, uint64_t *count) 898*eda14cbcSMatt Macy { 899*eda14cbcSMatt Macy zap_t *zap; 900*eda14cbcSMatt Macy 901*eda14cbcSMatt Macy int err = 902*eda14cbcSMatt Macy zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 903*eda14cbcSMatt Macy if (err != 0) 904*eda14cbcSMatt Macy return (err); 905*eda14cbcSMatt Macy if (!zap->zap_ismicro) { 906*eda14cbcSMatt Macy err = fzap_count(zap, count); 907*eda14cbcSMatt Macy } else { 908*eda14cbcSMatt Macy *count = zap->zap_m.zap_num_entries; 909*eda14cbcSMatt Macy } 910*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 911*eda14cbcSMatt Macy return (err); 912*eda14cbcSMatt Macy } 913*eda14cbcSMatt Macy 914*eda14cbcSMatt Macy /* 915*eda14cbcSMatt Macy * zn may be NULL; if not specified, it will be computed if needed. 916*eda14cbcSMatt Macy * See also the comment above zap_entry_normalization_conflict(). 917*eda14cbcSMatt Macy */ 918*eda14cbcSMatt Macy static boolean_t 919*eda14cbcSMatt Macy mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze) 920*eda14cbcSMatt Macy { 921*eda14cbcSMatt Macy int direction = AVL_BEFORE; 922*eda14cbcSMatt Macy boolean_t allocdzn = B_FALSE; 923*eda14cbcSMatt Macy 924*eda14cbcSMatt Macy if (zap->zap_normflags == 0) 925*eda14cbcSMatt Macy return (B_FALSE); 926*eda14cbcSMatt Macy 927*eda14cbcSMatt Macy again: 928*eda14cbcSMatt Macy for (mzap_ent_t *other = avl_walk(&zap->zap_m.zap_avl, mze, direction); 929*eda14cbcSMatt Macy other && other->mze_hash == mze->mze_hash; 930*eda14cbcSMatt Macy other = avl_walk(&zap->zap_m.zap_avl, other, direction)) { 931*eda14cbcSMatt Macy 932*eda14cbcSMatt Macy if (zn == NULL) { 933*eda14cbcSMatt Macy zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name, 934*eda14cbcSMatt Macy MT_NORMALIZE); 935*eda14cbcSMatt Macy allocdzn = B_TRUE; 936*eda14cbcSMatt Macy } 937*eda14cbcSMatt Macy if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) { 938*eda14cbcSMatt Macy if (allocdzn) 939*eda14cbcSMatt Macy zap_name_free(zn); 940*eda14cbcSMatt Macy return (B_TRUE); 941*eda14cbcSMatt Macy } 942*eda14cbcSMatt Macy } 943*eda14cbcSMatt Macy 944*eda14cbcSMatt Macy if (direction == AVL_BEFORE) { 945*eda14cbcSMatt Macy direction = AVL_AFTER; 946*eda14cbcSMatt Macy goto again; 947*eda14cbcSMatt Macy } 948*eda14cbcSMatt Macy 949*eda14cbcSMatt Macy if (allocdzn) 950*eda14cbcSMatt Macy zap_name_free(zn); 951*eda14cbcSMatt Macy return (B_FALSE); 952*eda14cbcSMatt Macy } 953*eda14cbcSMatt Macy 954*eda14cbcSMatt Macy /* 955*eda14cbcSMatt Macy * Routines for manipulating attributes. 956*eda14cbcSMatt Macy */ 957*eda14cbcSMatt Macy 958*eda14cbcSMatt Macy int 959*eda14cbcSMatt Macy zap_lookup(objset_t *os, uint64_t zapobj, const char *name, 960*eda14cbcSMatt Macy uint64_t integer_size, uint64_t num_integers, void *buf) 961*eda14cbcSMatt Macy { 962*eda14cbcSMatt Macy return (zap_lookup_norm(os, zapobj, name, integer_size, 963*eda14cbcSMatt Macy num_integers, buf, 0, NULL, 0, NULL)); 964*eda14cbcSMatt Macy } 965*eda14cbcSMatt Macy 966*eda14cbcSMatt Macy static int 967*eda14cbcSMatt Macy zap_lookup_impl(zap_t *zap, const char *name, 968*eda14cbcSMatt Macy uint64_t integer_size, uint64_t num_integers, void *buf, 969*eda14cbcSMatt Macy matchtype_t mt, char *realname, int rn_len, 970*eda14cbcSMatt Macy boolean_t *ncp) 971*eda14cbcSMatt Macy { 972*eda14cbcSMatt Macy int err = 0; 973*eda14cbcSMatt Macy 974*eda14cbcSMatt Macy zap_name_t *zn = zap_name_alloc(zap, name, mt); 975*eda14cbcSMatt Macy if (zn == NULL) 976*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 977*eda14cbcSMatt Macy 978*eda14cbcSMatt Macy if (!zap->zap_ismicro) { 979*eda14cbcSMatt Macy err = fzap_lookup(zn, integer_size, num_integers, buf, 980*eda14cbcSMatt Macy realname, rn_len, ncp); 981*eda14cbcSMatt Macy } else { 982*eda14cbcSMatt Macy mzap_ent_t *mze = mze_find(zn); 983*eda14cbcSMatt Macy if (mze == NULL) { 984*eda14cbcSMatt Macy err = SET_ERROR(ENOENT); 985*eda14cbcSMatt Macy } else { 986*eda14cbcSMatt Macy if (num_integers < 1) { 987*eda14cbcSMatt Macy err = SET_ERROR(EOVERFLOW); 988*eda14cbcSMatt Macy } else if (integer_size != 8) { 989*eda14cbcSMatt Macy err = SET_ERROR(EINVAL); 990*eda14cbcSMatt Macy } else { 991*eda14cbcSMatt Macy *(uint64_t *)buf = 992*eda14cbcSMatt Macy MZE_PHYS(zap, mze)->mze_value; 993*eda14cbcSMatt Macy (void) strlcpy(realname, 994*eda14cbcSMatt Macy MZE_PHYS(zap, mze)->mze_name, rn_len); 995*eda14cbcSMatt Macy if (ncp) { 996*eda14cbcSMatt Macy *ncp = mzap_normalization_conflict(zap, 997*eda14cbcSMatt Macy zn, mze); 998*eda14cbcSMatt Macy } 999*eda14cbcSMatt Macy } 1000*eda14cbcSMatt Macy } 1001*eda14cbcSMatt Macy } 1002*eda14cbcSMatt Macy zap_name_free(zn); 1003*eda14cbcSMatt Macy return (err); 1004*eda14cbcSMatt Macy } 1005*eda14cbcSMatt Macy 1006*eda14cbcSMatt Macy int 1007*eda14cbcSMatt Macy zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name, 1008*eda14cbcSMatt Macy uint64_t integer_size, uint64_t num_integers, void *buf, 1009*eda14cbcSMatt Macy matchtype_t mt, char *realname, int rn_len, 1010*eda14cbcSMatt Macy boolean_t *ncp) 1011*eda14cbcSMatt Macy { 1012*eda14cbcSMatt Macy zap_t *zap; 1013*eda14cbcSMatt Macy 1014*eda14cbcSMatt Macy int err = 1015*eda14cbcSMatt Macy zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 1016*eda14cbcSMatt Macy if (err != 0) 1017*eda14cbcSMatt Macy return (err); 1018*eda14cbcSMatt Macy err = zap_lookup_impl(zap, name, integer_size, 1019*eda14cbcSMatt Macy num_integers, buf, mt, realname, rn_len, ncp); 1020*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1021*eda14cbcSMatt Macy return (err); 1022*eda14cbcSMatt Macy } 1023*eda14cbcSMatt Macy 1024*eda14cbcSMatt Macy int 1025*eda14cbcSMatt Macy zap_prefetch(objset_t *os, uint64_t zapobj, const char *name) 1026*eda14cbcSMatt Macy { 1027*eda14cbcSMatt Macy zap_t *zap; 1028*eda14cbcSMatt Macy int err; 1029*eda14cbcSMatt Macy zap_name_t *zn; 1030*eda14cbcSMatt Macy 1031*eda14cbcSMatt Macy err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 1032*eda14cbcSMatt Macy if (err) 1033*eda14cbcSMatt Macy return (err); 1034*eda14cbcSMatt Macy zn = zap_name_alloc(zap, name, 0); 1035*eda14cbcSMatt Macy if (zn == NULL) { 1036*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1037*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 1038*eda14cbcSMatt Macy } 1039*eda14cbcSMatt Macy 1040*eda14cbcSMatt Macy fzap_prefetch(zn); 1041*eda14cbcSMatt Macy zap_name_free(zn); 1042*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1043*eda14cbcSMatt Macy return (err); 1044*eda14cbcSMatt Macy } 1045*eda14cbcSMatt Macy 1046*eda14cbcSMatt Macy int 1047*eda14cbcSMatt Macy zap_lookup_by_dnode(dnode_t *dn, const char *name, 1048*eda14cbcSMatt Macy uint64_t integer_size, uint64_t num_integers, void *buf) 1049*eda14cbcSMatt Macy { 1050*eda14cbcSMatt Macy return (zap_lookup_norm_by_dnode(dn, name, integer_size, 1051*eda14cbcSMatt Macy num_integers, buf, 0, NULL, 0, NULL)); 1052*eda14cbcSMatt Macy } 1053*eda14cbcSMatt Macy 1054*eda14cbcSMatt Macy int 1055*eda14cbcSMatt Macy zap_lookup_norm_by_dnode(dnode_t *dn, const char *name, 1056*eda14cbcSMatt Macy uint64_t integer_size, uint64_t num_integers, void *buf, 1057*eda14cbcSMatt Macy matchtype_t mt, char *realname, int rn_len, 1058*eda14cbcSMatt Macy boolean_t *ncp) 1059*eda14cbcSMatt Macy { 1060*eda14cbcSMatt Macy zap_t *zap; 1061*eda14cbcSMatt Macy 1062*eda14cbcSMatt Macy int err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE, 1063*eda14cbcSMatt Macy FTAG, &zap); 1064*eda14cbcSMatt Macy if (err != 0) 1065*eda14cbcSMatt Macy return (err); 1066*eda14cbcSMatt Macy err = zap_lookup_impl(zap, name, integer_size, 1067*eda14cbcSMatt Macy num_integers, buf, mt, realname, rn_len, ncp); 1068*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1069*eda14cbcSMatt Macy return (err); 1070*eda14cbcSMatt Macy } 1071*eda14cbcSMatt Macy 1072*eda14cbcSMatt Macy int 1073*eda14cbcSMatt Macy zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1074*eda14cbcSMatt Macy int key_numints) 1075*eda14cbcSMatt Macy { 1076*eda14cbcSMatt Macy zap_t *zap; 1077*eda14cbcSMatt Macy 1078*eda14cbcSMatt Macy int err = 1079*eda14cbcSMatt Macy zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 1080*eda14cbcSMatt Macy if (err != 0) 1081*eda14cbcSMatt Macy return (err); 1082*eda14cbcSMatt Macy zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); 1083*eda14cbcSMatt Macy if (zn == NULL) { 1084*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1085*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 1086*eda14cbcSMatt Macy } 1087*eda14cbcSMatt Macy 1088*eda14cbcSMatt Macy fzap_prefetch(zn); 1089*eda14cbcSMatt Macy zap_name_free(zn); 1090*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1091*eda14cbcSMatt Macy return (err); 1092*eda14cbcSMatt Macy } 1093*eda14cbcSMatt Macy 1094*eda14cbcSMatt Macy int 1095*eda14cbcSMatt Macy zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1096*eda14cbcSMatt Macy int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf) 1097*eda14cbcSMatt Macy { 1098*eda14cbcSMatt Macy zap_t *zap; 1099*eda14cbcSMatt Macy 1100*eda14cbcSMatt Macy int err = 1101*eda14cbcSMatt Macy zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 1102*eda14cbcSMatt Macy if (err != 0) 1103*eda14cbcSMatt Macy return (err); 1104*eda14cbcSMatt Macy zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); 1105*eda14cbcSMatt Macy if (zn == NULL) { 1106*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1107*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 1108*eda14cbcSMatt Macy } 1109*eda14cbcSMatt Macy 1110*eda14cbcSMatt Macy err = fzap_lookup(zn, integer_size, num_integers, buf, 1111*eda14cbcSMatt Macy NULL, 0, NULL); 1112*eda14cbcSMatt Macy zap_name_free(zn); 1113*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1114*eda14cbcSMatt Macy return (err); 1115*eda14cbcSMatt Macy } 1116*eda14cbcSMatt Macy 1117*eda14cbcSMatt Macy int 1118*eda14cbcSMatt Macy zap_contains(objset_t *os, uint64_t zapobj, const char *name) 1119*eda14cbcSMatt Macy { 1120*eda14cbcSMatt Macy int err = zap_lookup_norm(os, zapobj, name, 0, 1121*eda14cbcSMatt Macy 0, NULL, 0, NULL, 0, NULL); 1122*eda14cbcSMatt Macy if (err == EOVERFLOW || err == EINVAL) 1123*eda14cbcSMatt Macy err = 0; /* found, but skipped reading the value */ 1124*eda14cbcSMatt Macy return (err); 1125*eda14cbcSMatt Macy } 1126*eda14cbcSMatt Macy 1127*eda14cbcSMatt Macy int 1128*eda14cbcSMatt Macy zap_length(objset_t *os, uint64_t zapobj, const char *name, 1129*eda14cbcSMatt Macy uint64_t *integer_size, uint64_t *num_integers) 1130*eda14cbcSMatt Macy { 1131*eda14cbcSMatt Macy zap_t *zap; 1132*eda14cbcSMatt Macy 1133*eda14cbcSMatt Macy int err = 1134*eda14cbcSMatt Macy zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 1135*eda14cbcSMatt Macy if (err != 0) 1136*eda14cbcSMatt Macy return (err); 1137*eda14cbcSMatt Macy zap_name_t *zn = zap_name_alloc(zap, name, 0); 1138*eda14cbcSMatt Macy if (zn == NULL) { 1139*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1140*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 1141*eda14cbcSMatt Macy } 1142*eda14cbcSMatt Macy if (!zap->zap_ismicro) { 1143*eda14cbcSMatt Macy err = fzap_length(zn, integer_size, num_integers); 1144*eda14cbcSMatt Macy } else { 1145*eda14cbcSMatt Macy mzap_ent_t *mze = mze_find(zn); 1146*eda14cbcSMatt Macy if (mze == NULL) { 1147*eda14cbcSMatt Macy err = SET_ERROR(ENOENT); 1148*eda14cbcSMatt Macy } else { 1149*eda14cbcSMatt Macy if (integer_size) 1150*eda14cbcSMatt Macy *integer_size = 8; 1151*eda14cbcSMatt Macy if (num_integers) 1152*eda14cbcSMatt Macy *num_integers = 1; 1153*eda14cbcSMatt Macy } 1154*eda14cbcSMatt Macy } 1155*eda14cbcSMatt Macy zap_name_free(zn); 1156*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1157*eda14cbcSMatt Macy return (err); 1158*eda14cbcSMatt Macy } 1159*eda14cbcSMatt Macy 1160*eda14cbcSMatt Macy int 1161*eda14cbcSMatt Macy zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1162*eda14cbcSMatt Macy int key_numints, uint64_t *integer_size, uint64_t *num_integers) 1163*eda14cbcSMatt Macy { 1164*eda14cbcSMatt Macy zap_t *zap; 1165*eda14cbcSMatt Macy 1166*eda14cbcSMatt Macy int err = 1167*eda14cbcSMatt Macy zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 1168*eda14cbcSMatt Macy if (err != 0) 1169*eda14cbcSMatt Macy return (err); 1170*eda14cbcSMatt Macy zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); 1171*eda14cbcSMatt Macy if (zn == NULL) { 1172*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1173*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 1174*eda14cbcSMatt Macy } 1175*eda14cbcSMatt Macy err = fzap_length(zn, integer_size, num_integers); 1176*eda14cbcSMatt Macy zap_name_free(zn); 1177*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1178*eda14cbcSMatt Macy return (err); 1179*eda14cbcSMatt Macy } 1180*eda14cbcSMatt Macy 1181*eda14cbcSMatt Macy static void 1182*eda14cbcSMatt Macy mzap_addent(zap_name_t *zn, uint64_t value) 1183*eda14cbcSMatt Macy { 1184*eda14cbcSMatt Macy zap_t *zap = zn->zn_zap; 1185*eda14cbcSMatt Macy int start = zap->zap_m.zap_alloc_next; 1186*eda14cbcSMatt Macy 1187*eda14cbcSMatt Macy ASSERT(RW_WRITE_HELD(&zap->zap_rwlock)); 1188*eda14cbcSMatt Macy 1189*eda14cbcSMatt Macy #ifdef ZFS_DEBUG 1190*eda14cbcSMatt Macy for (int i = 0; i < zap->zap_m.zap_num_chunks; i++) { 1191*eda14cbcSMatt Macy mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; 1192*eda14cbcSMatt Macy ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0); 1193*eda14cbcSMatt Macy } 1194*eda14cbcSMatt Macy #endif 1195*eda14cbcSMatt Macy 1196*eda14cbcSMatt Macy uint32_t cd = mze_find_unused_cd(zap, zn->zn_hash); 1197*eda14cbcSMatt Macy /* given the limited size of the microzap, this can't happen */ 1198*eda14cbcSMatt Macy ASSERT(cd < zap_maxcd(zap)); 1199*eda14cbcSMatt Macy 1200*eda14cbcSMatt Macy again: 1201*eda14cbcSMatt Macy for (int i = start; i < zap->zap_m.zap_num_chunks; i++) { 1202*eda14cbcSMatt Macy mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i]; 1203*eda14cbcSMatt Macy if (mze->mze_name[0] == 0) { 1204*eda14cbcSMatt Macy mze->mze_value = value; 1205*eda14cbcSMatt Macy mze->mze_cd = cd; 1206*eda14cbcSMatt Macy (void) strlcpy(mze->mze_name, zn->zn_key_orig, 1207*eda14cbcSMatt Macy sizeof (mze->mze_name)); 1208*eda14cbcSMatt Macy zap->zap_m.zap_num_entries++; 1209*eda14cbcSMatt Macy zap->zap_m.zap_alloc_next = i+1; 1210*eda14cbcSMatt Macy if (zap->zap_m.zap_alloc_next == 1211*eda14cbcSMatt Macy zap->zap_m.zap_num_chunks) 1212*eda14cbcSMatt Macy zap->zap_m.zap_alloc_next = 0; 1213*eda14cbcSMatt Macy mze_insert(zap, i, zn->zn_hash); 1214*eda14cbcSMatt Macy return; 1215*eda14cbcSMatt Macy } 1216*eda14cbcSMatt Macy } 1217*eda14cbcSMatt Macy if (start != 0) { 1218*eda14cbcSMatt Macy start = 0; 1219*eda14cbcSMatt Macy goto again; 1220*eda14cbcSMatt Macy } 1221*eda14cbcSMatt Macy cmn_err(CE_PANIC, "out of entries!"); 1222*eda14cbcSMatt Macy } 1223*eda14cbcSMatt Macy 1224*eda14cbcSMatt Macy static int 1225*eda14cbcSMatt Macy zap_add_impl(zap_t *zap, const char *key, 1226*eda14cbcSMatt Macy int integer_size, uint64_t num_integers, 1227*eda14cbcSMatt Macy const void *val, dmu_tx_t *tx, void *tag) 1228*eda14cbcSMatt Macy { 1229*eda14cbcSMatt Macy const uint64_t *intval = val; 1230*eda14cbcSMatt Macy int err = 0; 1231*eda14cbcSMatt Macy 1232*eda14cbcSMatt Macy zap_name_t *zn = zap_name_alloc(zap, key, 0); 1233*eda14cbcSMatt Macy if (zn == NULL) { 1234*eda14cbcSMatt Macy zap_unlockdir(zap, tag); 1235*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 1236*eda14cbcSMatt Macy } 1237*eda14cbcSMatt Macy if (!zap->zap_ismicro) { 1238*eda14cbcSMatt Macy err = fzap_add(zn, integer_size, num_integers, val, tag, tx); 1239*eda14cbcSMatt Macy zap = zn->zn_zap; /* fzap_add() may change zap */ 1240*eda14cbcSMatt Macy } else if (integer_size != 8 || num_integers != 1 || 1241*eda14cbcSMatt Macy strlen(key) >= MZAP_NAME_LEN || 1242*eda14cbcSMatt Macy !mze_canfit_fzap_leaf(zn, zn->zn_hash)) { 1243*eda14cbcSMatt Macy err = mzap_upgrade(&zn->zn_zap, tag, tx, 0); 1244*eda14cbcSMatt Macy if (err == 0) { 1245*eda14cbcSMatt Macy err = fzap_add(zn, integer_size, num_integers, val, 1246*eda14cbcSMatt Macy tag, tx); 1247*eda14cbcSMatt Macy } 1248*eda14cbcSMatt Macy zap = zn->zn_zap; /* fzap_add() may change zap */ 1249*eda14cbcSMatt Macy } else { 1250*eda14cbcSMatt Macy if (mze_find(zn) != NULL) { 1251*eda14cbcSMatt Macy err = SET_ERROR(EEXIST); 1252*eda14cbcSMatt Macy } else { 1253*eda14cbcSMatt Macy mzap_addent(zn, *intval); 1254*eda14cbcSMatt Macy } 1255*eda14cbcSMatt Macy } 1256*eda14cbcSMatt Macy ASSERT(zap == zn->zn_zap); 1257*eda14cbcSMatt Macy zap_name_free(zn); 1258*eda14cbcSMatt Macy if (zap != NULL) /* may be NULL if fzap_add() failed */ 1259*eda14cbcSMatt Macy zap_unlockdir(zap, tag); 1260*eda14cbcSMatt Macy return (err); 1261*eda14cbcSMatt Macy } 1262*eda14cbcSMatt Macy 1263*eda14cbcSMatt Macy int 1264*eda14cbcSMatt Macy zap_add(objset_t *os, uint64_t zapobj, const char *key, 1265*eda14cbcSMatt Macy int integer_size, uint64_t num_integers, 1266*eda14cbcSMatt Macy const void *val, dmu_tx_t *tx) 1267*eda14cbcSMatt Macy { 1268*eda14cbcSMatt Macy zap_t *zap; 1269*eda14cbcSMatt Macy int err; 1270*eda14cbcSMatt Macy 1271*eda14cbcSMatt Macy err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); 1272*eda14cbcSMatt Macy if (err != 0) 1273*eda14cbcSMatt Macy return (err); 1274*eda14cbcSMatt Macy err = zap_add_impl(zap, key, integer_size, num_integers, val, tx, FTAG); 1275*eda14cbcSMatt Macy /* zap_add_impl() calls zap_unlockdir() */ 1276*eda14cbcSMatt Macy return (err); 1277*eda14cbcSMatt Macy } 1278*eda14cbcSMatt Macy 1279*eda14cbcSMatt Macy int 1280*eda14cbcSMatt Macy zap_add_by_dnode(dnode_t *dn, const char *key, 1281*eda14cbcSMatt Macy int integer_size, uint64_t num_integers, 1282*eda14cbcSMatt Macy const void *val, dmu_tx_t *tx) 1283*eda14cbcSMatt Macy { 1284*eda14cbcSMatt Macy zap_t *zap; 1285*eda14cbcSMatt Macy int err; 1286*eda14cbcSMatt Macy 1287*eda14cbcSMatt Macy err = zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); 1288*eda14cbcSMatt Macy if (err != 0) 1289*eda14cbcSMatt Macy return (err); 1290*eda14cbcSMatt Macy err = zap_add_impl(zap, key, integer_size, num_integers, val, tx, FTAG); 1291*eda14cbcSMatt Macy /* zap_add_impl() calls zap_unlockdir() */ 1292*eda14cbcSMatt Macy return (err); 1293*eda14cbcSMatt Macy } 1294*eda14cbcSMatt Macy 1295*eda14cbcSMatt Macy int 1296*eda14cbcSMatt Macy zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1297*eda14cbcSMatt Macy int key_numints, int integer_size, uint64_t num_integers, 1298*eda14cbcSMatt Macy const void *val, dmu_tx_t *tx) 1299*eda14cbcSMatt Macy { 1300*eda14cbcSMatt Macy zap_t *zap; 1301*eda14cbcSMatt Macy 1302*eda14cbcSMatt Macy int err = 1303*eda14cbcSMatt Macy zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); 1304*eda14cbcSMatt Macy if (err != 0) 1305*eda14cbcSMatt Macy return (err); 1306*eda14cbcSMatt Macy zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); 1307*eda14cbcSMatt Macy if (zn == NULL) { 1308*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1309*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 1310*eda14cbcSMatt Macy } 1311*eda14cbcSMatt Macy err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx); 1312*eda14cbcSMatt Macy zap = zn->zn_zap; /* fzap_add() may change zap */ 1313*eda14cbcSMatt Macy zap_name_free(zn); 1314*eda14cbcSMatt Macy if (zap != NULL) /* may be NULL if fzap_add() failed */ 1315*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1316*eda14cbcSMatt Macy return (err); 1317*eda14cbcSMatt Macy } 1318*eda14cbcSMatt Macy 1319*eda14cbcSMatt Macy int 1320*eda14cbcSMatt Macy zap_update(objset_t *os, uint64_t zapobj, const char *name, 1321*eda14cbcSMatt Macy int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 1322*eda14cbcSMatt Macy { 1323*eda14cbcSMatt Macy zap_t *zap; 1324*eda14cbcSMatt Macy const uint64_t *intval = val; 1325*eda14cbcSMatt Macy 1326*eda14cbcSMatt Macy int err = 1327*eda14cbcSMatt Macy zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); 1328*eda14cbcSMatt Macy if (err != 0) 1329*eda14cbcSMatt Macy return (err); 1330*eda14cbcSMatt Macy zap_name_t *zn = zap_name_alloc(zap, name, 0); 1331*eda14cbcSMatt Macy if (zn == NULL) { 1332*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1333*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 1334*eda14cbcSMatt Macy } 1335*eda14cbcSMatt Macy if (!zap->zap_ismicro) { 1336*eda14cbcSMatt Macy err = fzap_update(zn, integer_size, num_integers, val, 1337*eda14cbcSMatt Macy FTAG, tx); 1338*eda14cbcSMatt Macy zap = zn->zn_zap; /* fzap_update() may change zap */ 1339*eda14cbcSMatt Macy } else if (integer_size != 8 || num_integers != 1 || 1340*eda14cbcSMatt Macy strlen(name) >= MZAP_NAME_LEN) { 1341*eda14cbcSMatt Macy dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n", 1342*eda14cbcSMatt Macy zapobj, integer_size, num_integers, name); 1343*eda14cbcSMatt Macy err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0); 1344*eda14cbcSMatt Macy if (err == 0) { 1345*eda14cbcSMatt Macy err = fzap_update(zn, integer_size, num_integers, 1346*eda14cbcSMatt Macy val, FTAG, tx); 1347*eda14cbcSMatt Macy } 1348*eda14cbcSMatt Macy zap = zn->zn_zap; /* fzap_update() may change zap */ 1349*eda14cbcSMatt Macy } else { 1350*eda14cbcSMatt Macy mzap_ent_t *mze = mze_find(zn); 1351*eda14cbcSMatt Macy if (mze != NULL) { 1352*eda14cbcSMatt Macy MZE_PHYS(zap, mze)->mze_value = *intval; 1353*eda14cbcSMatt Macy } else { 1354*eda14cbcSMatt Macy mzap_addent(zn, *intval); 1355*eda14cbcSMatt Macy } 1356*eda14cbcSMatt Macy } 1357*eda14cbcSMatt Macy ASSERT(zap == zn->zn_zap); 1358*eda14cbcSMatt Macy zap_name_free(zn); 1359*eda14cbcSMatt Macy if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 1360*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1361*eda14cbcSMatt Macy return (err); 1362*eda14cbcSMatt Macy } 1363*eda14cbcSMatt Macy 1364*eda14cbcSMatt Macy int 1365*eda14cbcSMatt Macy zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1366*eda14cbcSMatt Macy int key_numints, 1367*eda14cbcSMatt Macy int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx) 1368*eda14cbcSMatt Macy { 1369*eda14cbcSMatt Macy zap_t *zap; 1370*eda14cbcSMatt Macy 1371*eda14cbcSMatt Macy int err = 1372*eda14cbcSMatt Macy zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap); 1373*eda14cbcSMatt Macy if (err != 0) 1374*eda14cbcSMatt Macy return (err); 1375*eda14cbcSMatt Macy zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); 1376*eda14cbcSMatt Macy if (zn == NULL) { 1377*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1378*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 1379*eda14cbcSMatt Macy } 1380*eda14cbcSMatt Macy err = fzap_update(zn, integer_size, num_integers, val, FTAG, tx); 1381*eda14cbcSMatt Macy zap = zn->zn_zap; /* fzap_update() may change zap */ 1382*eda14cbcSMatt Macy zap_name_free(zn); 1383*eda14cbcSMatt Macy if (zap != NULL) /* may be NULL if fzap_upgrade() failed */ 1384*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1385*eda14cbcSMatt Macy return (err); 1386*eda14cbcSMatt Macy } 1387*eda14cbcSMatt Macy 1388*eda14cbcSMatt Macy int 1389*eda14cbcSMatt Macy zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx) 1390*eda14cbcSMatt Macy { 1391*eda14cbcSMatt Macy return (zap_remove_norm(os, zapobj, name, 0, tx)); 1392*eda14cbcSMatt Macy } 1393*eda14cbcSMatt Macy 1394*eda14cbcSMatt Macy static int 1395*eda14cbcSMatt Macy zap_remove_impl(zap_t *zap, const char *name, 1396*eda14cbcSMatt Macy matchtype_t mt, dmu_tx_t *tx) 1397*eda14cbcSMatt Macy { 1398*eda14cbcSMatt Macy int err = 0; 1399*eda14cbcSMatt Macy 1400*eda14cbcSMatt Macy zap_name_t *zn = zap_name_alloc(zap, name, mt); 1401*eda14cbcSMatt Macy if (zn == NULL) 1402*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 1403*eda14cbcSMatt Macy if (!zap->zap_ismicro) { 1404*eda14cbcSMatt Macy err = fzap_remove(zn, tx); 1405*eda14cbcSMatt Macy } else { 1406*eda14cbcSMatt Macy mzap_ent_t *mze = mze_find(zn); 1407*eda14cbcSMatt Macy if (mze == NULL) { 1408*eda14cbcSMatt Macy err = SET_ERROR(ENOENT); 1409*eda14cbcSMatt Macy } else { 1410*eda14cbcSMatt Macy zap->zap_m.zap_num_entries--; 1411*eda14cbcSMatt Macy bzero(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid], 1412*eda14cbcSMatt Macy sizeof (mzap_ent_phys_t)); 1413*eda14cbcSMatt Macy mze_remove(zap, mze); 1414*eda14cbcSMatt Macy } 1415*eda14cbcSMatt Macy } 1416*eda14cbcSMatt Macy zap_name_free(zn); 1417*eda14cbcSMatt Macy return (err); 1418*eda14cbcSMatt Macy } 1419*eda14cbcSMatt Macy 1420*eda14cbcSMatt Macy int 1421*eda14cbcSMatt Macy zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name, 1422*eda14cbcSMatt Macy matchtype_t mt, dmu_tx_t *tx) 1423*eda14cbcSMatt Macy { 1424*eda14cbcSMatt Macy zap_t *zap; 1425*eda14cbcSMatt Macy int err; 1426*eda14cbcSMatt Macy 1427*eda14cbcSMatt Macy err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap); 1428*eda14cbcSMatt Macy if (err) 1429*eda14cbcSMatt Macy return (err); 1430*eda14cbcSMatt Macy err = zap_remove_impl(zap, name, mt, tx); 1431*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1432*eda14cbcSMatt Macy return (err); 1433*eda14cbcSMatt Macy } 1434*eda14cbcSMatt Macy 1435*eda14cbcSMatt Macy int 1436*eda14cbcSMatt Macy zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx) 1437*eda14cbcSMatt Macy { 1438*eda14cbcSMatt Macy zap_t *zap; 1439*eda14cbcSMatt Macy int err; 1440*eda14cbcSMatt Macy 1441*eda14cbcSMatt Macy err = zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap); 1442*eda14cbcSMatt Macy if (err) 1443*eda14cbcSMatt Macy return (err); 1444*eda14cbcSMatt Macy err = zap_remove_impl(zap, name, 0, tx); 1445*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1446*eda14cbcSMatt Macy return (err); 1447*eda14cbcSMatt Macy } 1448*eda14cbcSMatt Macy 1449*eda14cbcSMatt Macy int 1450*eda14cbcSMatt Macy zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key, 1451*eda14cbcSMatt Macy int key_numints, dmu_tx_t *tx) 1452*eda14cbcSMatt Macy { 1453*eda14cbcSMatt Macy zap_t *zap; 1454*eda14cbcSMatt Macy 1455*eda14cbcSMatt Macy int err = 1456*eda14cbcSMatt Macy zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap); 1457*eda14cbcSMatt Macy if (err != 0) 1458*eda14cbcSMatt Macy return (err); 1459*eda14cbcSMatt Macy zap_name_t *zn = zap_name_alloc_uint64(zap, key, key_numints); 1460*eda14cbcSMatt Macy if (zn == NULL) { 1461*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1462*eda14cbcSMatt Macy return (SET_ERROR(ENOTSUP)); 1463*eda14cbcSMatt Macy } 1464*eda14cbcSMatt Macy err = fzap_remove(zn, tx); 1465*eda14cbcSMatt Macy zap_name_free(zn); 1466*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1467*eda14cbcSMatt Macy return (err); 1468*eda14cbcSMatt Macy } 1469*eda14cbcSMatt Macy 1470*eda14cbcSMatt Macy /* 1471*eda14cbcSMatt Macy * Routines for iterating over the attributes. 1472*eda14cbcSMatt Macy */ 1473*eda14cbcSMatt Macy 1474*eda14cbcSMatt Macy static void 1475*eda14cbcSMatt Macy zap_cursor_init_impl(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, 1476*eda14cbcSMatt Macy uint64_t serialized, boolean_t prefetch) 1477*eda14cbcSMatt Macy { 1478*eda14cbcSMatt Macy zc->zc_objset = os; 1479*eda14cbcSMatt Macy zc->zc_zap = NULL; 1480*eda14cbcSMatt Macy zc->zc_leaf = NULL; 1481*eda14cbcSMatt Macy zc->zc_zapobj = zapobj; 1482*eda14cbcSMatt Macy zc->zc_serialized = serialized; 1483*eda14cbcSMatt Macy zc->zc_hash = 0; 1484*eda14cbcSMatt Macy zc->zc_cd = 0; 1485*eda14cbcSMatt Macy zc->zc_prefetch = prefetch; 1486*eda14cbcSMatt Macy } 1487*eda14cbcSMatt Macy void 1488*eda14cbcSMatt Macy zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj, 1489*eda14cbcSMatt Macy uint64_t serialized) 1490*eda14cbcSMatt Macy { 1491*eda14cbcSMatt Macy zap_cursor_init_impl(zc, os, zapobj, serialized, B_TRUE); 1492*eda14cbcSMatt Macy } 1493*eda14cbcSMatt Macy 1494*eda14cbcSMatt Macy /* 1495*eda14cbcSMatt Macy * Initialize a cursor at the beginning of the ZAP object. The entire 1496*eda14cbcSMatt Macy * ZAP object will be prefetched. 1497*eda14cbcSMatt Macy */ 1498*eda14cbcSMatt Macy void 1499*eda14cbcSMatt Macy zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) 1500*eda14cbcSMatt Macy { 1501*eda14cbcSMatt Macy zap_cursor_init_impl(zc, os, zapobj, 0, B_TRUE); 1502*eda14cbcSMatt Macy } 1503*eda14cbcSMatt Macy 1504*eda14cbcSMatt Macy /* 1505*eda14cbcSMatt Macy * Initialize a cursor at the beginning, but request that we not prefetch 1506*eda14cbcSMatt Macy * the entire ZAP object. 1507*eda14cbcSMatt Macy */ 1508*eda14cbcSMatt Macy void 1509*eda14cbcSMatt Macy zap_cursor_init_noprefetch(zap_cursor_t *zc, objset_t *os, uint64_t zapobj) 1510*eda14cbcSMatt Macy { 1511*eda14cbcSMatt Macy zap_cursor_init_impl(zc, os, zapobj, 0, B_FALSE); 1512*eda14cbcSMatt Macy } 1513*eda14cbcSMatt Macy 1514*eda14cbcSMatt Macy void 1515*eda14cbcSMatt Macy zap_cursor_fini(zap_cursor_t *zc) 1516*eda14cbcSMatt Macy { 1517*eda14cbcSMatt Macy if (zc->zc_zap) { 1518*eda14cbcSMatt Macy rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1519*eda14cbcSMatt Macy zap_unlockdir(zc->zc_zap, NULL); 1520*eda14cbcSMatt Macy zc->zc_zap = NULL; 1521*eda14cbcSMatt Macy } 1522*eda14cbcSMatt Macy if (zc->zc_leaf) { 1523*eda14cbcSMatt Macy rw_enter(&zc->zc_leaf->l_rwlock, RW_READER); 1524*eda14cbcSMatt Macy zap_put_leaf(zc->zc_leaf); 1525*eda14cbcSMatt Macy zc->zc_leaf = NULL; 1526*eda14cbcSMatt Macy } 1527*eda14cbcSMatt Macy zc->zc_objset = NULL; 1528*eda14cbcSMatt Macy } 1529*eda14cbcSMatt Macy 1530*eda14cbcSMatt Macy uint64_t 1531*eda14cbcSMatt Macy zap_cursor_serialize(zap_cursor_t *zc) 1532*eda14cbcSMatt Macy { 1533*eda14cbcSMatt Macy if (zc->zc_hash == -1ULL) 1534*eda14cbcSMatt Macy return (-1ULL); 1535*eda14cbcSMatt Macy if (zc->zc_zap == NULL) 1536*eda14cbcSMatt Macy return (zc->zc_serialized); 1537*eda14cbcSMatt Macy ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0); 1538*eda14cbcSMatt Macy ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap)); 1539*eda14cbcSMatt Macy 1540*eda14cbcSMatt Macy /* 1541*eda14cbcSMatt Macy * We want to keep the high 32 bits of the cursor zero if we can, so 1542*eda14cbcSMatt Macy * that 32-bit programs can access this. So usually use a small 1543*eda14cbcSMatt Macy * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits 1544*eda14cbcSMatt Macy * of the cursor. 1545*eda14cbcSMatt Macy * 1546*eda14cbcSMatt Macy * [ collision differentiator | zap_hashbits()-bit hash value ] 1547*eda14cbcSMatt Macy */ 1548*eda14cbcSMatt Macy return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) | 1549*eda14cbcSMatt Macy ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap))); 1550*eda14cbcSMatt Macy } 1551*eda14cbcSMatt Macy 1552*eda14cbcSMatt Macy int 1553*eda14cbcSMatt Macy zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za) 1554*eda14cbcSMatt Macy { 1555*eda14cbcSMatt Macy int err; 1556*eda14cbcSMatt Macy 1557*eda14cbcSMatt Macy if (zc->zc_hash == -1ULL) 1558*eda14cbcSMatt Macy return (SET_ERROR(ENOENT)); 1559*eda14cbcSMatt Macy 1560*eda14cbcSMatt Macy if (zc->zc_zap == NULL) { 1561*eda14cbcSMatt Macy int hb; 1562*eda14cbcSMatt Macy err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL, 1563*eda14cbcSMatt Macy RW_READER, TRUE, FALSE, NULL, &zc->zc_zap); 1564*eda14cbcSMatt Macy if (err != 0) 1565*eda14cbcSMatt Macy return (err); 1566*eda14cbcSMatt Macy 1567*eda14cbcSMatt Macy /* 1568*eda14cbcSMatt Macy * To support zap_cursor_init_serialized, advance, retrieve, 1569*eda14cbcSMatt Macy * we must add to the existing zc_cd, which may already 1570*eda14cbcSMatt Macy * be 1 due to the zap_cursor_advance. 1571*eda14cbcSMatt Macy */ 1572*eda14cbcSMatt Macy ASSERT(zc->zc_hash == 0); 1573*eda14cbcSMatt Macy hb = zap_hashbits(zc->zc_zap); 1574*eda14cbcSMatt Macy zc->zc_hash = zc->zc_serialized << (64 - hb); 1575*eda14cbcSMatt Macy zc->zc_cd += zc->zc_serialized >> hb; 1576*eda14cbcSMatt Macy if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */ 1577*eda14cbcSMatt Macy zc->zc_cd = 0; 1578*eda14cbcSMatt Macy } else { 1579*eda14cbcSMatt Macy rw_enter(&zc->zc_zap->zap_rwlock, RW_READER); 1580*eda14cbcSMatt Macy } 1581*eda14cbcSMatt Macy if (!zc->zc_zap->zap_ismicro) { 1582*eda14cbcSMatt Macy err = fzap_cursor_retrieve(zc->zc_zap, zc, za); 1583*eda14cbcSMatt Macy } else { 1584*eda14cbcSMatt Macy avl_index_t idx; 1585*eda14cbcSMatt Macy mzap_ent_t mze_tofind; 1586*eda14cbcSMatt Macy 1587*eda14cbcSMatt Macy mze_tofind.mze_hash = zc->zc_hash; 1588*eda14cbcSMatt Macy mze_tofind.mze_cd = zc->zc_cd; 1589*eda14cbcSMatt Macy 1590*eda14cbcSMatt Macy mzap_ent_t *mze = 1591*eda14cbcSMatt Macy avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx); 1592*eda14cbcSMatt Macy if (mze == NULL) { 1593*eda14cbcSMatt Macy mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl, 1594*eda14cbcSMatt Macy idx, AVL_AFTER); 1595*eda14cbcSMatt Macy } 1596*eda14cbcSMatt Macy if (mze) { 1597*eda14cbcSMatt Macy mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze); 1598*eda14cbcSMatt Macy ASSERT3U(mze->mze_cd, ==, mzep->mze_cd); 1599*eda14cbcSMatt Macy za->za_normalization_conflict = 1600*eda14cbcSMatt Macy mzap_normalization_conflict(zc->zc_zap, NULL, mze); 1601*eda14cbcSMatt Macy za->za_integer_length = 8; 1602*eda14cbcSMatt Macy za->za_num_integers = 1; 1603*eda14cbcSMatt Macy za->za_first_integer = mzep->mze_value; 1604*eda14cbcSMatt Macy (void) strlcpy(za->za_name, mzep->mze_name, 1605*eda14cbcSMatt Macy sizeof (za->za_name)); 1606*eda14cbcSMatt Macy zc->zc_hash = mze->mze_hash; 1607*eda14cbcSMatt Macy zc->zc_cd = mze->mze_cd; 1608*eda14cbcSMatt Macy err = 0; 1609*eda14cbcSMatt Macy } else { 1610*eda14cbcSMatt Macy zc->zc_hash = -1ULL; 1611*eda14cbcSMatt Macy err = SET_ERROR(ENOENT); 1612*eda14cbcSMatt Macy } 1613*eda14cbcSMatt Macy } 1614*eda14cbcSMatt Macy rw_exit(&zc->zc_zap->zap_rwlock); 1615*eda14cbcSMatt Macy return (err); 1616*eda14cbcSMatt Macy } 1617*eda14cbcSMatt Macy 1618*eda14cbcSMatt Macy void 1619*eda14cbcSMatt Macy zap_cursor_advance(zap_cursor_t *zc) 1620*eda14cbcSMatt Macy { 1621*eda14cbcSMatt Macy if (zc->zc_hash == -1ULL) 1622*eda14cbcSMatt Macy return; 1623*eda14cbcSMatt Macy zc->zc_cd++; 1624*eda14cbcSMatt Macy } 1625*eda14cbcSMatt Macy 1626*eda14cbcSMatt Macy int 1627*eda14cbcSMatt Macy zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs) 1628*eda14cbcSMatt Macy { 1629*eda14cbcSMatt Macy zap_t *zap; 1630*eda14cbcSMatt Macy 1631*eda14cbcSMatt Macy int err = 1632*eda14cbcSMatt Macy zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap); 1633*eda14cbcSMatt Macy if (err != 0) 1634*eda14cbcSMatt Macy return (err); 1635*eda14cbcSMatt Macy 1636*eda14cbcSMatt Macy bzero(zs, sizeof (zap_stats_t)); 1637*eda14cbcSMatt Macy 1638*eda14cbcSMatt Macy if (zap->zap_ismicro) { 1639*eda14cbcSMatt Macy zs->zs_blocksize = zap->zap_dbuf->db_size; 1640*eda14cbcSMatt Macy zs->zs_num_entries = zap->zap_m.zap_num_entries; 1641*eda14cbcSMatt Macy zs->zs_num_blocks = 1; 1642*eda14cbcSMatt Macy } else { 1643*eda14cbcSMatt Macy fzap_get_stats(zap, zs); 1644*eda14cbcSMatt Macy } 1645*eda14cbcSMatt Macy zap_unlockdir(zap, FTAG); 1646*eda14cbcSMatt Macy return (0); 1647*eda14cbcSMatt Macy } 1648*eda14cbcSMatt Macy 1649*eda14cbcSMatt Macy #if defined(_KERNEL) 1650*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create); 1651*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_dnsize); 1652*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_norm); 1653*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_norm_dnsize); 1654*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_flags); 1655*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_flags_dnsize); 1656*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_claim); 1657*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_claim_norm); 1658*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_claim_norm_dnsize); 1659*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_create_hold); 1660*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_destroy); 1661*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_lookup); 1662*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_lookup_by_dnode); 1663*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_lookup_norm); 1664*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_lookup_uint64); 1665*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_contains); 1666*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_prefetch); 1667*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_prefetch_uint64); 1668*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_add); 1669*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_add_by_dnode); 1670*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_add_uint64); 1671*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_update); 1672*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_update_uint64); 1673*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_length); 1674*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_length_uint64); 1675*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_remove); 1676*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_remove_by_dnode); 1677*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_remove_norm); 1678*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_remove_uint64); 1679*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_count); 1680*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_value_search); 1681*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_join); 1682*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_join_increment); 1683*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_add_int); 1684*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_remove_int); 1685*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_lookup_int); 1686*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_increment_int); 1687*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_add_int_key); 1688*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_lookup_int_key); 1689*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_increment); 1690*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_cursor_init); 1691*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_cursor_fini); 1692*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_cursor_retrieve); 1693*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_cursor_advance); 1694*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_cursor_serialize); 1695*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_cursor_init_serialized); 1696*eda14cbcSMatt Macy EXPORT_SYMBOL(zap_get_stats); 1697*eda14cbcSMatt Macy #endif 1698