1c1cb2cd8Shaad /*
2c1cb2cd8Shaad * CDDL HEADER START
3c1cb2cd8Shaad *
4c1cb2cd8Shaad * The contents of this file are subject to the terms of the
5c1cb2cd8Shaad * Common Development and Distribution License (the "License").
6c1cb2cd8Shaad * You may not use this file except in compliance with the License.
7c1cb2cd8Shaad *
8c1cb2cd8Shaad * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9c1cb2cd8Shaad * or http://www.opensolaris.org/os/licensing.
10c1cb2cd8Shaad * See the License for the specific language governing permissions
11c1cb2cd8Shaad * and limitations under the License.
12c1cb2cd8Shaad *
13c1cb2cd8Shaad * When distributing Covered Code, include this CDDL HEADER in each
14c1cb2cd8Shaad * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15c1cb2cd8Shaad * If applicable, add the following below this CDDL HEADER, with the
16c1cb2cd8Shaad * fields enclosed by brackets "[]" replaced with your own identifying
17c1cb2cd8Shaad * information: Portions Copyright [yyyy] [name of copyright owner]
18c1cb2cd8Shaad *
19c1cb2cd8Shaad * CDDL HEADER END
20c1cb2cd8Shaad */
21c1cb2cd8Shaad /*
22*ba2539a9Schs * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23*ba2539a9Schs * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
24*ba2539a9Schs * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
25*ba2539a9Schs * Copyright (c) 2014 Integros [integros.com]
26c1cb2cd8Shaad */
27c1cb2cd8Shaad
28a252d550Shaad #include <sys/zio.h>
29c1cb2cd8Shaad #include <sys/spa.h>
30c1cb2cd8Shaad #include <sys/dmu.h>
31c1cb2cd8Shaad #include <sys/zfs_context.h>
32c1cb2cd8Shaad #include <sys/zap.h>
33c1cb2cd8Shaad #include <sys/refcount.h>
34c1cb2cd8Shaad #include <sys/zap_impl.h>
35c1cb2cd8Shaad #include <sys/zap_leaf.h>
36c1cb2cd8Shaad #include <sys/avl.h>
37*ba2539a9Schs #include <sys/arc.h>
38*ba2539a9Schs #include <sys/dmu_objset.h>
39c1cb2cd8Shaad
40c1cb2cd8Shaad #ifdef _KERNEL
41c1cb2cd8Shaad #include <sys/sunddi.h>
42c1cb2cd8Shaad #endif
43c1cb2cd8Shaad
44*ba2539a9Schs extern inline mzap_phys_t *zap_m_phys(zap_t *zap);
45*ba2539a9Schs
46*ba2539a9Schs static int mzap_upgrade(zap_t **zapp,
47*ba2539a9Schs void *tag, dmu_tx_t *tx, zap_flags_t flags);
48c1cb2cd8Shaad
49a252d550Shaad uint64_t
zap_getflags(zap_t * zap)50a252d550Shaad zap_getflags(zap_t *zap)
51c1cb2cd8Shaad {
52a252d550Shaad if (zap->zap_ismicro)
53a252d550Shaad return (0);
54*ba2539a9Schs return (zap_f_phys(zap)->zap_flags);
55c1cb2cd8Shaad }
56c1cb2cd8Shaad
57a252d550Shaad int
zap_hashbits(zap_t * zap)58a252d550Shaad zap_hashbits(zap_t *zap)
59a252d550Shaad {
60a252d550Shaad if (zap_getflags(zap) & ZAP_FLAG_HASH64)
61a252d550Shaad return (48);
62a252d550Shaad else
63a252d550Shaad return (28);
64a252d550Shaad }
65a252d550Shaad
66a252d550Shaad uint32_t
zap_maxcd(zap_t * zap)67a252d550Shaad zap_maxcd(zap_t *zap)
68a252d550Shaad {
69a252d550Shaad if (zap_getflags(zap) & ZAP_FLAG_HASH64)
70a252d550Shaad return ((1<<16)-1);
71a252d550Shaad else
72a252d550Shaad return (-1U);
73a252d550Shaad }
74a252d550Shaad
75a252d550Shaad static uint64_t
zap_hash(zap_name_t * zn)76a252d550Shaad zap_hash(zap_name_t *zn)
77a252d550Shaad {
78a252d550Shaad zap_t *zap = zn->zn_zap;
79a252d550Shaad uint64_t h = 0;
80a252d550Shaad
81a252d550Shaad if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) {
82a252d550Shaad ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY);
83a252d550Shaad h = *(uint64_t *)zn->zn_key_orig;
84a252d550Shaad } else {
85a252d550Shaad h = zap->zap_salt;
86a252d550Shaad ASSERT(h != 0);
87a252d550Shaad ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
88a252d550Shaad
89a252d550Shaad if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
90a252d550Shaad int i;
91a252d550Shaad const uint64_t *wp = zn->zn_key_norm;
92a252d550Shaad
93a252d550Shaad ASSERT(zn->zn_key_intlen == 8);
94a252d550Shaad for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) {
95a252d550Shaad int j;
96a252d550Shaad uint64_t word = *wp;
97a252d550Shaad
98a252d550Shaad for (j = 0; j < zn->zn_key_intlen; j++) {
99a252d550Shaad h = (h >> 8) ^
100a252d550Shaad zfs_crc64_table[(h ^ word) & 0xFF];
101a252d550Shaad word >>= NBBY;
102a252d550Shaad }
103a252d550Shaad }
104a252d550Shaad } else {
105a252d550Shaad int i, len;
106a252d550Shaad const uint8_t *cp = zn->zn_key_norm;
107a252d550Shaad
108c1cb2cd8Shaad /*
109a252d550Shaad * We previously stored the terminating null on
110a252d550Shaad * disk, but didn't hash it, so we need to
111a252d550Shaad * continue to not hash it. (The
112a252d550Shaad * zn_key_*_numints includes the terminating
113a252d550Shaad * null for non-binary keys.)
114a252d550Shaad */
115a252d550Shaad len = zn->zn_key_norm_numints - 1;
116a252d550Shaad
117a252d550Shaad ASSERT(zn->zn_key_intlen == 1);
118a252d550Shaad for (i = 0; i < len; cp++, i++) {
119a252d550Shaad h = (h >> 8) ^
120a252d550Shaad zfs_crc64_table[(h ^ *cp) & 0xFF];
121a252d550Shaad }
122a252d550Shaad }
123a252d550Shaad }
124a252d550Shaad /*
125a252d550Shaad * Don't use all 64 bits, since we need some in the cookie for
126a252d550Shaad * the collision differentiator. We MUST use the high bits,
127a252d550Shaad * since those are the ones that we first pay attention to when
128c1cb2cd8Shaad * chosing the bucket.
129c1cb2cd8Shaad */
130a252d550Shaad h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1);
131c1cb2cd8Shaad
132a252d550Shaad return (h);
133c1cb2cd8Shaad }
134c1cb2cd8Shaad
135c1cb2cd8Shaad static int
zap_normalize(zap_t * zap,const char * name,char * namenorm)136c1cb2cd8Shaad zap_normalize(zap_t *zap, const char *name, char *namenorm)
137c1cb2cd8Shaad {
138c1cb2cd8Shaad size_t inlen, outlen;
139c1cb2cd8Shaad int err;
140c1cb2cd8Shaad
141a252d550Shaad ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY));
142a252d550Shaad
143c1cb2cd8Shaad inlen = strlen(name) + 1;
144c1cb2cd8Shaad outlen = ZAP_MAXNAMELEN;
145c1cb2cd8Shaad
146c1cb2cd8Shaad err = 0;
147c1cb2cd8Shaad (void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,
148a252d550Shaad zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL |
149a252d550Shaad U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err);
150c1cb2cd8Shaad
151c1cb2cd8Shaad return (err);
152c1cb2cd8Shaad }
153c1cb2cd8Shaad
154c1cb2cd8Shaad boolean_t
zap_match(zap_name_t * zn,const char * matchname)155c1cb2cd8Shaad zap_match(zap_name_t *zn, const char *matchname)
156c1cb2cd8Shaad {
157a252d550Shaad ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY));
158a252d550Shaad
159c1cb2cd8Shaad if (zn->zn_matchtype == MT_FIRST) {
160c1cb2cd8Shaad char norm[ZAP_MAXNAMELEN];
161c1cb2cd8Shaad
162c1cb2cd8Shaad if (zap_normalize(zn->zn_zap, matchname, norm) != 0)
163c1cb2cd8Shaad return (B_FALSE);
164c1cb2cd8Shaad
165a252d550Shaad return (strcmp(zn->zn_key_norm, norm) == 0);
166c1cb2cd8Shaad } else {
167c1cb2cd8Shaad /* MT_BEST or MT_EXACT */
168a252d550Shaad return (strcmp(zn->zn_key_orig, matchname) == 0);
169c1cb2cd8Shaad }
170c1cb2cd8Shaad }
171c1cb2cd8Shaad
172c1cb2cd8Shaad void
zap_name_free(zap_name_t * zn)173c1cb2cd8Shaad zap_name_free(zap_name_t *zn)
174c1cb2cd8Shaad {
175c1cb2cd8Shaad kmem_free(zn, sizeof (zap_name_t));
176c1cb2cd8Shaad }
177c1cb2cd8Shaad
178c1cb2cd8Shaad zap_name_t *
zap_name_alloc(zap_t * zap,const char * key,matchtype_t mt)179a252d550Shaad zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
180c1cb2cd8Shaad {
181c1cb2cd8Shaad zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
182c1cb2cd8Shaad
183c1cb2cd8Shaad zn->zn_zap = zap;
184a252d550Shaad zn->zn_key_intlen = sizeof (*key);
185a252d550Shaad zn->zn_key_orig = key;
186a252d550Shaad zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1;
187c1cb2cd8Shaad zn->zn_matchtype = mt;
188c1cb2cd8Shaad if (zap->zap_normflags) {
189a252d550Shaad if (zap_normalize(zap, key, zn->zn_normbuf) != 0) {
190c1cb2cd8Shaad zap_name_free(zn);
191c1cb2cd8Shaad return (NULL);
192c1cb2cd8Shaad }
193a252d550Shaad zn->zn_key_norm = zn->zn_normbuf;
194a252d550Shaad zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
195c1cb2cd8Shaad } else {
196c1cb2cd8Shaad if (mt != MT_EXACT) {
197c1cb2cd8Shaad zap_name_free(zn);
198c1cb2cd8Shaad return (NULL);
199c1cb2cd8Shaad }
200a252d550Shaad zn->zn_key_norm = zn->zn_key_orig;
201a252d550Shaad zn->zn_key_norm_numints = zn->zn_key_orig_numints;
202c1cb2cd8Shaad }
203c1cb2cd8Shaad
204a252d550Shaad zn->zn_hash = zap_hash(zn);
205a252d550Shaad return (zn);
206a252d550Shaad }
207a252d550Shaad
208a252d550Shaad zap_name_t *
zap_name_alloc_uint64(zap_t * zap,const uint64_t * key,int numints)209a252d550Shaad zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
210a252d550Shaad {
211a252d550Shaad zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
212a252d550Shaad
213a252d550Shaad ASSERT(zap->zap_normflags == 0);
214a252d550Shaad zn->zn_zap = zap;
215a252d550Shaad zn->zn_key_intlen = sizeof (*key);
216a252d550Shaad zn->zn_key_orig = zn->zn_key_norm = key;
217a252d550Shaad zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints;
218a252d550Shaad zn->zn_matchtype = MT_EXACT;
219a252d550Shaad
220a252d550Shaad zn->zn_hash = zap_hash(zn);
221c1cb2cd8Shaad return (zn);
222c1cb2cd8Shaad }
223c1cb2cd8Shaad
224c1cb2cd8Shaad static void
mzap_byteswap(mzap_phys_t * buf,size_t size)225c1cb2cd8Shaad mzap_byteswap(mzap_phys_t *buf, size_t size)
226c1cb2cd8Shaad {
227c1cb2cd8Shaad int i, max;
228c1cb2cd8Shaad buf->mz_block_type = BSWAP_64(buf->mz_block_type);
229c1cb2cd8Shaad buf->mz_salt = BSWAP_64(buf->mz_salt);
230c1cb2cd8Shaad buf->mz_normflags = BSWAP_64(buf->mz_normflags);
231c1cb2cd8Shaad max = (size / MZAP_ENT_LEN) - 1;
232c1cb2cd8Shaad for (i = 0; i < max; i++) {
233c1cb2cd8Shaad buf->mz_chunk[i].mze_value =
234c1cb2cd8Shaad BSWAP_64(buf->mz_chunk[i].mze_value);
235c1cb2cd8Shaad buf->mz_chunk[i].mze_cd =
236c1cb2cd8Shaad BSWAP_32(buf->mz_chunk[i].mze_cd);
237c1cb2cd8Shaad }
238c1cb2cd8Shaad }
239c1cb2cd8Shaad
240c1cb2cd8Shaad void
zap_byteswap(void * buf,size_t size)241c1cb2cd8Shaad zap_byteswap(void *buf, size_t size)
242c1cb2cd8Shaad {
243c1cb2cd8Shaad uint64_t block_type;
244c1cb2cd8Shaad
245c1cb2cd8Shaad block_type = *(uint64_t *)buf;
246c1cb2cd8Shaad
247c1cb2cd8Shaad if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) {
248c1cb2cd8Shaad /* ASSERT(magic == ZAP_LEAF_MAGIC); */
249c1cb2cd8Shaad mzap_byteswap(buf, size);
250c1cb2cd8Shaad } else {
251c1cb2cd8Shaad fzap_byteswap(buf, size);
252c1cb2cd8Shaad }
253c1cb2cd8Shaad }
254c1cb2cd8Shaad
255c1cb2cd8Shaad static int
mze_compare(const void * arg1,const void * arg2)256c1cb2cd8Shaad mze_compare(const void *arg1, const void *arg2)
257c1cb2cd8Shaad {
258c1cb2cd8Shaad const mzap_ent_t *mze1 = arg1;
259c1cb2cd8Shaad const mzap_ent_t *mze2 = arg2;
260c1cb2cd8Shaad
261c1cb2cd8Shaad if (mze1->mze_hash > mze2->mze_hash)
262c1cb2cd8Shaad return (+1);
263c1cb2cd8Shaad if (mze1->mze_hash < mze2->mze_hash)
264c1cb2cd8Shaad return (-1);
265*ba2539a9Schs if (mze1->mze_cd > mze2->mze_cd)
266c1cb2cd8Shaad return (+1);
267*ba2539a9Schs if (mze1->mze_cd < mze2->mze_cd)
268c1cb2cd8Shaad return (-1);
269c1cb2cd8Shaad return (0);
270c1cb2cd8Shaad }
271c1cb2cd8Shaad
272*ba2539a9Schs static int
mze_insert(zap_t * zap,int chunkid,uint64_t hash)273*ba2539a9Schs mze_insert(zap_t *zap, int chunkid, uint64_t hash)
274c1cb2cd8Shaad {
275c1cb2cd8Shaad mzap_ent_t *mze;
276*ba2539a9Schs avl_index_t idx;
277c1cb2cd8Shaad
278c1cb2cd8Shaad ASSERT(zap->zap_ismicro);
279c1cb2cd8Shaad ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
280c1cb2cd8Shaad
281c1cb2cd8Shaad mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP);
282c1cb2cd8Shaad mze->mze_chunkid = chunkid;
283c1cb2cd8Shaad mze->mze_hash = hash;
284*ba2539a9Schs mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd;
285*ba2539a9Schs ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0);
286*ba2539a9Schs if (avl_find(&zap->zap_m.zap_avl, mze, &idx) != NULL) {
287*ba2539a9Schs kmem_free(mze, sizeof (mzap_ent_t));
288*ba2539a9Schs return (EEXIST);
289*ba2539a9Schs }
290*ba2539a9Schs avl_insert(&zap->zap_m.zap_avl, mze, idx);
291*ba2539a9Schs return (0);
292c1cb2cd8Shaad }
293c1cb2cd8Shaad
294c1cb2cd8Shaad static mzap_ent_t *
mze_find(zap_name_t * zn)295c1cb2cd8Shaad mze_find(zap_name_t *zn)
296c1cb2cd8Shaad {
297c1cb2cd8Shaad mzap_ent_t mze_tofind;
298c1cb2cd8Shaad mzap_ent_t *mze;
299c1cb2cd8Shaad avl_index_t idx;
300c1cb2cd8Shaad avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl;
301c1cb2cd8Shaad
302c1cb2cd8Shaad ASSERT(zn->zn_zap->zap_ismicro);
303c1cb2cd8Shaad ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock));
304c1cb2cd8Shaad
305c1cb2cd8Shaad mze_tofind.mze_hash = zn->zn_hash;
306*ba2539a9Schs mze_tofind.mze_cd = 0;
307c1cb2cd8Shaad
308c1cb2cd8Shaad again:
309c1cb2cd8Shaad mze = avl_find(avl, &mze_tofind, &idx);
310c1cb2cd8Shaad if (mze == NULL)
311c1cb2cd8Shaad mze = avl_nearest(avl, idx, AVL_AFTER);
312c1cb2cd8Shaad for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) {
313*ba2539a9Schs ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd);
314*ba2539a9Schs if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name))
315c1cb2cd8Shaad return (mze);
316c1cb2cd8Shaad }
317c1cb2cd8Shaad if (zn->zn_matchtype == MT_BEST) {
318c1cb2cd8Shaad zn->zn_matchtype = MT_FIRST;
319c1cb2cd8Shaad goto again;
320c1cb2cd8Shaad }
321c1cb2cd8Shaad return (NULL);
322c1cb2cd8Shaad }
323c1cb2cd8Shaad
324c1cb2cd8Shaad static uint32_t
mze_find_unused_cd(zap_t * zap,uint64_t hash)325c1cb2cd8Shaad mze_find_unused_cd(zap_t *zap, uint64_t hash)
326c1cb2cd8Shaad {
327c1cb2cd8Shaad mzap_ent_t mze_tofind;
328c1cb2cd8Shaad mzap_ent_t *mze;
329c1cb2cd8Shaad avl_index_t idx;
330c1cb2cd8Shaad avl_tree_t *avl = &zap->zap_m.zap_avl;
331c1cb2cd8Shaad uint32_t cd;
332c1cb2cd8Shaad
333c1cb2cd8Shaad ASSERT(zap->zap_ismicro);
334c1cb2cd8Shaad ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
335c1cb2cd8Shaad
336c1cb2cd8Shaad mze_tofind.mze_hash = hash;
337*ba2539a9Schs mze_tofind.mze_cd = 0;
338c1cb2cd8Shaad
339c1cb2cd8Shaad cd = 0;
340c1cb2cd8Shaad for (mze = avl_find(avl, &mze_tofind, &idx);
341c1cb2cd8Shaad mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
342*ba2539a9Schs if (mze->mze_cd != cd)
343c1cb2cd8Shaad break;
344c1cb2cd8Shaad cd++;
345c1cb2cd8Shaad }
346c1cb2cd8Shaad
347c1cb2cd8Shaad return (cd);
348c1cb2cd8Shaad }
349c1cb2cd8Shaad
350c1cb2cd8Shaad static void
mze_remove(zap_t * zap,mzap_ent_t * mze)351c1cb2cd8Shaad mze_remove(zap_t *zap, mzap_ent_t *mze)
352c1cb2cd8Shaad {
353c1cb2cd8Shaad ASSERT(zap->zap_ismicro);
354c1cb2cd8Shaad ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
355c1cb2cd8Shaad
356c1cb2cd8Shaad avl_remove(&zap->zap_m.zap_avl, mze);
357c1cb2cd8Shaad kmem_free(mze, sizeof (mzap_ent_t));
358c1cb2cd8Shaad }
359c1cb2cd8Shaad
360c1cb2cd8Shaad static void
mze_destroy(zap_t * zap)361c1cb2cd8Shaad mze_destroy(zap_t *zap)
362c1cb2cd8Shaad {
363c1cb2cd8Shaad mzap_ent_t *mze;
364c1cb2cd8Shaad void *avlcookie = NULL;
365c1cb2cd8Shaad
366c1cb2cd8Shaad while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))
367c1cb2cd8Shaad kmem_free(mze, sizeof (mzap_ent_t));
368c1cb2cd8Shaad avl_destroy(&zap->zap_m.zap_avl);
369c1cb2cd8Shaad }
370c1cb2cd8Shaad
371c1cb2cd8Shaad static zap_t *
mzap_open(objset_t * os,uint64_t obj,dmu_buf_t * db)372c1cb2cd8Shaad mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
373c1cb2cd8Shaad {
374c1cb2cd8Shaad zap_t *winner;
375c1cb2cd8Shaad zap_t *zap;
376c1cb2cd8Shaad int i;
377*ba2539a9Schs uint64_t *zap_hdr = (uint64_t *)db->db_data;
378*ba2539a9Schs uint64_t zap_block_type = zap_hdr[0];
379*ba2539a9Schs uint64_t zap_magic = zap_hdr[1];
380c1cb2cd8Shaad
381c1cb2cd8Shaad ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
382c1cb2cd8Shaad
383c1cb2cd8Shaad zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
384c1cb2cd8Shaad rw_init(&zap->zap_rwlock, 0, 0, 0);
385c1cb2cd8Shaad rw_enter(&zap->zap_rwlock, RW_WRITER);
386c1cb2cd8Shaad zap->zap_objset = os;
387c1cb2cd8Shaad zap->zap_object = obj;
388c1cb2cd8Shaad zap->zap_dbuf = db;
389c1cb2cd8Shaad
390*ba2539a9Schs if (zap_block_type != ZBT_MICRO) {
391c1cb2cd8Shaad mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
392*ba2539a9Schs zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1;
393*ba2539a9Schs if (zap_block_type != ZBT_HEADER || zap_magic != ZAP_MAGIC) {
394*ba2539a9Schs winner = NULL; /* No actual winner here... */
395*ba2539a9Schs goto handle_winner;
396*ba2539a9Schs }
397c1cb2cd8Shaad } else {
398c1cb2cd8Shaad zap->zap_ismicro = TRUE;
399c1cb2cd8Shaad }
400c1cb2cd8Shaad
401c1cb2cd8Shaad /*
402c1cb2cd8Shaad * Make sure that zap_ismicro is set before we let others see
403c1cb2cd8Shaad * it, because zap_lockdir() checks zap_ismicro without the lock
404c1cb2cd8Shaad * held.
405c1cb2cd8Shaad */
406*ba2539a9Schs dmu_buf_init_user(&zap->zap_dbu, zap_evict_sync, NULL, &zap->zap_dbuf);
407*ba2539a9Schs winner = dmu_buf_set_user(db, &zap->zap_dbu);
408c1cb2cd8Shaad
409*ba2539a9Schs if (winner != NULL)
410*ba2539a9Schs goto handle_winner;
411c1cb2cd8Shaad
412c1cb2cd8Shaad if (zap->zap_ismicro) {
413*ba2539a9Schs zap->zap_salt = zap_m_phys(zap)->mz_salt;
414*ba2539a9Schs zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
415c1cb2cd8Shaad zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
416c1cb2cd8Shaad avl_create(&zap->zap_m.zap_avl, mze_compare,
417c1cb2cd8Shaad sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
418c1cb2cd8Shaad
419c1cb2cd8Shaad for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
420c1cb2cd8Shaad mzap_ent_phys_t *mze =
421*ba2539a9Schs &zap_m_phys(zap)->mz_chunk[i];
422c1cb2cd8Shaad if (mze->mze_name[0]) {
423c1cb2cd8Shaad zap_name_t *zn;
424c1cb2cd8Shaad
425c1cb2cd8Shaad zn = zap_name_alloc(zap, mze->mze_name,
426c1cb2cd8Shaad MT_EXACT);
427*ba2539a9Schs if (mze_insert(zap, i, zn->zn_hash) == 0)
428*ba2539a9Schs zap->zap_m.zap_num_entries++;
429*ba2539a9Schs else {
430*ba2539a9Schs printf("ZFS WARNING: Duplicated ZAP "
431*ba2539a9Schs "entry detected (%s).\n",
432*ba2539a9Schs mze->mze_name);
433*ba2539a9Schs }
434c1cb2cd8Shaad zap_name_free(zn);
435c1cb2cd8Shaad }
436c1cb2cd8Shaad }
437c1cb2cd8Shaad } else {
438*ba2539a9Schs zap->zap_salt = zap_f_phys(zap)->zap_salt;
439*ba2539a9Schs zap->zap_normflags = zap_f_phys(zap)->zap_normflags;
440c1cb2cd8Shaad
441c1cb2cd8Shaad ASSERT3U(sizeof (struct zap_leaf_header), ==,
442c1cb2cd8Shaad 2*ZAP_LEAF_CHUNKSIZE);
443c1cb2cd8Shaad
444c1cb2cd8Shaad /*
445c1cb2cd8Shaad * The embedded pointer table should not overlap the
446c1cb2cd8Shaad * other members.
447c1cb2cd8Shaad */
448c1cb2cd8Shaad ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
449*ba2539a9Schs &zap_f_phys(zap)->zap_salt);
450c1cb2cd8Shaad
451c1cb2cd8Shaad /*
452c1cb2cd8Shaad * The embedded pointer table should end at the end of
453c1cb2cd8Shaad * the block
454c1cb2cd8Shaad */
455c1cb2cd8Shaad ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
456c1cb2cd8Shaad 1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
457*ba2539a9Schs (uintptr_t)zap_f_phys(zap), ==,
458c1cb2cd8Shaad zap->zap_dbuf->db_size);
459c1cb2cd8Shaad }
460c1cb2cd8Shaad rw_exit(&zap->zap_rwlock);
461c1cb2cd8Shaad return (zap);
462*ba2539a9Schs
463*ba2539a9Schs handle_winner:
464*ba2539a9Schs rw_exit(&zap->zap_rwlock);
465*ba2539a9Schs rw_destroy(&zap->zap_rwlock);
466*ba2539a9Schs if (!zap->zap_ismicro)
467*ba2539a9Schs mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
468*ba2539a9Schs kmem_free(zap, sizeof (zap_t));
469*ba2539a9Schs return (winner);
470c1cb2cd8Shaad }
471c1cb2cd8Shaad
472*ba2539a9Schs static int
zap_lockdir_impl(dmu_buf_t * db,void * tag,dmu_tx_t * tx,krw_t lti,boolean_t fatreader,boolean_t adding,zap_t ** zapp)473*ba2539a9Schs zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx,
474c1cb2cd8Shaad krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
475c1cb2cd8Shaad {
476c1cb2cd8Shaad zap_t *zap;
477c1cb2cd8Shaad krw_t lt;
478*ba2539a9Schs
479*ba2539a9Schs ASSERT0(db->db_offset);
480*ba2539a9Schs objset_t *os = dmu_buf_get_objset(db);
481*ba2539a9Schs uint64_t obj = db->db_object;
482c1cb2cd8Shaad
483c1cb2cd8Shaad *zapp = NULL;
484c1cb2cd8Shaad
485c1cb2cd8Shaad #ifdef ZFS_DEBUG
486c1cb2cd8Shaad {
487c1cb2cd8Shaad dmu_object_info_t doi;
488c1cb2cd8Shaad dmu_object_info_from_db(db, &doi);
489*ba2539a9Schs ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
490c1cb2cd8Shaad }
491c1cb2cd8Shaad #endif
492c1cb2cd8Shaad
493c1cb2cd8Shaad zap = dmu_buf_get_user(db);
494*ba2539a9Schs if (zap == NULL) {
495c1cb2cd8Shaad zap = mzap_open(os, obj, db);
496*ba2539a9Schs if (zap == NULL) {
497*ba2539a9Schs /*
498*ba2539a9Schs * mzap_open() didn't like what it saw on-disk.
499*ba2539a9Schs * Check for corruption!
500*ba2539a9Schs */
501*ba2539a9Schs return (SET_ERROR(EIO));
502*ba2539a9Schs }
503*ba2539a9Schs }
504c1cb2cd8Shaad
505c1cb2cd8Shaad /*
506c1cb2cd8Shaad * We're checking zap_ismicro without the lock held, in order to
507c1cb2cd8Shaad * tell what type of lock we want. Once we have some sort of
508c1cb2cd8Shaad * lock, see if it really is the right type. In practice this
509c1cb2cd8Shaad * can only be different if it was upgraded from micro to fat,
510c1cb2cd8Shaad * and micro wanted WRITER but fat only needs READER.
511c1cb2cd8Shaad */
512c1cb2cd8Shaad lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
513c1cb2cd8Shaad rw_enter(&zap->zap_rwlock, lt);
514c1cb2cd8Shaad if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
515c1cb2cd8Shaad /* it was upgraded, now we only need reader */
516c1cb2cd8Shaad ASSERT(lt == RW_WRITER);
517c1cb2cd8Shaad ASSERT(RW_READER ==
518c1cb2cd8Shaad (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
519c1cb2cd8Shaad rw_downgrade(&zap->zap_rwlock);
520c1cb2cd8Shaad lt = RW_READER;
521c1cb2cd8Shaad }
522c1cb2cd8Shaad
523c1cb2cd8Shaad zap->zap_objset = os;
524c1cb2cd8Shaad
525c1cb2cd8Shaad if (lt == RW_WRITER)
526c1cb2cd8Shaad dmu_buf_will_dirty(db, tx);
527c1cb2cd8Shaad
528c1cb2cd8Shaad ASSERT3P(zap->zap_dbuf, ==, db);
529c1cb2cd8Shaad
530c1cb2cd8Shaad ASSERT(!zap->zap_ismicro ||
531c1cb2cd8Shaad zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks);
532c1cb2cd8Shaad if (zap->zap_ismicro && tx && adding &&
533c1cb2cd8Shaad zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
534c1cb2cd8Shaad uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
535c1cb2cd8Shaad if (newsz > MZAP_MAX_BLKSZ) {
536c1cb2cd8Shaad dprintf("upgrading obj %llu: num_entries=%u\n",
537c1cb2cd8Shaad obj, zap->zap_m.zap_num_entries);
538c1cb2cd8Shaad *zapp = zap;
539*ba2539a9Schs int err = mzap_upgrade(zapp, tag, tx, 0);
540*ba2539a9Schs if (err != 0)
541*ba2539a9Schs rw_exit(&zap->zap_rwlock);
542*ba2539a9Schs return (err);
543c1cb2cd8Shaad }
544*ba2539a9Schs VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx));
545c1cb2cd8Shaad zap->zap_m.zap_num_chunks =
546c1cb2cd8Shaad db->db_size / MZAP_ENT_LEN - 1;
547c1cb2cd8Shaad }
548c1cb2cd8Shaad
549c1cb2cd8Shaad *zapp = zap;
550c1cb2cd8Shaad return (0);
551c1cb2cd8Shaad }
552c1cb2cd8Shaad
553*ba2539a9Schs static int
zap_lockdir_by_dnode(dnode_t * dn,dmu_tx_t * tx,krw_t lti,boolean_t fatreader,boolean_t adding,void * tag,zap_t ** zapp)554*ba2539a9Schs zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx,
555*ba2539a9Schs krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
556*ba2539a9Schs {
557*ba2539a9Schs dmu_buf_t *db;
558*ba2539a9Schs int err;
559*ba2539a9Schs
560*ba2539a9Schs err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
561*ba2539a9Schs if (err != 0) {
562*ba2539a9Schs return (err);
563*ba2539a9Schs }
564*ba2539a9Schs err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
565*ba2539a9Schs if (err != 0) {
566*ba2539a9Schs dmu_buf_rele(db, tag);
567*ba2539a9Schs }
568*ba2539a9Schs return (err);
569*ba2539a9Schs }
570*ba2539a9Schs
571*ba2539a9Schs int
zap_lockdir(objset_t * os,uint64_t obj,dmu_tx_t * tx,krw_t lti,boolean_t fatreader,boolean_t adding,void * tag,zap_t ** zapp)572*ba2539a9Schs zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
573*ba2539a9Schs krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
574*ba2539a9Schs {
575*ba2539a9Schs dmu_buf_t *db;
576*ba2539a9Schs int err;
577*ba2539a9Schs
578*ba2539a9Schs err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH);
579*ba2539a9Schs if (err != 0)
580*ba2539a9Schs return (err);
581*ba2539a9Schs err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
582*ba2539a9Schs if (err != 0)
583*ba2539a9Schs dmu_buf_rele(db, tag);
584*ba2539a9Schs return (err);
585*ba2539a9Schs }
586*ba2539a9Schs
587c1cb2cd8Shaad void
zap_unlockdir(zap_t * zap,void * tag)588*ba2539a9Schs zap_unlockdir(zap_t *zap, void *tag)
589c1cb2cd8Shaad {
590c1cb2cd8Shaad rw_exit(&zap->zap_rwlock);
591*ba2539a9Schs dmu_buf_rele(zap->zap_dbuf, tag);
592c1cb2cd8Shaad }
593c1cb2cd8Shaad
594c1cb2cd8Shaad static int
mzap_upgrade(zap_t ** zapp,void * tag,dmu_tx_t * tx,zap_flags_t flags)595*ba2539a9Schs mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags)
596c1cb2cd8Shaad {
597c1cb2cd8Shaad mzap_phys_t *mzp;
598a252d550Shaad int i, sz, nchunks;
599a252d550Shaad int err = 0;
600c1cb2cd8Shaad zap_t *zap = *zapp;
601c1cb2cd8Shaad
602c1cb2cd8Shaad ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
603c1cb2cd8Shaad
604c1cb2cd8Shaad sz = zap->zap_dbuf->db_size;
605*ba2539a9Schs mzp = zio_buf_alloc(sz);
606c1cb2cd8Shaad bcopy(zap->zap_dbuf->db_data, mzp, sz);
607c1cb2cd8Shaad nchunks = zap->zap_m.zap_num_chunks;
608c1cb2cd8Shaad
609a252d550Shaad if (!flags) {
610c1cb2cd8Shaad err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object,
611c1cb2cd8Shaad 1ULL << fzap_default_block_shift, 0, tx);
612c1cb2cd8Shaad if (err) {
613*ba2539a9Schs zio_buf_free(mzp, sz);
614c1cb2cd8Shaad return (err);
615c1cb2cd8Shaad }
616a252d550Shaad }
617c1cb2cd8Shaad
618c1cb2cd8Shaad dprintf("upgrading obj=%llu with %u chunks\n",
619c1cb2cd8Shaad zap->zap_object, nchunks);
620c1cb2cd8Shaad /* XXX destroy the avl later, so we can use the stored hash value */
621c1cb2cd8Shaad mze_destroy(zap);
622c1cb2cd8Shaad
623a252d550Shaad fzap_upgrade(zap, tx, flags);
624c1cb2cd8Shaad
625c1cb2cd8Shaad for (i = 0; i < nchunks; i++) {
626c1cb2cd8Shaad mzap_ent_phys_t *mze = &mzp->mz_chunk[i];
627c1cb2cd8Shaad zap_name_t *zn;
628c1cb2cd8Shaad if (mze->mze_name[0] == 0)
629c1cb2cd8Shaad continue;
630c1cb2cd8Shaad dprintf("adding %s=%llu\n",
631c1cb2cd8Shaad mze->mze_name, mze->mze_value);
632c1cb2cd8Shaad zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT);
633*ba2539a9Schs err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
634*ba2539a9Schs tag, tx);
635c1cb2cd8Shaad zap = zn->zn_zap; /* fzap_add_cd() may change zap */
636c1cb2cd8Shaad zap_name_free(zn);
637c1cb2cd8Shaad if (err)
638c1cb2cd8Shaad break;
639c1cb2cd8Shaad }
640*ba2539a9Schs zio_buf_free(mzp, sz);
641c1cb2cd8Shaad *zapp = zap;
642c1cb2cd8Shaad return (err);
643c1cb2cd8Shaad }
644c1cb2cd8Shaad
645*ba2539a9Schs void
mzap_create_impl(objset_t * os,uint64_t obj,int normflags,zap_flags_t flags,dmu_tx_t * tx)646a252d550Shaad mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
647a252d550Shaad dmu_tx_t *tx)
648c1cb2cd8Shaad {
649c1cb2cd8Shaad dmu_buf_t *db;
650c1cb2cd8Shaad mzap_phys_t *zp;
651c1cb2cd8Shaad
652*ba2539a9Schs VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH));
653c1cb2cd8Shaad
654c1cb2cd8Shaad #ifdef ZFS_DEBUG
655c1cb2cd8Shaad {
656c1cb2cd8Shaad dmu_object_info_t doi;
657c1cb2cd8Shaad dmu_object_info_from_db(db, &doi);
658*ba2539a9Schs ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
659c1cb2cd8Shaad }
660c1cb2cd8Shaad #endif
661c1cb2cd8Shaad
662c1cb2cd8Shaad dmu_buf_will_dirty(db, tx);
663c1cb2cd8Shaad zp = db->db_data;
664c1cb2cd8Shaad zp->mz_block_type = ZBT_MICRO;
665c1cb2cd8Shaad zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL;
666c1cb2cd8Shaad zp->mz_normflags = normflags;
667c1cb2cd8Shaad dmu_buf_rele(db, FTAG);
668a252d550Shaad
669a252d550Shaad if (flags != 0) {
670a252d550Shaad zap_t *zap;
671a252d550Shaad /* Only fat zap supports flags; upgrade immediately. */
672a252d550Shaad VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER,
673*ba2539a9Schs B_FALSE, B_FALSE, FTAG, &zap));
674*ba2539a9Schs VERIFY3U(0, ==, mzap_upgrade(&zap, FTAG, tx, flags));
675*ba2539a9Schs zap_unlockdir(zap, FTAG);
676a252d550Shaad }
677c1cb2cd8Shaad }
678c1cb2cd8Shaad
679c1cb2cd8Shaad int
zap_create_claim(objset_t * os,uint64_t obj,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)680c1cb2cd8Shaad zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
681c1cb2cd8Shaad dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
682c1cb2cd8Shaad {
683c1cb2cd8Shaad return (zap_create_claim_norm(os, obj,
684c1cb2cd8Shaad 0, ot, bonustype, bonuslen, tx));
685c1cb2cd8Shaad }
686c1cb2cd8Shaad
687c1cb2cd8Shaad int
zap_create_claim_norm(objset_t * os,uint64_t obj,int normflags,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)688c1cb2cd8Shaad zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags,
689c1cb2cd8Shaad dmu_object_type_t ot,
690c1cb2cd8Shaad dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
691c1cb2cd8Shaad {
692c1cb2cd8Shaad int err;
693c1cb2cd8Shaad
694c1cb2cd8Shaad err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx);
695c1cb2cd8Shaad if (err != 0)
696c1cb2cd8Shaad return (err);
697a252d550Shaad mzap_create_impl(os, obj, normflags, 0, tx);
698c1cb2cd8Shaad return (0);
699c1cb2cd8Shaad }
700c1cb2cd8Shaad
701c1cb2cd8Shaad uint64_t
zap_create(objset_t * os,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)702c1cb2cd8Shaad zap_create(objset_t *os, dmu_object_type_t ot,
703c1cb2cd8Shaad dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
704c1cb2cd8Shaad {
705c1cb2cd8Shaad return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx));
706c1cb2cd8Shaad }
707c1cb2cd8Shaad
708c1cb2cd8Shaad uint64_t
zap_create_norm(objset_t * os,int normflags,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)709c1cb2cd8Shaad zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
710c1cb2cd8Shaad dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
711c1cb2cd8Shaad {
712c1cb2cd8Shaad uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
713c1cb2cd8Shaad
714a252d550Shaad mzap_create_impl(os, obj, normflags, 0, tx);
715a252d550Shaad return (obj);
716a252d550Shaad }
717a252d550Shaad
718a252d550Shaad uint64_t
zap_create_flags(objset_t * os,int normflags,zap_flags_t flags,dmu_object_type_t ot,int leaf_blockshift,int indirect_blockshift,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)719a252d550Shaad zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
720a252d550Shaad dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
721a252d550Shaad dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
722a252d550Shaad {
723a252d550Shaad uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
724a252d550Shaad
725a252d550Shaad ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT &&
726*ba2539a9Schs leaf_blockshift <= SPA_OLD_MAXBLOCKSHIFT &&
727a252d550Shaad indirect_blockshift >= SPA_MINBLOCKSHIFT &&
728*ba2539a9Schs indirect_blockshift <= SPA_OLD_MAXBLOCKSHIFT);
729a252d550Shaad
730a252d550Shaad VERIFY(dmu_object_set_blocksize(os, obj,
731a252d550Shaad 1ULL << leaf_blockshift, indirect_blockshift, tx) == 0);
732a252d550Shaad
733a252d550Shaad mzap_create_impl(os, obj, normflags, flags, tx);
734c1cb2cd8Shaad return (obj);
735c1cb2cd8Shaad }
736c1cb2cd8Shaad
737c1cb2cd8Shaad int
zap_destroy(objset_t * os,uint64_t zapobj,dmu_tx_t * tx)738c1cb2cd8Shaad zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
739c1cb2cd8Shaad {
740c1cb2cd8Shaad /*
741c1cb2cd8Shaad * dmu_object_free will free the object number and free the
742c1cb2cd8Shaad * data. Freeing the data will cause our pageout function to be
743c1cb2cd8Shaad * called, which will destroy our data (zap_leaf_t's and zap_t).
744c1cb2cd8Shaad */
745c1cb2cd8Shaad
746c1cb2cd8Shaad return (dmu_object_free(os, zapobj, tx));
747c1cb2cd8Shaad }
748c1cb2cd8Shaad
749c1cb2cd8Shaad void
zap_evict_sync(void * dbu)750*ba2539a9Schs zap_evict_sync(void *dbu)
751c1cb2cd8Shaad {
752*ba2539a9Schs zap_t *zap = dbu;
753c1cb2cd8Shaad
754c1cb2cd8Shaad rw_destroy(&zap->zap_rwlock);
755c1cb2cd8Shaad
756c1cb2cd8Shaad if (zap->zap_ismicro)
757c1cb2cd8Shaad mze_destroy(zap);
758c1cb2cd8Shaad else
759c1cb2cd8Shaad mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
760c1cb2cd8Shaad
761c1cb2cd8Shaad kmem_free(zap, sizeof (zap_t));
762c1cb2cd8Shaad }
763c1cb2cd8Shaad
764c1cb2cd8Shaad int
zap_count(objset_t * os,uint64_t zapobj,uint64_t * count)765c1cb2cd8Shaad zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
766c1cb2cd8Shaad {
767c1cb2cd8Shaad zap_t *zap;
768c1cb2cd8Shaad int err;
769c1cb2cd8Shaad
770*ba2539a9Schs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
771c1cb2cd8Shaad if (err)
772c1cb2cd8Shaad return (err);
773c1cb2cd8Shaad if (!zap->zap_ismicro) {
774c1cb2cd8Shaad err = fzap_count(zap, count);
775c1cb2cd8Shaad } else {
776c1cb2cd8Shaad *count = zap->zap_m.zap_num_entries;
777c1cb2cd8Shaad }
778*ba2539a9Schs zap_unlockdir(zap, FTAG);
779c1cb2cd8Shaad return (err);
780c1cb2cd8Shaad }
781c1cb2cd8Shaad
782c1cb2cd8Shaad /*
783c1cb2cd8Shaad * zn may be NULL; if not specified, it will be computed if needed.
784c1cb2cd8Shaad * See also the comment above zap_entry_normalization_conflict().
785c1cb2cd8Shaad */
786c1cb2cd8Shaad static boolean_t
mzap_normalization_conflict(zap_t * zap,zap_name_t * zn,mzap_ent_t * mze)787c1cb2cd8Shaad mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze)
788c1cb2cd8Shaad {
789c1cb2cd8Shaad mzap_ent_t *other;
790c1cb2cd8Shaad int direction = AVL_BEFORE;
791c1cb2cd8Shaad boolean_t allocdzn = B_FALSE;
792c1cb2cd8Shaad
793c1cb2cd8Shaad if (zap->zap_normflags == 0)
794c1cb2cd8Shaad return (B_FALSE);
795c1cb2cd8Shaad
796c1cb2cd8Shaad again:
797c1cb2cd8Shaad for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction);
798c1cb2cd8Shaad other && other->mze_hash == mze->mze_hash;
799c1cb2cd8Shaad other = avl_walk(&zap->zap_m.zap_avl, other, direction)) {
800c1cb2cd8Shaad
801c1cb2cd8Shaad if (zn == NULL) {
802*ba2539a9Schs zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name,
803c1cb2cd8Shaad MT_FIRST);
804c1cb2cd8Shaad allocdzn = B_TRUE;
805c1cb2cd8Shaad }
806*ba2539a9Schs if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
807c1cb2cd8Shaad if (allocdzn)
808c1cb2cd8Shaad zap_name_free(zn);
809c1cb2cd8Shaad return (B_TRUE);
810c1cb2cd8Shaad }
811c1cb2cd8Shaad }
812c1cb2cd8Shaad
813c1cb2cd8Shaad if (direction == AVL_BEFORE) {
814c1cb2cd8Shaad direction = AVL_AFTER;
815c1cb2cd8Shaad goto again;
816c1cb2cd8Shaad }
817c1cb2cd8Shaad
818c1cb2cd8Shaad if (allocdzn)
819c1cb2cd8Shaad zap_name_free(zn);
820c1cb2cd8Shaad return (B_FALSE);
821c1cb2cd8Shaad }
822c1cb2cd8Shaad
823c1cb2cd8Shaad /*
824c1cb2cd8Shaad * Routines for manipulating attributes.
825c1cb2cd8Shaad */
826c1cb2cd8Shaad
827c1cb2cd8Shaad int
zap_lookup(objset_t * os,uint64_t zapobj,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf)828c1cb2cd8Shaad zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
829c1cb2cd8Shaad uint64_t integer_size, uint64_t num_integers, void *buf)
830c1cb2cd8Shaad {
831c1cb2cd8Shaad return (zap_lookup_norm(os, zapobj, name, integer_size,
832c1cb2cd8Shaad num_integers, buf, MT_EXACT, NULL, 0, NULL));
833c1cb2cd8Shaad }
834c1cb2cd8Shaad
835*ba2539a9Schs static int
zap_lookup_impl(zap_t * zap,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf,matchtype_t mt,char * realname,int rn_len,boolean_t * ncp)836*ba2539a9Schs zap_lookup_impl(zap_t *zap, const char *name,
837c1cb2cd8Shaad uint64_t integer_size, uint64_t num_integers, void *buf,
838c1cb2cd8Shaad matchtype_t mt, char *realname, int rn_len,
839c1cb2cd8Shaad boolean_t *ncp)
840c1cb2cd8Shaad {
841*ba2539a9Schs int err = 0;
842c1cb2cd8Shaad mzap_ent_t *mze;
843c1cb2cd8Shaad zap_name_t *zn;
844c1cb2cd8Shaad
845c1cb2cd8Shaad zn = zap_name_alloc(zap, name, mt);
846*ba2539a9Schs if (zn == NULL)
847*ba2539a9Schs return (SET_ERROR(ENOTSUP));
848c1cb2cd8Shaad
849c1cb2cd8Shaad if (!zap->zap_ismicro) {
850c1cb2cd8Shaad err = fzap_lookup(zn, integer_size, num_integers, buf,
851c1cb2cd8Shaad realname, rn_len, ncp);
852c1cb2cd8Shaad } else {
853c1cb2cd8Shaad mze = mze_find(zn);
854c1cb2cd8Shaad if (mze == NULL) {
855*ba2539a9Schs err = SET_ERROR(ENOENT);
856c1cb2cd8Shaad } else {
857c1cb2cd8Shaad if (num_integers < 1) {
858*ba2539a9Schs err = SET_ERROR(EOVERFLOW);
859c1cb2cd8Shaad } else if (integer_size != 8) {
860*ba2539a9Schs err = SET_ERROR(EINVAL);
861c1cb2cd8Shaad } else {
862*ba2539a9Schs *(uint64_t *)buf =
863*ba2539a9Schs MZE_PHYS(zap, mze)->mze_value;
8646a125a39Shaad if (realname != NULL)
865c1cb2cd8Shaad (void) strlcpy(realname,
866*ba2539a9Schs MZE_PHYS(zap, mze)->mze_name, rn_len);
867c1cb2cd8Shaad if (ncp) {
868c1cb2cd8Shaad *ncp = mzap_normalization_conflict(zap,
869c1cb2cd8Shaad zn, mze);
870c1cb2cd8Shaad }
871c1cb2cd8Shaad }
872c1cb2cd8Shaad }
873c1cb2cd8Shaad }
874c1cb2cd8Shaad zap_name_free(zn);
875*ba2539a9Schs return (err);
876*ba2539a9Schs }
877*ba2539a9Schs
878*ba2539a9Schs int
zap_lookup_norm(objset_t * os,uint64_t zapobj,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf,matchtype_t mt,char * realname,int rn_len,boolean_t * ncp)879*ba2539a9Schs zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
880*ba2539a9Schs uint64_t integer_size, uint64_t num_integers, void *buf,
881*ba2539a9Schs matchtype_t mt, char *realname, int rn_len,
882*ba2539a9Schs boolean_t *ncp)
883*ba2539a9Schs {
884*ba2539a9Schs zap_t *zap;
885*ba2539a9Schs int err;
886*ba2539a9Schs
887*ba2539a9Schs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
888*ba2539a9Schs if (err != 0)
889*ba2539a9Schs return (err);
890*ba2539a9Schs err = zap_lookup_impl(zap, name, integer_size,
891*ba2539a9Schs num_integers, buf, mt, realname, rn_len, ncp);
892*ba2539a9Schs zap_unlockdir(zap, FTAG);
893*ba2539a9Schs return (err);
894*ba2539a9Schs }
895*ba2539a9Schs
896*ba2539a9Schs int
zap_lookup_by_dnode(dnode_t * dn,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf)897*ba2539a9Schs zap_lookup_by_dnode(dnode_t *dn, const char *name,
898*ba2539a9Schs uint64_t integer_size, uint64_t num_integers, void *buf)
899*ba2539a9Schs {
900*ba2539a9Schs return (zap_lookup_norm_by_dnode(dn, name, integer_size,
901*ba2539a9Schs num_integers, buf, MT_EXACT, NULL, 0, NULL));
902*ba2539a9Schs }
903*ba2539a9Schs
904*ba2539a9Schs int
zap_lookup_norm_by_dnode(dnode_t * dn,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf,matchtype_t mt,char * realname,int rn_len,boolean_t * ncp)905*ba2539a9Schs zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
906*ba2539a9Schs uint64_t integer_size, uint64_t num_integers, void *buf,
907*ba2539a9Schs matchtype_t mt, char *realname, int rn_len,
908*ba2539a9Schs boolean_t *ncp)
909*ba2539a9Schs {
910*ba2539a9Schs zap_t *zap;
911*ba2539a9Schs int err;
912*ba2539a9Schs
913*ba2539a9Schs err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
914*ba2539a9Schs FTAG, &zap);
915*ba2539a9Schs if (err != 0)
916*ba2539a9Schs return (err);
917*ba2539a9Schs err = zap_lookup_impl(zap, name, integer_size,
918*ba2539a9Schs num_integers, buf, mt, realname, rn_len, ncp);
919*ba2539a9Schs zap_unlockdir(zap, FTAG);
920*ba2539a9Schs return (err);
921*ba2539a9Schs }
922*ba2539a9Schs
923*ba2539a9Schs int
zap_prefetch_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints)924*ba2539a9Schs zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
925*ba2539a9Schs int key_numints)
926*ba2539a9Schs {
927*ba2539a9Schs zap_t *zap;
928*ba2539a9Schs int err;
929*ba2539a9Schs zap_name_t *zn;
930*ba2539a9Schs
931*ba2539a9Schs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
932*ba2539a9Schs if (err)
933*ba2539a9Schs return (err);
934*ba2539a9Schs zn = zap_name_alloc_uint64(zap, key, key_numints);
935*ba2539a9Schs if (zn == NULL) {
936*ba2539a9Schs zap_unlockdir(zap, FTAG);
937*ba2539a9Schs return (SET_ERROR(ENOTSUP));
938*ba2539a9Schs }
939*ba2539a9Schs
940*ba2539a9Schs fzap_prefetch(zn);
941*ba2539a9Schs zap_name_free(zn);
942*ba2539a9Schs zap_unlockdir(zap, FTAG);
943c1cb2cd8Shaad return (err);
944c1cb2cd8Shaad }
945c1cb2cd8Shaad
946c1cb2cd8Shaad int
zap_lookup_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,uint64_t integer_size,uint64_t num_integers,void * buf)947a252d550Shaad zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
948a252d550Shaad int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
949a252d550Shaad {
950a252d550Shaad zap_t *zap;
951a252d550Shaad int err;
952a252d550Shaad zap_name_t *zn;
953a252d550Shaad
954*ba2539a9Schs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
955a252d550Shaad if (err)
956a252d550Shaad return (err);
957a252d550Shaad zn = zap_name_alloc_uint64(zap, key, key_numints);
958a252d550Shaad if (zn == NULL) {
959*ba2539a9Schs zap_unlockdir(zap, FTAG);
960*ba2539a9Schs return (SET_ERROR(ENOTSUP));
961a252d550Shaad }
962a252d550Shaad
963a252d550Shaad err = fzap_lookup(zn, integer_size, num_integers, buf,
964a252d550Shaad NULL, 0, NULL);
965a252d550Shaad zap_name_free(zn);
966*ba2539a9Schs zap_unlockdir(zap, FTAG);
967a252d550Shaad return (err);
968a252d550Shaad }
969a252d550Shaad
970a252d550Shaad int
zap_contains(objset_t * os,uint64_t zapobj,const char * name)971a252d550Shaad zap_contains(objset_t *os, uint64_t zapobj, const char *name)
972a252d550Shaad {
973*ba2539a9Schs int err = zap_lookup_norm(os, zapobj, name, 0,
974*ba2539a9Schs 0, NULL, MT_EXACT, NULL, 0, NULL);
975a252d550Shaad if (err == EOVERFLOW || err == EINVAL)
976a252d550Shaad err = 0; /* found, but skipped reading the value */
977a252d550Shaad return (err);
978a252d550Shaad }
979a252d550Shaad
980a252d550Shaad int
zap_length(objset_t * os,uint64_t zapobj,const char * name,uint64_t * integer_size,uint64_t * num_integers)981c1cb2cd8Shaad zap_length(objset_t *os, uint64_t zapobj, const char *name,
982c1cb2cd8Shaad uint64_t *integer_size, uint64_t *num_integers)
983c1cb2cd8Shaad {
984c1cb2cd8Shaad zap_t *zap;
985c1cb2cd8Shaad int err;
986c1cb2cd8Shaad mzap_ent_t *mze;
987c1cb2cd8Shaad zap_name_t *zn;
988c1cb2cd8Shaad
989*ba2539a9Schs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
990c1cb2cd8Shaad if (err)
991c1cb2cd8Shaad return (err);
992c1cb2cd8Shaad zn = zap_name_alloc(zap, name, MT_EXACT);
993c1cb2cd8Shaad if (zn == NULL) {
994*ba2539a9Schs zap_unlockdir(zap, FTAG);
995*ba2539a9Schs return (SET_ERROR(ENOTSUP));
996c1cb2cd8Shaad }
997c1cb2cd8Shaad if (!zap->zap_ismicro) {
998c1cb2cd8Shaad err = fzap_length(zn, integer_size, num_integers);
999c1cb2cd8Shaad } else {
1000c1cb2cd8Shaad mze = mze_find(zn);
1001c1cb2cd8Shaad if (mze == NULL) {
1002*ba2539a9Schs err = SET_ERROR(ENOENT);
1003c1cb2cd8Shaad } else {
1004c1cb2cd8Shaad if (integer_size)
1005c1cb2cd8Shaad *integer_size = 8;
1006c1cb2cd8Shaad if (num_integers)
1007c1cb2cd8Shaad *num_integers = 1;
1008c1cb2cd8Shaad }
1009c1cb2cd8Shaad }
1010c1cb2cd8Shaad zap_name_free(zn);
1011*ba2539a9Schs zap_unlockdir(zap, FTAG);
1012c1cb2cd8Shaad return (err);
1013c1cb2cd8Shaad }
1014c1cb2cd8Shaad
1015a252d550Shaad int
zap_length_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,uint64_t * integer_size,uint64_t * num_integers)1016a252d550Shaad zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1017a252d550Shaad int key_numints, uint64_t *integer_size, uint64_t *num_integers)
1018a252d550Shaad {
1019a252d550Shaad zap_t *zap;
1020a252d550Shaad int err;
1021a252d550Shaad zap_name_t *zn;
1022a252d550Shaad
1023*ba2539a9Schs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1024a252d550Shaad if (err)
1025a252d550Shaad return (err);
1026a252d550Shaad zn = zap_name_alloc_uint64(zap, key, key_numints);
1027a252d550Shaad if (zn == NULL) {
1028*ba2539a9Schs zap_unlockdir(zap, FTAG);
1029*ba2539a9Schs return (SET_ERROR(ENOTSUP));
1030a252d550Shaad }
1031a252d550Shaad err = fzap_length(zn, integer_size, num_integers);
1032a252d550Shaad zap_name_free(zn);
1033*ba2539a9Schs zap_unlockdir(zap, FTAG);
1034a252d550Shaad return (err);
1035a252d550Shaad }
1036a252d550Shaad
1037c1cb2cd8Shaad static void
mzap_addent(zap_name_t * zn,uint64_t value)1038c1cb2cd8Shaad mzap_addent(zap_name_t *zn, uint64_t value)
1039c1cb2cd8Shaad {
1040c1cb2cd8Shaad int i;
1041c1cb2cd8Shaad zap_t *zap = zn->zn_zap;
1042c1cb2cd8Shaad int start = zap->zap_m.zap_alloc_next;
1043c1cb2cd8Shaad uint32_t cd;
1044c1cb2cd8Shaad
1045c1cb2cd8Shaad ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
1046c1cb2cd8Shaad
1047c1cb2cd8Shaad #ifdef ZFS_DEBUG
1048c1cb2cd8Shaad for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
1049*ba2539a9Schs mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i];
1050a252d550Shaad ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0);
1051c1cb2cd8Shaad }
1052c1cb2cd8Shaad #endif
1053c1cb2cd8Shaad
1054c1cb2cd8Shaad cd = mze_find_unused_cd(zap, zn->zn_hash);
1055c1cb2cd8Shaad /* given the limited size of the microzap, this can't happen */
1056a252d550Shaad ASSERT(cd < zap_maxcd(zap));
1057c1cb2cd8Shaad
1058c1cb2cd8Shaad again:
1059c1cb2cd8Shaad for (i = start; i < zap->zap_m.zap_num_chunks; i++) {
1060*ba2539a9Schs mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i];
1061c1cb2cd8Shaad if (mze->mze_name[0] == 0) {
1062c1cb2cd8Shaad mze->mze_value = value;
1063c1cb2cd8Shaad mze->mze_cd = cd;
1064a252d550Shaad (void) strcpy(mze->mze_name, zn->zn_key_orig);
1065c1cb2cd8Shaad zap->zap_m.zap_num_entries++;
1066c1cb2cd8Shaad zap->zap_m.zap_alloc_next = i+1;
1067c1cb2cd8Shaad if (zap->zap_m.zap_alloc_next ==
1068c1cb2cd8Shaad zap->zap_m.zap_num_chunks)
1069c1cb2cd8Shaad zap->zap_m.zap_alloc_next = 0;
1070*ba2539a9Schs VERIFY(0 == mze_insert(zap, i, zn->zn_hash));
1071c1cb2cd8Shaad return;
1072c1cb2cd8Shaad }
1073c1cb2cd8Shaad }
1074c1cb2cd8Shaad if (start != 0) {
1075c1cb2cd8Shaad start = 0;
1076c1cb2cd8Shaad goto again;
1077c1cb2cd8Shaad }
1078c1cb2cd8Shaad ASSERT(!"out of entries!");
1079c1cb2cd8Shaad }
1080c1cb2cd8Shaad
1081c1cb2cd8Shaad int
zap_add(objset_t * os,uint64_t zapobj,const char * key,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1082a252d550Shaad zap_add(objset_t *os, uint64_t zapobj, const char *key,
1083c1cb2cd8Shaad int integer_size, uint64_t num_integers,
1084c1cb2cd8Shaad const void *val, dmu_tx_t *tx)
1085c1cb2cd8Shaad {
1086c1cb2cd8Shaad zap_t *zap;
1087c1cb2cd8Shaad int err;
1088c1cb2cd8Shaad mzap_ent_t *mze;
1089c1cb2cd8Shaad const uint64_t *intval = val;
1090c1cb2cd8Shaad zap_name_t *zn;
1091c1cb2cd8Shaad
1092*ba2539a9Schs err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1093c1cb2cd8Shaad if (err)
1094c1cb2cd8Shaad return (err);
1095a252d550Shaad zn = zap_name_alloc(zap, key, MT_EXACT);
1096c1cb2cd8Shaad if (zn == NULL) {
1097*ba2539a9Schs zap_unlockdir(zap, FTAG);
1098*ba2539a9Schs return (SET_ERROR(ENOTSUP));
1099c1cb2cd8Shaad }
1100c1cb2cd8Shaad if (!zap->zap_ismicro) {
1101*ba2539a9Schs err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx);
1102c1cb2cd8Shaad zap = zn->zn_zap; /* fzap_add() may change zap */
1103c1cb2cd8Shaad } else if (integer_size != 8 || num_integers != 1 ||
1104a252d550Shaad strlen(key) >= MZAP_NAME_LEN) {
1105*ba2539a9Schs err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0);
1106*ba2539a9Schs if (err == 0) {
1107*ba2539a9Schs err = fzap_add(zn, integer_size, num_integers, val,
1108*ba2539a9Schs FTAG, tx);
1109*ba2539a9Schs }
1110c1cb2cd8Shaad zap = zn->zn_zap; /* fzap_add() may change zap */
1111c1cb2cd8Shaad } else {
1112c1cb2cd8Shaad mze = mze_find(zn);
1113c1cb2cd8Shaad if (mze != NULL) {
1114*ba2539a9Schs err = SET_ERROR(EEXIST);
1115c1cb2cd8Shaad } else {
1116c1cb2cd8Shaad mzap_addent(zn, *intval);
1117c1cb2cd8Shaad }
1118c1cb2cd8Shaad }
1119c1cb2cd8Shaad ASSERT(zap == zn->zn_zap);
1120c1cb2cd8Shaad zap_name_free(zn);
1121c1cb2cd8Shaad if (zap != NULL) /* may be NULL if fzap_add() failed */
1122*ba2539a9Schs zap_unlockdir(zap, FTAG);
1123c1cb2cd8Shaad return (err);
1124c1cb2cd8Shaad }
1125c1cb2cd8Shaad
1126c1cb2cd8Shaad int
zap_add_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1127a252d550Shaad zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1128a252d550Shaad int key_numints, int integer_size, uint64_t num_integers,
1129a252d550Shaad const void *val, dmu_tx_t *tx)
1130a252d550Shaad {
1131a252d550Shaad zap_t *zap;
1132a252d550Shaad int err;
1133a252d550Shaad zap_name_t *zn;
1134a252d550Shaad
1135*ba2539a9Schs err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1136a252d550Shaad if (err)
1137a252d550Shaad return (err);
1138a252d550Shaad zn = zap_name_alloc_uint64(zap, key, key_numints);
1139a252d550Shaad if (zn == NULL) {
1140*ba2539a9Schs zap_unlockdir(zap, FTAG);
1141*ba2539a9Schs return (SET_ERROR(ENOTSUP));
1142a252d550Shaad }
1143*ba2539a9Schs err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx);
1144a252d550Shaad zap = zn->zn_zap; /* fzap_add() may change zap */
1145a252d550Shaad zap_name_free(zn);
1146a252d550Shaad if (zap != NULL) /* may be NULL if fzap_add() failed */
1147*ba2539a9Schs zap_unlockdir(zap, FTAG);
1148a252d550Shaad return (err);
1149a252d550Shaad }
1150a252d550Shaad
1151a252d550Shaad int
zap_update(objset_t * os,uint64_t zapobj,const char * name,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1152c1cb2cd8Shaad zap_update(objset_t *os, uint64_t zapobj, const char *name,
1153c1cb2cd8Shaad int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
1154c1cb2cd8Shaad {
1155c1cb2cd8Shaad zap_t *zap;
1156c1cb2cd8Shaad mzap_ent_t *mze;
1157*ba2539a9Schs uint64_t oldval;
1158c1cb2cd8Shaad const uint64_t *intval = val;
1159c1cb2cd8Shaad zap_name_t *zn;
1160c1cb2cd8Shaad int err;
1161c1cb2cd8Shaad
1162*ba2539a9Schs #ifdef ZFS_DEBUG
1163*ba2539a9Schs /*
1164*ba2539a9Schs * If there is an old value, it shouldn't change across the
1165*ba2539a9Schs * lockdir (eg, due to bprewrite's xlation).
1166*ba2539a9Schs */
1167*ba2539a9Schs if (integer_size == 8 && num_integers == 1)
1168*ba2539a9Schs (void) zap_lookup(os, zapobj, name, 8, 1, &oldval);
1169*ba2539a9Schs #endif
1170*ba2539a9Schs
1171*ba2539a9Schs err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1172c1cb2cd8Shaad if (err)
1173c1cb2cd8Shaad return (err);
1174c1cb2cd8Shaad zn = zap_name_alloc(zap, name, MT_EXACT);
1175c1cb2cd8Shaad if (zn == NULL) {
1176*ba2539a9Schs zap_unlockdir(zap, FTAG);
1177*ba2539a9Schs return (SET_ERROR(ENOTSUP));
1178c1cb2cd8Shaad }
1179c1cb2cd8Shaad if (!zap->zap_ismicro) {
1180*ba2539a9Schs err = fzap_update(zn, integer_size, num_integers, val,
1181*ba2539a9Schs FTAG, tx);
1182c1cb2cd8Shaad zap = zn->zn_zap; /* fzap_update() may change zap */
1183c1cb2cd8Shaad } else if (integer_size != 8 || num_integers != 1 ||
1184c1cb2cd8Shaad strlen(name) >= MZAP_NAME_LEN) {
1185c1cb2cd8Shaad dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
1186c1cb2cd8Shaad zapobj, integer_size, num_integers, name);
1187*ba2539a9Schs err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0);
1188*ba2539a9Schs if (err == 0) {
1189c1cb2cd8Shaad err = fzap_update(zn, integer_size, num_integers,
1190*ba2539a9Schs val, FTAG, tx);
1191*ba2539a9Schs }
1192c1cb2cd8Shaad zap = zn->zn_zap; /* fzap_update() may change zap */
1193c1cb2cd8Shaad } else {
1194c1cb2cd8Shaad mze = mze_find(zn);
1195c1cb2cd8Shaad if (mze != NULL) {
1196*ba2539a9Schs ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval);
1197*ba2539a9Schs MZE_PHYS(zap, mze)->mze_value = *intval;
1198c1cb2cd8Shaad } else {
1199c1cb2cd8Shaad mzap_addent(zn, *intval);
1200c1cb2cd8Shaad }
1201c1cb2cd8Shaad }
1202c1cb2cd8Shaad ASSERT(zap == zn->zn_zap);
1203c1cb2cd8Shaad zap_name_free(zn);
1204c1cb2cd8Shaad if (zap != NULL) /* may be NULL if fzap_upgrade() failed */
1205*ba2539a9Schs zap_unlockdir(zap, FTAG);
1206c1cb2cd8Shaad return (err);
1207c1cb2cd8Shaad }
1208c1cb2cd8Shaad
1209c1cb2cd8Shaad int
zap_update_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1210a252d550Shaad zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1211a252d550Shaad int key_numints,
1212a252d550Shaad int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
1213a252d550Shaad {
1214a252d550Shaad zap_t *zap;
1215a252d550Shaad zap_name_t *zn;
1216a252d550Shaad int err;
1217a252d550Shaad
1218*ba2539a9Schs err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1219a252d550Shaad if (err)
1220a252d550Shaad return (err);
1221a252d550Shaad zn = zap_name_alloc_uint64(zap, key, key_numints);
1222a252d550Shaad if (zn == NULL) {
1223*ba2539a9Schs zap_unlockdir(zap, FTAG);
1224*ba2539a9Schs return (SET_ERROR(ENOTSUP));
1225a252d550Shaad }
1226*ba2539a9Schs err = fzap_update(zn, integer_size, num_integers, val, FTAG, tx);
1227a252d550Shaad zap = zn->zn_zap; /* fzap_update() may change zap */
1228a252d550Shaad zap_name_free(zn);
1229a252d550Shaad if (zap != NULL) /* may be NULL if fzap_upgrade() failed */
1230*ba2539a9Schs zap_unlockdir(zap, FTAG);
1231a252d550Shaad return (err);
1232a252d550Shaad }
1233a252d550Shaad
1234a252d550Shaad int
zap_remove(objset_t * os,uint64_t zapobj,const char * name,dmu_tx_t * tx)1235c1cb2cd8Shaad zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx)
1236c1cb2cd8Shaad {
1237c1cb2cd8Shaad return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx));
1238c1cb2cd8Shaad }
1239c1cb2cd8Shaad
1240c1cb2cd8Shaad int
zap_remove_norm(objset_t * os,uint64_t zapobj,const char * name,matchtype_t mt,dmu_tx_t * tx)1241c1cb2cd8Shaad zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
1242c1cb2cd8Shaad matchtype_t mt, dmu_tx_t *tx)
1243c1cb2cd8Shaad {
1244c1cb2cd8Shaad zap_t *zap;
1245c1cb2cd8Shaad int err;
1246c1cb2cd8Shaad mzap_ent_t *mze;
1247c1cb2cd8Shaad zap_name_t *zn;
1248c1cb2cd8Shaad
1249*ba2539a9Schs err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
1250c1cb2cd8Shaad if (err)
1251c1cb2cd8Shaad return (err);
1252c1cb2cd8Shaad zn = zap_name_alloc(zap, name, mt);
1253c1cb2cd8Shaad if (zn == NULL) {
1254*ba2539a9Schs zap_unlockdir(zap, FTAG);
1255*ba2539a9Schs return (SET_ERROR(ENOTSUP));
1256c1cb2cd8Shaad }
1257c1cb2cd8Shaad if (!zap->zap_ismicro) {
1258c1cb2cd8Shaad err = fzap_remove(zn, tx);
1259c1cb2cd8Shaad } else {
1260c1cb2cd8Shaad mze = mze_find(zn);
1261c1cb2cd8Shaad if (mze == NULL) {
1262*ba2539a9Schs err = SET_ERROR(ENOENT);
1263c1cb2cd8Shaad } else {
1264c1cb2cd8Shaad zap->zap_m.zap_num_entries--;
1265*ba2539a9Schs bzero(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid],
1266c1cb2cd8Shaad sizeof (mzap_ent_phys_t));
1267c1cb2cd8Shaad mze_remove(zap, mze);
1268c1cb2cd8Shaad }
1269c1cb2cd8Shaad }
1270c1cb2cd8Shaad zap_name_free(zn);
1271*ba2539a9Schs zap_unlockdir(zap, FTAG);
1272c1cb2cd8Shaad return (err);
1273c1cb2cd8Shaad }
1274c1cb2cd8Shaad
1275a252d550Shaad int
zap_remove_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,dmu_tx_t * tx)1276a252d550Shaad zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1277a252d550Shaad int key_numints, dmu_tx_t *tx)
1278a252d550Shaad {
1279a252d550Shaad zap_t *zap;
1280a252d550Shaad int err;
1281a252d550Shaad zap_name_t *zn;
1282a252d550Shaad
1283*ba2539a9Schs err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
1284a252d550Shaad if (err)
1285a252d550Shaad return (err);
1286a252d550Shaad zn = zap_name_alloc_uint64(zap, key, key_numints);
1287a252d550Shaad if (zn == NULL) {
1288*ba2539a9Schs zap_unlockdir(zap, FTAG);
1289*ba2539a9Schs return (SET_ERROR(ENOTSUP));
1290a252d550Shaad }
1291a252d550Shaad err = fzap_remove(zn, tx);
1292a252d550Shaad zap_name_free(zn);
1293*ba2539a9Schs zap_unlockdir(zap, FTAG);
1294a252d550Shaad return (err);
1295a252d550Shaad }
1296a252d550Shaad
1297c1cb2cd8Shaad /*
1298c1cb2cd8Shaad * Routines for iterating over the attributes.
1299c1cb2cd8Shaad */
1300c1cb2cd8Shaad
1301c1cb2cd8Shaad void
zap_cursor_init_serialized(zap_cursor_t * zc,objset_t * os,uint64_t zapobj,uint64_t serialized)1302c1cb2cd8Shaad zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
1303c1cb2cd8Shaad uint64_t serialized)
1304c1cb2cd8Shaad {
1305c1cb2cd8Shaad zc->zc_objset = os;
1306c1cb2cd8Shaad zc->zc_zap = NULL;
1307c1cb2cd8Shaad zc->zc_leaf = NULL;
1308c1cb2cd8Shaad zc->zc_zapobj = zapobj;
1309a252d550Shaad zc->zc_serialized = serialized;
1310a252d550Shaad zc->zc_hash = 0;
1311c1cb2cd8Shaad zc->zc_cd = 0;
1312c1cb2cd8Shaad }
1313c1cb2cd8Shaad
1314c1cb2cd8Shaad void
zap_cursor_init(zap_cursor_t * zc,objset_t * os,uint64_t zapobj)1315c1cb2cd8Shaad zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
1316c1cb2cd8Shaad {
1317c1cb2cd8Shaad zap_cursor_init_serialized(zc, os, zapobj, 0);
1318c1cb2cd8Shaad }
1319c1cb2cd8Shaad
1320c1cb2cd8Shaad void
zap_cursor_fini(zap_cursor_t * zc)1321c1cb2cd8Shaad zap_cursor_fini(zap_cursor_t *zc)
1322c1cb2cd8Shaad {
1323c1cb2cd8Shaad if (zc->zc_zap) {
1324c1cb2cd8Shaad rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
1325*ba2539a9Schs zap_unlockdir(zc->zc_zap, NULL);
1326c1cb2cd8Shaad zc->zc_zap = NULL;
1327c1cb2cd8Shaad }
1328c1cb2cd8Shaad if (zc->zc_leaf) {
1329c1cb2cd8Shaad rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
1330c1cb2cd8Shaad zap_put_leaf(zc->zc_leaf);
1331c1cb2cd8Shaad zc->zc_leaf = NULL;
1332c1cb2cd8Shaad }
1333c1cb2cd8Shaad zc->zc_objset = NULL;
1334c1cb2cd8Shaad }
1335c1cb2cd8Shaad
1336c1cb2cd8Shaad uint64_t
zap_cursor_serialize(zap_cursor_t * zc)1337c1cb2cd8Shaad zap_cursor_serialize(zap_cursor_t *zc)
1338c1cb2cd8Shaad {
1339c1cb2cd8Shaad if (zc->zc_hash == -1ULL)
1340c1cb2cd8Shaad return (-1ULL);
1341a252d550Shaad if (zc->zc_zap == NULL)
1342a252d550Shaad return (zc->zc_serialized);
1343a252d550Shaad ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0);
1344a252d550Shaad ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap));
1345a252d550Shaad
1346a252d550Shaad /*
1347a252d550Shaad * We want to keep the high 32 bits of the cursor zero if we can, so
1348a252d550Shaad * that 32-bit programs can access this. So usually use a small
1349a252d550Shaad * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits
1350a252d550Shaad * of the cursor.
1351a252d550Shaad *
1352a252d550Shaad * [ collision differentiator | zap_hashbits()-bit hash value ]
1353a252d550Shaad */
1354a252d550Shaad return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) |
1355a252d550Shaad ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap)));
1356c1cb2cd8Shaad }
1357c1cb2cd8Shaad
1358c1cb2cd8Shaad int
zap_cursor_retrieve(zap_cursor_t * zc,zap_attribute_t * za)1359c1cb2cd8Shaad zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
1360c1cb2cd8Shaad {
1361c1cb2cd8Shaad int err;
1362c1cb2cd8Shaad avl_index_t idx;
1363c1cb2cd8Shaad mzap_ent_t mze_tofind;
1364c1cb2cd8Shaad mzap_ent_t *mze;
1365c1cb2cd8Shaad
1366c1cb2cd8Shaad if (zc->zc_hash == -1ULL)
1367*ba2539a9Schs return (SET_ERROR(ENOENT));
1368c1cb2cd8Shaad
1369c1cb2cd8Shaad if (zc->zc_zap == NULL) {
1370a252d550Shaad int hb;
1371c1cb2cd8Shaad err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
1372*ba2539a9Schs RW_READER, TRUE, FALSE, NULL, &zc->zc_zap);
1373c1cb2cd8Shaad if (err)
1374c1cb2cd8Shaad return (err);
1375a252d550Shaad
1376a252d550Shaad /*
1377a252d550Shaad * To support zap_cursor_init_serialized, advance, retrieve,
1378a252d550Shaad * we must add to the existing zc_cd, which may already
1379a252d550Shaad * be 1 due to the zap_cursor_advance.
1380a252d550Shaad */
1381a252d550Shaad ASSERT(zc->zc_hash == 0);
1382a252d550Shaad hb = zap_hashbits(zc->zc_zap);
1383a252d550Shaad zc->zc_hash = zc->zc_serialized << (64 - hb);
1384a252d550Shaad zc->zc_cd += zc->zc_serialized >> hb;
1385a252d550Shaad if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */
1386a252d550Shaad zc->zc_cd = 0;
1387c1cb2cd8Shaad } else {
1388c1cb2cd8Shaad rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
1389c1cb2cd8Shaad }
1390c1cb2cd8Shaad if (!zc->zc_zap->zap_ismicro) {
1391c1cb2cd8Shaad err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
1392c1cb2cd8Shaad } else {
1393c1cb2cd8Shaad mze_tofind.mze_hash = zc->zc_hash;
1394*ba2539a9Schs mze_tofind.mze_cd = zc->zc_cd;
1395c1cb2cd8Shaad
1396c1cb2cd8Shaad mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx);
1397c1cb2cd8Shaad if (mze == NULL) {
1398c1cb2cd8Shaad mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl,
1399c1cb2cd8Shaad idx, AVL_AFTER);
1400c1cb2cd8Shaad }
1401c1cb2cd8Shaad if (mze) {
1402*ba2539a9Schs mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
1403*ba2539a9Schs ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
1404c1cb2cd8Shaad za->za_normalization_conflict =
1405c1cb2cd8Shaad mzap_normalization_conflict(zc->zc_zap, NULL, mze);
1406c1cb2cd8Shaad za->za_integer_length = 8;
1407c1cb2cd8Shaad za->za_num_integers = 1;
1408*ba2539a9Schs za->za_first_integer = mzep->mze_value;
1409*ba2539a9Schs (void) strcpy(za->za_name, mzep->mze_name);
1410c1cb2cd8Shaad zc->zc_hash = mze->mze_hash;
1411*ba2539a9Schs zc->zc_cd = mze->mze_cd;
1412c1cb2cd8Shaad err = 0;
1413c1cb2cd8Shaad } else {
1414c1cb2cd8Shaad zc->zc_hash = -1ULL;
1415*ba2539a9Schs err = SET_ERROR(ENOENT);
1416c1cb2cd8Shaad }
1417c1cb2cd8Shaad }
1418c1cb2cd8Shaad rw_exit(&zc->zc_zap->zap_rwlock);
1419c1cb2cd8Shaad return (err);
1420c1cb2cd8Shaad }
1421c1cb2cd8Shaad
1422c1cb2cd8Shaad void
zap_cursor_advance(zap_cursor_t * zc)1423c1cb2cd8Shaad zap_cursor_advance(zap_cursor_t *zc)
1424c1cb2cd8Shaad {
1425c1cb2cd8Shaad if (zc->zc_hash == -1ULL)
1426c1cb2cd8Shaad return;
1427c1cb2cd8Shaad zc->zc_cd++;
1428c1cb2cd8Shaad }
1429a252d550Shaad
1430a252d550Shaad int
zap_cursor_move_to_key(zap_cursor_t * zc,const char * name,matchtype_t mt)1431a252d550Shaad zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt)
1432a252d550Shaad {
1433a252d550Shaad int err = 0;
1434a252d550Shaad mzap_ent_t *mze;
1435a252d550Shaad zap_name_t *zn;
1436a252d550Shaad
1437a252d550Shaad if (zc->zc_zap == NULL) {
1438a252d550Shaad err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
1439*ba2539a9Schs RW_READER, TRUE, FALSE, FTAG, &zc->zc_zap);
1440a252d550Shaad if (err)
1441a252d550Shaad return (err);
1442a252d550Shaad } else {
1443a252d550Shaad rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
1444a252d550Shaad }
1445a252d550Shaad
1446a252d550Shaad zn = zap_name_alloc(zc->zc_zap, name, mt);
1447a252d550Shaad if (zn == NULL) {
1448a252d550Shaad rw_exit(&zc->zc_zap->zap_rwlock);
1449*ba2539a9Schs return (SET_ERROR(ENOTSUP));
1450a252d550Shaad }
1451a252d550Shaad
1452a252d550Shaad if (!zc->zc_zap->zap_ismicro) {
1453a252d550Shaad err = fzap_cursor_move_to_key(zc, zn);
1454a252d550Shaad } else {
1455a252d550Shaad mze = mze_find(zn);
1456a252d550Shaad if (mze == NULL) {
1457*ba2539a9Schs err = SET_ERROR(ENOENT);
1458a252d550Shaad goto out;
1459a252d550Shaad }
1460a252d550Shaad zc->zc_hash = mze->mze_hash;
1461*ba2539a9Schs zc->zc_cd = mze->mze_cd;
1462a252d550Shaad }
1463a252d550Shaad
1464a252d550Shaad out:
1465a252d550Shaad zap_name_free(zn);
1466a252d550Shaad rw_exit(&zc->zc_zap->zap_rwlock);
1467a252d550Shaad return (err);
1468c1cb2cd8Shaad }
1469c1cb2cd8Shaad
1470c1cb2cd8Shaad int
zap_get_stats(objset_t * os,uint64_t zapobj,zap_stats_t * zs)1471c1cb2cd8Shaad zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
1472c1cb2cd8Shaad {
1473c1cb2cd8Shaad int err;
1474c1cb2cd8Shaad zap_t *zap;
1475c1cb2cd8Shaad
1476*ba2539a9Schs err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1477c1cb2cd8Shaad if (err)
1478c1cb2cd8Shaad return (err);
1479c1cb2cd8Shaad
1480c1cb2cd8Shaad bzero(zs, sizeof (zap_stats_t));
1481c1cb2cd8Shaad
1482c1cb2cd8Shaad if (zap->zap_ismicro) {
1483c1cb2cd8Shaad zs->zs_blocksize = zap->zap_dbuf->db_size;
1484c1cb2cd8Shaad zs->zs_num_entries = zap->zap_m.zap_num_entries;
1485c1cb2cd8Shaad zs->zs_num_blocks = 1;
1486c1cb2cd8Shaad } else {
1487c1cb2cd8Shaad fzap_get_stats(zap, zs);
1488c1cb2cd8Shaad }
1489*ba2539a9Schs zap_unlockdir(zap, FTAG);
1490c1cb2cd8Shaad return (0);
1491c1cb2cd8Shaad }
1492a252d550Shaad
1493a252d550Shaad int
zap_count_write_by_dnode(dnode_t * dn,const char * name,int add,refcount_t * towrite,refcount_t * tooverwrite)1494*ba2539a9Schs zap_count_write_by_dnode(dnode_t *dn, const char *name, int add,
1495*ba2539a9Schs refcount_t *towrite, refcount_t *tooverwrite)
1496a252d550Shaad {
1497a252d550Shaad zap_t *zap;
1498a252d550Shaad int err = 0;
1499a252d550Shaad
1500a252d550Shaad /*
1501a252d550Shaad * Since, we don't have a name, we cannot figure out which blocks will
1502a252d550Shaad * be affected in this operation. So, account for the worst case :
1503a252d550Shaad * - 3 blocks overwritten: target leaf, ptrtbl block, header block
1504a252d550Shaad * - 4 new blocks written if adding:
1505a252d550Shaad * - 2 blocks for possibly split leaves,
1506a252d550Shaad * - 2 grown ptrtbl blocks
1507a252d550Shaad *
1508*ba2539a9Schs * This also accommodates the case where an add operation to a fairly
1509a252d550Shaad * large microzap results in a promotion to fatzap.
1510a252d550Shaad */
1511a252d550Shaad if (name == NULL) {
1512*ba2539a9Schs (void) refcount_add_many(towrite,
1513*ba2539a9Schs (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE, FTAG);
1514a252d550Shaad return (err);
1515a252d550Shaad }
1516a252d550Shaad
1517a252d550Shaad /*
1518a252d550Shaad * We lock the zap with adding == FALSE. Because, if we pass
1519a252d550Shaad * the actual value of add, it could trigger a mzap_upgrade().
1520a252d550Shaad * At present we are just evaluating the possibility of this operation
1521a252d550Shaad * and hence we do not want to trigger an upgrade.
1522a252d550Shaad */
1523*ba2539a9Schs err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
1524*ba2539a9Schs FTAG, &zap);
1525*ba2539a9Schs if (err != 0)
1526a252d550Shaad return (err);
1527a252d550Shaad
1528a252d550Shaad if (!zap->zap_ismicro) {
1529a252d550Shaad zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT);
1530a252d550Shaad if (zn) {
1531a252d550Shaad err = fzap_count_write(zn, add, towrite,
1532a252d550Shaad tooverwrite);
1533a252d550Shaad zap_name_free(zn);
1534a252d550Shaad } else {
1535a252d550Shaad /*
1536a252d550Shaad * We treat this case as similar to (name == NULL)
1537a252d550Shaad */
1538*ba2539a9Schs (void) refcount_add_many(towrite,
1539*ba2539a9Schs (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE, FTAG);
1540a252d550Shaad }
1541a252d550Shaad } else {
1542a252d550Shaad /*
1543a252d550Shaad * We are here if (name != NULL) and this is a micro-zap.
1544a252d550Shaad * We account for the header block depending on whether it
1545a252d550Shaad * is freeable.
1546a252d550Shaad *
1547a252d550Shaad * Incase of an add-operation it is hard to find out
1548a252d550Shaad * if this add will promote this microzap to fatzap.
1549a252d550Shaad * Hence, we consider the worst case and account for the
1550a252d550Shaad * blocks assuming this microzap would be promoted to a
1551a252d550Shaad * fatzap.
1552a252d550Shaad *
1553a252d550Shaad * 1 block overwritten : header block
1554a252d550Shaad * 4 new blocks written : 2 new split leaf, 2 grown
1555a252d550Shaad * ptrtbl blocks
1556a252d550Shaad */
1557*ba2539a9Schs if (dmu_buf_freeable(zap->zap_dbuf)) {
1558*ba2539a9Schs (void) refcount_add_many(tooverwrite,
1559*ba2539a9Schs MZAP_MAX_BLKSZ, FTAG);
1560*ba2539a9Schs } else {
1561*ba2539a9Schs (void) refcount_add_many(towrite,
1562*ba2539a9Schs MZAP_MAX_BLKSZ, FTAG);
1563*ba2539a9Schs }
1564a252d550Shaad
1565a252d550Shaad if (add) {
1566*ba2539a9Schs (void) refcount_add_many(towrite,
1567*ba2539a9Schs 4 * MZAP_MAX_BLKSZ, FTAG);
1568a252d550Shaad }
1569a252d550Shaad }
1570a252d550Shaad
1571*ba2539a9Schs zap_unlockdir(zap, FTAG);
1572a252d550Shaad return (err);
1573a252d550Shaad }
1574