xref: /netbsd-src/external/cddl/osnet/dist/uts/common/fs/zfs/zap_micro.c (revision ba2539a9805a0544ff82c0003cc02fe1eee5603d)
1c1cb2cd8Shaad /*
2c1cb2cd8Shaad  * CDDL HEADER START
3c1cb2cd8Shaad  *
4c1cb2cd8Shaad  * The contents of this file are subject to the terms of the
5c1cb2cd8Shaad  * Common Development and Distribution License (the "License").
6c1cb2cd8Shaad  * You may not use this file except in compliance with the License.
7c1cb2cd8Shaad  *
8c1cb2cd8Shaad  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9c1cb2cd8Shaad  * or http://www.opensolaris.org/os/licensing.
10c1cb2cd8Shaad  * See the License for the specific language governing permissions
11c1cb2cd8Shaad  * and limitations under the License.
12c1cb2cd8Shaad  *
13c1cb2cd8Shaad  * When distributing Covered Code, include this CDDL HEADER in each
14c1cb2cd8Shaad  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15c1cb2cd8Shaad  * If applicable, add the following below this CDDL HEADER, with the
16c1cb2cd8Shaad  * fields enclosed by brackets "[]" replaced with your own identifying
17c1cb2cd8Shaad  * information: Portions Copyright [yyyy] [name of copyright owner]
18c1cb2cd8Shaad  *
19c1cb2cd8Shaad  * CDDL HEADER END
20c1cb2cd8Shaad  */
21c1cb2cd8Shaad /*
22*ba2539a9Schs  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23*ba2539a9Schs  * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
24*ba2539a9Schs  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
25*ba2539a9Schs  * Copyright (c) 2014 Integros [integros.com]
26c1cb2cd8Shaad  */
27c1cb2cd8Shaad 
28a252d550Shaad #include <sys/zio.h>
29c1cb2cd8Shaad #include <sys/spa.h>
30c1cb2cd8Shaad #include <sys/dmu.h>
31c1cb2cd8Shaad #include <sys/zfs_context.h>
32c1cb2cd8Shaad #include <sys/zap.h>
33c1cb2cd8Shaad #include <sys/refcount.h>
34c1cb2cd8Shaad #include <sys/zap_impl.h>
35c1cb2cd8Shaad #include <sys/zap_leaf.h>
36c1cb2cd8Shaad #include <sys/avl.h>
37*ba2539a9Schs #include <sys/arc.h>
38*ba2539a9Schs #include <sys/dmu_objset.h>
39c1cb2cd8Shaad 
40c1cb2cd8Shaad #ifdef _KERNEL
41c1cb2cd8Shaad #include <sys/sunddi.h>
42c1cb2cd8Shaad #endif
43c1cb2cd8Shaad 
44*ba2539a9Schs extern inline mzap_phys_t *zap_m_phys(zap_t *zap);
45*ba2539a9Schs 
46*ba2539a9Schs static int mzap_upgrade(zap_t **zapp,
47*ba2539a9Schs     void *tag, dmu_tx_t *tx, zap_flags_t flags);
48c1cb2cd8Shaad 
49a252d550Shaad uint64_t
zap_getflags(zap_t * zap)50a252d550Shaad zap_getflags(zap_t *zap)
51c1cb2cd8Shaad {
52a252d550Shaad 	if (zap->zap_ismicro)
53a252d550Shaad 		return (0);
54*ba2539a9Schs 	return (zap_f_phys(zap)->zap_flags);
55c1cb2cd8Shaad }
56c1cb2cd8Shaad 
57a252d550Shaad int
zap_hashbits(zap_t * zap)58a252d550Shaad zap_hashbits(zap_t *zap)
59a252d550Shaad {
60a252d550Shaad 	if (zap_getflags(zap) & ZAP_FLAG_HASH64)
61a252d550Shaad 		return (48);
62a252d550Shaad 	else
63a252d550Shaad 		return (28);
64a252d550Shaad }
65a252d550Shaad 
66a252d550Shaad uint32_t
zap_maxcd(zap_t * zap)67a252d550Shaad zap_maxcd(zap_t *zap)
68a252d550Shaad {
69a252d550Shaad 	if (zap_getflags(zap) & ZAP_FLAG_HASH64)
70a252d550Shaad 		return ((1<<16)-1);
71a252d550Shaad 	else
72a252d550Shaad 		return (-1U);
73a252d550Shaad }
74a252d550Shaad 
75a252d550Shaad static uint64_t
zap_hash(zap_name_t * zn)76a252d550Shaad zap_hash(zap_name_t *zn)
77a252d550Shaad {
78a252d550Shaad 	zap_t *zap = zn->zn_zap;
79a252d550Shaad 	uint64_t h = 0;
80a252d550Shaad 
81a252d550Shaad 	if (zap_getflags(zap) & ZAP_FLAG_PRE_HASHED_KEY) {
82a252d550Shaad 		ASSERT(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY);
83a252d550Shaad 		h = *(uint64_t *)zn->zn_key_orig;
84a252d550Shaad 	} else {
85a252d550Shaad 		h = zap->zap_salt;
86a252d550Shaad 		ASSERT(h != 0);
87a252d550Shaad 		ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
88a252d550Shaad 
89a252d550Shaad 		if (zap_getflags(zap) & ZAP_FLAG_UINT64_KEY) {
90a252d550Shaad 			int i;
91a252d550Shaad 			const uint64_t *wp = zn->zn_key_norm;
92a252d550Shaad 
93a252d550Shaad 			ASSERT(zn->zn_key_intlen == 8);
94a252d550Shaad 			for (i = 0; i < zn->zn_key_norm_numints; wp++, i++) {
95a252d550Shaad 				int j;
96a252d550Shaad 				uint64_t word = *wp;
97a252d550Shaad 
98a252d550Shaad 				for (j = 0; j < zn->zn_key_intlen; j++) {
99a252d550Shaad 					h = (h >> 8) ^
100a252d550Shaad 					    zfs_crc64_table[(h ^ word) & 0xFF];
101a252d550Shaad 					word >>= NBBY;
102a252d550Shaad 				}
103a252d550Shaad 			}
104a252d550Shaad 		} else {
105a252d550Shaad 			int i, len;
106a252d550Shaad 			const uint8_t *cp = zn->zn_key_norm;
107a252d550Shaad 
108c1cb2cd8Shaad 			/*
109a252d550Shaad 			 * We previously stored the terminating null on
110a252d550Shaad 			 * disk, but didn't hash it, so we need to
111a252d550Shaad 			 * continue to not hash it.  (The
112a252d550Shaad 			 * zn_key_*_numints includes the terminating
113a252d550Shaad 			 * null for non-binary keys.)
114a252d550Shaad 			 */
115a252d550Shaad 			len = zn->zn_key_norm_numints - 1;
116a252d550Shaad 
117a252d550Shaad 			ASSERT(zn->zn_key_intlen == 1);
118a252d550Shaad 			for (i = 0; i < len; cp++, i++) {
119a252d550Shaad 				h = (h >> 8) ^
120a252d550Shaad 				    zfs_crc64_table[(h ^ *cp) & 0xFF];
121a252d550Shaad 			}
122a252d550Shaad 		}
123a252d550Shaad 	}
124a252d550Shaad 	/*
125a252d550Shaad 	 * Don't use all 64 bits, since we need some in the cookie for
126a252d550Shaad 	 * the collision differentiator.  We MUST use the high bits,
127a252d550Shaad 	 * since those are the ones that we first pay attention to when
128c1cb2cd8Shaad 	 * chosing the bucket.
129c1cb2cd8Shaad 	 */
130a252d550Shaad 	h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1);
131c1cb2cd8Shaad 
132a252d550Shaad 	return (h);
133c1cb2cd8Shaad }
134c1cb2cd8Shaad 
135c1cb2cd8Shaad static int
zap_normalize(zap_t * zap,const char * name,char * namenorm)136c1cb2cd8Shaad zap_normalize(zap_t *zap, const char *name, char *namenorm)
137c1cb2cd8Shaad {
138c1cb2cd8Shaad 	size_t inlen, outlen;
139c1cb2cd8Shaad 	int err;
140c1cb2cd8Shaad 
141a252d550Shaad 	ASSERT(!(zap_getflags(zap) & ZAP_FLAG_UINT64_KEY));
142a252d550Shaad 
143c1cb2cd8Shaad 	inlen = strlen(name) + 1;
144c1cb2cd8Shaad 	outlen = ZAP_MAXNAMELEN;
145c1cb2cd8Shaad 
146c1cb2cd8Shaad 	err = 0;
147c1cb2cd8Shaad 	(void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,
148a252d550Shaad 	    zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL |
149a252d550Shaad 	    U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err);
150c1cb2cd8Shaad 
151c1cb2cd8Shaad 	return (err);
152c1cb2cd8Shaad }
153c1cb2cd8Shaad 
154c1cb2cd8Shaad boolean_t
zap_match(zap_name_t * zn,const char * matchname)155c1cb2cd8Shaad zap_match(zap_name_t *zn, const char *matchname)
156c1cb2cd8Shaad {
157a252d550Shaad 	ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY));
158a252d550Shaad 
159c1cb2cd8Shaad 	if (zn->zn_matchtype == MT_FIRST) {
160c1cb2cd8Shaad 		char norm[ZAP_MAXNAMELEN];
161c1cb2cd8Shaad 
162c1cb2cd8Shaad 		if (zap_normalize(zn->zn_zap, matchname, norm) != 0)
163c1cb2cd8Shaad 			return (B_FALSE);
164c1cb2cd8Shaad 
165a252d550Shaad 		return (strcmp(zn->zn_key_norm, norm) == 0);
166c1cb2cd8Shaad 	} else {
167c1cb2cd8Shaad 		/* MT_BEST or MT_EXACT */
168a252d550Shaad 		return (strcmp(zn->zn_key_orig, matchname) == 0);
169c1cb2cd8Shaad 	}
170c1cb2cd8Shaad }
171c1cb2cd8Shaad 
172c1cb2cd8Shaad void
zap_name_free(zap_name_t * zn)173c1cb2cd8Shaad zap_name_free(zap_name_t *zn)
174c1cb2cd8Shaad {
175c1cb2cd8Shaad 	kmem_free(zn, sizeof (zap_name_t));
176c1cb2cd8Shaad }
177c1cb2cd8Shaad 
178c1cb2cd8Shaad zap_name_t *
zap_name_alloc(zap_t * zap,const char * key,matchtype_t mt)179a252d550Shaad zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
180c1cb2cd8Shaad {
181c1cb2cd8Shaad 	zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
182c1cb2cd8Shaad 
183c1cb2cd8Shaad 	zn->zn_zap = zap;
184a252d550Shaad 	zn->zn_key_intlen = sizeof (*key);
185a252d550Shaad 	zn->zn_key_orig = key;
186a252d550Shaad 	zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1;
187c1cb2cd8Shaad 	zn->zn_matchtype = mt;
188c1cb2cd8Shaad 	if (zap->zap_normflags) {
189a252d550Shaad 		if (zap_normalize(zap, key, zn->zn_normbuf) != 0) {
190c1cb2cd8Shaad 			zap_name_free(zn);
191c1cb2cd8Shaad 			return (NULL);
192c1cb2cd8Shaad 		}
193a252d550Shaad 		zn->zn_key_norm = zn->zn_normbuf;
194a252d550Shaad 		zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
195c1cb2cd8Shaad 	} else {
196c1cb2cd8Shaad 		if (mt != MT_EXACT) {
197c1cb2cd8Shaad 			zap_name_free(zn);
198c1cb2cd8Shaad 			return (NULL);
199c1cb2cd8Shaad 		}
200a252d550Shaad 		zn->zn_key_norm = zn->zn_key_orig;
201a252d550Shaad 		zn->zn_key_norm_numints = zn->zn_key_orig_numints;
202c1cb2cd8Shaad 	}
203c1cb2cd8Shaad 
204a252d550Shaad 	zn->zn_hash = zap_hash(zn);
205a252d550Shaad 	return (zn);
206a252d550Shaad }
207a252d550Shaad 
208a252d550Shaad zap_name_t *
zap_name_alloc_uint64(zap_t * zap,const uint64_t * key,int numints)209a252d550Shaad zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
210a252d550Shaad {
211a252d550Shaad 	zap_name_t *zn = kmem_alloc(sizeof (zap_name_t), KM_SLEEP);
212a252d550Shaad 
213a252d550Shaad 	ASSERT(zap->zap_normflags == 0);
214a252d550Shaad 	zn->zn_zap = zap;
215a252d550Shaad 	zn->zn_key_intlen = sizeof (*key);
216a252d550Shaad 	zn->zn_key_orig = zn->zn_key_norm = key;
217a252d550Shaad 	zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints;
218a252d550Shaad 	zn->zn_matchtype = MT_EXACT;
219a252d550Shaad 
220a252d550Shaad 	zn->zn_hash = zap_hash(zn);
221c1cb2cd8Shaad 	return (zn);
222c1cb2cd8Shaad }
223c1cb2cd8Shaad 
224c1cb2cd8Shaad static void
mzap_byteswap(mzap_phys_t * buf,size_t size)225c1cb2cd8Shaad mzap_byteswap(mzap_phys_t *buf, size_t size)
226c1cb2cd8Shaad {
227c1cb2cd8Shaad 	int i, max;
228c1cb2cd8Shaad 	buf->mz_block_type = BSWAP_64(buf->mz_block_type);
229c1cb2cd8Shaad 	buf->mz_salt = BSWAP_64(buf->mz_salt);
230c1cb2cd8Shaad 	buf->mz_normflags = BSWAP_64(buf->mz_normflags);
231c1cb2cd8Shaad 	max = (size / MZAP_ENT_LEN) - 1;
232c1cb2cd8Shaad 	for (i = 0; i < max; i++) {
233c1cb2cd8Shaad 		buf->mz_chunk[i].mze_value =
234c1cb2cd8Shaad 		    BSWAP_64(buf->mz_chunk[i].mze_value);
235c1cb2cd8Shaad 		buf->mz_chunk[i].mze_cd =
236c1cb2cd8Shaad 		    BSWAP_32(buf->mz_chunk[i].mze_cd);
237c1cb2cd8Shaad 	}
238c1cb2cd8Shaad }
239c1cb2cd8Shaad 
240c1cb2cd8Shaad void
zap_byteswap(void * buf,size_t size)241c1cb2cd8Shaad zap_byteswap(void *buf, size_t size)
242c1cb2cd8Shaad {
243c1cb2cd8Shaad 	uint64_t block_type;
244c1cb2cd8Shaad 
245c1cb2cd8Shaad 	block_type = *(uint64_t *)buf;
246c1cb2cd8Shaad 
247c1cb2cd8Shaad 	if (block_type == ZBT_MICRO || block_type == BSWAP_64(ZBT_MICRO)) {
248c1cb2cd8Shaad 		/* ASSERT(magic == ZAP_LEAF_MAGIC); */
249c1cb2cd8Shaad 		mzap_byteswap(buf, size);
250c1cb2cd8Shaad 	} else {
251c1cb2cd8Shaad 		fzap_byteswap(buf, size);
252c1cb2cd8Shaad 	}
253c1cb2cd8Shaad }
254c1cb2cd8Shaad 
255c1cb2cd8Shaad static int
mze_compare(const void * arg1,const void * arg2)256c1cb2cd8Shaad mze_compare(const void *arg1, const void *arg2)
257c1cb2cd8Shaad {
258c1cb2cd8Shaad 	const mzap_ent_t *mze1 = arg1;
259c1cb2cd8Shaad 	const mzap_ent_t *mze2 = arg2;
260c1cb2cd8Shaad 
261c1cb2cd8Shaad 	if (mze1->mze_hash > mze2->mze_hash)
262c1cb2cd8Shaad 		return (+1);
263c1cb2cd8Shaad 	if (mze1->mze_hash < mze2->mze_hash)
264c1cb2cd8Shaad 		return (-1);
265*ba2539a9Schs 	if (mze1->mze_cd > mze2->mze_cd)
266c1cb2cd8Shaad 		return (+1);
267*ba2539a9Schs 	if (mze1->mze_cd < mze2->mze_cd)
268c1cb2cd8Shaad 		return (-1);
269c1cb2cd8Shaad 	return (0);
270c1cb2cd8Shaad }
271c1cb2cd8Shaad 
272*ba2539a9Schs static int
mze_insert(zap_t * zap,int chunkid,uint64_t hash)273*ba2539a9Schs mze_insert(zap_t *zap, int chunkid, uint64_t hash)
274c1cb2cd8Shaad {
275c1cb2cd8Shaad 	mzap_ent_t *mze;
276*ba2539a9Schs 	avl_index_t idx;
277c1cb2cd8Shaad 
278c1cb2cd8Shaad 	ASSERT(zap->zap_ismicro);
279c1cb2cd8Shaad 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
280c1cb2cd8Shaad 
281c1cb2cd8Shaad 	mze = kmem_alloc(sizeof (mzap_ent_t), KM_SLEEP);
282c1cb2cd8Shaad 	mze->mze_chunkid = chunkid;
283c1cb2cd8Shaad 	mze->mze_hash = hash;
284*ba2539a9Schs 	mze->mze_cd = MZE_PHYS(zap, mze)->mze_cd;
285*ba2539a9Schs 	ASSERT(MZE_PHYS(zap, mze)->mze_name[0] != 0);
286*ba2539a9Schs 	if (avl_find(&zap->zap_m.zap_avl, mze, &idx) != NULL) {
287*ba2539a9Schs 		kmem_free(mze, sizeof (mzap_ent_t));
288*ba2539a9Schs 		return (EEXIST);
289*ba2539a9Schs 	}
290*ba2539a9Schs 	avl_insert(&zap->zap_m.zap_avl, mze, idx);
291*ba2539a9Schs 	return (0);
292c1cb2cd8Shaad }
293c1cb2cd8Shaad 
294c1cb2cd8Shaad static mzap_ent_t *
mze_find(zap_name_t * zn)295c1cb2cd8Shaad mze_find(zap_name_t *zn)
296c1cb2cd8Shaad {
297c1cb2cd8Shaad 	mzap_ent_t mze_tofind;
298c1cb2cd8Shaad 	mzap_ent_t *mze;
299c1cb2cd8Shaad 	avl_index_t idx;
300c1cb2cd8Shaad 	avl_tree_t *avl = &zn->zn_zap->zap_m.zap_avl;
301c1cb2cd8Shaad 
302c1cb2cd8Shaad 	ASSERT(zn->zn_zap->zap_ismicro);
303c1cb2cd8Shaad 	ASSERT(RW_LOCK_HELD(&zn->zn_zap->zap_rwlock));
304c1cb2cd8Shaad 
305c1cb2cd8Shaad 	mze_tofind.mze_hash = zn->zn_hash;
306*ba2539a9Schs 	mze_tofind.mze_cd = 0;
307c1cb2cd8Shaad 
308c1cb2cd8Shaad again:
309c1cb2cd8Shaad 	mze = avl_find(avl, &mze_tofind, &idx);
310c1cb2cd8Shaad 	if (mze == NULL)
311c1cb2cd8Shaad 		mze = avl_nearest(avl, idx, AVL_AFTER);
312c1cb2cd8Shaad 	for (; mze && mze->mze_hash == zn->zn_hash; mze = AVL_NEXT(avl, mze)) {
313*ba2539a9Schs 		ASSERT3U(mze->mze_cd, ==, MZE_PHYS(zn->zn_zap, mze)->mze_cd);
314*ba2539a9Schs 		if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name))
315c1cb2cd8Shaad 			return (mze);
316c1cb2cd8Shaad 	}
317c1cb2cd8Shaad 	if (zn->zn_matchtype == MT_BEST) {
318c1cb2cd8Shaad 		zn->zn_matchtype = MT_FIRST;
319c1cb2cd8Shaad 		goto again;
320c1cb2cd8Shaad 	}
321c1cb2cd8Shaad 	return (NULL);
322c1cb2cd8Shaad }
323c1cb2cd8Shaad 
324c1cb2cd8Shaad static uint32_t
mze_find_unused_cd(zap_t * zap,uint64_t hash)325c1cb2cd8Shaad mze_find_unused_cd(zap_t *zap, uint64_t hash)
326c1cb2cd8Shaad {
327c1cb2cd8Shaad 	mzap_ent_t mze_tofind;
328c1cb2cd8Shaad 	mzap_ent_t *mze;
329c1cb2cd8Shaad 	avl_index_t idx;
330c1cb2cd8Shaad 	avl_tree_t *avl = &zap->zap_m.zap_avl;
331c1cb2cd8Shaad 	uint32_t cd;
332c1cb2cd8Shaad 
333c1cb2cd8Shaad 	ASSERT(zap->zap_ismicro);
334c1cb2cd8Shaad 	ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
335c1cb2cd8Shaad 
336c1cb2cd8Shaad 	mze_tofind.mze_hash = hash;
337*ba2539a9Schs 	mze_tofind.mze_cd = 0;
338c1cb2cd8Shaad 
339c1cb2cd8Shaad 	cd = 0;
340c1cb2cd8Shaad 	for (mze = avl_find(avl, &mze_tofind, &idx);
341c1cb2cd8Shaad 	    mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
342*ba2539a9Schs 		if (mze->mze_cd != cd)
343c1cb2cd8Shaad 			break;
344c1cb2cd8Shaad 		cd++;
345c1cb2cd8Shaad 	}
346c1cb2cd8Shaad 
347c1cb2cd8Shaad 	return (cd);
348c1cb2cd8Shaad }
349c1cb2cd8Shaad 
350c1cb2cd8Shaad static void
mze_remove(zap_t * zap,mzap_ent_t * mze)351c1cb2cd8Shaad mze_remove(zap_t *zap, mzap_ent_t *mze)
352c1cb2cd8Shaad {
353c1cb2cd8Shaad 	ASSERT(zap->zap_ismicro);
354c1cb2cd8Shaad 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
355c1cb2cd8Shaad 
356c1cb2cd8Shaad 	avl_remove(&zap->zap_m.zap_avl, mze);
357c1cb2cd8Shaad 	kmem_free(mze, sizeof (mzap_ent_t));
358c1cb2cd8Shaad }
359c1cb2cd8Shaad 
360c1cb2cd8Shaad static void
mze_destroy(zap_t * zap)361c1cb2cd8Shaad mze_destroy(zap_t *zap)
362c1cb2cd8Shaad {
363c1cb2cd8Shaad 	mzap_ent_t *mze;
364c1cb2cd8Shaad 	void *avlcookie = NULL;
365c1cb2cd8Shaad 
366c1cb2cd8Shaad 	while (mze = avl_destroy_nodes(&zap->zap_m.zap_avl, &avlcookie))
367c1cb2cd8Shaad 		kmem_free(mze, sizeof (mzap_ent_t));
368c1cb2cd8Shaad 	avl_destroy(&zap->zap_m.zap_avl);
369c1cb2cd8Shaad }
370c1cb2cd8Shaad 
371c1cb2cd8Shaad static zap_t *
mzap_open(objset_t * os,uint64_t obj,dmu_buf_t * db)372c1cb2cd8Shaad mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
373c1cb2cd8Shaad {
374c1cb2cd8Shaad 	zap_t *winner;
375c1cb2cd8Shaad 	zap_t *zap;
376c1cb2cd8Shaad 	int i;
377*ba2539a9Schs 	uint64_t *zap_hdr = (uint64_t *)db->db_data;
378*ba2539a9Schs 	uint64_t zap_block_type = zap_hdr[0];
379*ba2539a9Schs 	uint64_t zap_magic = zap_hdr[1];
380c1cb2cd8Shaad 
381c1cb2cd8Shaad 	ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
382c1cb2cd8Shaad 
383c1cb2cd8Shaad 	zap = kmem_zalloc(sizeof (zap_t), KM_SLEEP);
384c1cb2cd8Shaad 	rw_init(&zap->zap_rwlock, 0, 0, 0);
385c1cb2cd8Shaad 	rw_enter(&zap->zap_rwlock, RW_WRITER);
386c1cb2cd8Shaad 	zap->zap_objset = os;
387c1cb2cd8Shaad 	zap->zap_object = obj;
388c1cb2cd8Shaad 	zap->zap_dbuf = db;
389c1cb2cd8Shaad 
390*ba2539a9Schs 	if (zap_block_type != ZBT_MICRO) {
391c1cb2cd8Shaad 		mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
392*ba2539a9Schs 		zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1;
393*ba2539a9Schs 		if (zap_block_type != ZBT_HEADER || zap_magic != ZAP_MAGIC) {
394*ba2539a9Schs 			winner = NULL;	/* No actual winner here... */
395*ba2539a9Schs 			goto handle_winner;
396*ba2539a9Schs 		}
397c1cb2cd8Shaad 	} else {
398c1cb2cd8Shaad 		zap->zap_ismicro = TRUE;
399c1cb2cd8Shaad 	}
400c1cb2cd8Shaad 
401c1cb2cd8Shaad 	/*
402c1cb2cd8Shaad 	 * Make sure that zap_ismicro is set before we let others see
403c1cb2cd8Shaad 	 * it, because zap_lockdir() checks zap_ismicro without the lock
404c1cb2cd8Shaad 	 * held.
405c1cb2cd8Shaad 	 */
406*ba2539a9Schs 	dmu_buf_init_user(&zap->zap_dbu, zap_evict_sync, NULL, &zap->zap_dbuf);
407*ba2539a9Schs 	winner = dmu_buf_set_user(db, &zap->zap_dbu);
408c1cb2cd8Shaad 
409*ba2539a9Schs 	if (winner != NULL)
410*ba2539a9Schs 		goto handle_winner;
411c1cb2cd8Shaad 
412c1cb2cd8Shaad 	if (zap->zap_ismicro) {
413*ba2539a9Schs 		zap->zap_salt = zap_m_phys(zap)->mz_salt;
414*ba2539a9Schs 		zap->zap_normflags = zap_m_phys(zap)->mz_normflags;
415c1cb2cd8Shaad 		zap->zap_m.zap_num_chunks = db->db_size / MZAP_ENT_LEN - 1;
416c1cb2cd8Shaad 		avl_create(&zap->zap_m.zap_avl, mze_compare,
417c1cb2cd8Shaad 		    sizeof (mzap_ent_t), offsetof(mzap_ent_t, mze_node));
418c1cb2cd8Shaad 
419c1cb2cd8Shaad 		for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
420c1cb2cd8Shaad 			mzap_ent_phys_t *mze =
421*ba2539a9Schs 			    &zap_m_phys(zap)->mz_chunk[i];
422c1cb2cd8Shaad 			if (mze->mze_name[0]) {
423c1cb2cd8Shaad 				zap_name_t *zn;
424c1cb2cd8Shaad 
425c1cb2cd8Shaad 				zn = zap_name_alloc(zap, mze->mze_name,
426c1cb2cd8Shaad 				    MT_EXACT);
427*ba2539a9Schs 				if (mze_insert(zap, i, zn->zn_hash) == 0)
428*ba2539a9Schs 					zap->zap_m.zap_num_entries++;
429*ba2539a9Schs 				else {
430*ba2539a9Schs 					printf("ZFS WARNING: Duplicated ZAP "
431*ba2539a9Schs 					    "entry detected (%s).\n",
432*ba2539a9Schs 					    mze->mze_name);
433*ba2539a9Schs 				}
434c1cb2cd8Shaad 				zap_name_free(zn);
435c1cb2cd8Shaad 			}
436c1cb2cd8Shaad 		}
437c1cb2cd8Shaad 	} else {
438*ba2539a9Schs 		zap->zap_salt = zap_f_phys(zap)->zap_salt;
439*ba2539a9Schs 		zap->zap_normflags = zap_f_phys(zap)->zap_normflags;
440c1cb2cd8Shaad 
441c1cb2cd8Shaad 		ASSERT3U(sizeof (struct zap_leaf_header), ==,
442c1cb2cd8Shaad 		    2*ZAP_LEAF_CHUNKSIZE);
443c1cb2cd8Shaad 
444c1cb2cd8Shaad 		/*
445c1cb2cd8Shaad 		 * The embedded pointer table should not overlap the
446c1cb2cd8Shaad 		 * other members.
447c1cb2cd8Shaad 		 */
448c1cb2cd8Shaad 		ASSERT3P(&ZAP_EMBEDDED_PTRTBL_ENT(zap, 0), >,
449*ba2539a9Schs 		    &zap_f_phys(zap)->zap_salt);
450c1cb2cd8Shaad 
451c1cb2cd8Shaad 		/*
452c1cb2cd8Shaad 		 * The embedded pointer table should end at the end of
453c1cb2cd8Shaad 		 * the block
454c1cb2cd8Shaad 		 */
455c1cb2cd8Shaad 		ASSERT3U((uintptr_t)&ZAP_EMBEDDED_PTRTBL_ENT(zap,
456c1cb2cd8Shaad 		    1<<ZAP_EMBEDDED_PTRTBL_SHIFT(zap)) -
457*ba2539a9Schs 		    (uintptr_t)zap_f_phys(zap), ==,
458c1cb2cd8Shaad 		    zap->zap_dbuf->db_size);
459c1cb2cd8Shaad 	}
460c1cb2cd8Shaad 	rw_exit(&zap->zap_rwlock);
461c1cb2cd8Shaad 	return (zap);
462*ba2539a9Schs 
463*ba2539a9Schs handle_winner:
464*ba2539a9Schs 	rw_exit(&zap->zap_rwlock);
465*ba2539a9Schs 	rw_destroy(&zap->zap_rwlock);
466*ba2539a9Schs 	if (!zap->zap_ismicro)
467*ba2539a9Schs 		mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
468*ba2539a9Schs 	kmem_free(zap, sizeof (zap_t));
469*ba2539a9Schs 	return (winner);
470c1cb2cd8Shaad }
471c1cb2cd8Shaad 
472*ba2539a9Schs static int
zap_lockdir_impl(dmu_buf_t * db,void * tag,dmu_tx_t * tx,krw_t lti,boolean_t fatreader,boolean_t adding,zap_t ** zapp)473*ba2539a9Schs zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx,
474c1cb2cd8Shaad     krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
475c1cb2cd8Shaad {
476c1cb2cd8Shaad 	zap_t *zap;
477c1cb2cd8Shaad 	krw_t lt;
478*ba2539a9Schs 
479*ba2539a9Schs 	ASSERT0(db->db_offset);
480*ba2539a9Schs 	objset_t *os = dmu_buf_get_objset(db);
481*ba2539a9Schs 	uint64_t obj = db->db_object;
482c1cb2cd8Shaad 
483c1cb2cd8Shaad 	*zapp = NULL;
484c1cb2cd8Shaad 
485c1cb2cd8Shaad #ifdef ZFS_DEBUG
486c1cb2cd8Shaad 	{
487c1cb2cd8Shaad 		dmu_object_info_t doi;
488c1cb2cd8Shaad 		dmu_object_info_from_db(db, &doi);
489*ba2539a9Schs 		ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
490c1cb2cd8Shaad 	}
491c1cb2cd8Shaad #endif
492c1cb2cd8Shaad 
493c1cb2cd8Shaad 	zap = dmu_buf_get_user(db);
494*ba2539a9Schs 	if (zap == NULL) {
495c1cb2cd8Shaad 		zap = mzap_open(os, obj, db);
496*ba2539a9Schs 		if (zap == NULL) {
497*ba2539a9Schs 			/*
498*ba2539a9Schs 			 * mzap_open() didn't like what it saw on-disk.
499*ba2539a9Schs 			 * Check for corruption!
500*ba2539a9Schs 			 */
501*ba2539a9Schs 			return (SET_ERROR(EIO));
502*ba2539a9Schs 		}
503*ba2539a9Schs 	}
504c1cb2cd8Shaad 
505c1cb2cd8Shaad 	/*
506c1cb2cd8Shaad 	 * We're checking zap_ismicro without the lock held, in order to
507c1cb2cd8Shaad 	 * tell what type of lock we want.  Once we have some sort of
508c1cb2cd8Shaad 	 * lock, see if it really is the right type.  In practice this
509c1cb2cd8Shaad 	 * can only be different if it was upgraded from micro to fat,
510c1cb2cd8Shaad 	 * and micro wanted WRITER but fat only needs READER.
511c1cb2cd8Shaad 	 */
512c1cb2cd8Shaad 	lt = (!zap->zap_ismicro && fatreader) ? RW_READER : lti;
513c1cb2cd8Shaad 	rw_enter(&zap->zap_rwlock, lt);
514c1cb2cd8Shaad 	if (lt != ((!zap->zap_ismicro && fatreader) ? RW_READER : lti)) {
515c1cb2cd8Shaad 		/* it was upgraded, now we only need reader */
516c1cb2cd8Shaad 		ASSERT(lt == RW_WRITER);
517c1cb2cd8Shaad 		ASSERT(RW_READER ==
518c1cb2cd8Shaad 		    (!zap->zap_ismicro && fatreader) ? RW_READER : lti);
519c1cb2cd8Shaad 		rw_downgrade(&zap->zap_rwlock);
520c1cb2cd8Shaad 		lt = RW_READER;
521c1cb2cd8Shaad 	}
522c1cb2cd8Shaad 
523c1cb2cd8Shaad 	zap->zap_objset = os;
524c1cb2cd8Shaad 
525c1cb2cd8Shaad 	if (lt == RW_WRITER)
526c1cb2cd8Shaad 		dmu_buf_will_dirty(db, tx);
527c1cb2cd8Shaad 
528c1cb2cd8Shaad 	ASSERT3P(zap->zap_dbuf, ==, db);
529c1cb2cd8Shaad 
530c1cb2cd8Shaad 	ASSERT(!zap->zap_ismicro ||
531c1cb2cd8Shaad 	    zap->zap_m.zap_num_entries <= zap->zap_m.zap_num_chunks);
532c1cb2cd8Shaad 	if (zap->zap_ismicro && tx && adding &&
533c1cb2cd8Shaad 	    zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
534c1cb2cd8Shaad 		uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
535c1cb2cd8Shaad 		if (newsz > MZAP_MAX_BLKSZ) {
536c1cb2cd8Shaad 			dprintf("upgrading obj %llu: num_entries=%u\n",
537c1cb2cd8Shaad 			    obj, zap->zap_m.zap_num_entries);
538c1cb2cd8Shaad 			*zapp = zap;
539*ba2539a9Schs 			int err = mzap_upgrade(zapp, tag, tx, 0);
540*ba2539a9Schs 			if (err != 0)
541*ba2539a9Schs 				rw_exit(&zap->zap_rwlock);
542*ba2539a9Schs 			return (err);
543c1cb2cd8Shaad 		}
544*ba2539a9Schs 		VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx));
545c1cb2cd8Shaad 		zap->zap_m.zap_num_chunks =
546c1cb2cd8Shaad 		    db->db_size / MZAP_ENT_LEN - 1;
547c1cb2cd8Shaad 	}
548c1cb2cd8Shaad 
549c1cb2cd8Shaad 	*zapp = zap;
550c1cb2cd8Shaad 	return (0);
551c1cb2cd8Shaad }
552c1cb2cd8Shaad 
553*ba2539a9Schs static int
zap_lockdir_by_dnode(dnode_t * dn,dmu_tx_t * tx,krw_t lti,boolean_t fatreader,boolean_t adding,void * tag,zap_t ** zapp)554*ba2539a9Schs zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx,
555*ba2539a9Schs     krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
556*ba2539a9Schs {
557*ba2539a9Schs 	dmu_buf_t *db;
558*ba2539a9Schs 	int err;
559*ba2539a9Schs 
560*ba2539a9Schs 	err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
561*ba2539a9Schs 	if (err != 0) {
562*ba2539a9Schs 		return (err);
563*ba2539a9Schs 	}
564*ba2539a9Schs 	err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
565*ba2539a9Schs 	if (err != 0) {
566*ba2539a9Schs 		dmu_buf_rele(db, tag);
567*ba2539a9Schs 	}
568*ba2539a9Schs 	return (err);
569*ba2539a9Schs }
570*ba2539a9Schs 
571*ba2539a9Schs int
zap_lockdir(objset_t * os,uint64_t obj,dmu_tx_t * tx,krw_t lti,boolean_t fatreader,boolean_t adding,void * tag,zap_t ** zapp)572*ba2539a9Schs zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
573*ba2539a9Schs     krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
574*ba2539a9Schs {
575*ba2539a9Schs 	dmu_buf_t *db;
576*ba2539a9Schs 	int err;
577*ba2539a9Schs 
578*ba2539a9Schs 	err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH);
579*ba2539a9Schs 	if (err != 0)
580*ba2539a9Schs 		return (err);
581*ba2539a9Schs 	err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
582*ba2539a9Schs 	if (err != 0)
583*ba2539a9Schs 		dmu_buf_rele(db, tag);
584*ba2539a9Schs 	return (err);
585*ba2539a9Schs }
586*ba2539a9Schs 
587c1cb2cd8Shaad void
zap_unlockdir(zap_t * zap,void * tag)588*ba2539a9Schs zap_unlockdir(zap_t *zap, void *tag)
589c1cb2cd8Shaad {
590c1cb2cd8Shaad 	rw_exit(&zap->zap_rwlock);
591*ba2539a9Schs 	dmu_buf_rele(zap->zap_dbuf, tag);
592c1cb2cd8Shaad }
593c1cb2cd8Shaad 
594c1cb2cd8Shaad static int
mzap_upgrade(zap_t ** zapp,void * tag,dmu_tx_t * tx,zap_flags_t flags)595*ba2539a9Schs mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags)
596c1cb2cd8Shaad {
597c1cb2cd8Shaad 	mzap_phys_t *mzp;
598a252d550Shaad 	int i, sz, nchunks;
599a252d550Shaad 	int err = 0;
600c1cb2cd8Shaad 	zap_t *zap = *zapp;
601c1cb2cd8Shaad 
602c1cb2cd8Shaad 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
603c1cb2cd8Shaad 
604c1cb2cd8Shaad 	sz = zap->zap_dbuf->db_size;
605*ba2539a9Schs 	mzp = zio_buf_alloc(sz);
606c1cb2cd8Shaad 	bcopy(zap->zap_dbuf->db_data, mzp, sz);
607c1cb2cd8Shaad 	nchunks = zap->zap_m.zap_num_chunks;
608c1cb2cd8Shaad 
609a252d550Shaad 	if (!flags) {
610c1cb2cd8Shaad 		err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object,
611c1cb2cd8Shaad 		    1ULL << fzap_default_block_shift, 0, tx);
612c1cb2cd8Shaad 		if (err) {
613*ba2539a9Schs 			zio_buf_free(mzp, sz);
614c1cb2cd8Shaad 			return (err);
615c1cb2cd8Shaad 		}
616a252d550Shaad 	}
617c1cb2cd8Shaad 
618c1cb2cd8Shaad 	dprintf("upgrading obj=%llu with %u chunks\n",
619c1cb2cd8Shaad 	    zap->zap_object, nchunks);
620c1cb2cd8Shaad 	/* XXX destroy the avl later, so we can use the stored hash value */
621c1cb2cd8Shaad 	mze_destroy(zap);
622c1cb2cd8Shaad 
623a252d550Shaad 	fzap_upgrade(zap, tx, flags);
624c1cb2cd8Shaad 
625c1cb2cd8Shaad 	for (i = 0; i < nchunks; i++) {
626c1cb2cd8Shaad 		mzap_ent_phys_t *mze = &mzp->mz_chunk[i];
627c1cb2cd8Shaad 		zap_name_t *zn;
628c1cb2cd8Shaad 		if (mze->mze_name[0] == 0)
629c1cb2cd8Shaad 			continue;
630c1cb2cd8Shaad 		dprintf("adding %s=%llu\n",
631c1cb2cd8Shaad 		    mze->mze_name, mze->mze_value);
632c1cb2cd8Shaad 		zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT);
633*ba2539a9Schs 		err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
634*ba2539a9Schs 		    tag, tx);
635c1cb2cd8Shaad 		zap = zn->zn_zap;	/* fzap_add_cd() may change zap */
636c1cb2cd8Shaad 		zap_name_free(zn);
637c1cb2cd8Shaad 		if (err)
638c1cb2cd8Shaad 			break;
639c1cb2cd8Shaad 	}
640*ba2539a9Schs 	zio_buf_free(mzp, sz);
641c1cb2cd8Shaad 	*zapp = zap;
642c1cb2cd8Shaad 	return (err);
643c1cb2cd8Shaad }
644c1cb2cd8Shaad 
645*ba2539a9Schs void
mzap_create_impl(objset_t * os,uint64_t obj,int normflags,zap_flags_t flags,dmu_tx_t * tx)646a252d550Shaad mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
647a252d550Shaad     dmu_tx_t *tx)
648c1cb2cd8Shaad {
649c1cb2cd8Shaad 	dmu_buf_t *db;
650c1cb2cd8Shaad 	mzap_phys_t *zp;
651c1cb2cd8Shaad 
652*ba2539a9Schs 	VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH));
653c1cb2cd8Shaad 
654c1cb2cd8Shaad #ifdef ZFS_DEBUG
655c1cb2cd8Shaad 	{
656c1cb2cd8Shaad 		dmu_object_info_t doi;
657c1cb2cd8Shaad 		dmu_object_info_from_db(db, &doi);
658*ba2539a9Schs 		ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
659c1cb2cd8Shaad 	}
660c1cb2cd8Shaad #endif
661c1cb2cd8Shaad 
662c1cb2cd8Shaad 	dmu_buf_will_dirty(db, tx);
663c1cb2cd8Shaad 	zp = db->db_data;
664c1cb2cd8Shaad 	zp->mz_block_type = ZBT_MICRO;
665c1cb2cd8Shaad 	zp->mz_salt = ((uintptr_t)db ^ (uintptr_t)tx ^ (obj << 1)) | 1ULL;
666c1cb2cd8Shaad 	zp->mz_normflags = normflags;
667c1cb2cd8Shaad 	dmu_buf_rele(db, FTAG);
668a252d550Shaad 
669a252d550Shaad 	if (flags != 0) {
670a252d550Shaad 		zap_t *zap;
671a252d550Shaad 		/* Only fat zap supports flags; upgrade immediately. */
672a252d550Shaad 		VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER,
673*ba2539a9Schs 		    B_FALSE, B_FALSE, FTAG, &zap));
674*ba2539a9Schs 		VERIFY3U(0, ==, mzap_upgrade(&zap, FTAG, tx, flags));
675*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
676a252d550Shaad 	}
677c1cb2cd8Shaad }
678c1cb2cd8Shaad 
679c1cb2cd8Shaad int
zap_create_claim(objset_t * os,uint64_t obj,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)680c1cb2cd8Shaad zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
681c1cb2cd8Shaad     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
682c1cb2cd8Shaad {
683c1cb2cd8Shaad 	return (zap_create_claim_norm(os, obj,
684c1cb2cd8Shaad 	    0, ot, bonustype, bonuslen, tx));
685c1cb2cd8Shaad }
686c1cb2cd8Shaad 
687c1cb2cd8Shaad int
zap_create_claim_norm(objset_t * os,uint64_t obj,int normflags,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)688c1cb2cd8Shaad zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags,
689c1cb2cd8Shaad     dmu_object_type_t ot,
690c1cb2cd8Shaad     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
691c1cb2cd8Shaad {
692c1cb2cd8Shaad 	int err;
693c1cb2cd8Shaad 
694c1cb2cd8Shaad 	err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx);
695c1cb2cd8Shaad 	if (err != 0)
696c1cb2cd8Shaad 		return (err);
697a252d550Shaad 	mzap_create_impl(os, obj, normflags, 0, tx);
698c1cb2cd8Shaad 	return (0);
699c1cb2cd8Shaad }
700c1cb2cd8Shaad 
701c1cb2cd8Shaad uint64_t
zap_create(objset_t * os,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)702c1cb2cd8Shaad zap_create(objset_t *os, dmu_object_type_t ot,
703c1cb2cd8Shaad     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
704c1cb2cd8Shaad {
705c1cb2cd8Shaad 	return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx));
706c1cb2cd8Shaad }
707c1cb2cd8Shaad 
708c1cb2cd8Shaad uint64_t
zap_create_norm(objset_t * os,int normflags,dmu_object_type_t ot,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)709c1cb2cd8Shaad zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
710c1cb2cd8Shaad     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
711c1cb2cd8Shaad {
712c1cb2cd8Shaad 	uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
713c1cb2cd8Shaad 
714a252d550Shaad 	mzap_create_impl(os, obj, normflags, 0, tx);
715a252d550Shaad 	return (obj);
716a252d550Shaad }
717a252d550Shaad 
718a252d550Shaad uint64_t
zap_create_flags(objset_t * os,int normflags,zap_flags_t flags,dmu_object_type_t ot,int leaf_blockshift,int indirect_blockshift,dmu_object_type_t bonustype,int bonuslen,dmu_tx_t * tx)719a252d550Shaad zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
720a252d550Shaad     dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
721a252d550Shaad     dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
722a252d550Shaad {
723a252d550Shaad 	uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
724a252d550Shaad 
725a252d550Shaad 	ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT &&
726*ba2539a9Schs 	    leaf_blockshift <= SPA_OLD_MAXBLOCKSHIFT &&
727a252d550Shaad 	    indirect_blockshift >= SPA_MINBLOCKSHIFT &&
728*ba2539a9Schs 	    indirect_blockshift <= SPA_OLD_MAXBLOCKSHIFT);
729a252d550Shaad 
730a252d550Shaad 	VERIFY(dmu_object_set_blocksize(os, obj,
731a252d550Shaad 	    1ULL << leaf_blockshift, indirect_blockshift, tx) == 0);
732a252d550Shaad 
733a252d550Shaad 	mzap_create_impl(os, obj, normflags, flags, tx);
734c1cb2cd8Shaad 	return (obj);
735c1cb2cd8Shaad }
736c1cb2cd8Shaad 
737c1cb2cd8Shaad int
zap_destroy(objset_t * os,uint64_t zapobj,dmu_tx_t * tx)738c1cb2cd8Shaad zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
739c1cb2cd8Shaad {
740c1cb2cd8Shaad 	/*
741c1cb2cd8Shaad 	 * dmu_object_free will free the object number and free the
742c1cb2cd8Shaad 	 * data.  Freeing the data will cause our pageout function to be
743c1cb2cd8Shaad 	 * called, which will destroy our data (zap_leaf_t's and zap_t).
744c1cb2cd8Shaad 	 */
745c1cb2cd8Shaad 
746c1cb2cd8Shaad 	return (dmu_object_free(os, zapobj, tx));
747c1cb2cd8Shaad }
748c1cb2cd8Shaad 
749c1cb2cd8Shaad void
zap_evict_sync(void * dbu)750*ba2539a9Schs zap_evict_sync(void *dbu)
751c1cb2cd8Shaad {
752*ba2539a9Schs 	zap_t *zap = dbu;
753c1cb2cd8Shaad 
754c1cb2cd8Shaad 	rw_destroy(&zap->zap_rwlock);
755c1cb2cd8Shaad 
756c1cb2cd8Shaad 	if (zap->zap_ismicro)
757c1cb2cd8Shaad 		mze_destroy(zap);
758c1cb2cd8Shaad 	else
759c1cb2cd8Shaad 		mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
760c1cb2cd8Shaad 
761c1cb2cd8Shaad 	kmem_free(zap, sizeof (zap_t));
762c1cb2cd8Shaad }
763c1cb2cd8Shaad 
764c1cb2cd8Shaad int
zap_count(objset_t * os,uint64_t zapobj,uint64_t * count)765c1cb2cd8Shaad zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
766c1cb2cd8Shaad {
767c1cb2cd8Shaad 	zap_t *zap;
768c1cb2cd8Shaad 	int err;
769c1cb2cd8Shaad 
770*ba2539a9Schs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
771c1cb2cd8Shaad 	if (err)
772c1cb2cd8Shaad 		return (err);
773c1cb2cd8Shaad 	if (!zap->zap_ismicro) {
774c1cb2cd8Shaad 		err = fzap_count(zap, count);
775c1cb2cd8Shaad 	} else {
776c1cb2cd8Shaad 		*count = zap->zap_m.zap_num_entries;
777c1cb2cd8Shaad 	}
778*ba2539a9Schs 	zap_unlockdir(zap, FTAG);
779c1cb2cd8Shaad 	return (err);
780c1cb2cd8Shaad }
781c1cb2cd8Shaad 
782c1cb2cd8Shaad /*
783c1cb2cd8Shaad  * zn may be NULL; if not specified, it will be computed if needed.
784c1cb2cd8Shaad  * See also the comment above zap_entry_normalization_conflict().
785c1cb2cd8Shaad  */
786c1cb2cd8Shaad static boolean_t
mzap_normalization_conflict(zap_t * zap,zap_name_t * zn,mzap_ent_t * mze)787c1cb2cd8Shaad mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze)
788c1cb2cd8Shaad {
789c1cb2cd8Shaad 	mzap_ent_t *other;
790c1cb2cd8Shaad 	int direction = AVL_BEFORE;
791c1cb2cd8Shaad 	boolean_t allocdzn = B_FALSE;
792c1cb2cd8Shaad 
793c1cb2cd8Shaad 	if (zap->zap_normflags == 0)
794c1cb2cd8Shaad 		return (B_FALSE);
795c1cb2cd8Shaad 
796c1cb2cd8Shaad again:
797c1cb2cd8Shaad 	for (other = avl_walk(&zap->zap_m.zap_avl, mze, direction);
798c1cb2cd8Shaad 	    other && other->mze_hash == mze->mze_hash;
799c1cb2cd8Shaad 	    other = avl_walk(&zap->zap_m.zap_avl, other, direction)) {
800c1cb2cd8Shaad 
801c1cb2cd8Shaad 		if (zn == NULL) {
802*ba2539a9Schs 			zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name,
803c1cb2cd8Shaad 			    MT_FIRST);
804c1cb2cd8Shaad 			allocdzn = B_TRUE;
805c1cb2cd8Shaad 		}
806*ba2539a9Schs 		if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
807c1cb2cd8Shaad 			if (allocdzn)
808c1cb2cd8Shaad 				zap_name_free(zn);
809c1cb2cd8Shaad 			return (B_TRUE);
810c1cb2cd8Shaad 		}
811c1cb2cd8Shaad 	}
812c1cb2cd8Shaad 
813c1cb2cd8Shaad 	if (direction == AVL_BEFORE) {
814c1cb2cd8Shaad 		direction = AVL_AFTER;
815c1cb2cd8Shaad 		goto again;
816c1cb2cd8Shaad 	}
817c1cb2cd8Shaad 
818c1cb2cd8Shaad 	if (allocdzn)
819c1cb2cd8Shaad 		zap_name_free(zn);
820c1cb2cd8Shaad 	return (B_FALSE);
821c1cb2cd8Shaad }
822c1cb2cd8Shaad 
823c1cb2cd8Shaad /*
824c1cb2cd8Shaad  * Routines for manipulating attributes.
825c1cb2cd8Shaad  */
826c1cb2cd8Shaad 
827c1cb2cd8Shaad int
zap_lookup(objset_t * os,uint64_t zapobj,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf)828c1cb2cd8Shaad zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
829c1cb2cd8Shaad     uint64_t integer_size, uint64_t num_integers, void *buf)
830c1cb2cd8Shaad {
831c1cb2cd8Shaad 	return (zap_lookup_norm(os, zapobj, name, integer_size,
832c1cb2cd8Shaad 	    num_integers, buf, MT_EXACT, NULL, 0, NULL));
833c1cb2cd8Shaad }
834c1cb2cd8Shaad 
835*ba2539a9Schs static int
zap_lookup_impl(zap_t * zap,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf,matchtype_t mt,char * realname,int rn_len,boolean_t * ncp)836*ba2539a9Schs zap_lookup_impl(zap_t *zap, const char *name,
837c1cb2cd8Shaad     uint64_t integer_size, uint64_t num_integers, void *buf,
838c1cb2cd8Shaad     matchtype_t mt, char *realname, int rn_len,
839c1cb2cd8Shaad     boolean_t *ncp)
840c1cb2cd8Shaad {
841*ba2539a9Schs 	int err = 0;
842c1cb2cd8Shaad 	mzap_ent_t *mze;
843c1cb2cd8Shaad 	zap_name_t *zn;
844c1cb2cd8Shaad 
845c1cb2cd8Shaad 	zn = zap_name_alloc(zap, name, mt);
846*ba2539a9Schs 	if (zn == NULL)
847*ba2539a9Schs 		return (SET_ERROR(ENOTSUP));
848c1cb2cd8Shaad 
849c1cb2cd8Shaad 	if (!zap->zap_ismicro) {
850c1cb2cd8Shaad 		err = fzap_lookup(zn, integer_size, num_integers, buf,
851c1cb2cd8Shaad 		    realname, rn_len, ncp);
852c1cb2cd8Shaad 	} else {
853c1cb2cd8Shaad 		mze = mze_find(zn);
854c1cb2cd8Shaad 		if (mze == NULL) {
855*ba2539a9Schs 			err = SET_ERROR(ENOENT);
856c1cb2cd8Shaad 		} else {
857c1cb2cd8Shaad 			if (num_integers < 1) {
858*ba2539a9Schs 				err = SET_ERROR(EOVERFLOW);
859c1cb2cd8Shaad 			} else if (integer_size != 8) {
860*ba2539a9Schs 				err = SET_ERROR(EINVAL);
861c1cb2cd8Shaad 			} else {
862*ba2539a9Schs 				*(uint64_t *)buf =
863*ba2539a9Schs 				    MZE_PHYS(zap, mze)->mze_value;
8646a125a39Shaad 				if (realname != NULL)
865c1cb2cd8Shaad 					(void) strlcpy(realname,
866*ba2539a9Schs 					    MZE_PHYS(zap, mze)->mze_name, rn_len);
867c1cb2cd8Shaad 				if (ncp) {
868c1cb2cd8Shaad 					*ncp = mzap_normalization_conflict(zap,
869c1cb2cd8Shaad 					    zn, mze);
870c1cb2cd8Shaad 				}
871c1cb2cd8Shaad 			}
872c1cb2cd8Shaad 		}
873c1cb2cd8Shaad 	}
874c1cb2cd8Shaad 	zap_name_free(zn);
875*ba2539a9Schs 	return (err);
876*ba2539a9Schs }
877*ba2539a9Schs 
878*ba2539a9Schs int
zap_lookup_norm(objset_t * os,uint64_t zapobj,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf,matchtype_t mt,char * realname,int rn_len,boolean_t * ncp)879*ba2539a9Schs zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
880*ba2539a9Schs     uint64_t integer_size, uint64_t num_integers, void *buf,
881*ba2539a9Schs     matchtype_t mt, char *realname, int rn_len,
882*ba2539a9Schs     boolean_t *ncp)
883*ba2539a9Schs {
884*ba2539a9Schs 	zap_t *zap;
885*ba2539a9Schs 	int err;
886*ba2539a9Schs 
887*ba2539a9Schs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
888*ba2539a9Schs 	if (err != 0)
889*ba2539a9Schs 		return (err);
890*ba2539a9Schs 	err = zap_lookup_impl(zap, name, integer_size,
891*ba2539a9Schs 	    num_integers, buf, mt, realname, rn_len, ncp);
892*ba2539a9Schs 	zap_unlockdir(zap, FTAG);
893*ba2539a9Schs 	return (err);
894*ba2539a9Schs }
895*ba2539a9Schs 
896*ba2539a9Schs int
zap_lookup_by_dnode(dnode_t * dn,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf)897*ba2539a9Schs zap_lookup_by_dnode(dnode_t *dn, const char *name,
898*ba2539a9Schs     uint64_t integer_size, uint64_t num_integers, void *buf)
899*ba2539a9Schs {
900*ba2539a9Schs 	return (zap_lookup_norm_by_dnode(dn, name, integer_size,
901*ba2539a9Schs 	    num_integers, buf, MT_EXACT, NULL, 0, NULL));
902*ba2539a9Schs }
903*ba2539a9Schs 
904*ba2539a9Schs int
zap_lookup_norm_by_dnode(dnode_t * dn,const char * name,uint64_t integer_size,uint64_t num_integers,void * buf,matchtype_t mt,char * realname,int rn_len,boolean_t * ncp)905*ba2539a9Schs zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
906*ba2539a9Schs     uint64_t integer_size, uint64_t num_integers, void *buf,
907*ba2539a9Schs     matchtype_t mt, char *realname, int rn_len,
908*ba2539a9Schs     boolean_t *ncp)
909*ba2539a9Schs {
910*ba2539a9Schs 	zap_t *zap;
911*ba2539a9Schs 	int err;
912*ba2539a9Schs 
913*ba2539a9Schs 	err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
914*ba2539a9Schs 	    FTAG, &zap);
915*ba2539a9Schs 	if (err != 0)
916*ba2539a9Schs 		return (err);
917*ba2539a9Schs 	err = zap_lookup_impl(zap, name, integer_size,
918*ba2539a9Schs 	    num_integers, buf, mt, realname, rn_len, ncp);
919*ba2539a9Schs 	zap_unlockdir(zap, FTAG);
920*ba2539a9Schs 	return (err);
921*ba2539a9Schs }
922*ba2539a9Schs 
923*ba2539a9Schs int
zap_prefetch_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints)924*ba2539a9Schs zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
925*ba2539a9Schs     int key_numints)
926*ba2539a9Schs {
927*ba2539a9Schs 	zap_t *zap;
928*ba2539a9Schs 	int err;
929*ba2539a9Schs 	zap_name_t *zn;
930*ba2539a9Schs 
931*ba2539a9Schs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
932*ba2539a9Schs 	if (err)
933*ba2539a9Schs 		return (err);
934*ba2539a9Schs 	zn = zap_name_alloc_uint64(zap, key, key_numints);
935*ba2539a9Schs 	if (zn == NULL) {
936*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
937*ba2539a9Schs 		return (SET_ERROR(ENOTSUP));
938*ba2539a9Schs 	}
939*ba2539a9Schs 
940*ba2539a9Schs 	fzap_prefetch(zn);
941*ba2539a9Schs 	zap_name_free(zn);
942*ba2539a9Schs 	zap_unlockdir(zap, FTAG);
943c1cb2cd8Shaad 	return (err);
944c1cb2cd8Shaad }
945c1cb2cd8Shaad 
946c1cb2cd8Shaad int
zap_lookup_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,uint64_t integer_size,uint64_t num_integers,void * buf)947a252d550Shaad zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
948a252d550Shaad     int key_numints, uint64_t integer_size, uint64_t num_integers, void *buf)
949a252d550Shaad {
950a252d550Shaad 	zap_t *zap;
951a252d550Shaad 	int err;
952a252d550Shaad 	zap_name_t *zn;
953a252d550Shaad 
954*ba2539a9Schs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
955a252d550Shaad 	if (err)
956a252d550Shaad 		return (err);
957a252d550Shaad 	zn = zap_name_alloc_uint64(zap, key, key_numints);
958a252d550Shaad 	if (zn == NULL) {
959*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
960*ba2539a9Schs 		return (SET_ERROR(ENOTSUP));
961a252d550Shaad 	}
962a252d550Shaad 
963a252d550Shaad 	err = fzap_lookup(zn, integer_size, num_integers, buf,
964a252d550Shaad 	    NULL, 0, NULL);
965a252d550Shaad 	zap_name_free(zn);
966*ba2539a9Schs 	zap_unlockdir(zap, FTAG);
967a252d550Shaad 	return (err);
968a252d550Shaad }
969a252d550Shaad 
970a252d550Shaad int
zap_contains(objset_t * os,uint64_t zapobj,const char * name)971a252d550Shaad zap_contains(objset_t *os, uint64_t zapobj, const char *name)
972a252d550Shaad {
973*ba2539a9Schs 	int err = zap_lookup_norm(os, zapobj, name, 0,
974*ba2539a9Schs 	    0, NULL, MT_EXACT, NULL, 0, NULL);
975a252d550Shaad 	if (err == EOVERFLOW || err == EINVAL)
976a252d550Shaad 		err = 0; /* found, but skipped reading the value */
977a252d550Shaad 	return (err);
978a252d550Shaad }
979a252d550Shaad 
980a252d550Shaad int
zap_length(objset_t * os,uint64_t zapobj,const char * name,uint64_t * integer_size,uint64_t * num_integers)981c1cb2cd8Shaad zap_length(objset_t *os, uint64_t zapobj, const char *name,
982c1cb2cd8Shaad     uint64_t *integer_size, uint64_t *num_integers)
983c1cb2cd8Shaad {
984c1cb2cd8Shaad 	zap_t *zap;
985c1cb2cd8Shaad 	int err;
986c1cb2cd8Shaad 	mzap_ent_t *mze;
987c1cb2cd8Shaad 	zap_name_t *zn;
988c1cb2cd8Shaad 
989*ba2539a9Schs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
990c1cb2cd8Shaad 	if (err)
991c1cb2cd8Shaad 		return (err);
992c1cb2cd8Shaad 	zn = zap_name_alloc(zap, name, MT_EXACT);
993c1cb2cd8Shaad 	if (zn == NULL) {
994*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
995*ba2539a9Schs 		return (SET_ERROR(ENOTSUP));
996c1cb2cd8Shaad 	}
997c1cb2cd8Shaad 	if (!zap->zap_ismicro) {
998c1cb2cd8Shaad 		err = fzap_length(zn, integer_size, num_integers);
999c1cb2cd8Shaad 	} else {
1000c1cb2cd8Shaad 		mze = mze_find(zn);
1001c1cb2cd8Shaad 		if (mze == NULL) {
1002*ba2539a9Schs 			err = SET_ERROR(ENOENT);
1003c1cb2cd8Shaad 		} else {
1004c1cb2cd8Shaad 			if (integer_size)
1005c1cb2cd8Shaad 				*integer_size = 8;
1006c1cb2cd8Shaad 			if (num_integers)
1007c1cb2cd8Shaad 				*num_integers = 1;
1008c1cb2cd8Shaad 		}
1009c1cb2cd8Shaad 	}
1010c1cb2cd8Shaad 	zap_name_free(zn);
1011*ba2539a9Schs 	zap_unlockdir(zap, FTAG);
1012c1cb2cd8Shaad 	return (err);
1013c1cb2cd8Shaad }
1014c1cb2cd8Shaad 
1015a252d550Shaad int
zap_length_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,uint64_t * integer_size,uint64_t * num_integers)1016a252d550Shaad zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1017a252d550Shaad     int key_numints, uint64_t *integer_size, uint64_t *num_integers)
1018a252d550Shaad {
1019a252d550Shaad 	zap_t *zap;
1020a252d550Shaad 	int err;
1021a252d550Shaad 	zap_name_t *zn;
1022a252d550Shaad 
1023*ba2539a9Schs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1024a252d550Shaad 	if (err)
1025a252d550Shaad 		return (err);
1026a252d550Shaad 	zn = zap_name_alloc_uint64(zap, key, key_numints);
1027a252d550Shaad 	if (zn == NULL) {
1028*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
1029*ba2539a9Schs 		return (SET_ERROR(ENOTSUP));
1030a252d550Shaad 	}
1031a252d550Shaad 	err = fzap_length(zn, integer_size, num_integers);
1032a252d550Shaad 	zap_name_free(zn);
1033*ba2539a9Schs 	zap_unlockdir(zap, FTAG);
1034a252d550Shaad 	return (err);
1035a252d550Shaad }
1036a252d550Shaad 
1037c1cb2cd8Shaad static void
mzap_addent(zap_name_t * zn,uint64_t value)1038c1cb2cd8Shaad mzap_addent(zap_name_t *zn, uint64_t value)
1039c1cb2cd8Shaad {
1040c1cb2cd8Shaad 	int i;
1041c1cb2cd8Shaad 	zap_t *zap = zn->zn_zap;
1042c1cb2cd8Shaad 	int start = zap->zap_m.zap_alloc_next;
1043c1cb2cd8Shaad 	uint32_t cd;
1044c1cb2cd8Shaad 
1045c1cb2cd8Shaad 	ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
1046c1cb2cd8Shaad 
1047c1cb2cd8Shaad #ifdef ZFS_DEBUG
1048c1cb2cd8Shaad 	for (i = 0; i < zap->zap_m.zap_num_chunks; i++) {
1049*ba2539a9Schs 		mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i];
1050a252d550Shaad 		ASSERT(strcmp(zn->zn_key_orig, mze->mze_name) != 0);
1051c1cb2cd8Shaad 	}
1052c1cb2cd8Shaad #endif
1053c1cb2cd8Shaad 
1054c1cb2cd8Shaad 	cd = mze_find_unused_cd(zap, zn->zn_hash);
1055c1cb2cd8Shaad 	/* given the limited size of the microzap, this can't happen */
1056a252d550Shaad 	ASSERT(cd < zap_maxcd(zap));
1057c1cb2cd8Shaad 
1058c1cb2cd8Shaad again:
1059c1cb2cd8Shaad 	for (i = start; i < zap->zap_m.zap_num_chunks; i++) {
1060*ba2539a9Schs 		mzap_ent_phys_t *mze = &zap_m_phys(zap)->mz_chunk[i];
1061c1cb2cd8Shaad 		if (mze->mze_name[0] == 0) {
1062c1cb2cd8Shaad 			mze->mze_value = value;
1063c1cb2cd8Shaad 			mze->mze_cd = cd;
1064a252d550Shaad 			(void) strcpy(mze->mze_name, zn->zn_key_orig);
1065c1cb2cd8Shaad 			zap->zap_m.zap_num_entries++;
1066c1cb2cd8Shaad 			zap->zap_m.zap_alloc_next = i+1;
1067c1cb2cd8Shaad 			if (zap->zap_m.zap_alloc_next ==
1068c1cb2cd8Shaad 			    zap->zap_m.zap_num_chunks)
1069c1cb2cd8Shaad 				zap->zap_m.zap_alloc_next = 0;
1070*ba2539a9Schs 			VERIFY(0 == mze_insert(zap, i, zn->zn_hash));
1071c1cb2cd8Shaad 			return;
1072c1cb2cd8Shaad 		}
1073c1cb2cd8Shaad 	}
1074c1cb2cd8Shaad 	if (start != 0) {
1075c1cb2cd8Shaad 		start = 0;
1076c1cb2cd8Shaad 		goto again;
1077c1cb2cd8Shaad 	}
1078c1cb2cd8Shaad 	ASSERT(!"out of entries!");
1079c1cb2cd8Shaad }
1080c1cb2cd8Shaad 
1081c1cb2cd8Shaad int
zap_add(objset_t * os,uint64_t zapobj,const char * key,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1082a252d550Shaad zap_add(objset_t *os, uint64_t zapobj, const char *key,
1083c1cb2cd8Shaad     int integer_size, uint64_t num_integers,
1084c1cb2cd8Shaad     const void *val, dmu_tx_t *tx)
1085c1cb2cd8Shaad {
1086c1cb2cd8Shaad 	zap_t *zap;
1087c1cb2cd8Shaad 	int err;
1088c1cb2cd8Shaad 	mzap_ent_t *mze;
1089c1cb2cd8Shaad 	const uint64_t *intval = val;
1090c1cb2cd8Shaad 	zap_name_t *zn;
1091c1cb2cd8Shaad 
1092*ba2539a9Schs 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1093c1cb2cd8Shaad 	if (err)
1094c1cb2cd8Shaad 		return (err);
1095a252d550Shaad 	zn = zap_name_alloc(zap, key, MT_EXACT);
1096c1cb2cd8Shaad 	if (zn == NULL) {
1097*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
1098*ba2539a9Schs 		return (SET_ERROR(ENOTSUP));
1099c1cb2cd8Shaad 	}
1100c1cb2cd8Shaad 	if (!zap->zap_ismicro) {
1101*ba2539a9Schs 		err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx);
1102c1cb2cd8Shaad 		zap = zn->zn_zap;	/* fzap_add() may change zap */
1103c1cb2cd8Shaad 	} else if (integer_size != 8 || num_integers != 1 ||
1104a252d550Shaad 	    strlen(key) >= MZAP_NAME_LEN) {
1105*ba2539a9Schs 		err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0);
1106*ba2539a9Schs 		if (err == 0) {
1107*ba2539a9Schs 			err = fzap_add(zn, integer_size, num_integers, val,
1108*ba2539a9Schs 			    FTAG, tx);
1109*ba2539a9Schs 		}
1110c1cb2cd8Shaad 		zap = zn->zn_zap;	/* fzap_add() may change zap */
1111c1cb2cd8Shaad 	} else {
1112c1cb2cd8Shaad 		mze = mze_find(zn);
1113c1cb2cd8Shaad 		if (mze != NULL) {
1114*ba2539a9Schs 			err = SET_ERROR(EEXIST);
1115c1cb2cd8Shaad 		} else {
1116c1cb2cd8Shaad 			mzap_addent(zn, *intval);
1117c1cb2cd8Shaad 		}
1118c1cb2cd8Shaad 	}
1119c1cb2cd8Shaad 	ASSERT(zap == zn->zn_zap);
1120c1cb2cd8Shaad 	zap_name_free(zn);
1121c1cb2cd8Shaad 	if (zap != NULL)	/* may be NULL if fzap_add() failed */
1122*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
1123c1cb2cd8Shaad 	return (err);
1124c1cb2cd8Shaad }
1125c1cb2cd8Shaad 
1126c1cb2cd8Shaad int
zap_add_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1127a252d550Shaad zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1128a252d550Shaad     int key_numints, int integer_size, uint64_t num_integers,
1129a252d550Shaad     const void *val, dmu_tx_t *tx)
1130a252d550Shaad {
1131a252d550Shaad 	zap_t *zap;
1132a252d550Shaad 	int err;
1133a252d550Shaad 	zap_name_t *zn;
1134a252d550Shaad 
1135*ba2539a9Schs 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1136a252d550Shaad 	if (err)
1137a252d550Shaad 		return (err);
1138a252d550Shaad 	zn = zap_name_alloc_uint64(zap, key, key_numints);
1139a252d550Shaad 	if (zn == NULL) {
1140*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
1141*ba2539a9Schs 		return (SET_ERROR(ENOTSUP));
1142a252d550Shaad 	}
1143*ba2539a9Schs 	err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx);
1144a252d550Shaad 	zap = zn->zn_zap;	/* fzap_add() may change zap */
1145a252d550Shaad 	zap_name_free(zn);
1146a252d550Shaad 	if (zap != NULL)	/* may be NULL if fzap_add() failed */
1147*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
1148a252d550Shaad 	return (err);
1149a252d550Shaad }
1150a252d550Shaad 
1151a252d550Shaad int
zap_update(objset_t * os,uint64_t zapobj,const char * name,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1152c1cb2cd8Shaad zap_update(objset_t *os, uint64_t zapobj, const char *name,
1153c1cb2cd8Shaad     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
1154c1cb2cd8Shaad {
1155c1cb2cd8Shaad 	zap_t *zap;
1156c1cb2cd8Shaad 	mzap_ent_t *mze;
1157*ba2539a9Schs 	uint64_t oldval;
1158c1cb2cd8Shaad 	const uint64_t *intval = val;
1159c1cb2cd8Shaad 	zap_name_t *zn;
1160c1cb2cd8Shaad 	int err;
1161c1cb2cd8Shaad 
1162*ba2539a9Schs #ifdef ZFS_DEBUG
1163*ba2539a9Schs 	/*
1164*ba2539a9Schs 	 * If there is an old value, it shouldn't change across the
1165*ba2539a9Schs 	 * lockdir (eg, due to bprewrite's xlation).
1166*ba2539a9Schs 	 */
1167*ba2539a9Schs 	if (integer_size == 8 && num_integers == 1)
1168*ba2539a9Schs 		(void) zap_lookup(os, zapobj, name, 8, 1, &oldval);
1169*ba2539a9Schs #endif
1170*ba2539a9Schs 
1171*ba2539a9Schs 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1172c1cb2cd8Shaad 	if (err)
1173c1cb2cd8Shaad 		return (err);
1174c1cb2cd8Shaad 	zn = zap_name_alloc(zap, name, MT_EXACT);
1175c1cb2cd8Shaad 	if (zn == NULL) {
1176*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
1177*ba2539a9Schs 		return (SET_ERROR(ENOTSUP));
1178c1cb2cd8Shaad 	}
1179c1cb2cd8Shaad 	if (!zap->zap_ismicro) {
1180*ba2539a9Schs 		err = fzap_update(zn, integer_size, num_integers, val,
1181*ba2539a9Schs 		    FTAG, tx);
1182c1cb2cd8Shaad 		zap = zn->zn_zap;	/* fzap_update() may change zap */
1183c1cb2cd8Shaad 	} else if (integer_size != 8 || num_integers != 1 ||
1184c1cb2cd8Shaad 	    strlen(name) >= MZAP_NAME_LEN) {
1185c1cb2cd8Shaad 		dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
1186c1cb2cd8Shaad 		    zapobj, integer_size, num_integers, name);
1187*ba2539a9Schs 		err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0);
1188*ba2539a9Schs 		if (err == 0) {
1189c1cb2cd8Shaad 			err = fzap_update(zn, integer_size, num_integers,
1190*ba2539a9Schs 			    val, FTAG, tx);
1191*ba2539a9Schs 		}
1192c1cb2cd8Shaad 		zap = zn->zn_zap;	/* fzap_update() may change zap */
1193c1cb2cd8Shaad 	} else {
1194c1cb2cd8Shaad 		mze = mze_find(zn);
1195c1cb2cd8Shaad 		if (mze != NULL) {
1196*ba2539a9Schs 			ASSERT3U(MZE_PHYS(zap, mze)->mze_value, ==, oldval);
1197*ba2539a9Schs 			MZE_PHYS(zap, mze)->mze_value = *intval;
1198c1cb2cd8Shaad 		} else {
1199c1cb2cd8Shaad 			mzap_addent(zn, *intval);
1200c1cb2cd8Shaad 		}
1201c1cb2cd8Shaad 	}
1202c1cb2cd8Shaad 	ASSERT(zap == zn->zn_zap);
1203c1cb2cd8Shaad 	zap_name_free(zn);
1204c1cb2cd8Shaad 	if (zap != NULL)	/* may be NULL if fzap_upgrade() failed */
1205*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
1206c1cb2cd8Shaad 	return (err);
1207c1cb2cd8Shaad }
1208c1cb2cd8Shaad 
1209c1cb2cd8Shaad int
zap_update_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,int integer_size,uint64_t num_integers,const void * val,dmu_tx_t * tx)1210a252d550Shaad zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1211a252d550Shaad     int key_numints,
1212a252d550Shaad     int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
1213a252d550Shaad {
1214a252d550Shaad 	zap_t *zap;
1215a252d550Shaad 	zap_name_t *zn;
1216a252d550Shaad 	int err;
1217a252d550Shaad 
1218*ba2539a9Schs 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
1219a252d550Shaad 	if (err)
1220a252d550Shaad 		return (err);
1221a252d550Shaad 	zn = zap_name_alloc_uint64(zap, key, key_numints);
1222a252d550Shaad 	if (zn == NULL) {
1223*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
1224*ba2539a9Schs 		return (SET_ERROR(ENOTSUP));
1225a252d550Shaad 	}
1226*ba2539a9Schs 	err = fzap_update(zn, integer_size, num_integers, val, FTAG, tx);
1227a252d550Shaad 	zap = zn->zn_zap;	/* fzap_update() may change zap */
1228a252d550Shaad 	zap_name_free(zn);
1229a252d550Shaad 	if (zap != NULL)	/* may be NULL if fzap_upgrade() failed */
1230*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
1231a252d550Shaad 	return (err);
1232a252d550Shaad }
1233a252d550Shaad 
1234a252d550Shaad int
zap_remove(objset_t * os,uint64_t zapobj,const char * name,dmu_tx_t * tx)1235c1cb2cd8Shaad zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx)
1236c1cb2cd8Shaad {
1237c1cb2cd8Shaad 	return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx));
1238c1cb2cd8Shaad }
1239c1cb2cd8Shaad 
1240c1cb2cd8Shaad int
zap_remove_norm(objset_t * os,uint64_t zapobj,const char * name,matchtype_t mt,dmu_tx_t * tx)1241c1cb2cd8Shaad zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
1242c1cb2cd8Shaad     matchtype_t mt, dmu_tx_t *tx)
1243c1cb2cd8Shaad {
1244c1cb2cd8Shaad 	zap_t *zap;
1245c1cb2cd8Shaad 	int err;
1246c1cb2cd8Shaad 	mzap_ent_t *mze;
1247c1cb2cd8Shaad 	zap_name_t *zn;
1248c1cb2cd8Shaad 
1249*ba2539a9Schs 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
1250c1cb2cd8Shaad 	if (err)
1251c1cb2cd8Shaad 		return (err);
1252c1cb2cd8Shaad 	zn = zap_name_alloc(zap, name, mt);
1253c1cb2cd8Shaad 	if (zn == NULL) {
1254*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
1255*ba2539a9Schs 		return (SET_ERROR(ENOTSUP));
1256c1cb2cd8Shaad 	}
1257c1cb2cd8Shaad 	if (!zap->zap_ismicro) {
1258c1cb2cd8Shaad 		err = fzap_remove(zn, tx);
1259c1cb2cd8Shaad 	} else {
1260c1cb2cd8Shaad 		mze = mze_find(zn);
1261c1cb2cd8Shaad 		if (mze == NULL) {
1262*ba2539a9Schs 			err = SET_ERROR(ENOENT);
1263c1cb2cd8Shaad 		} else {
1264c1cb2cd8Shaad 			zap->zap_m.zap_num_entries--;
1265*ba2539a9Schs 			bzero(&zap_m_phys(zap)->mz_chunk[mze->mze_chunkid],
1266c1cb2cd8Shaad 			    sizeof (mzap_ent_phys_t));
1267c1cb2cd8Shaad 			mze_remove(zap, mze);
1268c1cb2cd8Shaad 		}
1269c1cb2cd8Shaad 	}
1270c1cb2cd8Shaad 	zap_name_free(zn);
1271*ba2539a9Schs 	zap_unlockdir(zap, FTAG);
1272c1cb2cd8Shaad 	return (err);
1273c1cb2cd8Shaad }
1274c1cb2cd8Shaad 
1275a252d550Shaad int
zap_remove_uint64(objset_t * os,uint64_t zapobj,const uint64_t * key,int key_numints,dmu_tx_t * tx)1276a252d550Shaad zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
1277a252d550Shaad     int key_numints, dmu_tx_t *tx)
1278a252d550Shaad {
1279a252d550Shaad 	zap_t *zap;
1280a252d550Shaad 	int err;
1281a252d550Shaad 	zap_name_t *zn;
1282a252d550Shaad 
1283*ba2539a9Schs 	err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
1284a252d550Shaad 	if (err)
1285a252d550Shaad 		return (err);
1286a252d550Shaad 	zn = zap_name_alloc_uint64(zap, key, key_numints);
1287a252d550Shaad 	if (zn == NULL) {
1288*ba2539a9Schs 		zap_unlockdir(zap, FTAG);
1289*ba2539a9Schs 		return (SET_ERROR(ENOTSUP));
1290a252d550Shaad 	}
1291a252d550Shaad 	err = fzap_remove(zn, tx);
1292a252d550Shaad 	zap_name_free(zn);
1293*ba2539a9Schs 	zap_unlockdir(zap, FTAG);
1294a252d550Shaad 	return (err);
1295a252d550Shaad }
1296a252d550Shaad 
1297c1cb2cd8Shaad /*
1298c1cb2cd8Shaad  * Routines for iterating over the attributes.
1299c1cb2cd8Shaad  */
1300c1cb2cd8Shaad 
1301c1cb2cd8Shaad void
zap_cursor_init_serialized(zap_cursor_t * zc,objset_t * os,uint64_t zapobj,uint64_t serialized)1302c1cb2cd8Shaad zap_cursor_init_serialized(zap_cursor_t *zc, objset_t *os, uint64_t zapobj,
1303c1cb2cd8Shaad     uint64_t serialized)
1304c1cb2cd8Shaad {
1305c1cb2cd8Shaad 	zc->zc_objset = os;
1306c1cb2cd8Shaad 	zc->zc_zap = NULL;
1307c1cb2cd8Shaad 	zc->zc_leaf = NULL;
1308c1cb2cd8Shaad 	zc->zc_zapobj = zapobj;
1309a252d550Shaad 	zc->zc_serialized = serialized;
1310a252d550Shaad 	zc->zc_hash = 0;
1311c1cb2cd8Shaad 	zc->zc_cd = 0;
1312c1cb2cd8Shaad }
1313c1cb2cd8Shaad 
1314c1cb2cd8Shaad void
zap_cursor_init(zap_cursor_t * zc,objset_t * os,uint64_t zapobj)1315c1cb2cd8Shaad zap_cursor_init(zap_cursor_t *zc, objset_t *os, uint64_t zapobj)
1316c1cb2cd8Shaad {
1317c1cb2cd8Shaad 	zap_cursor_init_serialized(zc, os, zapobj, 0);
1318c1cb2cd8Shaad }
1319c1cb2cd8Shaad 
1320c1cb2cd8Shaad void
zap_cursor_fini(zap_cursor_t * zc)1321c1cb2cd8Shaad zap_cursor_fini(zap_cursor_t *zc)
1322c1cb2cd8Shaad {
1323c1cb2cd8Shaad 	if (zc->zc_zap) {
1324c1cb2cd8Shaad 		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
1325*ba2539a9Schs 		zap_unlockdir(zc->zc_zap, NULL);
1326c1cb2cd8Shaad 		zc->zc_zap = NULL;
1327c1cb2cd8Shaad 	}
1328c1cb2cd8Shaad 	if (zc->zc_leaf) {
1329c1cb2cd8Shaad 		rw_enter(&zc->zc_leaf->l_rwlock, RW_READER);
1330c1cb2cd8Shaad 		zap_put_leaf(zc->zc_leaf);
1331c1cb2cd8Shaad 		zc->zc_leaf = NULL;
1332c1cb2cd8Shaad 	}
1333c1cb2cd8Shaad 	zc->zc_objset = NULL;
1334c1cb2cd8Shaad }
1335c1cb2cd8Shaad 
1336c1cb2cd8Shaad uint64_t
zap_cursor_serialize(zap_cursor_t * zc)1337c1cb2cd8Shaad zap_cursor_serialize(zap_cursor_t *zc)
1338c1cb2cd8Shaad {
1339c1cb2cd8Shaad 	if (zc->zc_hash == -1ULL)
1340c1cb2cd8Shaad 		return (-1ULL);
1341a252d550Shaad 	if (zc->zc_zap == NULL)
1342a252d550Shaad 		return (zc->zc_serialized);
1343a252d550Shaad 	ASSERT((zc->zc_hash & zap_maxcd(zc->zc_zap)) == 0);
1344a252d550Shaad 	ASSERT(zc->zc_cd < zap_maxcd(zc->zc_zap));
1345a252d550Shaad 
1346a252d550Shaad 	/*
1347a252d550Shaad 	 * We want to keep the high 32 bits of the cursor zero if we can, so
1348a252d550Shaad 	 * that 32-bit programs can access this.  So usually use a small
1349a252d550Shaad 	 * (28-bit) hash value so we can fit 4 bits of cd into the low 32-bits
1350a252d550Shaad 	 * of the cursor.
1351a252d550Shaad 	 *
1352a252d550Shaad 	 * [ collision differentiator | zap_hashbits()-bit hash value ]
1353a252d550Shaad 	 */
1354a252d550Shaad 	return ((zc->zc_hash >> (64 - zap_hashbits(zc->zc_zap))) |
1355a252d550Shaad 	    ((uint64_t)zc->zc_cd << zap_hashbits(zc->zc_zap)));
1356c1cb2cd8Shaad }
1357c1cb2cd8Shaad 
1358c1cb2cd8Shaad int
zap_cursor_retrieve(zap_cursor_t * zc,zap_attribute_t * za)1359c1cb2cd8Shaad zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
1360c1cb2cd8Shaad {
1361c1cb2cd8Shaad 	int err;
1362c1cb2cd8Shaad 	avl_index_t idx;
1363c1cb2cd8Shaad 	mzap_ent_t mze_tofind;
1364c1cb2cd8Shaad 	mzap_ent_t *mze;
1365c1cb2cd8Shaad 
1366c1cb2cd8Shaad 	if (zc->zc_hash == -1ULL)
1367*ba2539a9Schs 		return (SET_ERROR(ENOENT));
1368c1cb2cd8Shaad 
1369c1cb2cd8Shaad 	if (zc->zc_zap == NULL) {
1370a252d550Shaad 		int hb;
1371c1cb2cd8Shaad 		err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
1372*ba2539a9Schs 		    RW_READER, TRUE, FALSE, NULL, &zc->zc_zap);
1373c1cb2cd8Shaad 		if (err)
1374c1cb2cd8Shaad 			return (err);
1375a252d550Shaad 
1376a252d550Shaad 		/*
1377a252d550Shaad 		 * To support zap_cursor_init_serialized, advance, retrieve,
1378a252d550Shaad 		 * we must add to the existing zc_cd, which may already
1379a252d550Shaad 		 * be 1 due to the zap_cursor_advance.
1380a252d550Shaad 		 */
1381a252d550Shaad 		ASSERT(zc->zc_hash == 0);
1382a252d550Shaad 		hb = zap_hashbits(zc->zc_zap);
1383a252d550Shaad 		zc->zc_hash = zc->zc_serialized << (64 - hb);
1384a252d550Shaad 		zc->zc_cd += zc->zc_serialized >> hb;
1385a252d550Shaad 		if (zc->zc_cd >= zap_maxcd(zc->zc_zap)) /* corrupt serialized */
1386a252d550Shaad 			zc->zc_cd = 0;
1387c1cb2cd8Shaad 	} else {
1388c1cb2cd8Shaad 		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
1389c1cb2cd8Shaad 	}
1390c1cb2cd8Shaad 	if (!zc->zc_zap->zap_ismicro) {
1391c1cb2cd8Shaad 		err = fzap_cursor_retrieve(zc->zc_zap, zc, za);
1392c1cb2cd8Shaad 	} else {
1393c1cb2cd8Shaad 		mze_tofind.mze_hash = zc->zc_hash;
1394*ba2539a9Schs 		mze_tofind.mze_cd = zc->zc_cd;
1395c1cb2cd8Shaad 
1396c1cb2cd8Shaad 		mze = avl_find(&zc->zc_zap->zap_m.zap_avl, &mze_tofind, &idx);
1397c1cb2cd8Shaad 		if (mze == NULL) {
1398c1cb2cd8Shaad 			mze = avl_nearest(&zc->zc_zap->zap_m.zap_avl,
1399c1cb2cd8Shaad 			    idx, AVL_AFTER);
1400c1cb2cd8Shaad 		}
1401c1cb2cd8Shaad 		if (mze) {
1402*ba2539a9Schs 			mzap_ent_phys_t *mzep = MZE_PHYS(zc->zc_zap, mze);
1403*ba2539a9Schs 			ASSERT3U(mze->mze_cd, ==, mzep->mze_cd);
1404c1cb2cd8Shaad 			za->za_normalization_conflict =
1405c1cb2cd8Shaad 			    mzap_normalization_conflict(zc->zc_zap, NULL, mze);
1406c1cb2cd8Shaad 			za->za_integer_length = 8;
1407c1cb2cd8Shaad 			za->za_num_integers = 1;
1408*ba2539a9Schs 			za->za_first_integer = mzep->mze_value;
1409*ba2539a9Schs 			(void) strcpy(za->za_name, mzep->mze_name);
1410c1cb2cd8Shaad 			zc->zc_hash = mze->mze_hash;
1411*ba2539a9Schs 			zc->zc_cd = mze->mze_cd;
1412c1cb2cd8Shaad 			err = 0;
1413c1cb2cd8Shaad 		} else {
1414c1cb2cd8Shaad 			zc->zc_hash = -1ULL;
1415*ba2539a9Schs 			err = SET_ERROR(ENOENT);
1416c1cb2cd8Shaad 		}
1417c1cb2cd8Shaad 	}
1418c1cb2cd8Shaad 	rw_exit(&zc->zc_zap->zap_rwlock);
1419c1cb2cd8Shaad 	return (err);
1420c1cb2cd8Shaad }
1421c1cb2cd8Shaad 
1422c1cb2cd8Shaad void
zap_cursor_advance(zap_cursor_t * zc)1423c1cb2cd8Shaad zap_cursor_advance(zap_cursor_t *zc)
1424c1cb2cd8Shaad {
1425c1cb2cd8Shaad 	if (zc->zc_hash == -1ULL)
1426c1cb2cd8Shaad 		return;
1427c1cb2cd8Shaad 	zc->zc_cd++;
1428c1cb2cd8Shaad }
1429a252d550Shaad 
1430a252d550Shaad int
zap_cursor_move_to_key(zap_cursor_t * zc,const char * name,matchtype_t mt)1431a252d550Shaad zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt)
1432a252d550Shaad {
1433a252d550Shaad 	int err = 0;
1434a252d550Shaad 	mzap_ent_t *mze;
1435a252d550Shaad 	zap_name_t *zn;
1436a252d550Shaad 
1437a252d550Shaad 	if (zc->zc_zap == NULL) {
1438a252d550Shaad 		err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
1439*ba2539a9Schs 		    RW_READER, TRUE, FALSE, FTAG, &zc->zc_zap);
1440a252d550Shaad 		if (err)
1441a252d550Shaad 			return (err);
1442a252d550Shaad 	} else {
1443a252d550Shaad 		rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
1444a252d550Shaad 	}
1445a252d550Shaad 
1446a252d550Shaad 	zn = zap_name_alloc(zc->zc_zap, name, mt);
1447a252d550Shaad 	if (zn == NULL) {
1448a252d550Shaad 		rw_exit(&zc->zc_zap->zap_rwlock);
1449*ba2539a9Schs 		return (SET_ERROR(ENOTSUP));
1450a252d550Shaad 	}
1451a252d550Shaad 
1452a252d550Shaad 	if (!zc->zc_zap->zap_ismicro) {
1453a252d550Shaad 		err = fzap_cursor_move_to_key(zc, zn);
1454a252d550Shaad 	} else {
1455a252d550Shaad 		mze = mze_find(zn);
1456a252d550Shaad 		if (mze == NULL) {
1457*ba2539a9Schs 			err = SET_ERROR(ENOENT);
1458a252d550Shaad 			goto out;
1459a252d550Shaad 		}
1460a252d550Shaad 		zc->zc_hash = mze->mze_hash;
1461*ba2539a9Schs 		zc->zc_cd = mze->mze_cd;
1462a252d550Shaad 	}
1463a252d550Shaad 
1464a252d550Shaad out:
1465a252d550Shaad 	zap_name_free(zn);
1466a252d550Shaad 	rw_exit(&zc->zc_zap->zap_rwlock);
1467a252d550Shaad 	return (err);
1468c1cb2cd8Shaad }
1469c1cb2cd8Shaad 
1470c1cb2cd8Shaad int
zap_get_stats(objset_t * os,uint64_t zapobj,zap_stats_t * zs)1471c1cb2cd8Shaad zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
1472c1cb2cd8Shaad {
1473c1cb2cd8Shaad 	int err;
1474c1cb2cd8Shaad 	zap_t *zap;
1475c1cb2cd8Shaad 
1476*ba2539a9Schs 	err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
1477c1cb2cd8Shaad 	if (err)
1478c1cb2cd8Shaad 		return (err);
1479c1cb2cd8Shaad 
1480c1cb2cd8Shaad 	bzero(zs, sizeof (zap_stats_t));
1481c1cb2cd8Shaad 
1482c1cb2cd8Shaad 	if (zap->zap_ismicro) {
1483c1cb2cd8Shaad 		zs->zs_blocksize = zap->zap_dbuf->db_size;
1484c1cb2cd8Shaad 		zs->zs_num_entries = zap->zap_m.zap_num_entries;
1485c1cb2cd8Shaad 		zs->zs_num_blocks = 1;
1486c1cb2cd8Shaad 	} else {
1487c1cb2cd8Shaad 		fzap_get_stats(zap, zs);
1488c1cb2cd8Shaad 	}
1489*ba2539a9Schs 	zap_unlockdir(zap, FTAG);
1490c1cb2cd8Shaad 	return (0);
1491c1cb2cd8Shaad }
1492a252d550Shaad 
1493a252d550Shaad int
zap_count_write_by_dnode(dnode_t * dn,const char * name,int add,refcount_t * towrite,refcount_t * tooverwrite)1494*ba2539a9Schs zap_count_write_by_dnode(dnode_t *dn, const char *name, int add,
1495*ba2539a9Schs     refcount_t *towrite, refcount_t *tooverwrite)
1496a252d550Shaad {
1497a252d550Shaad 	zap_t *zap;
1498a252d550Shaad 	int err = 0;
1499a252d550Shaad 
1500a252d550Shaad 	/*
1501a252d550Shaad 	 * Since, we don't have a name, we cannot figure out which blocks will
1502a252d550Shaad 	 * be affected in this operation. So, account for the worst case :
1503a252d550Shaad 	 * - 3 blocks overwritten: target leaf, ptrtbl block, header block
1504a252d550Shaad 	 * - 4 new blocks written if adding:
1505a252d550Shaad 	 *    - 2 blocks for possibly split leaves,
1506a252d550Shaad 	 *    - 2 grown ptrtbl blocks
1507a252d550Shaad 	 *
1508*ba2539a9Schs 	 * This also accommodates the case where an add operation to a fairly
1509a252d550Shaad 	 * large microzap results in a promotion to fatzap.
1510a252d550Shaad 	 */
1511a252d550Shaad 	if (name == NULL) {
1512*ba2539a9Schs 		(void) refcount_add_many(towrite,
1513*ba2539a9Schs 		    (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE, FTAG);
1514a252d550Shaad 		return (err);
1515a252d550Shaad 	}
1516a252d550Shaad 
1517a252d550Shaad 	/*
1518a252d550Shaad 	 * We lock the zap with adding == FALSE. Because, if we pass
1519a252d550Shaad 	 * the actual value of add, it could trigger a mzap_upgrade().
1520a252d550Shaad 	 * At present we are just evaluating the possibility of this operation
1521a252d550Shaad 	 * and hence we do not want to trigger an upgrade.
1522a252d550Shaad 	 */
1523*ba2539a9Schs 	err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
1524*ba2539a9Schs 	    FTAG, &zap);
1525*ba2539a9Schs 	if (err != 0)
1526a252d550Shaad 		return (err);
1527a252d550Shaad 
1528a252d550Shaad 	if (!zap->zap_ismicro) {
1529a252d550Shaad 		zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT);
1530a252d550Shaad 		if (zn) {
1531a252d550Shaad 			err = fzap_count_write(zn, add, towrite,
1532a252d550Shaad 			    tooverwrite);
1533a252d550Shaad 			zap_name_free(zn);
1534a252d550Shaad 		} else {
1535a252d550Shaad 			/*
1536a252d550Shaad 			 * We treat this case as similar to (name == NULL)
1537a252d550Shaad 			 */
1538*ba2539a9Schs 			(void) refcount_add_many(towrite,
1539*ba2539a9Schs 			    (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE, FTAG);
1540a252d550Shaad 		}
1541a252d550Shaad 	} else {
1542a252d550Shaad 		/*
1543a252d550Shaad 		 * We are here if (name != NULL) and this is a micro-zap.
1544a252d550Shaad 		 * We account for the header block depending on whether it
1545a252d550Shaad 		 * is freeable.
1546a252d550Shaad 		 *
1547a252d550Shaad 		 * Incase of an add-operation it is hard to find out
1548a252d550Shaad 		 * if this add will promote this microzap to fatzap.
1549a252d550Shaad 		 * Hence, we consider the worst case and account for the
1550a252d550Shaad 		 * blocks assuming this microzap would be promoted to a
1551a252d550Shaad 		 * fatzap.
1552a252d550Shaad 		 *
1553a252d550Shaad 		 * 1 block overwritten  : header block
1554a252d550Shaad 		 * 4 new blocks written : 2 new split leaf, 2 grown
1555a252d550Shaad 		 *			ptrtbl blocks
1556a252d550Shaad 		 */
1557*ba2539a9Schs 		if (dmu_buf_freeable(zap->zap_dbuf)) {
1558*ba2539a9Schs 			(void) refcount_add_many(tooverwrite,
1559*ba2539a9Schs 			    MZAP_MAX_BLKSZ, FTAG);
1560*ba2539a9Schs 		} else {
1561*ba2539a9Schs 			(void) refcount_add_many(towrite,
1562*ba2539a9Schs 			    MZAP_MAX_BLKSZ, FTAG);
1563*ba2539a9Schs 		}
1564a252d550Shaad 
1565a252d550Shaad 		if (add) {
1566*ba2539a9Schs 			(void) refcount_add_many(towrite,
1567*ba2539a9Schs 			    4 * MZAP_MAX_BLKSZ, FTAG);
1568a252d550Shaad 		}
1569a252d550Shaad 	}
1570a252d550Shaad 
1571*ba2539a9Schs 	zap_unlockdir(zap, FTAG);
1572a252d550Shaad 	return (err);
1573a252d550Shaad }
1574