xref: /freebsd-src/sys/contrib/openzfs/module/zfs/vdev_indirect_mapping.c (revision eda14cbc264d6969b02f2b1994cef11148e914f1)
1*eda14cbcSMatt Macy /*
2*eda14cbcSMatt Macy  * CDDL HEADER START
3*eda14cbcSMatt Macy  *
4*eda14cbcSMatt Macy  * This file and its contents are supplied under the terms of the
5*eda14cbcSMatt Macy  * Common Development and Distribution License ("CDDL"), version 1.0.
6*eda14cbcSMatt Macy  * You may only use this file in accordance with the terms of version
7*eda14cbcSMatt Macy  * 1.0 of the CDDL.
8*eda14cbcSMatt Macy  *
9*eda14cbcSMatt Macy  * A full copy of the text of the CDDL should have accompanied this
10*eda14cbcSMatt Macy  * source.  A copy of the CDDL is also available via the Internet at
11*eda14cbcSMatt Macy  * http://www.illumos.org/license/CDDL.
12*eda14cbcSMatt Macy  *
13*eda14cbcSMatt Macy  * CDDL HEADER END
14*eda14cbcSMatt Macy  */
15*eda14cbcSMatt Macy 
16*eda14cbcSMatt Macy /*
17*eda14cbcSMatt Macy  * Copyright (c) 2015, 2017 by Delphix. All rights reserved.
18*eda14cbcSMatt Macy  */
19*eda14cbcSMatt Macy 
20*eda14cbcSMatt Macy #include <sys/dmu_tx.h>
21*eda14cbcSMatt Macy #include <sys/dsl_pool.h>
22*eda14cbcSMatt Macy #include <sys/spa.h>
23*eda14cbcSMatt Macy #include <sys/vdev_impl.h>
24*eda14cbcSMatt Macy #include <sys/vdev_indirect_mapping.h>
25*eda14cbcSMatt Macy #include <sys/zfeature.h>
26*eda14cbcSMatt Macy #include <sys/dmu_objset.h>
27*eda14cbcSMatt Macy 
28*eda14cbcSMatt Macy #ifdef ZFS_DEBUG
29*eda14cbcSMatt Macy static boolean_t
30*eda14cbcSMatt Macy vdev_indirect_mapping_verify(vdev_indirect_mapping_t *vim)
31*eda14cbcSMatt Macy {
32*eda14cbcSMatt Macy 	ASSERT(vim != NULL);
33*eda14cbcSMatt Macy 
34*eda14cbcSMatt Macy 	ASSERT(vim->vim_object != 0);
35*eda14cbcSMatt Macy 	ASSERT(vim->vim_objset != NULL);
36*eda14cbcSMatt Macy 	ASSERT(vim->vim_phys != NULL);
37*eda14cbcSMatt Macy 	ASSERT(vim->vim_dbuf != NULL);
38*eda14cbcSMatt Macy 
39*eda14cbcSMatt Macy 	EQUIV(vim->vim_phys->vimp_num_entries > 0,
40*eda14cbcSMatt Macy 	    vim->vim_entries != NULL);
41*eda14cbcSMatt Macy 	if (vim->vim_phys->vimp_num_entries > 0) {
42*eda14cbcSMatt Macy 		vdev_indirect_mapping_entry_phys_t *last_entry __maybe_unused =
43*eda14cbcSMatt Macy 		    &vim->vim_entries[vim->vim_phys->vimp_num_entries - 1];
44*eda14cbcSMatt Macy 		uint64_t offset __maybe_unused =
45*eda14cbcSMatt Macy 		    DVA_MAPPING_GET_SRC_OFFSET(last_entry);
46*eda14cbcSMatt Macy 		uint64_t size __maybe_unused =
47*eda14cbcSMatt Macy 		    DVA_GET_ASIZE(&last_entry->vimep_dst);
48*eda14cbcSMatt Macy 
49*eda14cbcSMatt Macy 		ASSERT3U(vim->vim_phys->vimp_max_offset, >=, offset + size);
50*eda14cbcSMatt Macy 	}
51*eda14cbcSMatt Macy 	if (vim->vim_havecounts) {
52*eda14cbcSMatt Macy 		ASSERT(vim->vim_phys->vimp_counts_object != 0);
53*eda14cbcSMatt Macy 	}
54*eda14cbcSMatt Macy 
55*eda14cbcSMatt Macy 	return (B_TRUE);
56*eda14cbcSMatt Macy }
57*eda14cbcSMatt Macy #endif
58*eda14cbcSMatt Macy 
59*eda14cbcSMatt Macy uint64_t
60*eda14cbcSMatt Macy vdev_indirect_mapping_num_entries(vdev_indirect_mapping_t *vim)
61*eda14cbcSMatt Macy {
62*eda14cbcSMatt Macy 	ASSERT(vdev_indirect_mapping_verify(vim));
63*eda14cbcSMatt Macy 
64*eda14cbcSMatt Macy 	return (vim->vim_phys->vimp_num_entries);
65*eda14cbcSMatt Macy }
66*eda14cbcSMatt Macy 
67*eda14cbcSMatt Macy uint64_t
68*eda14cbcSMatt Macy vdev_indirect_mapping_max_offset(vdev_indirect_mapping_t *vim)
69*eda14cbcSMatt Macy {
70*eda14cbcSMatt Macy 	ASSERT(vdev_indirect_mapping_verify(vim));
71*eda14cbcSMatt Macy 
72*eda14cbcSMatt Macy 	return (vim->vim_phys->vimp_max_offset);
73*eda14cbcSMatt Macy }
74*eda14cbcSMatt Macy 
75*eda14cbcSMatt Macy uint64_t
76*eda14cbcSMatt Macy vdev_indirect_mapping_object(vdev_indirect_mapping_t *vim)
77*eda14cbcSMatt Macy {
78*eda14cbcSMatt Macy 	ASSERT(vdev_indirect_mapping_verify(vim));
79*eda14cbcSMatt Macy 
80*eda14cbcSMatt Macy 	return (vim->vim_object);
81*eda14cbcSMatt Macy }
82*eda14cbcSMatt Macy 
83*eda14cbcSMatt Macy uint64_t
84*eda14cbcSMatt Macy vdev_indirect_mapping_bytes_mapped(vdev_indirect_mapping_t *vim)
85*eda14cbcSMatt Macy {
86*eda14cbcSMatt Macy 	ASSERT(vdev_indirect_mapping_verify(vim));
87*eda14cbcSMatt Macy 
88*eda14cbcSMatt Macy 	return (vim->vim_phys->vimp_bytes_mapped);
89*eda14cbcSMatt Macy }
90*eda14cbcSMatt Macy 
91*eda14cbcSMatt Macy /*
92*eda14cbcSMatt Macy  * The length (in bytes) of the mapping object array in memory and
93*eda14cbcSMatt Macy  * (logically) on disk.
94*eda14cbcSMatt Macy  *
95*eda14cbcSMatt Macy  * Note that unlike most of our accessor functions,
96*eda14cbcSMatt Macy  * we don't assert that the struct is consistent; therefore it can be
97*eda14cbcSMatt Macy  * called while there may be concurrent changes, if we don't care about
98*eda14cbcSMatt Macy  * the value being immediately stale (e.g. from spa_removal_get_stats()).
99*eda14cbcSMatt Macy  */
100*eda14cbcSMatt Macy uint64_t
101*eda14cbcSMatt Macy vdev_indirect_mapping_size(vdev_indirect_mapping_t *vim)
102*eda14cbcSMatt Macy {
103*eda14cbcSMatt Macy 	return (vim->vim_phys->vimp_num_entries * sizeof (*vim->vim_entries));
104*eda14cbcSMatt Macy }
105*eda14cbcSMatt Macy 
106*eda14cbcSMatt Macy /*
107*eda14cbcSMatt Macy  * Compare an offset with an indirect mapping entry; there are three
108*eda14cbcSMatt Macy  * possible scenarios:
109*eda14cbcSMatt Macy  *
110*eda14cbcSMatt Macy  *     1. The offset is "less than" the mapping entry; meaning the
111*eda14cbcSMatt Macy  *        offset is less than the source offset of the mapping entry. In
112*eda14cbcSMatt Macy  *        this case, there is no overlap between the offset and the
113*eda14cbcSMatt Macy  *        mapping entry and -1 will be returned.
114*eda14cbcSMatt Macy  *
115*eda14cbcSMatt Macy  *     2. The offset is "greater than" the mapping entry; meaning the
116*eda14cbcSMatt Macy  *        offset is greater than the mapping entry's source offset plus
117*eda14cbcSMatt Macy  *        the entry's size. In this case, there is no overlap between
118*eda14cbcSMatt Macy  *        the offset and the mapping entry and 1 will be returned.
119*eda14cbcSMatt Macy  *
120*eda14cbcSMatt Macy  *        NOTE: If the offset is actually equal to the entry's offset
121*eda14cbcSMatt Macy  *        plus size, this is considered to be "greater" than the entry,
122*eda14cbcSMatt Macy  *        and this case applies (i.e. 1 will be returned). Thus, the
123*eda14cbcSMatt Macy  *        entry's "range" can be considered to be inclusive at its
124*eda14cbcSMatt Macy  *        start, but exclusive at its end: e.g. [src, src + size).
125*eda14cbcSMatt Macy  *
126*eda14cbcSMatt Macy  *     3. The last case to consider is if the offset actually falls
127*eda14cbcSMatt Macy  *        within the mapping entry's range. If this is the case, the
128*eda14cbcSMatt Macy  *        offset is considered to be "equal to" the mapping entry and
129*eda14cbcSMatt Macy  *        0 will be returned.
130*eda14cbcSMatt Macy  *
131*eda14cbcSMatt Macy  *        NOTE: If the offset is equal to the entry's source offset,
132*eda14cbcSMatt Macy  *        this case applies and 0 will be returned. If the offset is
133*eda14cbcSMatt Macy  *        equal to the entry's source plus its size, this case does
134*eda14cbcSMatt Macy  *        *not* apply (see "NOTE" above for scenario 2), and 1 will be
135*eda14cbcSMatt Macy  *        returned.
136*eda14cbcSMatt Macy  */
137*eda14cbcSMatt Macy static int
138*eda14cbcSMatt Macy dva_mapping_overlap_compare(const void *v_key, const void *v_array_elem)
139*eda14cbcSMatt Macy {
140*eda14cbcSMatt Macy 	const uint64_t * const key = v_key;
141*eda14cbcSMatt Macy 	const vdev_indirect_mapping_entry_phys_t * const array_elem =
142*eda14cbcSMatt Macy 	    v_array_elem;
143*eda14cbcSMatt Macy 	uint64_t src_offset = DVA_MAPPING_GET_SRC_OFFSET(array_elem);
144*eda14cbcSMatt Macy 
145*eda14cbcSMatt Macy 	if (*key < src_offset) {
146*eda14cbcSMatt Macy 		return (-1);
147*eda14cbcSMatt Macy 	} else if (*key < src_offset + DVA_GET_ASIZE(&array_elem->vimep_dst)) {
148*eda14cbcSMatt Macy 		return (0);
149*eda14cbcSMatt Macy 	} else {
150*eda14cbcSMatt Macy 		return (1);
151*eda14cbcSMatt Macy 	}
152*eda14cbcSMatt Macy }
153*eda14cbcSMatt Macy 
154*eda14cbcSMatt Macy /*
155*eda14cbcSMatt Macy  * Returns the mapping entry for the given offset.
156*eda14cbcSMatt Macy  *
157*eda14cbcSMatt Macy  * It's possible that the given offset will not be in the mapping table
158*eda14cbcSMatt Macy  * (i.e. no mapping entries contain this offset), in which case, the
159*eda14cbcSMatt Macy  * return value value depends on the "next_if_missing" parameter.
160*eda14cbcSMatt Macy  *
161*eda14cbcSMatt Macy  * If the offset is not found in the table and "next_if_missing" is
162*eda14cbcSMatt Macy  * B_FALSE, then NULL will always be returned. The behavior is intended
163*eda14cbcSMatt Macy  * to allow consumers to get the entry corresponding to the offset
164*eda14cbcSMatt Macy  * parameter, iff the offset overlaps with an entry in the table.
165*eda14cbcSMatt Macy  *
166*eda14cbcSMatt Macy  * If the offset is not found in the table and "next_if_missing" is
167*eda14cbcSMatt Macy  * B_TRUE, then the entry nearest to the given offset will be returned,
168*eda14cbcSMatt Macy  * such that the entry's source offset is greater than the offset
169*eda14cbcSMatt Macy  * passed in (i.e. the "next" mapping entry in the table is returned, if
170*eda14cbcSMatt Macy  * the offset is missing from the table). If there are no entries whose
171*eda14cbcSMatt Macy  * source offset is greater than the passed in offset, NULL is returned.
172*eda14cbcSMatt Macy  */
173*eda14cbcSMatt Macy static vdev_indirect_mapping_entry_phys_t *
174*eda14cbcSMatt Macy vdev_indirect_mapping_entry_for_offset_impl(vdev_indirect_mapping_t *vim,
175*eda14cbcSMatt Macy     uint64_t offset, boolean_t next_if_missing)
176*eda14cbcSMatt Macy {
177*eda14cbcSMatt Macy 	ASSERT(vdev_indirect_mapping_verify(vim));
178*eda14cbcSMatt Macy 	ASSERT(vim->vim_phys->vimp_num_entries > 0);
179*eda14cbcSMatt Macy 
180*eda14cbcSMatt Macy 	vdev_indirect_mapping_entry_phys_t *entry = NULL;
181*eda14cbcSMatt Macy 
182*eda14cbcSMatt Macy 	uint64_t last = vim->vim_phys->vimp_num_entries - 1;
183*eda14cbcSMatt Macy 	uint64_t base = 0;
184*eda14cbcSMatt Macy 
185*eda14cbcSMatt Macy 	/*
186*eda14cbcSMatt Macy 	 * We don't define these inside of the while loop because we use
187*eda14cbcSMatt Macy 	 * their value in the case that offset isn't in the mapping.
188*eda14cbcSMatt Macy 	 */
189*eda14cbcSMatt Macy 	uint64_t mid;
190*eda14cbcSMatt Macy 	int result;
191*eda14cbcSMatt Macy 
192*eda14cbcSMatt Macy 	while (last >= base) {
193*eda14cbcSMatt Macy 		mid = base + ((last - base) >> 1);
194*eda14cbcSMatt Macy 
195*eda14cbcSMatt Macy 		result = dva_mapping_overlap_compare(&offset,
196*eda14cbcSMatt Macy 		    &vim->vim_entries[mid]);
197*eda14cbcSMatt Macy 
198*eda14cbcSMatt Macy 		if (result == 0) {
199*eda14cbcSMatt Macy 			entry = &vim->vim_entries[mid];
200*eda14cbcSMatt Macy 			break;
201*eda14cbcSMatt Macy 		} else if (result < 0) {
202*eda14cbcSMatt Macy 			last = mid - 1;
203*eda14cbcSMatt Macy 		} else {
204*eda14cbcSMatt Macy 			base = mid + 1;
205*eda14cbcSMatt Macy 		}
206*eda14cbcSMatt Macy 	}
207*eda14cbcSMatt Macy 
208*eda14cbcSMatt Macy 	if (entry == NULL && next_if_missing) {
209*eda14cbcSMatt Macy 		ASSERT3U(base, ==, last + 1);
210*eda14cbcSMatt Macy 		ASSERT(mid == base || mid == last);
211*eda14cbcSMatt Macy 		ASSERT3S(result, !=, 0);
212*eda14cbcSMatt Macy 
213*eda14cbcSMatt Macy 		/*
214*eda14cbcSMatt Macy 		 * The offset we're looking for isn't actually contained
215*eda14cbcSMatt Macy 		 * in the mapping table, thus we need to return the
216*eda14cbcSMatt Macy 		 * closest mapping entry that is greater than the
217*eda14cbcSMatt Macy 		 * offset. We reuse the result of the last comparison,
218*eda14cbcSMatt Macy 		 * comparing the mapping entry at index "mid" and the
219*eda14cbcSMatt Macy 		 * offset. The offset is guaranteed to lie between
220*eda14cbcSMatt Macy 		 * indices one less than "mid", and one greater than
221*eda14cbcSMatt Macy 		 * "mid"; we just need to determine if offset is greater
222*eda14cbcSMatt Macy 		 * than, or less than the mapping entry contained at
223*eda14cbcSMatt Macy 		 * index "mid".
224*eda14cbcSMatt Macy 		 */
225*eda14cbcSMatt Macy 
226*eda14cbcSMatt Macy 		uint64_t index;
227*eda14cbcSMatt Macy 		if (result < 0)
228*eda14cbcSMatt Macy 			index = mid;
229*eda14cbcSMatt Macy 		else
230*eda14cbcSMatt Macy 			index = mid + 1;
231*eda14cbcSMatt Macy 
232*eda14cbcSMatt Macy 		ASSERT3U(index, <=, vim->vim_phys->vimp_num_entries);
233*eda14cbcSMatt Macy 
234*eda14cbcSMatt Macy 		if (index == vim->vim_phys->vimp_num_entries) {
235*eda14cbcSMatt Macy 			/*
236*eda14cbcSMatt Macy 			 * If "index" is past the end of the entries
237*eda14cbcSMatt Macy 			 * array, then not only is the offset not in the
238*eda14cbcSMatt Macy 			 * mapping table, but it's actually greater than
239*eda14cbcSMatt Macy 			 * all entries in the table. In this case, we
240*eda14cbcSMatt Macy 			 * can't return a mapping entry greater than the
241*eda14cbcSMatt Macy 			 * offset (since none exist), so we return NULL.
242*eda14cbcSMatt Macy 			 */
243*eda14cbcSMatt Macy 
244*eda14cbcSMatt Macy 			ASSERT3S(dva_mapping_overlap_compare(&offset,
245*eda14cbcSMatt Macy 			    &vim->vim_entries[index - 1]), >, 0);
246*eda14cbcSMatt Macy 
247*eda14cbcSMatt Macy 			return (NULL);
248*eda14cbcSMatt Macy 		} else {
249*eda14cbcSMatt Macy 			/*
250*eda14cbcSMatt Macy 			 * Just to be safe, we verify the offset falls
251*eda14cbcSMatt Macy 			 * in between the mapping entries at index and
252*eda14cbcSMatt Macy 			 * one less than index. Since we know the offset
253*eda14cbcSMatt Macy 			 * doesn't overlap an entry, and we're supposed
254*eda14cbcSMatt Macy 			 * to return the entry just greater than the
255*eda14cbcSMatt Macy 			 * offset, both of the following tests must be
256*eda14cbcSMatt Macy 			 * true.
257*eda14cbcSMatt Macy 			 */
258*eda14cbcSMatt Macy 			ASSERT3S(dva_mapping_overlap_compare(&offset,
259*eda14cbcSMatt Macy 			    &vim->vim_entries[index]), <, 0);
260*eda14cbcSMatt Macy 			IMPLY(index >= 1, dva_mapping_overlap_compare(&offset,
261*eda14cbcSMatt Macy 			    &vim->vim_entries[index - 1]) > 0);
262*eda14cbcSMatt Macy 
263*eda14cbcSMatt Macy 			return (&vim->vim_entries[index]);
264*eda14cbcSMatt Macy 		}
265*eda14cbcSMatt Macy 	} else {
266*eda14cbcSMatt Macy 		return (entry);
267*eda14cbcSMatt Macy 	}
268*eda14cbcSMatt Macy }
269*eda14cbcSMatt Macy 
270*eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *
271*eda14cbcSMatt Macy vdev_indirect_mapping_entry_for_offset(vdev_indirect_mapping_t *vim,
272*eda14cbcSMatt Macy     uint64_t offset)
273*eda14cbcSMatt Macy {
274*eda14cbcSMatt Macy 	return (vdev_indirect_mapping_entry_for_offset_impl(vim, offset,
275*eda14cbcSMatt Macy 	    B_FALSE));
276*eda14cbcSMatt Macy }
277*eda14cbcSMatt Macy 
278*eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *
279*eda14cbcSMatt Macy vdev_indirect_mapping_entry_for_offset_or_next(vdev_indirect_mapping_t *vim,
280*eda14cbcSMatt Macy     uint64_t offset)
281*eda14cbcSMatt Macy {
282*eda14cbcSMatt Macy 	return (vdev_indirect_mapping_entry_for_offset_impl(vim, offset,
283*eda14cbcSMatt Macy 	    B_TRUE));
284*eda14cbcSMatt Macy }
285*eda14cbcSMatt Macy 
286*eda14cbcSMatt Macy void
287*eda14cbcSMatt Macy vdev_indirect_mapping_close(vdev_indirect_mapping_t *vim)
288*eda14cbcSMatt Macy {
289*eda14cbcSMatt Macy 	ASSERT(vdev_indirect_mapping_verify(vim));
290*eda14cbcSMatt Macy 
291*eda14cbcSMatt Macy 	if (vim->vim_phys->vimp_num_entries > 0) {
292*eda14cbcSMatt Macy 		uint64_t map_size = vdev_indirect_mapping_size(vim);
293*eda14cbcSMatt Macy 		vmem_free(vim->vim_entries, map_size);
294*eda14cbcSMatt Macy 		vim->vim_entries = NULL;
295*eda14cbcSMatt Macy 	}
296*eda14cbcSMatt Macy 
297*eda14cbcSMatt Macy 	dmu_buf_rele(vim->vim_dbuf, vim);
298*eda14cbcSMatt Macy 
299*eda14cbcSMatt Macy 	vim->vim_objset = NULL;
300*eda14cbcSMatt Macy 	vim->vim_object = 0;
301*eda14cbcSMatt Macy 	vim->vim_dbuf = NULL;
302*eda14cbcSMatt Macy 	vim->vim_phys = NULL;
303*eda14cbcSMatt Macy 
304*eda14cbcSMatt Macy 	kmem_free(vim, sizeof (*vim));
305*eda14cbcSMatt Macy }
306*eda14cbcSMatt Macy 
307*eda14cbcSMatt Macy uint64_t
308*eda14cbcSMatt Macy vdev_indirect_mapping_alloc(objset_t *os, dmu_tx_t *tx)
309*eda14cbcSMatt Macy {
310*eda14cbcSMatt Macy 	uint64_t object;
311*eda14cbcSMatt Macy 	ASSERT(dmu_tx_is_syncing(tx));
312*eda14cbcSMatt Macy 	uint64_t bonus_size = VDEV_INDIRECT_MAPPING_SIZE_V0;
313*eda14cbcSMatt Macy 
314*eda14cbcSMatt Macy 	if (spa_feature_is_enabled(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS)) {
315*eda14cbcSMatt Macy 		bonus_size = sizeof (vdev_indirect_mapping_phys_t);
316*eda14cbcSMatt Macy 	}
317*eda14cbcSMatt Macy 
318*eda14cbcSMatt Macy 	object = dmu_object_alloc(os,
319*eda14cbcSMatt Macy 	    DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
320*eda14cbcSMatt Macy 	    DMU_OTN_UINT64_METADATA, bonus_size,
321*eda14cbcSMatt Macy 	    tx);
322*eda14cbcSMatt Macy 
323*eda14cbcSMatt Macy 	if (spa_feature_is_enabled(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS)) {
324*eda14cbcSMatt Macy 		dmu_buf_t *dbuf;
325*eda14cbcSMatt Macy 		vdev_indirect_mapping_phys_t *vimp;
326*eda14cbcSMatt Macy 
327*eda14cbcSMatt Macy 		VERIFY0(dmu_bonus_hold(os, object, FTAG, &dbuf));
328*eda14cbcSMatt Macy 		dmu_buf_will_dirty(dbuf, tx);
329*eda14cbcSMatt Macy 		vimp = dbuf->db_data;
330*eda14cbcSMatt Macy 		vimp->vimp_counts_object = dmu_object_alloc(os,
331*eda14cbcSMatt Macy 		    DMU_OTN_UINT32_METADATA, SPA_OLD_MAXBLOCKSIZE,
332*eda14cbcSMatt Macy 		    DMU_OT_NONE, 0, tx);
333*eda14cbcSMatt Macy 		spa_feature_incr(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);
334*eda14cbcSMatt Macy 		dmu_buf_rele(dbuf, FTAG);
335*eda14cbcSMatt Macy 	}
336*eda14cbcSMatt Macy 
337*eda14cbcSMatt Macy 	return (object);
338*eda14cbcSMatt Macy }
339*eda14cbcSMatt Macy 
340*eda14cbcSMatt Macy 
341*eda14cbcSMatt Macy vdev_indirect_mapping_t *
342*eda14cbcSMatt Macy vdev_indirect_mapping_open(objset_t *os, uint64_t mapping_object)
343*eda14cbcSMatt Macy {
344*eda14cbcSMatt Macy 	vdev_indirect_mapping_t *vim = kmem_zalloc(sizeof (*vim), KM_SLEEP);
345*eda14cbcSMatt Macy 	dmu_object_info_t doi;
346*eda14cbcSMatt Macy 	VERIFY0(dmu_object_info(os, mapping_object, &doi));
347*eda14cbcSMatt Macy 
348*eda14cbcSMatt Macy 	vim->vim_objset = os;
349*eda14cbcSMatt Macy 	vim->vim_object = mapping_object;
350*eda14cbcSMatt Macy 
351*eda14cbcSMatt Macy 	VERIFY0(dmu_bonus_hold(os, vim->vim_object, vim,
352*eda14cbcSMatt Macy 	    &vim->vim_dbuf));
353*eda14cbcSMatt Macy 	vim->vim_phys = vim->vim_dbuf->db_data;
354*eda14cbcSMatt Macy 
355*eda14cbcSMatt Macy 	vim->vim_havecounts =
356*eda14cbcSMatt Macy 	    (doi.doi_bonus_size > VDEV_INDIRECT_MAPPING_SIZE_V0);
357*eda14cbcSMatt Macy 
358*eda14cbcSMatt Macy 	if (vim->vim_phys->vimp_num_entries > 0) {
359*eda14cbcSMatt Macy 		uint64_t map_size = vdev_indirect_mapping_size(vim);
360*eda14cbcSMatt Macy 		vim->vim_entries = vmem_alloc(map_size, KM_SLEEP);
361*eda14cbcSMatt Macy 		VERIFY0(dmu_read(os, vim->vim_object, 0, map_size,
362*eda14cbcSMatt Macy 		    vim->vim_entries, DMU_READ_PREFETCH));
363*eda14cbcSMatt Macy 	}
364*eda14cbcSMatt Macy 
365*eda14cbcSMatt Macy 	ASSERT(vdev_indirect_mapping_verify(vim));
366*eda14cbcSMatt Macy 
367*eda14cbcSMatt Macy 	return (vim);
368*eda14cbcSMatt Macy }
369*eda14cbcSMatt Macy 
370*eda14cbcSMatt Macy void
371*eda14cbcSMatt Macy vdev_indirect_mapping_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
372*eda14cbcSMatt Macy {
373*eda14cbcSMatt Macy 	vdev_indirect_mapping_t *vim = vdev_indirect_mapping_open(os, object);
374*eda14cbcSMatt Macy 	if (vim->vim_havecounts) {
375*eda14cbcSMatt Macy 		VERIFY0(dmu_object_free(os, vim->vim_phys->vimp_counts_object,
376*eda14cbcSMatt Macy 		    tx));
377*eda14cbcSMatt Macy 		spa_feature_decr(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);
378*eda14cbcSMatt Macy 	}
379*eda14cbcSMatt Macy 	vdev_indirect_mapping_close(vim);
380*eda14cbcSMatt Macy 
381*eda14cbcSMatt Macy 	VERIFY0(dmu_object_free(os, object, tx));
382*eda14cbcSMatt Macy }
383*eda14cbcSMatt Macy 
384*eda14cbcSMatt Macy /*
385*eda14cbcSMatt Macy  * Append the list of vdev_indirect_mapping_entry_t's to the on-disk
386*eda14cbcSMatt Macy  * mapping object.  Also remove the entries from the list and free them.
387*eda14cbcSMatt Macy  * This also implicitly extends the max_offset of the mapping (to the end
388*eda14cbcSMatt Macy  * of the last entry).
389*eda14cbcSMatt Macy  */
390*eda14cbcSMatt Macy void
391*eda14cbcSMatt Macy vdev_indirect_mapping_add_entries(vdev_indirect_mapping_t *vim,
392*eda14cbcSMatt Macy     list_t *list, dmu_tx_t *tx)
393*eda14cbcSMatt Macy {
394*eda14cbcSMatt Macy 	vdev_indirect_mapping_entry_phys_t *mapbuf;
395*eda14cbcSMatt Macy 	uint64_t old_size;
396*eda14cbcSMatt Macy 	uint32_t *countbuf = NULL;
397*eda14cbcSMatt Macy 	vdev_indirect_mapping_entry_phys_t *old_entries;
398*eda14cbcSMatt Macy 	uint64_t old_count;
399*eda14cbcSMatt Macy 	uint64_t entries_written = 0;
400*eda14cbcSMatt Macy 
401*eda14cbcSMatt Macy 	ASSERT(vdev_indirect_mapping_verify(vim));
402*eda14cbcSMatt Macy 	ASSERT(dmu_tx_is_syncing(tx));
403*eda14cbcSMatt Macy 	ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
404*eda14cbcSMatt Macy 	ASSERT(!list_is_empty(list));
405*eda14cbcSMatt Macy 
406*eda14cbcSMatt Macy 	old_size = vdev_indirect_mapping_size(vim);
407*eda14cbcSMatt Macy 	old_entries = vim->vim_entries;
408*eda14cbcSMatt Macy 	old_count = vim->vim_phys->vimp_num_entries;
409*eda14cbcSMatt Macy 
410*eda14cbcSMatt Macy 	dmu_buf_will_dirty(vim->vim_dbuf, tx);
411*eda14cbcSMatt Macy 
412*eda14cbcSMatt Macy 	mapbuf = vmem_alloc(SPA_OLD_MAXBLOCKSIZE, KM_SLEEP);
413*eda14cbcSMatt Macy 	if (vim->vim_havecounts) {
414*eda14cbcSMatt Macy 		countbuf = vmem_alloc(SPA_OLD_MAXBLOCKSIZE, KM_SLEEP);
415*eda14cbcSMatt Macy 		ASSERT(spa_feature_is_active(vim->vim_objset->os_spa,
416*eda14cbcSMatt Macy 		    SPA_FEATURE_OBSOLETE_COUNTS));
417*eda14cbcSMatt Macy 	}
418*eda14cbcSMatt Macy 	while (!list_is_empty(list)) {
419*eda14cbcSMatt Macy 		uint64_t i;
420*eda14cbcSMatt Macy 		/*
421*eda14cbcSMatt Macy 		 * Write entries from the list to the
422*eda14cbcSMatt Macy 		 * vdev_im_object in batches of size SPA_OLD_MAXBLOCKSIZE.
423*eda14cbcSMatt Macy 		 */
424*eda14cbcSMatt Macy 		for (i = 0; i < SPA_OLD_MAXBLOCKSIZE / sizeof (*mapbuf); i++) {
425*eda14cbcSMatt Macy 			vdev_indirect_mapping_entry_t *entry =
426*eda14cbcSMatt Macy 			    list_remove_head(list);
427*eda14cbcSMatt Macy 			if (entry == NULL)
428*eda14cbcSMatt Macy 				break;
429*eda14cbcSMatt Macy 
430*eda14cbcSMatt Macy 			uint64_t size =
431*eda14cbcSMatt Macy 			    DVA_GET_ASIZE(&entry->vime_mapping.vimep_dst);
432*eda14cbcSMatt Macy 			uint64_t src_offset =
433*eda14cbcSMatt Macy 			    DVA_MAPPING_GET_SRC_OFFSET(&entry->vime_mapping);
434*eda14cbcSMatt Macy 
435*eda14cbcSMatt Macy 			/*
436*eda14cbcSMatt Macy 			 * We shouldn't be adding an entry which is fully
437*eda14cbcSMatt Macy 			 * obsolete.
438*eda14cbcSMatt Macy 			 */
439*eda14cbcSMatt Macy 			ASSERT3U(entry->vime_obsolete_count, <, size);
440*eda14cbcSMatt Macy 			IMPLY(entry->vime_obsolete_count != 0,
441*eda14cbcSMatt Macy 			    vim->vim_havecounts);
442*eda14cbcSMatt Macy 
443*eda14cbcSMatt Macy 			mapbuf[i] = entry->vime_mapping;
444*eda14cbcSMatt Macy 			if (vim->vim_havecounts)
445*eda14cbcSMatt Macy 				countbuf[i] = entry->vime_obsolete_count;
446*eda14cbcSMatt Macy 
447*eda14cbcSMatt Macy 			vim->vim_phys->vimp_bytes_mapped += size;
448*eda14cbcSMatt Macy 			ASSERT3U(src_offset, >=,
449*eda14cbcSMatt Macy 			    vim->vim_phys->vimp_max_offset);
450*eda14cbcSMatt Macy 			vim->vim_phys->vimp_max_offset = src_offset + size;
451*eda14cbcSMatt Macy 
452*eda14cbcSMatt Macy 			entries_written++;
453*eda14cbcSMatt Macy 
454*eda14cbcSMatt Macy 			vmem_free(entry, sizeof (*entry));
455*eda14cbcSMatt Macy 		}
456*eda14cbcSMatt Macy 		dmu_write(vim->vim_objset, vim->vim_object,
457*eda14cbcSMatt Macy 		    vim->vim_phys->vimp_num_entries * sizeof (*mapbuf),
458*eda14cbcSMatt Macy 		    i * sizeof (*mapbuf),
459*eda14cbcSMatt Macy 		    mapbuf, tx);
460*eda14cbcSMatt Macy 		if (vim->vim_havecounts) {
461*eda14cbcSMatt Macy 			dmu_write(vim->vim_objset,
462*eda14cbcSMatt Macy 			    vim->vim_phys->vimp_counts_object,
463*eda14cbcSMatt Macy 			    vim->vim_phys->vimp_num_entries *
464*eda14cbcSMatt Macy 			    sizeof (*countbuf),
465*eda14cbcSMatt Macy 			    i * sizeof (*countbuf), countbuf, tx);
466*eda14cbcSMatt Macy 		}
467*eda14cbcSMatt Macy 		vim->vim_phys->vimp_num_entries += i;
468*eda14cbcSMatt Macy 	}
469*eda14cbcSMatt Macy 	vmem_free(mapbuf, SPA_OLD_MAXBLOCKSIZE);
470*eda14cbcSMatt Macy 	if (vim->vim_havecounts)
471*eda14cbcSMatt Macy 		vmem_free(countbuf, SPA_OLD_MAXBLOCKSIZE);
472*eda14cbcSMatt Macy 
473*eda14cbcSMatt Macy 	/*
474*eda14cbcSMatt Macy 	 * Update the entry array to reflect the new entries. First, copy
475*eda14cbcSMatt Macy 	 * over any old entries then read back the new entries we just wrote.
476*eda14cbcSMatt Macy 	 */
477*eda14cbcSMatt Macy 	uint64_t new_size = vdev_indirect_mapping_size(vim);
478*eda14cbcSMatt Macy 	ASSERT3U(new_size, >, old_size);
479*eda14cbcSMatt Macy 	ASSERT3U(new_size - old_size, ==,
480*eda14cbcSMatt Macy 	    entries_written * sizeof (vdev_indirect_mapping_entry_phys_t));
481*eda14cbcSMatt Macy 	vim->vim_entries = vmem_alloc(new_size, KM_SLEEP);
482*eda14cbcSMatt Macy 	if (old_size > 0) {
483*eda14cbcSMatt Macy 		bcopy(old_entries, vim->vim_entries, old_size);
484*eda14cbcSMatt Macy 		vmem_free(old_entries, old_size);
485*eda14cbcSMatt Macy 	}
486*eda14cbcSMatt Macy 	VERIFY0(dmu_read(vim->vim_objset, vim->vim_object, old_size,
487*eda14cbcSMatt Macy 	    new_size - old_size, &vim->vim_entries[old_count],
488*eda14cbcSMatt Macy 	    DMU_READ_PREFETCH));
489*eda14cbcSMatt Macy 
490*eda14cbcSMatt Macy 	zfs_dbgmsg("txg %llu: wrote %llu entries to "
491*eda14cbcSMatt Macy 	    "indirect mapping obj %llu; max offset=0x%llx",
492*eda14cbcSMatt Macy 	    (u_longlong_t)dmu_tx_get_txg(tx),
493*eda14cbcSMatt Macy 	    (u_longlong_t)entries_written,
494*eda14cbcSMatt Macy 	    (u_longlong_t)vim->vim_object,
495*eda14cbcSMatt Macy 	    (u_longlong_t)vim->vim_phys->vimp_max_offset);
496*eda14cbcSMatt Macy }
497*eda14cbcSMatt Macy 
498*eda14cbcSMatt Macy /*
499*eda14cbcSMatt Macy  * Increment the relevant counts for the specified offset and length.
500*eda14cbcSMatt Macy  * The counts array must be obtained from
501*eda14cbcSMatt Macy  * vdev_indirect_mapping_load_obsolete_counts().
502*eda14cbcSMatt Macy  */
503*eda14cbcSMatt Macy void
504*eda14cbcSMatt Macy vdev_indirect_mapping_increment_obsolete_count(vdev_indirect_mapping_t *vim,
505*eda14cbcSMatt Macy     uint64_t offset, uint64_t length, uint32_t *counts)
506*eda14cbcSMatt Macy {
507*eda14cbcSMatt Macy 	vdev_indirect_mapping_entry_phys_t *mapping;
508*eda14cbcSMatt Macy 	uint64_t index;
509*eda14cbcSMatt Macy 
510*eda14cbcSMatt Macy 	mapping = vdev_indirect_mapping_entry_for_offset(vim,  offset);
511*eda14cbcSMatt Macy 
512*eda14cbcSMatt Macy 	ASSERT(length > 0);
513*eda14cbcSMatt Macy 	ASSERT3P(mapping, !=, NULL);
514*eda14cbcSMatt Macy 
515*eda14cbcSMatt Macy 	index = mapping - vim->vim_entries;
516*eda14cbcSMatt Macy 
517*eda14cbcSMatt Macy 	while (length > 0) {
518*eda14cbcSMatt Macy 		ASSERT3U(index, <, vdev_indirect_mapping_num_entries(vim));
519*eda14cbcSMatt Macy 
520*eda14cbcSMatt Macy 		uint64_t size = DVA_GET_ASIZE(&mapping->vimep_dst);
521*eda14cbcSMatt Macy 		uint64_t inner_offset = offset -
522*eda14cbcSMatt Macy 		    DVA_MAPPING_GET_SRC_OFFSET(mapping);
523*eda14cbcSMatt Macy 		VERIFY3U(inner_offset, <, size);
524*eda14cbcSMatt Macy 		uint64_t inner_size = MIN(length, size - inner_offset);
525*eda14cbcSMatt Macy 
526*eda14cbcSMatt Macy 		VERIFY3U(counts[index] + inner_size, <=, size);
527*eda14cbcSMatt Macy 		counts[index] += inner_size;
528*eda14cbcSMatt Macy 
529*eda14cbcSMatt Macy 		offset += inner_size;
530*eda14cbcSMatt Macy 		length -= inner_size;
531*eda14cbcSMatt Macy 		mapping++;
532*eda14cbcSMatt Macy 		index++;
533*eda14cbcSMatt Macy 	}
534*eda14cbcSMatt Macy }
535*eda14cbcSMatt Macy 
536*eda14cbcSMatt Macy typedef struct load_obsolete_space_map_arg {
537*eda14cbcSMatt Macy 	vdev_indirect_mapping_t	*losma_vim;
538*eda14cbcSMatt Macy 	uint32_t		*losma_counts;
539*eda14cbcSMatt Macy } load_obsolete_space_map_arg_t;
540*eda14cbcSMatt Macy 
541*eda14cbcSMatt Macy static int
542*eda14cbcSMatt Macy load_obsolete_sm_callback(space_map_entry_t *sme, void *arg)
543*eda14cbcSMatt Macy {
544*eda14cbcSMatt Macy 	load_obsolete_space_map_arg_t *losma = arg;
545*eda14cbcSMatt Macy 	ASSERT3S(sme->sme_type, ==, SM_ALLOC);
546*eda14cbcSMatt Macy 
547*eda14cbcSMatt Macy 	vdev_indirect_mapping_increment_obsolete_count(losma->losma_vim,
548*eda14cbcSMatt Macy 	    sme->sme_offset, sme->sme_run, losma->losma_counts);
549*eda14cbcSMatt Macy 
550*eda14cbcSMatt Macy 	return (0);
551*eda14cbcSMatt Macy }
552*eda14cbcSMatt Macy 
553*eda14cbcSMatt Macy /*
554*eda14cbcSMatt Macy  * Modify the counts (increment them) based on the spacemap.
555*eda14cbcSMatt Macy  */
556*eda14cbcSMatt Macy void
557*eda14cbcSMatt Macy vdev_indirect_mapping_load_obsolete_spacemap(vdev_indirect_mapping_t *vim,
558*eda14cbcSMatt Macy     uint32_t *counts, space_map_t *obsolete_space_sm)
559*eda14cbcSMatt Macy {
560*eda14cbcSMatt Macy 	load_obsolete_space_map_arg_t losma;
561*eda14cbcSMatt Macy 	losma.losma_counts = counts;
562*eda14cbcSMatt Macy 	losma.losma_vim = vim;
563*eda14cbcSMatt Macy 	VERIFY0(space_map_iterate(obsolete_space_sm,
564*eda14cbcSMatt Macy 	    space_map_length(obsolete_space_sm),
565*eda14cbcSMatt Macy 	    load_obsolete_sm_callback, &losma));
566*eda14cbcSMatt Macy }
567*eda14cbcSMatt Macy 
568*eda14cbcSMatt Macy /*
569*eda14cbcSMatt Macy  * Read the obsolete counts from disk, returning them in an array.
570*eda14cbcSMatt Macy  */
571*eda14cbcSMatt Macy uint32_t *
572*eda14cbcSMatt Macy vdev_indirect_mapping_load_obsolete_counts(vdev_indirect_mapping_t *vim)
573*eda14cbcSMatt Macy {
574*eda14cbcSMatt Macy 	ASSERT(vdev_indirect_mapping_verify(vim));
575*eda14cbcSMatt Macy 
576*eda14cbcSMatt Macy 	uint64_t counts_size =
577*eda14cbcSMatt Macy 	    vim->vim_phys->vimp_num_entries * sizeof (uint32_t);
578*eda14cbcSMatt Macy 	uint32_t *counts = vmem_alloc(counts_size, KM_SLEEP);
579*eda14cbcSMatt Macy 	if (vim->vim_havecounts) {
580*eda14cbcSMatt Macy 		VERIFY0(dmu_read(vim->vim_objset,
581*eda14cbcSMatt Macy 		    vim->vim_phys->vimp_counts_object,
582*eda14cbcSMatt Macy 		    0, counts_size,
583*eda14cbcSMatt Macy 		    counts, DMU_READ_PREFETCH));
584*eda14cbcSMatt Macy 	} else {
585*eda14cbcSMatt Macy 		bzero(counts, counts_size);
586*eda14cbcSMatt Macy 	}
587*eda14cbcSMatt Macy 	return (counts);
588*eda14cbcSMatt Macy }
589*eda14cbcSMatt Macy 
590*eda14cbcSMatt Macy extern void
591*eda14cbcSMatt Macy vdev_indirect_mapping_free_obsolete_counts(vdev_indirect_mapping_t *vim,
592*eda14cbcSMatt Macy     uint32_t *counts)
593*eda14cbcSMatt Macy {
594*eda14cbcSMatt Macy 	ASSERT(vdev_indirect_mapping_verify(vim));
595*eda14cbcSMatt Macy 
596*eda14cbcSMatt Macy 	vmem_free(counts, vim->vim_phys->vimp_num_entries * sizeof (uint32_t));
597*eda14cbcSMatt Macy }
598*eda14cbcSMatt Macy 
599*eda14cbcSMatt Macy #if defined(_KERNEL)
600*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_add_entries);
601*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_alloc);
602*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_bytes_mapped);
603*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_close);
604*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_entry_for_offset);
605*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_entry_for_offset_or_next);
606*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_free);
607*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_free_obsolete_counts);
608*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_increment_obsolete_count);
609*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_load_obsolete_counts);
610*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_load_obsolete_spacemap);
611*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_max_offset);
612*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_num_entries);
613*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_object);
614*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_open);
615*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_size);
616*eda14cbcSMatt Macy #endif
617