1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy * CDDL HEADER START
3eda14cbcSMatt Macy *
4eda14cbcSMatt Macy * This file and its contents are supplied under the terms of the
5eda14cbcSMatt Macy * Common Development and Distribution License ("CDDL"), version 1.0.
6eda14cbcSMatt Macy * You may only use this file in accordance with the terms of version
7eda14cbcSMatt Macy * 1.0 of the CDDL.
8eda14cbcSMatt Macy *
9eda14cbcSMatt Macy * A full copy of the text of the CDDL should have accompanied this
10eda14cbcSMatt Macy * source. A copy of the CDDL is also available via the Internet at
11eda14cbcSMatt Macy * http://www.illumos.org/license/CDDL.
12eda14cbcSMatt Macy *
13eda14cbcSMatt Macy * CDDL HEADER END
14eda14cbcSMatt Macy */
15eda14cbcSMatt Macy
16eda14cbcSMatt Macy /*
17eda14cbcSMatt Macy * Copyright (c) 2015, 2017 by Delphix. All rights reserved.
18eda14cbcSMatt Macy */
19eda14cbcSMatt Macy
20eda14cbcSMatt Macy #include <sys/dmu_tx.h>
21eda14cbcSMatt Macy #include <sys/dsl_pool.h>
22eda14cbcSMatt Macy #include <sys/spa.h>
23eda14cbcSMatt Macy #include <sys/vdev_impl.h>
24eda14cbcSMatt Macy #include <sys/vdev_indirect_mapping.h>
25eda14cbcSMatt Macy #include <sys/zfeature.h>
26eda14cbcSMatt Macy #include <sys/dmu_objset.h>
27eda14cbcSMatt Macy
28eda14cbcSMatt Macy #ifdef ZFS_DEBUG
29eda14cbcSMatt Macy static boolean_t
vdev_indirect_mapping_verify(vdev_indirect_mapping_t * vim)30eda14cbcSMatt Macy vdev_indirect_mapping_verify(vdev_indirect_mapping_t *vim)
31eda14cbcSMatt Macy {
32eda14cbcSMatt Macy ASSERT(vim != NULL);
33eda14cbcSMatt Macy
34eda14cbcSMatt Macy ASSERT(vim->vim_object != 0);
35eda14cbcSMatt Macy ASSERT(vim->vim_objset != NULL);
36eda14cbcSMatt Macy ASSERT(vim->vim_phys != NULL);
37eda14cbcSMatt Macy ASSERT(vim->vim_dbuf != NULL);
38eda14cbcSMatt Macy
39eda14cbcSMatt Macy EQUIV(vim->vim_phys->vimp_num_entries > 0,
40eda14cbcSMatt Macy vim->vim_entries != NULL);
41eda14cbcSMatt Macy if (vim->vim_phys->vimp_num_entries > 0) {
42eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *last_entry __maybe_unused =
43eda14cbcSMatt Macy &vim->vim_entries[vim->vim_phys->vimp_num_entries - 1];
44eda14cbcSMatt Macy uint64_t offset __maybe_unused =
45eda14cbcSMatt Macy DVA_MAPPING_GET_SRC_OFFSET(last_entry);
46eda14cbcSMatt Macy uint64_t size __maybe_unused =
47eda14cbcSMatt Macy DVA_GET_ASIZE(&last_entry->vimep_dst);
48eda14cbcSMatt Macy
49eda14cbcSMatt Macy ASSERT3U(vim->vim_phys->vimp_max_offset, >=, offset + size);
50eda14cbcSMatt Macy }
51eda14cbcSMatt Macy if (vim->vim_havecounts) {
52eda14cbcSMatt Macy ASSERT(vim->vim_phys->vimp_counts_object != 0);
53eda14cbcSMatt Macy }
54eda14cbcSMatt Macy
55eda14cbcSMatt Macy return (B_TRUE);
56eda14cbcSMatt Macy }
57e92ffd9bSMartin Matuska #else
58e92ffd9bSMartin Matuska #define vdev_indirect_mapping_verify(vim) ((void) sizeof (vim), B_TRUE)
59eda14cbcSMatt Macy #endif
60eda14cbcSMatt Macy
61eda14cbcSMatt Macy uint64_t
vdev_indirect_mapping_num_entries(vdev_indirect_mapping_t * vim)62eda14cbcSMatt Macy vdev_indirect_mapping_num_entries(vdev_indirect_mapping_t *vim)
63eda14cbcSMatt Macy {
64eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim));
65eda14cbcSMatt Macy
66eda14cbcSMatt Macy return (vim->vim_phys->vimp_num_entries);
67eda14cbcSMatt Macy }
68eda14cbcSMatt Macy
69eda14cbcSMatt Macy uint64_t
vdev_indirect_mapping_max_offset(vdev_indirect_mapping_t * vim)70eda14cbcSMatt Macy vdev_indirect_mapping_max_offset(vdev_indirect_mapping_t *vim)
71eda14cbcSMatt Macy {
72eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim));
73eda14cbcSMatt Macy
74eda14cbcSMatt Macy return (vim->vim_phys->vimp_max_offset);
75eda14cbcSMatt Macy }
76eda14cbcSMatt Macy
77eda14cbcSMatt Macy uint64_t
vdev_indirect_mapping_object(vdev_indirect_mapping_t * vim)78eda14cbcSMatt Macy vdev_indirect_mapping_object(vdev_indirect_mapping_t *vim)
79eda14cbcSMatt Macy {
80eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim));
81eda14cbcSMatt Macy
82eda14cbcSMatt Macy return (vim->vim_object);
83eda14cbcSMatt Macy }
84eda14cbcSMatt Macy
85eda14cbcSMatt Macy uint64_t
vdev_indirect_mapping_bytes_mapped(vdev_indirect_mapping_t * vim)86eda14cbcSMatt Macy vdev_indirect_mapping_bytes_mapped(vdev_indirect_mapping_t *vim)
87eda14cbcSMatt Macy {
88eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim));
89eda14cbcSMatt Macy
90eda14cbcSMatt Macy return (vim->vim_phys->vimp_bytes_mapped);
91eda14cbcSMatt Macy }
92eda14cbcSMatt Macy
93eda14cbcSMatt Macy /*
94eda14cbcSMatt Macy * The length (in bytes) of the mapping object array in memory and
95eda14cbcSMatt Macy * (logically) on disk.
96eda14cbcSMatt Macy *
97eda14cbcSMatt Macy * Note that unlike most of our accessor functions,
98eda14cbcSMatt Macy * we don't assert that the struct is consistent; therefore it can be
99eda14cbcSMatt Macy * called while there may be concurrent changes, if we don't care about
100eda14cbcSMatt Macy * the value being immediately stale (e.g. from spa_removal_get_stats()).
101eda14cbcSMatt Macy */
102eda14cbcSMatt Macy uint64_t
vdev_indirect_mapping_size(vdev_indirect_mapping_t * vim)103eda14cbcSMatt Macy vdev_indirect_mapping_size(vdev_indirect_mapping_t *vim)
104eda14cbcSMatt Macy {
105eda14cbcSMatt Macy return (vim->vim_phys->vimp_num_entries * sizeof (*vim->vim_entries));
106eda14cbcSMatt Macy }
107eda14cbcSMatt Macy
108eda14cbcSMatt Macy /*
109eda14cbcSMatt Macy * Compare an offset with an indirect mapping entry; there are three
110eda14cbcSMatt Macy * possible scenarios:
111eda14cbcSMatt Macy *
112eda14cbcSMatt Macy * 1. The offset is "less than" the mapping entry; meaning the
113eda14cbcSMatt Macy * offset is less than the source offset of the mapping entry. In
114eda14cbcSMatt Macy * this case, there is no overlap between the offset and the
115eda14cbcSMatt Macy * mapping entry and -1 will be returned.
116eda14cbcSMatt Macy *
117eda14cbcSMatt Macy * 2. The offset is "greater than" the mapping entry; meaning the
118eda14cbcSMatt Macy * offset is greater than the mapping entry's source offset plus
119eda14cbcSMatt Macy * the entry's size. In this case, there is no overlap between
120eda14cbcSMatt Macy * the offset and the mapping entry and 1 will be returned.
121eda14cbcSMatt Macy *
122eda14cbcSMatt Macy * NOTE: If the offset is actually equal to the entry's offset
123eda14cbcSMatt Macy * plus size, this is considered to be "greater" than the entry,
124eda14cbcSMatt Macy * and this case applies (i.e. 1 will be returned). Thus, the
125eda14cbcSMatt Macy * entry's "range" can be considered to be inclusive at its
126eda14cbcSMatt Macy * start, but exclusive at its end: e.g. [src, src + size).
127eda14cbcSMatt Macy *
128eda14cbcSMatt Macy * 3. The last case to consider is if the offset actually falls
129eda14cbcSMatt Macy * within the mapping entry's range. If this is the case, the
130eda14cbcSMatt Macy * offset is considered to be "equal to" the mapping entry and
131eda14cbcSMatt Macy * 0 will be returned.
132eda14cbcSMatt Macy *
133eda14cbcSMatt Macy * NOTE: If the offset is equal to the entry's source offset,
134eda14cbcSMatt Macy * this case applies and 0 will be returned. If the offset is
135eda14cbcSMatt Macy * equal to the entry's source plus its size, this case does
136eda14cbcSMatt Macy * *not* apply (see "NOTE" above for scenario 2), and 1 will be
137eda14cbcSMatt Macy * returned.
138eda14cbcSMatt Macy */
139eda14cbcSMatt Macy static int
dva_mapping_overlap_compare(const void * v_key,const void * v_array_elem)140eda14cbcSMatt Macy dva_mapping_overlap_compare(const void *v_key, const void *v_array_elem)
141eda14cbcSMatt Macy {
142eda14cbcSMatt Macy const uint64_t * const key = v_key;
143eda14cbcSMatt Macy const vdev_indirect_mapping_entry_phys_t * const array_elem =
144eda14cbcSMatt Macy v_array_elem;
145eda14cbcSMatt Macy uint64_t src_offset = DVA_MAPPING_GET_SRC_OFFSET(array_elem);
146eda14cbcSMatt Macy
147eda14cbcSMatt Macy if (*key < src_offset) {
148eda14cbcSMatt Macy return (-1);
149eda14cbcSMatt Macy } else if (*key < src_offset + DVA_GET_ASIZE(&array_elem->vimep_dst)) {
150eda14cbcSMatt Macy return (0);
151eda14cbcSMatt Macy } else {
152eda14cbcSMatt Macy return (1);
153eda14cbcSMatt Macy }
154eda14cbcSMatt Macy }
155eda14cbcSMatt Macy
156eda14cbcSMatt Macy /*
157eda14cbcSMatt Macy * Returns the mapping entry for the given offset.
158eda14cbcSMatt Macy *
159eda14cbcSMatt Macy * It's possible that the given offset will not be in the mapping table
160eda14cbcSMatt Macy * (i.e. no mapping entries contain this offset), in which case, the
161eda14cbcSMatt Macy * return value value depends on the "next_if_missing" parameter.
162eda14cbcSMatt Macy *
163eda14cbcSMatt Macy * If the offset is not found in the table and "next_if_missing" is
164eda14cbcSMatt Macy * B_FALSE, then NULL will always be returned. The behavior is intended
165eda14cbcSMatt Macy * to allow consumers to get the entry corresponding to the offset
166eda14cbcSMatt Macy * parameter, iff the offset overlaps with an entry in the table.
167eda14cbcSMatt Macy *
168eda14cbcSMatt Macy * If the offset is not found in the table and "next_if_missing" is
169eda14cbcSMatt Macy * B_TRUE, then the entry nearest to the given offset will be returned,
170eda14cbcSMatt Macy * such that the entry's source offset is greater than the offset
171eda14cbcSMatt Macy * passed in (i.e. the "next" mapping entry in the table is returned, if
172eda14cbcSMatt Macy * the offset is missing from the table). If there are no entries whose
173eda14cbcSMatt Macy * source offset is greater than the passed in offset, NULL is returned.
174eda14cbcSMatt Macy */
175eda14cbcSMatt Macy static vdev_indirect_mapping_entry_phys_t *
vdev_indirect_mapping_entry_for_offset_impl(vdev_indirect_mapping_t * vim,uint64_t offset,boolean_t next_if_missing)176eda14cbcSMatt Macy vdev_indirect_mapping_entry_for_offset_impl(vdev_indirect_mapping_t *vim,
177eda14cbcSMatt Macy uint64_t offset, boolean_t next_if_missing)
178eda14cbcSMatt Macy {
179eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim));
180eda14cbcSMatt Macy ASSERT(vim->vim_phys->vimp_num_entries > 0);
181eda14cbcSMatt Macy
182eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *entry = NULL;
183eda14cbcSMatt Macy
184eda14cbcSMatt Macy uint64_t last = vim->vim_phys->vimp_num_entries - 1;
185eda14cbcSMatt Macy uint64_t base = 0;
186eda14cbcSMatt Macy
187eda14cbcSMatt Macy /*
188eda14cbcSMatt Macy * We don't define these inside of the while loop because we use
189eda14cbcSMatt Macy * their value in the case that offset isn't in the mapping.
190eda14cbcSMatt Macy */
191eda14cbcSMatt Macy uint64_t mid;
192eda14cbcSMatt Macy int result;
193eda14cbcSMatt Macy
194eda14cbcSMatt Macy while (last >= base) {
195eda14cbcSMatt Macy mid = base + ((last - base) >> 1);
196eda14cbcSMatt Macy
197eda14cbcSMatt Macy result = dva_mapping_overlap_compare(&offset,
198eda14cbcSMatt Macy &vim->vim_entries[mid]);
199eda14cbcSMatt Macy
200eda14cbcSMatt Macy if (result == 0) {
201eda14cbcSMatt Macy entry = &vim->vim_entries[mid];
202eda14cbcSMatt Macy break;
203eda14cbcSMatt Macy } else if (result < 0) {
204eda14cbcSMatt Macy last = mid - 1;
205eda14cbcSMatt Macy } else {
206eda14cbcSMatt Macy base = mid + 1;
207eda14cbcSMatt Macy }
208eda14cbcSMatt Macy }
209eda14cbcSMatt Macy
210eda14cbcSMatt Macy if (entry == NULL && next_if_missing) {
211eda14cbcSMatt Macy ASSERT3U(base, ==, last + 1);
212eda14cbcSMatt Macy ASSERT(mid == base || mid == last);
213eda14cbcSMatt Macy ASSERT3S(result, !=, 0);
214eda14cbcSMatt Macy
215eda14cbcSMatt Macy /*
216eda14cbcSMatt Macy * The offset we're looking for isn't actually contained
217eda14cbcSMatt Macy * in the mapping table, thus we need to return the
218eda14cbcSMatt Macy * closest mapping entry that is greater than the
219eda14cbcSMatt Macy * offset. We reuse the result of the last comparison,
220eda14cbcSMatt Macy * comparing the mapping entry at index "mid" and the
221eda14cbcSMatt Macy * offset. The offset is guaranteed to lie between
222eda14cbcSMatt Macy * indices one less than "mid", and one greater than
223eda14cbcSMatt Macy * "mid"; we just need to determine if offset is greater
224eda14cbcSMatt Macy * than, or less than the mapping entry contained at
225eda14cbcSMatt Macy * index "mid".
226eda14cbcSMatt Macy */
227eda14cbcSMatt Macy
228eda14cbcSMatt Macy uint64_t index;
229eda14cbcSMatt Macy if (result < 0)
230eda14cbcSMatt Macy index = mid;
231eda14cbcSMatt Macy else
232eda14cbcSMatt Macy index = mid + 1;
233eda14cbcSMatt Macy
234eda14cbcSMatt Macy ASSERT3U(index, <=, vim->vim_phys->vimp_num_entries);
235eda14cbcSMatt Macy
236eda14cbcSMatt Macy if (index == vim->vim_phys->vimp_num_entries) {
237eda14cbcSMatt Macy /*
238eda14cbcSMatt Macy * If "index" is past the end of the entries
239eda14cbcSMatt Macy * array, then not only is the offset not in the
240eda14cbcSMatt Macy * mapping table, but it's actually greater than
241eda14cbcSMatt Macy * all entries in the table. In this case, we
242eda14cbcSMatt Macy * can't return a mapping entry greater than the
243eda14cbcSMatt Macy * offset (since none exist), so we return NULL.
244eda14cbcSMatt Macy */
245eda14cbcSMatt Macy
246eda14cbcSMatt Macy ASSERT3S(dva_mapping_overlap_compare(&offset,
247eda14cbcSMatt Macy &vim->vim_entries[index - 1]), >, 0);
248eda14cbcSMatt Macy
249eda14cbcSMatt Macy return (NULL);
250eda14cbcSMatt Macy } else {
251eda14cbcSMatt Macy /*
252eda14cbcSMatt Macy * Just to be safe, we verify the offset falls
253eda14cbcSMatt Macy * in between the mapping entries at index and
254eda14cbcSMatt Macy * one less than index. Since we know the offset
255eda14cbcSMatt Macy * doesn't overlap an entry, and we're supposed
256eda14cbcSMatt Macy * to return the entry just greater than the
257eda14cbcSMatt Macy * offset, both of the following tests must be
258eda14cbcSMatt Macy * true.
259eda14cbcSMatt Macy */
260eda14cbcSMatt Macy ASSERT3S(dva_mapping_overlap_compare(&offset,
261eda14cbcSMatt Macy &vim->vim_entries[index]), <, 0);
262eda14cbcSMatt Macy IMPLY(index >= 1, dva_mapping_overlap_compare(&offset,
263eda14cbcSMatt Macy &vim->vim_entries[index - 1]) > 0);
264eda14cbcSMatt Macy
265eda14cbcSMatt Macy return (&vim->vim_entries[index]);
266eda14cbcSMatt Macy }
267eda14cbcSMatt Macy } else {
268eda14cbcSMatt Macy return (entry);
269eda14cbcSMatt Macy }
270eda14cbcSMatt Macy }
271eda14cbcSMatt Macy
272eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *
vdev_indirect_mapping_entry_for_offset(vdev_indirect_mapping_t * vim,uint64_t offset)273eda14cbcSMatt Macy vdev_indirect_mapping_entry_for_offset(vdev_indirect_mapping_t *vim,
274eda14cbcSMatt Macy uint64_t offset)
275eda14cbcSMatt Macy {
276eda14cbcSMatt Macy return (vdev_indirect_mapping_entry_for_offset_impl(vim, offset,
277eda14cbcSMatt Macy B_FALSE));
278eda14cbcSMatt Macy }
279eda14cbcSMatt Macy
280eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *
vdev_indirect_mapping_entry_for_offset_or_next(vdev_indirect_mapping_t * vim,uint64_t offset)281eda14cbcSMatt Macy vdev_indirect_mapping_entry_for_offset_or_next(vdev_indirect_mapping_t *vim,
282eda14cbcSMatt Macy uint64_t offset)
283eda14cbcSMatt Macy {
284eda14cbcSMatt Macy return (vdev_indirect_mapping_entry_for_offset_impl(vim, offset,
285eda14cbcSMatt Macy B_TRUE));
286eda14cbcSMatt Macy }
287eda14cbcSMatt Macy
288eda14cbcSMatt Macy void
vdev_indirect_mapping_close(vdev_indirect_mapping_t * vim)289eda14cbcSMatt Macy vdev_indirect_mapping_close(vdev_indirect_mapping_t *vim)
290eda14cbcSMatt Macy {
291eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim));
292eda14cbcSMatt Macy
293eda14cbcSMatt Macy if (vim->vim_phys->vimp_num_entries > 0) {
294eda14cbcSMatt Macy uint64_t map_size = vdev_indirect_mapping_size(vim);
295eda14cbcSMatt Macy vmem_free(vim->vim_entries, map_size);
296eda14cbcSMatt Macy vim->vim_entries = NULL;
297eda14cbcSMatt Macy }
298eda14cbcSMatt Macy
299eda14cbcSMatt Macy dmu_buf_rele(vim->vim_dbuf, vim);
300eda14cbcSMatt Macy
301eda14cbcSMatt Macy vim->vim_objset = NULL;
302eda14cbcSMatt Macy vim->vim_object = 0;
303eda14cbcSMatt Macy vim->vim_dbuf = NULL;
304eda14cbcSMatt Macy vim->vim_phys = NULL;
305eda14cbcSMatt Macy
306eda14cbcSMatt Macy kmem_free(vim, sizeof (*vim));
307eda14cbcSMatt Macy }
308eda14cbcSMatt Macy
309eda14cbcSMatt Macy uint64_t
vdev_indirect_mapping_alloc(objset_t * os,dmu_tx_t * tx)310eda14cbcSMatt Macy vdev_indirect_mapping_alloc(objset_t *os, dmu_tx_t *tx)
311eda14cbcSMatt Macy {
312eda14cbcSMatt Macy uint64_t object;
313eda14cbcSMatt Macy ASSERT(dmu_tx_is_syncing(tx));
314eda14cbcSMatt Macy uint64_t bonus_size = VDEV_INDIRECT_MAPPING_SIZE_V0;
315eda14cbcSMatt Macy
316eda14cbcSMatt Macy if (spa_feature_is_enabled(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS)) {
317eda14cbcSMatt Macy bonus_size = sizeof (vdev_indirect_mapping_phys_t);
318eda14cbcSMatt Macy }
319eda14cbcSMatt Macy
320eda14cbcSMatt Macy object = dmu_object_alloc(os,
321eda14cbcSMatt Macy DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
322eda14cbcSMatt Macy DMU_OTN_UINT64_METADATA, bonus_size,
323eda14cbcSMatt Macy tx);
324eda14cbcSMatt Macy
325eda14cbcSMatt Macy if (spa_feature_is_enabled(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS)) {
326eda14cbcSMatt Macy dmu_buf_t *dbuf;
327eda14cbcSMatt Macy vdev_indirect_mapping_phys_t *vimp;
328eda14cbcSMatt Macy
329eda14cbcSMatt Macy VERIFY0(dmu_bonus_hold(os, object, FTAG, &dbuf));
330eda14cbcSMatt Macy dmu_buf_will_dirty(dbuf, tx);
331eda14cbcSMatt Macy vimp = dbuf->db_data;
332eda14cbcSMatt Macy vimp->vimp_counts_object = dmu_object_alloc(os,
333eda14cbcSMatt Macy DMU_OTN_UINT32_METADATA, SPA_OLD_MAXBLOCKSIZE,
334eda14cbcSMatt Macy DMU_OT_NONE, 0, tx);
335eda14cbcSMatt Macy spa_feature_incr(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);
336eda14cbcSMatt Macy dmu_buf_rele(dbuf, FTAG);
337eda14cbcSMatt Macy }
338eda14cbcSMatt Macy
339eda14cbcSMatt Macy return (object);
340eda14cbcSMatt Macy }
341eda14cbcSMatt Macy
342eda14cbcSMatt Macy
343eda14cbcSMatt Macy vdev_indirect_mapping_t *
vdev_indirect_mapping_open(objset_t * os,uint64_t mapping_object)344eda14cbcSMatt Macy vdev_indirect_mapping_open(objset_t *os, uint64_t mapping_object)
345eda14cbcSMatt Macy {
346eda14cbcSMatt Macy vdev_indirect_mapping_t *vim = kmem_zalloc(sizeof (*vim), KM_SLEEP);
347eda14cbcSMatt Macy dmu_object_info_t doi;
348eda14cbcSMatt Macy VERIFY0(dmu_object_info(os, mapping_object, &doi));
349eda14cbcSMatt Macy
350eda14cbcSMatt Macy vim->vim_objset = os;
351eda14cbcSMatt Macy vim->vim_object = mapping_object;
352eda14cbcSMatt Macy
353eda14cbcSMatt Macy VERIFY0(dmu_bonus_hold(os, vim->vim_object, vim,
354eda14cbcSMatt Macy &vim->vim_dbuf));
355eda14cbcSMatt Macy vim->vim_phys = vim->vim_dbuf->db_data;
356eda14cbcSMatt Macy
357eda14cbcSMatt Macy vim->vim_havecounts =
358eda14cbcSMatt Macy (doi.doi_bonus_size > VDEV_INDIRECT_MAPPING_SIZE_V0);
359eda14cbcSMatt Macy
360eda14cbcSMatt Macy if (vim->vim_phys->vimp_num_entries > 0) {
361eda14cbcSMatt Macy uint64_t map_size = vdev_indirect_mapping_size(vim);
362eda14cbcSMatt Macy vim->vim_entries = vmem_alloc(map_size, KM_SLEEP);
363eda14cbcSMatt Macy VERIFY0(dmu_read(os, vim->vim_object, 0, map_size,
364eda14cbcSMatt Macy vim->vim_entries, DMU_READ_PREFETCH));
365eda14cbcSMatt Macy }
366eda14cbcSMatt Macy
367eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim));
368eda14cbcSMatt Macy
369eda14cbcSMatt Macy return (vim);
370eda14cbcSMatt Macy }
371eda14cbcSMatt Macy
372eda14cbcSMatt Macy void
vdev_indirect_mapping_free(objset_t * os,uint64_t object,dmu_tx_t * tx)373eda14cbcSMatt Macy vdev_indirect_mapping_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
374eda14cbcSMatt Macy {
375eda14cbcSMatt Macy vdev_indirect_mapping_t *vim = vdev_indirect_mapping_open(os, object);
376eda14cbcSMatt Macy if (vim->vim_havecounts) {
377eda14cbcSMatt Macy VERIFY0(dmu_object_free(os, vim->vim_phys->vimp_counts_object,
378eda14cbcSMatt Macy tx));
379eda14cbcSMatt Macy spa_feature_decr(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS, tx);
380eda14cbcSMatt Macy }
381eda14cbcSMatt Macy vdev_indirect_mapping_close(vim);
382eda14cbcSMatt Macy
383eda14cbcSMatt Macy VERIFY0(dmu_object_free(os, object, tx));
384eda14cbcSMatt Macy }
385eda14cbcSMatt Macy
386eda14cbcSMatt Macy /*
387eda14cbcSMatt Macy * Append the list of vdev_indirect_mapping_entry_t's to the on-disk
388eda14cbcSMatt Macy * mapping object. Also remove the entries from the list and free them.
389eda14cbcSMatt Macy * This also implicitly extends the max_offset of the mapping (to the end
390eda14cbcSMatt Macy * of the last entry).
391eda14cbcSMatt Macy */
392eda14cbcSMatt Macy void
vdev_indirect_mapping_add_entries(vdev_indirect_mapping_t * vim,list_t * list,dmu_tx_t * tx)393eda14cbcSMatt Macy vdev_indirect_mapping_add_entries(vdev_indirect_mapping_t *vim,
394eda14cbcSMatt Macy list_t *list, dmu_tx_t *tx)
395eda14cbcSMatt Macy {
396eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *mapbuf;
397eda14cbcSMatt Macy uint64_t old_size;
398eda14cbcSMatt Macy uint32_t *countbuf = NULL;
399eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *old_entries;
400eda14cbcSMatt Macy uint64_t old_count;
401eda14cbcSMatt Macy uint64_t entries_written = 0;
402eda14cbcSMatt Macy
403eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim));
404eda14cbcSMatt Macy ASSERT(dmu_tx_is_syncing(tx));
405eda14cbcSMatt Macy ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
406eda14cbcSMatt Macy ASSERT(!list_is_empty(list));
407eda14cbcSMatt Macy
408eda14cbcSMatt Macy old_size = vdev_indirect_mapping_size(vim);
409eda14cbcSMatt Macy old_entries = vim->vim_entries;
410eda14cbcSMatt Macy old_count = vim->vim_phys->vimp_num_entries;
411eda14cbcSMatt Macy
412eda14cbcSMatt Macy dmu_buf_will_dirty(vim->vim_dbuf, tx);
413eda14cbcSMatt Macy
414eda14cbcSMatt Macy mapbuf = vmem_alloc(SPA_OLD_MAXBLOCKSIZE, KM_SLEEP);
415eda14cbcSMatt Macy if (vim->vim_havecounts) {
416eda14cbcSMatt Macy countbuf = vmem_alloc(SPA_OLD_MAXBLOCKSIZE, KM_SLEEP);
417eda14cbcSMatt Macy ASSERT(spa_feature_is_active(vim->vim_objset->os_spa,
418eda14cbcSMatt Macy SPA_FEATURE_OBSOLETE_COUNTS));
419eda14cbcSMatt Macy }
420eda14cbcSMatt Macy while (!list_is_empty(list)) {
421eda14cbcSMatt Macy uint64_t i;
422eda14cbcSMatt Macy /*
423eda14cbcSMatt Macy * Write entries from the list to the
424eda14cbcSMatt Macy * vdev_im_object in batches of size SPA_OLD_MAXBLOCKSIZE.
425eda14cbcSMatt Macy */
426eda14cbcSMatt Macy for (i = 0; i < SPA_OLD_MAXBLOCKSIZE / sizeof (*mapbuf); i++) {
427eda14cbcSMatt Macy vdev_indirect_mapping_entry_t *entry =
428eda14cbcSMatt Macy list_remove_head(list);
429eda14cbcSMatt Macy if (entry == NULL)
430eda14cbcSMatt Macy break;
431eda14cbcSMatt Macy
432eda14cbcSMatt Macy uint64_t size =
433eda14cbcSMatt Macy DVA_GET_ASIZE(&entry->vime_mapping.vimep_dst);
434eda14cbcSMatt Macy uint64_t src_offset =
435eda14cbcSMatt Macy DVA_MAPPING_GET_SRC_OFFSET(&entry->vime_mapping);
436eda14cbcSMatt Macy
437eda14cbcSMatt Macy /*
438eda14cbcSMatt Macy * We shouldn't be adding an entry which is fully
439eda14cbcSMatt Macy * obsolete.
440eda14cbcSMatt Macy */
441eda14cbcSMatt Macy ASSERT3U(entry->vime_obsolete_count, <, size);
442eda14cbcSMatt Macy IMPLY(entry->vime_obsolete_count != 0,
443eda14cbcSMatt Macy vim->vim_havecounts);
444eda14cbcSMatt Macy
445eda14cbcSMatt Macy mapbuf[i] = entry->vime_mapping;
446eda14cbcSMatt Macy if (vim->vim_havecounts)
447eda14cbcSMatt Macy countbuf[i] = entry->vime_obsolete_count;
448eda14cbcSMatt Macy
449eda14cbcSMatt Macy vim->vim_phys->vimp_bytes_mapped += size;
450eda14cbcSMatt Macy ASSERT3U(src_offset, >=,
451eda14cbcSMatt Macy vim->vim_phys->vimp_max_offset);
452eda14cbcSMatt Macy vim->vim_phys->vimp_max_offset = src_offset + size;
453eda14cbcSMatt Macy
454eda14cbcSMatt Macy entries_written++;
455eda14cbcSMatt Macy
456eda14cbcSMatt Macy vmem_free(entry, sizeof (*entry));
457eda14cbcSMatt Macy }
458eda14cbcSMatt Macy dmu_write(vim->vim_objset, vim->vim_object,
459eda14cbcSMatt Macy vim->vim_phys->vimp_num_entries * sizeof (*mapbuf),
460eda14cbcSMatt Macy i * sizeof (*mapbuf),
461eda14cbcSMatt Macy mapbuf, tx);
462eda14cbcSMatt Macy if (vim->vim_havecounts) {
463eda14cbcSMatt Macy dmu_write(vim->vim_objset,
464eda14cbcSMatt Macy vim->vim_phys->vimp_counts_object,
465eda14cbcSMatt Macy vim->vim_phys->vimp_num_entries *
466eda14cbcSMatt Macy sizeof (*countbuf),
467eda14cbcSMatt Macy i * sizeof (*countbuf), countbuf, tx);
468eda14cbcSMatt Macy }
469eda14cbcSMatt Macy vim->vim_phys->vimp_num_entries += i;
470eda14cbcSMatt Macy }
471eda14cbcSMatt Macy vmem_free(mapbuf, SPA_OLD_MAXBLOCKSIZE);
472eda14cbcSMatt Macy if (vim->vim_havecounts)
473eda14cbcSMatt Macy vmem_free(countbuf, SPA_OLD_MAXBLOCKSIZE);
474eda14cbcSMatt Macy
475eda14cbcSMatt Macy /*
476eda14cbcSMatt Macy * Update the entry array to reflect the new entries. First, copy
477eda14cbcSMatt Macy * over any old entries then read back the new entries we just wrote.
478eda14cbcSMatt Macy */
479eda14cbcSMatt Macy uint64_t new_size = vdev_indirect_mapping_size(vim);
480eda14cbcSMatt Macy ASSERT3U(new_size, >, old_size);
481eda14cbcSMatt Macy ASSERT3U(new_size - old_size, ==,
482eda14cbcSMatt Macy entries_written * sizeof (vdev_indirect_mapping_entry_phys_t));
483eda14cbcSMatt Macy vim->vim_entries = vmem_alloc(new_size, KM_SLEEP);
484eda14cbcSMatt Macy if (old_size > 0) {
485*da5137abSMartin Matuska memcpy(vim->vim_entries, old_entries, old_size);
486eda14cbcSMatt Macy vmem_free(old_entries, old_size);
487eda14cbcSMatt Macy }
488eda14cbcSMatt Macy VERIFY0(dmu_read(vim->vim_objset, vim->vim_object, old_size,
489eda14cbcSMatt Macy new_size - old_size, &vim->vim_entries[old_count],
490eda14cbcSMatt Macy DMU_READ_PREFETCH));
491eda14cbcSMatt Macy
492eda14cbcSMatt Macy zfs_dbgmsg("txg %llu: wrote %llu entries to "
493eda14cbcSMatt Macy "indirect mapping obj %llu; max offset=0x%llx",
494eda14cbcSMatt Macy (u_longlong_t)dmu_tx_get_txg(tx),
495eda14cbcSMatt Macy (u_longlong_t)entries_written,
496eda14cbcSMatt Macy (u_longlong_t)vim->vim_object,
497eda14cbcSMatt Macy (u_longlong_t)vim->vim_phys->vimp_max_offset);
498eda14cbcSMatt Macy }
499eda14cbcSMatt Macy
500eda14cbcSMatt Macy /*
501eda14cbcSMatt Macy * Increment the relevant counts for the specified offset and length.
502eda14cbcSMatt Macy * The counts array must be obtained from
503eda14cbcSMatt Macy * vdev_indirect_mapping_load_obsolete_counts().
504eda14cbcSMatt Macy */
505eda14cbcSMatt Macy void
vdev_indirect_mapping_increment_obsolete_count(vdev_indirect_mapping_t * vim,uint64_t offset,uint64_t length,uint32_t * counts)506eda14cbcSMatt Macy vdev_indirect_mapping_increment_obsolete_count(vdev_indirect_mapping_t *vim,
507eda14cbcSMatt Macy uint64_t offset, uint64_t length, uint32_t *counts)
508eda14cbcSMatt Macy {
509eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *mapping;
510eda14cbcSMatt Macy uint64_t index;
511eda14cbcSMatt Macy
512eda14cbcSMatt Macy mapping = vdev_indirect_mapping_entry_for_offset(vim, offset);
513eda14cbcSMatt Macy
514eda14cbcSMatt Macy ASSERT(length > 0);
515eda14cbcSMatt Macy ASSERT3P(mapping, !=, NULL);
516eda14cbcSMatt Macy
517eda14cbcSMatt Macy index = mapping - vim->vim_entries;
518eda14cbcSMatt Macy
519eda14cbcSMatt Macy while (length > 0) {
520eda14cbcSMatt Macy ASSERT3U(index, <, vdev_indirect_mapping_num_entries(vim));
521eda14cbcSMatt Macy
522eda14cbcSMatt Macy uint64_t size = DVA_GET_ASIZE(&mapping->vimep_dst);
523eda14cbcSMatt Macy uint64_t inner_offset = offset -
524eda14cbcSMatt Macy DVA_MAPPING_GET_SRC_OFFSET(mapping);
525eda14cbcSMatt Macy VERIFY3U(inner_offset, <, size);
526eda14cbcSMatt Macy uint64_t inner_size = MIN(length, size - inner_offset);
527eda14cbcSMatt Macy
528eda14cbcSMatt Macy VERIFY3U(counts[index] + inner_size, <=, size);
529eda14cbcSMatt Macy counts[index] += inner_size;
530eda14cbcSMatt Macy
531eda14cbcSMatt Macy offset += inner_size;
532eda14cbcSMatt Macy length -= inner_size;
533eda14cbcSMatt Macy mapping++;
534eda14cbcSMatt Macy index++;
535eda14cbcSMatt Macy }
536eda14cbcSMatt Macy }
537eda14cbcSMatt Macy
538eda14cbcSMatt Macy typedef struct load_obsolete_space_map_arg {
539eda14cbcSMatt Macy vdev_indirect_mapping_t *losma_vim;
540eda14cbcSMatt Macy uint32_t *losma_counts;
541eda14cbcSMatt Macy } load_obsolete_space_map_arg_t;
542eda14cbcSMatt Macy
543eda14cbcSMatt Macy static int
load_obsolete_sm_callback(space_map_entry_t * sme,void * arg)544eda14cbcSMatt Macy load_obsolete_sm_callback(space_map_entry_t *sme, void *arg)
545eda14cbcSMatt Macy {
546eda14cbcSMatt Macy load_obsolete_space_map_arg_t *losma = arg;
547eda14cbcSMatt Macy ASSERT3S(sme->sme_type, ==, SM_ALLOC);
548eda14cbcSMatt Macy
549eda14cbcSMatt Macy vdev_indirect_mapping_increment_obsolete_count(losma->losma_vim,
550eda14cbcSMatt Macy sme->sme_offset, sme->sme_run, losma->losma_counts);
551eda14cbcSMatt Macy
552eda14cbcSMatt Macy return (0);
553eda14cbcSMatt Macy }
554eda14cbcSMatt Macy
555eda14cbcSMatt Macy /*
556eda14cbcSMatt Macy * Modify the counts (increment them) based on the spacemap.
557eda14cbcSMatt Macy */
558eda14cbcSMatt Macy void
vdev_indirect_mapping_load_obsolete_spacemap(vdev_indirect_mapping_t * vim,uint32_t * counts,space_map_t * obsolete_space_sm)559eda14cbcSMatt Macy vdev_indirect_mapping_load_obsolete_spacemap(vdev_indirect_mapping_t *vim,
560eda14cbcSMatt Macy uint32_t *counts, space_map_t *obsolete_space_sm)
561eda14cbcSMatt Macy {
562eda14cbcSMatt Macy load_obsolete_space_map_arg_t losma;
563eda14cbcSMatt Macy losma.losma_counts = counts;
564eda14cbcSMatt Macy losma.losma_vim = vim;
565eda14cbcSMatt Macy VERIFY0(space_map_iterate(obsolete_space_sm,
566eda14cbcSMatt Macy space_map_length(obsolete_space_sm),
567eda14cbcSMatt Macy load_obsolete_sm_callback, &losma));
568eda14cbcSMatt Macy }
569eda14cbcSMatt Macy
570eda14cbcSMatt Macy /*
571eda14cbcSMatt Macy * Read the obsolete counts from disk, returning them in an array.
572eda14cbcSMatt Macy */
573eda14cbcSMatt Macy uint32_t *
vdev_indirect_mapping_load_obsolete_counts(vdev_indirect_mapping_t * vim)574eda14cbcSMatt Macy vdev_indirect_mapping_load_obsolete_counts(vdev_indirect_mapping_t *vim)
575eda14cbcSMatt Macy {
576eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim));
577eda14cbcSMatt Macy
578eda14cbcSMatt Macy uint64_t counts_size =
579eda14cbcSMatt Macy vim->vim_phys->vimp_num_entries * sizeof (uint32_t);
580eda14cbcSMatt Macy uint32_t *counts = vmem_alloc(counts_size, KM_SLEEP);
581eda14cbcSMatt Macy if (vim->vim_havecounts) {
582eda14cbcSMatt Macy VERIFY0(dmu_read(vim->vim_objset,
583eda14cbcSMatt Macy vim->vim_phys->vimp_counts_object,
584eda14cbcSMatt Macy 0, counts_size,
585eda14cbcSMatt Macy counts, DMU_READ_PREFETCH));
586eda14cbcSMatt Macy } else {
587*da5137abSMartin Matuska memset(counts, 0, counts_size);
588eda14cbcSMatt Macy }
589eda14cbcSMatt Macy return (counts);
590eda14cbcSMatt Macy }
591eda14cbcSMatt Macy
592eda14cbcSMatt Macy extern void
vdev_indirect_mapping_free_obsolete_counts(vdev_indirect_mapping_t * vim,uint32_t * counts)593eda14cbcSMatt Macy vdev_indirect_mapping_free_obsolete_counts(vdev_indirect_mapping_t *vim,
594eda14cbcSMatt Macy uint32_t *counts)
595eda14cbcSMatt Macy {
596eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim));
597eda14cbcSMatt Macy
598eda14cbcSMatt Macy vmem_free(counts, vim->vim_phys->vimp_num_entries * sizeof (uint32_t));
599eda14cbcSMatt Macy }
600eda14cbcSMatt Macy
601eda14cbcSMatt Macy #if defined(_KERNEL)
602eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_add_entries);
603eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_alloc);
604eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_bytes_mapped);
605eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_close);
606eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_entry_for_offset);
607eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_entry_for_offset_or_next);
608eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_free);
609eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_free_obsolete_counts);
610eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_increment_obsolete_count);
611eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_load_obsolete_counts);
612eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_load_obsolete_spacemap);
613eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_max_offset);
614eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_num_entries);
615eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_object);
616eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_open);
617eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_size);
618eda14cbcSMatt Macy #endif
619