1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * This file and its contents are supplied under the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License ("CDDL"), version 1.0. 6eda14cbcSMatt Macy * You may only use this file in accordance with the terms of version 7eda14cbcSMatt Macy * 1.0 of the CDDL. 8eda14cbcSMatt Macy * 9eda14cbcSMatt Macy * A full copy of the text of the CDDL should have accompanied this 10eda14cbcSMatt Macy * source. A copy of the CDDL is also available via the Internet at 11eda14cbcSMatt Macy * http://www.illumos.org/license/CDDL. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * CDDL HEADER END 14eda14cbcSMatt Macy */ 15eda14cbcSMatt Macy 16eda14cbcSMatt Macy /* 17eda14cbcSMatt Macy * Copyright (c) 2015, 2017 by Delphix. All rights reserved. 18eda14cbcSMatt Macy */ 19eda14cbcSMatt Macy 20eda14cbcSMatt Macy #include <sys/dmu_tx.h> 21eda14cbcSMatt Macy #include <sys/dsl_pool.h> 22eda14cbcSMatt Macy #include <sys/spa.h> 23eda14cbcSMatt Macy #include <sys/vdev_impl.h> 24eda14cbcSMatt Macy #include <sys/vdev_indirect_mapping.h> 25eda14cbcSMatt Macy #include <sys/zfeature.h> 26eda14cbcSMatt Macy #include <sys/dmu_objset.h> 27eda14cbcSMatt Macy 28eda14cbcSMatt Macy #ifdef ZFS_DEBUG 29eda14cbcSMatt Macy static boolean_t 30eda14cbcSMatt Macy vdev_indirect_mapping_verify(vdev_indirect_mapping_t *vim) 31eda14cbcSMatt Macy { 32eda14cbcSMatt Macy ASSERT(vim != NULL); 33eda14cbcSMatt Macy 34eda14cbcSMatt Macy ASSERT(vim->vim_object != 0); 35eda14cbcSMatt Macy ASSERT(vim->vim_objset != NULL); 36eda14cbcSMatt Macy ASSERT(vim->vim_phys != NULL); 37eda14cbcSMatt Macy ASSERT(vim->vim_dbuf != NULL); 38eda14cbcSMatt Macy 39eda14cbcSMatt Macy EQUIV(vim->vim_phys->vimp_num_entries > 0, 40eda14cbcSMatt Macy vim->vim_entries != NULL); 41eda14cbcSMatt Macy if (vim->vim_phys->vimp_num_entries > 0) { 42eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *last_entry __maybe_unused = 43eda14cbcSMatt Macy &vim->vim_entries[vim->vim_phys->vimp_num_entries - 1]; 44eda14cbcSMatt Macy uint64_t offset __maybe_unused = 45eda14cbcSMatt Macy DVA_MAPPING_GET_SRC_OFFSET(last_entry); 46eda14cbcSMatt Macy uint64_t size __maybe_unused = 47eda14cbcSMatt Macy DVA_GET_ASIZE(&last_entry->vimep_dst); 48eda14cbcSMatt Macy 49eda14cbcSMatt Macy ASSERT3U(vim->vim_phys->vimp_max_offset, >=, offset + size); 50eda14cbcSMatt Macy } 51eda14cbcSMatt Macy if (vim->vim_havecounts) { 52eda14cbcSMatt Macy ASSERT(vim->vim_phys->vimp_counts_object != 0); 53eda14cbcSMatt Macy } 54eda14cbcSMatt Macy 55eda14cbcSMatt Macy return (B_TRUE); 56eda14cbcSMatt Macy } 57*e92ffd9bSMartin Matuska #else 58*e92ffd9bSMartin Matuska #define vdev_indirect_mapping_verify(vim) ((void) sizeof (vim), B_TRUE) 59eda14cbcSMatt Macy #endif 60eda14cbcSMatt Macy 61eda14cbcSMatt Macy uint64_t 62eda14cbcSMatt Macy vdev_indirect_mapping_num_entries(vdev_indirect_mapping_t *vim) 63eda14cbcSMatt Macy { 64eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 65eda14cbcSMatt Macy 66eda14cbcSMatt Macy return (vim->vim_phys->vimp_num_entries); 67eda14cbcSMatt Macy } 68eda14cbcSMatt Macy 69eda14cbcSMatt Macy uint64_t 70eda14cbcSMatt Macy vdev_indirect_mapping_max_offset(vdev_indirect_mapping_t *vim) 71eda14cbcSMatt Macy { 72eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 73eda14cbcSMatt Macy 74eda14cbcSMatt Macy return (vim->vim_phys->vimp_max_offset); 75eda14cbcSMatt Macy } 76eda14cbcSMatt Macy 77eda14cbcSMatt Macy uint64_t 78eda14cbcSMatt Macy vdev_indirect_mapping_object(vdev_indirect_mapping_t *vim) 79eda14cbcSMatt Macy { 80eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 81eda14cbcSMatt Macy 82eda14cbcSMatt Macy return (vim->vim_object); 83eda14cbcSMatt Macy } 84eda14cbcSMatt Macy 85eda14cbcSMatt Macy uint64_t 86eda14cbcSMatt Macy vdev_indirect_mapping_bytes_mapped(vdev_indirect_mapping_t *vim) 87eda14cbcSMatt Macy { 88eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 89eda14cbcSMatt Macy 90eda14cbcSMatt Macy return (vim->vim_phys->vimp_bytes_mapped); 91eda14cbcSMatt Macy } 92eda14cbcSMatt Macy 93eda14cbcSMatt Macy /* 94eda14cbcSMatt Macy * The length (in bytes) of the mapping object array in memory and 95eda14cbcSMatt Macy * (logically) on disk. 96eda14cbcSMatt Macy * 97eda14cbcSMatt Macy * Note that unlike most of our accessor functions, 98eda14cbcSMatt Macy * we don't assert that the struct is consistent; therefore it can be 99eda14cbcSMatt Macy * called while there may be concurrent changes, if we don't care about 100eda14cbcSMatt Macy * the value being immediately stale (e.g. from spa_removal_get_stats()). 101eda14cbcSMatt Macy */ 102eda14cbcSMatt Macy uint64_t 103eda14cbcSMatt Macy vdev_indirect_mapping_size(vdev_indirect_mapping_t *vim) 104eda14cbcSMatt Macy { 105eda14cbcSMatt Macy return (vim->vim_phys->vimp_num_entries * sizeof (*vim->vim_entries)); 106eda14cbcSMatt Macy } 107eda14cbcSMatt Macy 108eda14cbcSMatt Macy /* 109eda14cbcSMatt Macy * Compare an offset with an indirect mapping entry; there are three 110eda14cbcSMatt Macy * possible scenarios: 111eda14cbcSMatt Macy * 112eda14cbcSMatt Macy * 1. The offset is "less than" the mapping entry; meaning the 113eda14cbcSMatt Macy * offset is less than the source offset of the mapping entry. In 114eda14cbcSMatt Macy * this case, there is no overlap between the offset and the 115eda14cbcSMatt Macy * mapping entry and -1 will be returned. 116eda14cbcSMatt Macy * 117eda14cbcSMatt Macy * 2. The offset is "greater than" the mapping entry; meaning the 118eda14cbcSMatt Macy * offset is greater than the mapping entry's source offset plus 119eda14cbcSMatt Macy * the entry's size. In this case, there is no overlap between 120eda14cbcSMatt Macy * the offset and the mapping entry and 1 will be returned. 121eda14cbcSMatt Macy * 122eda14cbcSMatt Macy * NOTE: If the offset is actually equal to the entry's offset 123eda14cbcSMatt Macy * plus size, this is considered to be "greater" than the entry, 124eda14cbcSMatt Macy * and this case applies (i.e. 1 will be returned). Thus, the 125eda14cbcSMatt Macy * entry's "range" can be considered to be inclusive at its 126eda14cbcSMatt Macy * start, but exclusive at its end: e.g. [src, src + size). 127eda14cbcSMatt Macy * 128eda14cbcSMatt Macy * 3. The last case to consider is if the offset actually falls 129eda14cbcSMatt Macy * within the mapping entry's range. If this is the case, the 130eda14cbcSMatt Macy * offset is considered to be "equal to" the mapping entry and 131eda14cbcSMatt Macy * 0 will be returned. 132eda14cbcSMatt Macy * 133eda14cbcSMatt Macy * NOTE: If the offset is equal to the entry's source offset, 134eda14cbcSMatt Macy * this case applies and 0 will be returned. If the offset is 135eda14cbcSMatt Macy * equal to the entry's source plus its size, this case does 136eda14cbcSMatt Macy * *not* apply (see "NOTE" above for scenario 2), and 1 will be 137eda14cbcSMatt Macy * returned. 138eda14cbcSMatt Macy */ 139eda14cbcSMatt Macy static int 140eda14cbcSMatt Macy dva_mapping_overlap_compare(const void *v_key, const void *v_array_elem) 141eda14cbcSMatt Macy { 142eda14cbcSMatt Macy const uint64_t * const key = v_key; 143eda14cbcSMatt Macy const vdev_indirect_mapping_entry_phys_t * const array_elem = 144eda14cbcSMatt Macy v_array_elem; 145eda14cbcSMatt Macy uint64_t src_offset = DVA_MAPPING_GET_SRC_OFFSET(array_elem); 146eda14cbcSMatt Macy 147eda14cbcSMatt Macy if (*key < src_offset) { 148eda14cbcSMatt Macy return (-1); 149eda14cbcSMatt Macy } else if (*key < src_offset + DVA_GET_ASIZE(&array_elem->vimep_dst)) { 150eda14cbcSMatt Macy return (0); 151eda14cbcSMatt Macy } else { 152eda14cbcSMatt Macy return (1); 153eda14cbcSMatt Macy } 154eda14cbcSMatt Macy } 155eda14cbcSMatt Macy 156eda14cbcSMatt Macy /* 157eda14cbcSMatt Macy * Returns the mapping entry for the given offset. 158eda14cbcSMatt Macy * 159eda14cbcSMatt Macy * It's possible that the given offset will not be in the mapping table 160eda14cbcSMatt Macy * (i.e. no mapping entries contain this offset), in which case, the 161eda14cbcSMatt Macy * return value value depends on the "next_if_missing" parameter. 162eda14cbcSMatt Macy * 163eda14cbcSMatt Macy * If the offset is not found in the table and "next_if_missing" is 164eda14cbcSMatt Macy * B_FALSE, then NULL will always be returned. The behavior is intended 165eda14cbcSMatt Macy * to allow consumers to get the entry corresponding to the offset 166eda14cbcSMatt Macy * parameter, iff the offset overlaps with an entry in the table. 167eda14cbcSMatt Macy * 168eda14cbcSMatt Macy * If the offset is not found in the table and "next_if_missing" is 169eda14cbcSMatt Macy * B_TRUE, then the entry nearest to the given offset will be returned, 170eda14cbcSMatt Macy * such that the entry's source offset is greater than the offset 171eda14cbcSMatt Macy * passed in (i.e. the "next" mapping entry in the table is returned, if 172eda14cbcSMatt Macy * the offset is missing from the table). If there are no entries whose 173eda14cbcSMatt Macy * source offset is greater than the passed in offset, NULL is returned. 174eda14cbcSMatt Macy */ 175eda14cbcSMatt Macy static vdev_indirect_mapping_entry_phys_t * 176eda14cbcSMatt Macy vdev_indirect_mapping_entry_for_offset_impl(vdev_indirect_mapping_t *vim, 177eda14cbcSMatt Macy uint64_t offset, boolean_t next_if_missing) 178eda14cbcSMatt Macy { 179eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 180eda14cbcSMatt Macy ASSERT(vim->vim_phys->vimp_num_entries > 0); 181eda14cbcSMatt Macy 182eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *entry = NULL; 183eda14cbcSMatt Macy 184eda14cbcSMatt Macy uint64_t last = vim->vim_phys->vimp_num_entries - 1; 185eda14cbcSMatt Macy uint64_t base = 0; 186eda14cbcSMatt Macy 187eda14cbcSMatt Macy /* 188eda14cbcSMatt Macy * We don't define these inside of the while loop because we use 189eda14cbcSMatt Macy * their value in the case that offset isn't in the mapping. 190eda14cbcSMatt Macy */ 191eda14cbcSMatt Macy uint64_t mid; 192eda14cbcSMatt Macy int result; 193eda14cbcSMatt Macy 194eda14cbcSMatt Macy while (last >= base) { 195eda14cbcSMatt Macy mid = base + ((last - base) >> 1); 196eda14cbcSMatt Macy 197eda14cbcSMatt Macy result = dva_mapping_overlap_compare(&offset, 198eda14cbcSMatt Macy &vim->vim_entries[mid]); 199eda14cbcSMatt Macy 200eda14cbcSMatt Macy if (result == 0) { 201eda14cbcSMatt Macy entry = &vim->vim_entries[mid]; 202eda14cbcSMatt Macy break; 203eda14cbcSMatt Macy } else if (result < 0) { 204eda14cbcSMatt Macy last = mid - 1; 205eda14cbcSMatt Macy } else { 206eda14cbcSMatt Macy base = mid + 1; 207eda14cbcSMatt Macy } 208eda14cbcSMatt Macy } 209eda14cbcSMatt Macy 210eda14cbcSMatt Macy if (entry == NULL && next_if_missing) { 211eda14cbcSMatt Macy ASSERT3U(base, ==, last + 1); 212eda14cbcSMatt Macy ASSERT(mid == base || mid == last); 213eda14cbcSMatt Macy ASSERT3S(result, !=, 0); 214eda14cbcSMatt Macy 215eda14cbcSMatt Macy /* 216eda14cbcSMatt Macy * The offset we're looking for isn't actually contained 217eda14cbcSMatt Macy * in the mapping table, thus we need to return the 218eda14cbcSMatt Macy * closest mapping entry that is greater than the 219eda14cbcSMatt Macy * offset. We reuse the result of the last comparison, 220eda14cbcSMatt Macy * comparing the mapping entry at index "mid" and the 221eda14cbcSMatt Macy * offset. The offset is guaranteed to lie between 222eda14cbcSMatt Macy * indices one less than "mid", and one greater than 223eda14cbcSMatt Macy * "mid"; we just need to determine if offset is greater 224eda14cbcSMatt Macy * than, or less than the mapping entry contained at 225eda14cbcSMatt Macy * index "mid". 226eda14cbcSMatt Macy */ 227eda14cbcSMatt Macy 228eda14cbcSMatt Macy uint64_t index; 229eda14cbcSMatt Macy if (result < 0) 230eda14cbcSMatt Macy index = mid; 231eda14cbcSMatt Macy else 232eda14cbcSMatt Macy index = mid + 1; 233eda14cbcSMatt Macy 234eda14cbcSMatt Macy ASSERT3U(index, <=, vim->vim_phys->vimp_num_entries); 235eda14cbcSMatt Macy 236eda14cbcSMatt Macy if (index == vim->vim_phys->vimp_num_entries) { 237eda14cbcSMatt Macy /* 238eda14cbcSMatt Macy * If "index" is past the end of the entries 239eda14cbcSMatt Macy * array, then not only is the offset not in the 240eda14cbcSMatt Macy * mapping table, but it's actually greater than 241eda14cbcSMatt Macy * all entries in the table. In this case, we 242eda14cbcSMatt Macy * can't return a mapping entry greater than the 243eda14cbcSMatt Macy * offset (since none exist), so we return NULL. 244eda14cbcSMatt Macy */ 245eda14cbcSMatt Macy 246eda14cbcSMatt Macy ASSERT3S(dva_mapping_overlap_compare(&offset, 247eda14cbcSMatt Macy &vim->vim_entries[index - 1]), >, 0); 248eda14cbcSMatt Macy 249eda14cbcSMatt Macy return (NULL); 250eda14cbcSMatt Macy } else { 251eda14cbcSMatt Macy /* 252eda14cbcSMatt Macy * Just to be safe, we verify the offset falls 253eda14cbcSMatt Macy * in between the mapping entries at index and 254eda14cbcSMatt Macy * one less than index. Since we know the offset 255eda14cbcSMatt Macy * doesn't overlap an entry, and we're supposed 256eda14cbcSMatt Macy * to return the entry just greater than the 257eda14cbcSMatt Macy * offset, both of the following tests must be 258eda14cbcSMatt Macy * true. 259eda14cbcSMatt Macy */ 260eda14cbcSMatt Macy ASSERT3S(dva_mapping_overlap_compare(&offset, 261eda14cbcSMatt Macy &vim->vim_entries[index]), <, 0); 262eda14cbcSMatt Macy IMPLY(index >= 1, dva_mapping_overlap_compare(&offset, 263eda14cbcSMatt Macy &vim->vim_entries[index - 1]) > 0); 264eda14cbcSMatt Macy 265eda14cbcSMatt Macy return (&vim->vim_entries[index]); 266eda14cbcSMatt Macy } 267eda14cbcSMatt Macy } else { 268eda14cbcSMatt Macy return (entry); 269eda14cbcSMatt Macy } 270eda14cbcSMatt Macy } 271eda14cbcSMatt Macy 272eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t * 273eda14cbcSMatt Macy vdev_indirect_mapping_entry_for_offset(vdev_indirect_mapping_t *vim, 274eda14cbcSMatt Macy uint64_t offset) 275eda14cbcSMatt Macy { 276eda14cbcSMatt Macy return (vdev_indirect_mapping_entry_for_offset_impl(vim, offset, 277eda14cbcSMatt Macy B_FALSE)); 278eda14cbcSMatt Macy } 279eda14cbcSMatt Macy 280eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t * 281eda14cbcSMatt Macy vdev_indirect_mapping_entry_for_offset_or_next(vdev_indirect_mapping_t *vim, 282eda14cbcSMatt Macy uint64_t offset) 283eda14cbcSMatt Macy { 284eda14cbcSMatt Macy return (vdev_indirect_mapping_entry_for_offset_impl(vim, offset, 285eda14cbcSMatt Macy B_TRUE)); 286eda14cbcSMatt Macy } 287eda14cbcSMatt Macy 288eda14cbcSMatt Macy void 289eda14cbcSMatt Macy vdev_indirect_mapping_close(vdev_indirect_mapping_t *vim) 290eda14cbcSMatt Macy { 291eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 292eda14cbcSMatt Macy 293eda14cbcSMatt Macy if (vim->vim_phys->vimp_num_entries > 0) { 294eda14cbcSMatt Macy uint64_t map_size = vdev_indirect_mapping_size(vim); 295eda14cbcSMatt Macy vmem_free(vim->vim_entries, map_size); 296eda14cbcSMatt Macy vim->vim_entries = NULL; 297eda14cbcSMatt Macy } 298eda14cbcSMatt Macy 299eda14cbcSMatt Macy dmu_buf_rele(vim->vim_dbuf, vim); 300eda14cbcSMatt Macy 301eda14cbcSMatt Macy vim->vim_objset = NULL; 302eda14cbcSMatt Macy vim->vim_object = 0; 303eda14cbcSMatt Macy vim->vim_dbuf = NULL; 304eda14cbcSMatt Macy vim->vim_phys = NULL; 305eda14cbcSMatt Macy 306eda14cbcSMatt Macy kmem_free(vim, sizeof (*vim)); 307eda14cbcSMatt Macy } 308eda14cbcSMatt Macy 309eda14cbcSMatt Macy uint64_t 310eda14cbcSMatt Macy vdev_indirect_mapping_alloc(objset_t *os, dmu_tx_t *tx) 311eda14cbcSMatt Macy { 312eda14cbcSMatt Macy uint64_t object; 313eda14cbcSMatt Macy ASSERT(dmu_tx_is_syncing(tx)); 314eda14cbcSMatt Macy uint64_t bonus_size = VDEV_INDIRECT_MAPPING_SIZE_V0; 315eda14cbcSMatt Macy 316eda14cbcSMatt Macy if (spa_feature_is_enabled(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS)) { 317eda14cbcSMatt Macy bonus_size = sizeof (vdev_indirect_mapping_phys_t); 318eda14cbcSMatt Macy } 319eda14cbcSMatt Macy 320eda14cbcSMatt Macy object = dmu_object_alloc(os, 321eda14cbcSMatt Macy DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE, 322eda14cbcSMatt Macy DMU_OTN_UINT64_METADATA, bonus_size, 323eda14cbcSMatt Macy tx); 324eda14cbcSMatt Macy 325eda14cbcSMatt Macy if (spa_feature_is_enabled(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS)) { 326eda14cbcSMatt Macy dmu_buf_t *dbuf; 327eda14cbcSMatt Macy vdev_indirect_mapping_phys_t *vimp; 328eda14cbcSMatt Macy 329eda14cbcSMatt Macy VERIFY0(dmu_bonus_hold(os, object, FTAG, &dbuf)); 330eda14cbcSMatt Macy dmu_buf_will_dirty(dbuf, tx); 331eda14cbcSMatt Macy vimp = dbuf->db_data; 332eda14cbcSMatt Macy vimp->vimp_counts_object = dmu_object_alloc(os, 333eda14cbcSMatt Macy DMU_OTN_UINT32_METADATA, SPA_OLD_MAXBLOCKSIZE, 334eda14cbcSMatt Macy DMU_OT_NONE, 0, tx); 335eda14cbcSMatt Macy spa_feature_incr(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS, tx); 336eda14cbcSMatt Macy dmu_buf_rele(dbuf, FTAG); 337eda14cbcSMatt Macy } 338eda14cbcSMatt Macy 339eda14cbcSMatt Macy return (object); 340eda14cbcSMatt Macy } 341eda14cbcSMatt Macy 342eda14cbcSMatt Macy 343eda14cbcSMatt Macy vdev_indirect_mapping_t * 344eda14cbcSMatt Macy vdev_indirect_mapping_open(objset_t *os, uint64_t mapping_object) 345eda14cbcSMatt Macy { 346eda14cbcSMatt Macy vdev_indirect_mapping_t *vim = kmem_zalloc(sizeof (*vim), KM_SLEEP); 347eda14cbcSMatt Macy dmu_object_info_t doi; 348eda14cbcSMatt Macy VERIFY0(dmu_object_info(os, mapping_object, &doi)); 349eda14cbcSMatt Macy 350eda14cbcSMatt Macy vim->vim_objset = os; 351eda14cbcSMatt Macy vim->vim_object = mapping_object; 352eda14cbcSMatt Macy 353eda14cbcSMatt Macy VERIFY0(dmu_bonus_hold(os, vim->vim_object, vim, 354eda14cbcSMatt Macy &vim->vim_dbuf)); 355eda14cbcSMatt Macy vim->vim_phys = vim->vim_dbuf->db_data; 356eda14cbcSMatt Macy 357eda14cbcSMatt Macy vim->vim_havecounts = 358eda14cbcSMatt Macy (doi.doi_bonus_size > VDEV_INDIRECT_MAPPING_SIZE_V0); 359eda14cbcSMatt Macy 360eda14cbcSMatt Macy if (vim->vim_phys->vimp_num_entries > 0) { 361eda14cbcSMatt Macy uint64_t map_size = vdev_indirect_mapping_size(vim); 362eda14cbcSMatt Macy vim->vim_entries = vmem_alloc(map_size, KM_SLEEP); 363eda14cbcSMatt Macy VERIFY0(dmu_read(os, vim->vim_object, 0, map_size, 364eda14cbcSMatt Macy vim->vim_entries, DMU_READ_PREFETCH)); 365eda14cbcSMatt Macy } 366eda14cbcSMatt Macy 367eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 368eda14cbcSMatt Macy 369eda14cbcSMatt Macy return (vim); 370eda14cbcSMatt Macy } 371eda14cbcSMatt Macy 372eda14cbcSMatt Macy void 373eda14cbcSMatt Macy vdev_indirect_mapping_free(objset_t *os, uint64_t object, dmu_tx_t *tx) 374eda14cbcSMatt Macy { 375eda14cbcSMatt Macy vdev_indirect_mapping_t *vim = vdev_indirect_mapping_open(os, object); 376eda14cbcSMatt Macy if (vim->vim_havecounts) { 377eda14cbcSMatt Macy VERIFY0(dmu_object_free(os, vim->vim_phys->vimp_counts_object, 378eda14cbcSMatt Macy tx)); 379eda14cbcSMatt Macy spa_feature_decr(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS, tx); 380eda14cbcSMatt Macy } 381eda14cbcSMatt Macy vdev_indirect_mapping_close(vim); 382eda14cbcSMatt Macy 383eda14cbcSMatt Macy VERIFY0(dmu_object_free(os, object, tx)); 384eda14cbcSMatt Macy } 385eda14cbcSMatt Macy 386eda14cbcSMatt Macy /* 387eda14cbcSMatt Macy * Append the list of vdev_indirect_mapping_entry_t's to the on-disk 388eda14cbcSMatt Macy * mapping object. Also remove the entries from the list and free them. 389eda14cbcSMatt Macy * This also implicitly extends the max_offset of the mapping (to the end 390eda14cbcSMatt Macy * of the last entry). 391eda14cbcSMatt Macy */ 392eda14cbcSMatt Macy void 393eda14cbcSMatt Macy vdev_indirect_mapping_add_entries(vdev_indirect_mapping_t *vim, 394eda14cbcSMatt Macy list_t *list, dmu_tx_t *tx) 395eda14cbcSMatt Macy { 396eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *mapbuf; 397eda14cbcSMatt Macy uint64_t old_size; 398eda14cbcSMatt Macy uint32_t *countbuf = NULL; 399eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *old_entries; 400eda14cbcSMatt Macy uint64_t old_count; 401eda14cbcSMatt Macy uint64_t entries_written = 0; 402eda14cbcSMatt Macy 403eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 404eda14cbcSMatt Macy ASSERT(dmu_tx_is_syncing(tx)); 405eda14cbcSMatt Macy ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx))); 406eda14cbcSMatt Macy ASSERT(!list_is_empty(list)); 407eda14cbcSMatt Macy 408eda14cbcSMatt Macy old_size = vdev_indirect_mapping_size(vim); 409eda14cbcSMatt Macy old_entries = vim->vim_entries; 410eda14cbcSMatt Macy old_count = vim->vim_phys->vimp_num_entries; 411eda14cbcSMatt Macy 412eda14cbcSMatt Macy dmu_buf_will_dirty(vim->vim_dbuf, tx); 413eda14cbcSMatt Macy 414eda14cbcSMatt Macy mapbuf = vmem_alloc(SPA_OLD_MAXBLOCKSIZE, KM_SLEEP); 415eda14cbcSMatt Macy if (vim->vim_havecounts) { 416eda14cbcSMatt Macy countbuf = vmem_alloc(SPA_OLD_MAXBLOCKSIZE, KM_SLEEP); 417eda14cbcSMatt Macy ASSERT(spa_feature_is_active(vim->vim_objset->os_spa, 418eda14cbcSMatt Macy SPA_FEATURE_OBSOLETE_COUNTS)); 419eda14cbcSMatt Macy } 420eda14cbcSMatt Macy while (!list_is_empty(list)) { 421eda14cbcSMatt Macy uint64_t i; 422eda14cbcSMatt Macy /* 423eda14cbcSMatt Macy * Write entries from the list to the 424eda14cbcSMatt Macy * vdev_im_object in batches of size SPA_OLD_MAXBLOCKSIZE. 425eda14cbcSMatt Macy */ 426eda14cbcSMatt Macy for (i = 0; i < SPA_OLD_MAXBLOCKSIZE / sizeof (*mapbuf); i++) { 427eda14cbcSMatt Macy vdev_indirect_mapping_entry_t *entry = 428eda14cbcSMatt Macy list_remove_head(list); 429eda14cbcSMatt Macy if (entry == NULL) 430eda14cbcSMatt Macy break; 431eda14cbcSMatt Macy 432eda14cbcSMatt Macy uint64_t size = 433eda14cbcSMatt Macy DVA_GET_ASIZE(&entry->vime_mapping.vimep_dst); 434eda14cbcSMatt Macy uint64_t src_offset = 435eda14cbcSMatt Macy DVA_MAPPING_GET_SRC_OFFSET(&entry->vime_mapping); 436eda14cbcSMatt Macy 437eda14cbcSMatt Macy /* 438eda14cbcSMatt Macy * We shouldn't be adding an entry which is fully 439eda14cbcSMatt Macy * obsolete. 440eda14cbcSMatt Macy */ 441eda14cbcSMatt Macy ASSERT3U(entry->vime_obsolete_count, <, size); 442eda14cbcSMatt Macy IMPLY(entry->vime_obsolete_count != 0, 443eda14cbcSMatt Macy vim->vim_havecounts); 444eda14cbcSMatt Macy 445eda14cbcSMatt Macy mapbuf[i] = entry->vime_mapping; 446eda14cbcSMatt Macy if (vim->vim_havecounts) 447eda14cbcSMatt Macy countbuf[i] = entry->vime_obsolete_count; 448eda14cbcSMatt Macy 449eda14cbcSMatt Macy vim->vim_phys->vimp_bytes_mapped += size; 450eda14cbcSMatt Macy ASSERT3U(src_offset, >=, 451eda14cbcSMatt Macy vim->vim_phys->vimp_max_offset); 452eda14cbcSMatt Macy vim->vim_phys->vimp_max_offset = src_offset + size; 453eda14cbcSMatt Macy 454eda14cbcSMatt Macy entries_written++; 455eda14cbcSMatt Macy 456eda14cbcSMatt Macy vmem_free(entry, sizeof (*entry)); 457eda14cbcSMatt Macy } 458eda14cbcSMatt Macy dmu_write(vim->vim_objset, vim->vim_object, 459eda14cbcSMatt Macy vim->vim_phys->vimp_num_entries * sizeof (*mapbuf), 460eda14cbcSMatt Macy i * sizeof (*mapbuf), 461eda14cbcSMatt Macy mapbuf, tx); 462eda14cbcSMatt Macy if (vim->vim_havecounts) { 463eda14cbcSMatt Macy dmu_write(vim->vim_objset, 464eda14cbcSMatt Macy vim->vim_phys->vimp_counts_object, 465eda14cbcSMatt Macy vim->vim_phys->vimp_num_entries * 466eda14cbcSMatt Macy sizeof (*countbuf), 467eda14cbcSMatt Macy i * sizeof (*countbuf), countbuf, tx); 468eda14cbcSMatt Macy } 469eda14cbcSMatt Macy vim->vim_phys->vimp_num_entries += i; 470eda14cbcSMatt Macy } 471eda14cbcSMatt Macy vmem_free(mapbuf, SPA_OLD_MAXBLOCKSIZE); 472eda14cbcSMatt Macy if (vim->vim_havecounts) 473eda14cbcSMatt Macy vmem_free(countbuf, SPA_OLD_MAXBLOCKSIZE); 474eda14cbcSMatt Macy 475eda14cbcSMatt Macy /* 476eda14cbcSMatt Macy * Update the entry array to reflect the new entries. First, copy 477eda14cbcSMatt Macy * over any old entries then read back the new entries we just wrote. 478eda14cbcSMatt Macy */ 479eda14cbcSMatt Macy uint64_t new_size = vdev_indirect_mapping_size(vim); 480eda14cbcSMatt Macy ASSERT3U(new_size, >, old_size); 481eda14cbcSMatt Macy ASSERT3U(new_size - old_size, ==, 482eda14cbcSMatt Macy entries_written * sizeof (vdev_indirect_mapping_entry_phys_t)); 483eda14cbcSMatt Macy vim->vim_entries = vmem_alloc(new_size, KM_SLEEP); 484eda14cbcSMatt Macy if (old_size > 0) { 485eda14cbcSMatt Macy bcopy(old_entries, vim->vim_entries, old_size); 486eda14cbcSMatt Macy vmem_free(old_entries, old_size); 487eda14cbcSMatt Macy } 488eda14cbcSMatt Macy VERIFY0(dmu_read(vim->vim_objset, vim->vim_object, old_size, 489eda14cbcSMatt Macy new_size - old_size, &vim->vim_entries[old_count], 490eda14cbcSMatt Macy DMU_READ_PREFETCH)); 491eda14cbcSMatt Macy 492eda14cbcSMatt Macy zfs_dbgmsg("txg %llu: wrote %llu entries to " 493eda14cbcSMatt Macy "indirect mapping obj %llu; max offset=0x%llx", 494eda14cbcSMatt Macy (u_longlong_t)dmu_tx_get_txg(tx), 495eda14cbcSMatt Macy (u_longlong_t)entries_written, 496eda14cbcSMatt Macy (u_longlong_t)vim->vim_object, 497eda14cbcSMatt Macy (u_longlong_t)vim->vim_phys->vimp_max_offset); 498eda14cbcSMatt Macy } 499eda14cbcSMatt Macy 500eda14cbcSMatt Macy /* 501eda14cbcSMatt Macy * Increment the relevant counts for the specified offset and length. 502eda14cbcSMatt Macy * The counts array must be obtained from 503eda14cbcSMatt Macy * vdev_indirect_mapping_load_obsolete_counts(). 504eda14cbcSMatt Macy */ 505eda14cbcSMatt Macy void 506eda14cbcSMatt Macy vdev_indirect_mapping_increment_obsolete_count(vdev_indirect_mapping_t *vim, 507eda14cbcSMatt Macy uint64_t offset, uint64_t length, uint32_t *counts) 508eda14cbcSMatt Macy { 509eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *mapping; 510eda14cbcSMatt Macy uint64_t index; 511eda14cbcSMatt Macy 512eda14cbcSMatt Macy mapping = vdev_indirect_mapping_entry_for_offset(vim, offset); 513eda14cbcSMatt Macy 514eda14cbcSMatt Macy ASSERT(length > 0); 515eda14cbcSMatt Macy ASSERT3P(mapping, !=, NULL); 516eda14cbcSMatt Macy 517eda14cbcSMatt Macy index = mapping - vim->vim_entries; 518eda14cbcSMatt Macy 519eda14cbcSMatt Macy while (length > 0) { 520eda14cbcSMatt Macy ASSERT3U(index, <, vdev_indirect_mapping_num_entries(vim)); 521eda14cbcSMatt Macy 522eda14cbcSMatt Macy uint64_t size = DVA_GET_ASIZE(&mapping->vimep_dst); 523eda14cbcSMatt Macy uint64_t inner_offset = offset - 524eda14cbcSMatt Macy DVA_MAPPING_GET_SRC_OFFSET(mapping); 525eda14cbcSMatt Macy VERIFY3U(inner_offset, <, size); 526eda14cbcSMatt Macy uint64_t inner_size = MIN(length, size - inner_offset); 527eda14cbcSMatt Macy 528eda14cbcSMatt Macy VERIFY3U(counts[index] + inner_size, <=, size); 529eda14cbcSMatt Macy counts[index] += inner_size; 530eda14cbcSMatt Macy 531eda14cbcSMatt Macy offset += inner_size; 532eda14cbcSMatt Macy length -= inner_size; 533eda14cbcSMatt Macy mapping++; 534eda14cbcSMatt Macy index++; 535eda14cbcSMatt Macy } 536eda14cbcSMatt Macy } 537eda14cbcSMatt Macy 538eda14cbcSMatt Macy typedef struct load_obsolete_space_map_arg { 539eda14cbcSMatt Macy vdev_indirect_mapping_t *losma_vim; 540eda14cbcSMatt Macy uint32_t *losma_counts; 541eda14cbcSMatt Macy } load_obsolete_space_map_arg_t; 542eda14cbcSMatt Macy 543eda14cbcSMatt Macy static int 544eda14cbcSMatt Macy load_obsolete_sm_callback(space_map_entry_t *sme, void *arg) 545eda14cbcSMatt Macy { 546eda14cbcSMatt Macy load_obsolete_space_map_arg_t *losma = arg; 547eda14cbcSMatt Macy ASSERT3S(sme->sme_type, ==, SM_ALLOC); 548eda14cbcSMatt Macy 549eda14cbcSMatt Macy vdev_indirect_mapping_increment_obsolete_count(losma->losma_vim, 550eda14cbcSMatt Macy sme->sme_offset, sme->sme_run, losma->losma_counts); 551eda14cbcSMatt Macy 552eda14cbcSMatt Macy return (0); 553eda14cbcSMatt Macy } 554eda14cbcSMatt Macy 555eda14cbcSMatt Macy /* 556eda14cbcSMatt Macy * Modify the counts (increment them) based on the spacemap. 557eda14cbcSMatt Macy */ 558eda14cbcSMatt Macy void 559eda14cbcSMatt Macy vdev_indirect_mapping_load_obsolete_spacemap(vdev_indirect_mapping_t *vim, 560eda14cbcSMatt Macy uint32_t *counts, space_map_t *obsolete_space_sm) 561eda14cbcSMatt Macy { 562eda14cbcSMatt Macy load_obsolete_space_map_arg_t losma; 563eda14cbcSMatt Macy losma.losma_counts = counts; 564eda14cbcSMatt Macy losma.losma_vim = vim; 565eda14cbcSMatt Macy VERIFY0(space_map_iterate(obsolete_space_sm, 566eda14cbcSMatt Macy space_map_length(obsolete_space_sm), 567eda14cbcSMatt Macy load_obsolete_sm_callback, &losma)); 568eda14cbcSMatt Macy } 569eda14cbcSMatt Macy 570eda14cbcSMatt Macy /* 571eda14cbcSMatt Macy * Read the obsolete counts from disk, returning them in an array. 572eda14cbcSMatt Macy */ 573eda14cbcSMatt Macy uint32_t * 574eda14cbcSMatt Macy vdev_indirect_mapping_load_obsolete_counts(vdev_indirect_mapping_t *vim) 575eda14cbcSMatt Macy { 576eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 577eda14cbcSMatt Macy 578eda14cbcSMatt Macy uint64_t counts_size = 579eda14cbcSMatt Macy vim->vim_phys->vimp_num_entries * sizeof (uint32_t); 580eda14cbcSMatt Macy uint32_t *counts = vmem_alloc(counts_size, KM_SLEEP); 581eda14cbcSMatt Macy if (vim->vim_havecounts) { 582eda14cbcSMatt Macy VERIFY0(dmu_read(vim->vim_objset, 583eda14cbcSMatt Macy vim->vim_phys->vimp_counts_object, 584eda14cbcSMatt Macy 0, counts_size, 585eda14cbcSMatt Macy counts, DMU_READ_PREFETCH)); 586eda14cbcSMatt Macy } else { 587eda14cbcSMatt Macy bzero(counts, counts_size); 588eda14cbcSMatt Macy } 589eda14cbcSMatt Macy return (counts); 590eda14cbcSMatt Macy } 591eda14cbcSMatt Macy 592eda14cbcSMatt Macy extern void 593eda14cbcSMatt Macy vdev_indirect_mapping_free_obsolete_counts(vdev_indirect_mapping_t *vim, 594eda14cbcSMatt Macy uint32_t *counts) 595eda14cbcSMatt Macy { 596eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 597eda14cbcSMatt Macy 598eda14cbcSMatt Macy vmem_free(counts, vim->vim_phys->vimp_num_entries * sizeof (uint32_t)); 599eda14cbcSMatt Macy } 600eda14cbcSMatt Macy 601eda14cbcSMatt Macy #if defined(_KERNEL) 602eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_add_entries); 603eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_alloc); 604eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_bytes_mapped); 605eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_close); 606eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_entry_for_offset); 607eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_entry_for_offset_or_next); 608eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_free); 609eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_free_obsolete_counts); 610eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_increment_obsolete_count); 611eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_load_obsolete_counts); 612eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_load_obsolete_spacemap); 613eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_max_offset); 614eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_num_entries); 615eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_object); 616eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_open); 617eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_size); 618eda14cbcSMatt Macy #endif 619