1*eda14cbcSMatt Macy /* 2*eda14cbcSMatt Macy * CDDL HEADER START 3*eda14cbcSMatt Macy * 4*eda14cbcSMatt Macy * This file and its contents are supplied under the terms of the 5*eda14cbcSMatt Macy * Common Development and Distribution License ("CDDL"), version 1.0. 6*eda14cbcSMatt Macy * You may only use this file in accordance with the terms of version 7*eda14cbcSMatt Macy * 1.0 of the CDDL. 8*eda14cbcSMatt Macy * 9*eda14cbcSMatt Macy * A full copy of the text of the CDDL should have accompanied this 10*eda14cbcSMatt Macy * source. A copy of the CDDL is also available via the Internet at 11*eda14cbcSMatt Macy * http://www.illumos.org/license/CDDL. 12*eda14cbcSMatt Macy * 13*eda14cbcSMatt Macy * CDDL HEADER END 14*eda14cbcSMatt Macy */ 15*eda14cbcSMatt Macy 16*eda14cbcSMatt Macy /* 17*eda14cbcSMatt Macy * Copyright (c) 2015, 2017 by Delphix. All rights reserved. 18*eda14cbcSMatt Macy */ 19*eda14cbcSMatt Macy 20*eda14cbcSMatt Macy #include <sys/dmu_tx.h> 21*eda14cbcSMatt Macy #include <sys/dsl_pool.h> 22*eda14cbcSMatt Macy #include <sys/spa.h> 23*eda14cbcSMatt Macy #include <sys/vdev_impl.h> 24*eda14cbcSMatt Macy #include <sys/vdev_indirect_mapping.h> 25*eda14cbcSMatt Macy #include <sys/zfeature.h> 26*eda14cbcSMatt Macy #include <sys/dmu_objset.h> 27*eda14cbcSMatt Macy 28*eda14cbcSMatt Macy #ifdef ZFS_DEBUG 29*eda14cbcSMatt Macy static boolean_t 30*eda14cbcSMatt Macy vdev_indirect_mapping_verify(vdev_indirect_mapping_t *vim) 31*eda14cbcSMatt Macy { 32*eda14cbcSMatt Macy ASSERT(vim != NULL); 33*eda14cbcSMatt Macy 34*eda14cbcSMatt Macy ASSERT(vim->vim_object != 0); 35*eda14cbcSMatt Macy ASSERT(vim->vim_objset != NULL); 36*eda14cbcSMatt Macy ASSERT(vim->vim_phys != NULL); 37*eda14cbcSMatt Macy ASSERT(vim->vim_dbuf != NULL); 38*eda14cbcSMatt Macy 39*eda14cbcSMatt Macy EQUIV(vim->vim_phys->vimp_num_entries > 0, 40*eda14cbcSMatt Macy vim->vim_entries != NULL); 41*eda14cbcSMatt Macy if (vim->vim_phys->vimp_num_entries > 0) { 42*eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *last_entry __maybe_unused = 43*eda14cbcSMatt Macy &vim->vim_entries[vim->vim_phys->vimp_num_entries - 1]; 44*eda14cbcSMatt Macy uint64_t offset __maybe_unused = 45*eda14cbcSMatt Macy DVA_MAPPING_GET_SRC_OFFSET(last_entry); 46*eda14cbcSMatt Macy uint64_t size __maybe_unused = 47*eda14cbcSMatt Macy DVA_GET_ASIZE(&last_entry->vimep_dst); 48*eda14cbcSMatt Macy 49*eda14cbcSMatt Macy ASSERT3U(vim->vim_phys->vimp_max_offset, >=, offset + size); 50*eda14cbcSMatt Macy } 51*eda14cbcSMatt Macy if (vim->vim_havecounts) { 52*eda14cbcSMatt Macy ASSERT(vim->vim_phys->vimp_counts_object != 0); 53*eda14cbcSMatt Macy } 54*eda14cbcSMatt Macy 55*eda14cbcSMatt Macy return (B_TRUE); 56*eda14cbcSMatt Macy } 57*eda14cbcSMatt Macy #endif 58*eda14cbcSMatt Macy 59*eda14cbcSMatt Macy uint64_t 60*eda14cbcSMatt Macy vdev_indirect_mapping_num_entries(vdev_indirect_mapping_t *vim) 61*eda14cbcSMatt Macy { 62*eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 63*eda14cbcSMatt Macy 64*eda14cbcSMatt Macy return (vim->vim_phys->vimp_num_entries); 65*eda14cbcSMatt Macy } 66*eda14cbcSMatt Macy 67*eda14cbcSMatt Macy uint64_t 68*eda14cbcSMatt Macy vdev_indirect_mapping_max_offset(vdev_indirect_mapping_t *vim) 69*eda14cbcSMatt Macy { 70*eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 71*eda14cbcSMatt Macy 72*eda14cbcSMatt Macy return (vim->vim_phys->vimp_max_offset); 73*eda14cbcSMatt Macy } 74*eda14cbcSMatt Macy 75*eda14cbcSMatt Macy uint64_t 76*eda14cbcSMatt Macy vdev_indirect_mapping_object(vdev_indirect_mapping_t *vim) 77*eda14cbcSMatt Macy { 78*eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 79*eda14cbcSMatt Macy 80*eda14cbcSMatt Macy return (vim->vim_object); 81*eda14cbcSMatt Macy } 82*eda14cbcSMatt Macy 83*eda14cbcSMatt Macy uint64_t 84*eda14cbcSMatt Macy vdev_indirect_mapping_bytes_mapped(vdev_indirect_mapping_t *vim) 85*eda14cbcSMatt Macy { 86*eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 87*eda14cbcSMatt Macy 88*eda14cbcSMatt Macy return (vim->vim_phys->vimp_bytes_mapped); 89*eda14cbcSMatt Macy } 90*eda14cbcSMatt Macy 91*eda14cbcSMatt Macy /* 92*eda14cbcSMatt Macy * The length (in bytes) of the mapping object array in memory and 93*eda14cbcSMatt Macy * (logically) on disk. 94*eda14cbcSMatt Macy * 95*eda14cbcSMatt Macy * Note that unlike most of our accessor functions, 96*eda14cbcSMatt Macy * we don't assert that the struct is consistent; therefore it can be 97*eda14cbcSMatt Macy * called while there may be concurrent changes, if we don't care about 98*eda14cbcSMatt Macy * the value being immediately stale (e.g. from spa_removal_get_stats()). 99*eda14cbcSMatt Macy */ 100*eda14cbcSMatt Macy uint64_t 101*eda14cbcSMatt Macy vdev_indirect_mapping_size(vdev_indirect_mapping_t *vim) 102*eda14cbcSMatt Macy { 103*eda14cbcSMatt Macy return (vim->vim_phys->vimp_num_entries * sizeof (*vim->vim_entries)); 104*eda14cbcSMatt Macy } 105*eda14cbcSMatt Macy 106*eda14cbcSMatt Macy /* 107*eda14cbcSMatt Macy * Compare an offset with an indirect mapping entry; there are three 108*eda14cbcSMatt Macy * possible scenarios: 109*eda14cbcSMatt Macy * 110*eda14cbcSMatt Macy * 1. The offset is "less than" the mapping entry; meaning the 111*eda14cbcSMatt Macy * offset is less than the source offset of the mapping entry. In 112*eda14cbcSMatt Macy * this case, there is no overlap between the offset and the 113*eda14cbcSMatt Macy * mapping entry and -1 will be returned. 114*eda14cbcSMatt Macy * 115*eda14cbcSMatt Macy * 2. The offset is "greater than" the mapping entry; meaning the 116*eda14cbcSMatt Macy * offset is greater than the mapping entry's source offset plus 117*eda14cbcSMatt Macy * the entry's size. In this case, there is no overlap between 118*eda14cbcSMatt Macy * the offset and the mapping entry and 1 will be returned. 119*eda14cbcSMatt Macy * 120*eda14cbcSMatt Macy * NOTE: If the offset is actually equal to the entry's offset 121*eda14cbcSMatt Macy * plus size, this is considered to be "greater" than the entry, 122*eda14cbcSMatt Macy * and this case applies (i.e. 1 will be returned). Thus, the 123*eda14cbcSMatt Macy * entry's "range" can be considered to be inclusive at its 124*eda14cbcSMatt Macy * start, but exclusive at its end: e.g. [src, src + size). 125*eda14cbcSMatt Macy * 126*eda14cbcSMatt Macy * 3. The last case to consider is if the offset actually falls 127*eda14cbcSMatt Macy * within the mapping entry's range. If this is the case, the 128*eda14cbcSMatt Macy * offset is considered to be "equal to" the mapping entry and 129*eda14cbcSMatt Macy * 0 will be returned. 130*eda14cbcSMatt Macy * 131*eda14cbcSMatt Macy * NOTE: If the offset is equal to the entry's source offset, 132*eda14cbcSMatt Macy * this case applies and 0 will be returned. If the offset is 133*eda14cbcSMatt Macy * equal to the entry's source plus its size, this case does 134*eda14cbcSMatt Macy * *not* apply (see "NOTE" above for scenario 2), and 1 will be 135*eda14cbcSMatt Macy * returned. 136*eda14cbcSMatt Macy */ 137*eda14cbcSMatt Macy static int 138*eda14cbcSMatt Macy dva_mapping_overlap_compare(const void *v_key, const void *v_array_elem) 139*eda14cbcSMatt Macy { 140*eda14cbcSMatt Macy const uint64_t * const key = v_key; 141*eda14cbcSMatt Macy const vdev_indirect_mapping_entry_phys_t * const array_elem = 142*eda14cbcSMatt Macy v_array_elem; 143*eda14cbcSMatt Macy uint64_t src_offset = DVA_MAPPING_GET_SRC_OFFSET(array_elem); 144*eda14cbcSMatt Macy 145*eda14cbcSMatt Macy if (*key < src_offset) { 146*eda14cbcSMatt Macy return (-1); 147*eda14cbcSMatt Macy } else if (*key < src_offset + DVA_GET_ASIZE(&array_elem->vimep_dst)) { 148*eda14cbcSMatt Macy return (0); 149*eda14cbcSMatt Macy } else { 150*eda14cbcSMatt Macy return (1); 151*eda14cbcSMatt Macy } 152*eda14cbcSMatt Macy } 153*eda14cbcSMatt Macy 154*eda14cbcSMatt Macy /* 155*eda14cbcSMatt Macy * Returns the mapping entry for the given offset. 156*eda14cbcSMatt Macy * 157*eda14cbcSMatt Macy * It's possible that the given offset will not be in the mapping table 158*eda14cbcSMatt Macy * (i.e. no mapping entries contain this offset), in which case, the 159*eda14cbcSMatt Macy * return value value depends on the "next_if_missing" parameter. 160*eda14cbcSMatt Macy * 161*eda14cbcSMatt Macy * If the offset is not found in the table and "next_if_missing" is 162*eda14cbcSMatt Macy * B_FALSE, then NULL will always be returned. The behavior is intended 163*eda14cbcSMatt Macy * to allow consumers to get the entry corresponding to the offset 164*eda14cbcSMatt Macy * parameter, iff the offset overlaps with an entry in the table. 165*eda14cbcSMatt Macy * 166*eda14cbcSMatt Macy * If the offset is not found in the table and "next_if_missing" is 167*eda14cbcSMatt Macy * B_TRUE, then the entry nearest to the given offset will be returned, 168*eda14cbcSMatt Macy * such that the entry's source offset is greater than the offset 169*eda14cbcSMatt Macy * passed in (i.e. the "next" mapping entry in the table is returned, if 170*eda14cbcSMatt Macy * the offset is missing from the table). If there are no entries whose 171*eda14cbcSMatt Macy * source offset is greater than the passed in offset, NULL is returned. 172*eda14cbcSMatt Macy */ 173*eda14cbcSMatt Macy static vdev_indirect_mapping_entry_phys_t * 174*eda14cbcSMatt Macy vdev_indirect_mapping_entry_for_offset_impl(vdev_indirect_mapping_t *vim, 175*eda14cbcSMatt Macy uint64_t offset, boolean_t next_if_missing) 176*eda14cbcSMatt Macy { 177*eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 178*eda14cbcSMatt Macy ASSERT(vim->vim_phys->vimp_num_entries > 0); 179*eda14cbcSMatt Macy 180*eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *entry = NULL; 181*eda14cbcSMatt Macy 182*eda14cbcSMatt Macy uint64_t last = vim->vim_phys->vimp_num_entries - 1; 183*eda14cbcSMatt Macy uint64_t base = 0; 184*eda14cbcSMatt Macy 185*eda14cbcSMatt Macy /* 186*eda14cbcSMatt Macy * We don't define these inside of the while loop because we use 187*eda14cbcSMatt Macy * their value in the case that offset isn't in the mapping. 188*eda14cbcSMatt Macy */ 189*eda14cbcSMatt Macy uint64_t mid; 190*eda14cbcSMatt Macy int result; 191*eda14cbcSMatt Macy 192*eda14cbcSMatt Macy while (last >= base) { 193*eda14cbcSMatt Macy mid = base + ((last - base) >> 1); 194*eda14cbcSMatt Macy 195*eda14cbcSMatt Macy result = dva_mapping_overlap_compare(&offset, 196*eda14cbcSMatt Macy &vim->vim_entries[mid]); 197*eda14cbcSMatt Macy 198*eda14cbcSMatt Macy if (result == 0) { 199*eda14cbcSMatt Macy entry = &vim->vim_entries[mid]; 200*eda14cbcSMatt Macy break; 201*eda14cbcSMatt Macy } else if (result < 0) { 202*eda14cbcSMatt Macy last = mid - 1; 203*eda14cbcSMatt Macy } else { 204*eda14cbcSMatt Macy base = mid + 1; 205*eda14cbcSMatt Macy } 206*eda14cbcSMatt Macy } 207*eda14cbcSMatt Macy 208*eda14cbcSMatt Macy if (entry == NULL && next_if_missing) { 209*eda14cbcSMatt Macy ASSERT3U(base, ==, last + 1); 210*eda14cbcSMatt Macy ASSERT(mid == base || mid == last); 211*eda14cbcSMatt Macy ASSERT3S(result, !=, 0); 212*eda14cbcSMatt Macy 213*eda14cbcSMatt Macy /* 214*eda14cbcSMatt Macy * The offset we're looking for isn't actually contained 215*eda14cbcSMatt Macy * in the mapping table, thus we need to return the 216*eda14cbcSMatt Macy * closest mapping entry that is greater than the 217*eda14cbcSMatt Macy * offset. We reuse the result of the last comparison, 218*eda14cbcSMatt Macy * comparing the mapping entry at index "mid" and the 219*eda14cbcSMatt Macy * offset. The offset is guaranteed to lie between 220*eda14cbcSMatt Macy * indices one less than "mid", and one greater than 221*eda14cbcSMatt Macy * "mid"; we just need to determine if offset is greater 222*eda14cbcSMatt Macy * than, or less than the mapping entry contained at 223*eda14cbcSMatt Macy * index "mid". 224*eda14cbcSMatt Macy */ 225*eda14cbcSMatt Macy 226*eda14cbcSMatt Macy uint64_t index; 227*eda14cbcSMatt Macy if (result < 0) 228*eda14cbcSMatt Macy index = mid; 229*eda14cbcSMatt Macy else 230*eda14cbcSMatt Macy index = mid + 1; 231*eda14cbcSMatt Macy 232*eda14cbcSMatt Macy ASSERT3U(index, <=, vim->vim_phys->vimp_num_entries); 233*eda14cbcSMatt Macy 234*eda14cbcSMatt Macy if (index == vim->vim_phys->vimp_num_entries) { 235*eda14cbcSMatt Macy /* 236*eda14cbcSMatt Macy * If "index" is past the end of the entries 237*eda14cbcSMatt Macy * array, then not only is the offset not in the 238*eda14cbcSMatt Macy * mapping table, but it's actually greater than 239*eda14cbcSMatt Macy * all entries in the table. In this case, we 240*eda14cbcSMatt Macy * can't return a mapping entry greater than the 241*eda14cbcSMatt Macy * offset (since none exist), so we return NULL. 242*eda14cbcSMatt Macy */ 243*eda14cbcSMatt Macy 244*eda14cbcSMatt Macy ASSERT3S(dva_mapping_overlap_compare(&offset, 245*eda14cbcSMatt Macy &vim->vim_entries[index - 1]), >, 0); 246*eda14cbcSMatt Macy 247*eda14cbcSMatt Macy return (NULL); 248*eda14cbcSMatt Macy } else { 249*eda14cbcSMatt Macy /* 250*eda14cbcSMatt Macy * Just to be safe, we verify the offset falls 251*eda14cbcSMatt Macy * in between the mapping entries at index and 252*eda14cbcSMatt Macy * one less than index. Since we know the offset 253*eda14cbcSMatt Macy * doesn't overlap an entry, and we're supposed 254*eda14cbcSMatt Macy * to return the entry just greater than the 255*eda14cbcSMatt Macy * offset, both of the following tests must be 256*eda14cbcSMatt Macy * true. 257*eda14cbcSMatt Macy */ 258*eda14cbcSMatt Macy ASSERT3S(dva_mapping_overlap_compare(&offset, 259*eda14cbcSMatt Macy &vim->vim_entries[index]), <, 0); 260*eda14cbcSMatt Macy IMPLY(index >= 1, dva_mapping_overlap_compare(&offset, 261*eda14cbcSMatt Macy &vim->vim_entries[index - 1]) > 0); 262*eda14cbcSMatt Macy 263*eda14cbcSMatt Macy return (&vim->vim_entries[index]); 264*eda14cbcSMatt Macy } 265*eda14cbcSMatt Macy } else { 266*eda14cbcSMatt Macy return (entry); 267*eda14cbcSMatt Macy } 268*eda14cbcSMatt Macy } 269*eda14cbcSMatt Macy 270*eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t * 271*eda14cbcSMatt Macy vdev_indirect_mapping_entry_for_offset(vdev_indirect_mapping_t *vim, 272*eda14cbcSMatt Macy uint64_t offset) 273*eda14cbcSMatt Macy { 274*eda14cbcSMatt Macy return (vdev_indirect_mapping_entry_for_offset_impl(vim, offset, 275*eda14cbcSMatt Macy B_FALSE)); 276*eda14cbcSMatt Macy } 277*eda14cbcSMatt Macy 278*eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t * 279*eda14cbcSMatt Macy vdev_indirect_mapping_entry_for_offset_or_next(vdev_indirect_mapping_t *vim, 280*eda14cbcSMatt Macy uint64_t offset) 281*eda14cbcSMatt Macy { 282*eda14cbcSMatt Macy return (vdev_indirect_mapping_entry_for_offset_impl(vim, offset, 283*eda14cbcSMatt Macy B_TRUE)); 284*eda14cbcSMatt Macy } 285*eda14cbcSMatt Macy 286*eda14cbcSMatt Macy void 287*eda14cbcSMatt Macy vdev_indirect_mapping_close(vdev_indirect_mapping_t *vim) 288*eda14cbcSMatt Macy { 289*eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 290*eda14cbcSMatt Macy 291*eda14cbcSMatt Macy if (vim->vim_phys->vimp_num_entries > 0) { 292*eda14cbcSMatt Macy uint64_t map_size = vdev_indirect_mapping_size(vim); 293*eda14cbcSMatt Macy vmem_free(vim->vim_entries, map_size); 294*eda14cbcSMatt Macy vim->vim_entries = NULL; 295*eda14cbcSMatt Macy } 296*eda14cbcSMatt Macy 297*eda14cbcSMatt Macy dmu_buf_rele(vim->vim_dbuf, vim); 298*eda14cbcSMatt Macy 299*eda14cbcSMatt Macy vim->vim_objset = NULL; 300*eda14cbcSMatt Macy vim->vim_object = 0; 301*eda14cbcSMatt Macy vim->vim_dbuf = NULL; 302*eda14cbcSMatt Macy vim->vim_phys = NULL; 303*eda14cbcSMatt Macy 304*eda14cbcSMatt Macy kmem_free(vim, sizeof (*vim)); 305*eda14cbcSMatt Macy } 306*eda14cbcSMatt Macy 307*eda14cbcSMatt Macy uint64_t 308*eda14cbcSMatt Macy vdev_indirect_mapping_alloc(objset_t *os, dmu_tx_t *tx) 309*eda14cbcSMatt Macy { 310*eda14cbcSMatt Macy uint64_t object; 311*eda14cbcSMatt Macy ASSERT(dmu_tx_is_syncing(tx)); 312*eda14cbcSMatt Macy uint64_t bonus_size = VDEV_INDIRECT_MAPPING_SIZE_V0; 313*eda14cbcSMatt Macy 314*eda14cbcSMatt Macy if (spa_feature_is_enabled(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS)) { 315*eda14cbcSMatt Macy bonus_size = sizeof (vdev_indirect_mapping_phys_t); 316*eda14cbcSMatt Macy } 317*eda14cbcSMatt Macy 318*eda14cbcSMatt Macy object = dmu_object_alloc(os, 319*eda14cbcSMatt Macy DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE, 320*eda14cbcSMatt Macy DMU_OTN_UINT64_METADATA, bonus_size, 321*eda14cbcSMatt Macy tx); 322*eda14cbcSMatt Macy 323*eda14cbcSMatt Macy if (spa_feature_is_enabled(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS)) { 324*eda14cbcSMatt Macy dmu_buf_t *dbuf; 325*eda14cbcSMatt Macy vdev_indirect_mapping_phys_t *vimp; 326*eda14cbcSMatt Macy 327*eda14cbcSMatt Macy VERIFY0(dmu_bonus_hold(os, object, FTAG, &dbuf)); 328*eda14cbcSMatt Macy dmu_buf_will_dirty(dbuf, tx); 329*eda14cbcSMatt Macy vimp = dbuf->db_data; 330*eda14cbcSMatt Macy vimp->vimp_counts_object = dmu_object_alloc(os, 331*eda14cbcSMatt Macy DMU_OTN_UINT32_METADATA, SPA_OLD_MAXBLOCKSIZE, 332*eda14cbcSMatt Macy DMU_OT_NONE, 0, tx); 333*eda14cbcSMatt Macy spa_feature_incr(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS, tx); 334*eda14cbcSMatt Macy dmu_buf_rele(dbuf, FTAG); 335*eda14cbcSMatt Macy } 336*eda14cbcSMatt Macy 337*eda14cbcSMatt Macy return (object); 338*eda14cbcSMatt Macy } 339*eda14cbcSMatt Macy 340*eda14cbcSMatt Macy 341*eda14cbcSMatt Macy vdev_indirect_mapping_t * 342*eda14cbcSMatt Macy vdev_indirect_mapping_open(objset_t *os, uint64_t mapping_object) 343*eda14cbcSMatt Macy { 344*eda14cbcSMatt Macy vdev_indirect_mapping_t *vim = kmem_zalloc(sizeof (*vim), KM_SLEEP); 345*eda14cbcSMatt Macy dmu_object_info_t doi; 346*eda14cbcSMatt Macy VERIFY0(dmu_object_info(os, mapping_object, &doi)); 347*eda14cbcSMatt Macy 348*eda14cbcSMatt Macy vim->vim_objset = os; 349*eda14cbcSMatt Macy vim->vim_object = mapping_object; 350*eda14cbcSMatt Macy 351*eda14cbcSMatt Macy VERIFY0(dmu_bonus_hold(os, vim->vim_object, vim, 352*eda14cbcSMatt Macy &vim->vim_dbuf)); 353*eda14cbcSMatt Macy vim->vim_phys = vim->vim_dbuf->db_data; 354*eda14cbcSMatt Macy 355*eda14cbcSMatt Macy vim->vim_havecounts = 356*eda14cbcSMatt Macy (doi.doi_bonus_size > VDEV_INDIRECT_MAPPING_SIZE_V0); 357*eda14cbcSMatt Macy 358*eda14cbcSMatt Macy if (vim->vim_phys->vimp_num_entries > 0) { 359*eda14cbcSMatt Macy uint64_t map_size = vdev_indirect_mapping_size(vim); 360*eda14cbcSMatt Macy vim->vim_entries = vmem_alloc(map_size, KM_SLEEP); 361*eda14cbcSMatt Macy VERIFY0(dmu_read(os, vim->vim_object, 0, map_size, 362*eda14cbcSMatt Macy vim->vim_entries, DMU_READ_PREFETCH)); 363*eda14cbcSMatt Macy } 364*eda14cbcSMatt Macy 365*eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 366*eda14cbcSMatt Macy 367*eda14cbcSMatt Macy return (vim); 368*eda14cbcSMatt Macy } 369*eda14cbcSMatt Macy 370*eda14cbcSMatt Macy void 371*eda14cbcSMatt Macy vdev_indirect_mapping_free(objset_t *os, uint64_t object, dmu_tx_t *tx) 372*eda14cbcSMatt Macy { 373*eda14cbcSMatt Macy vdev_indirect_mapping_t *vim = vdev_indirect_mapping_open(os, object); 374*eda14cbcSMatt Macy if (vim->vim_havecounts) { 375*eda14cbcSMatt Macy VERIFY0(dmu_object_free(os, vim->vim_phys->vimp_counts_object, 376*eda14cbcSMatt Macy tx)); 377*eda14cbcSMatt Macy spa_feature_decr(os->os_spa, SPA_FEATURE_OBSOLETE_COUNTS, tx); 378*eda14cbcSMatt Macy } 379*eda14cbcSMatt Macy vdev_indirect_mapping_close(vim); 380*eda14cbcSMatt Macy 381*eda14cbcSMatt Macy VERIFY0(dmu_object_free(os, object, tx)); 382*eda14cbcSMatt Macy } 383*eda14cbcSMatt Macy 384*eda14cbcSMatt Macy /* 385*eda14cbcSMatt Macy * Append the list of vdev_indirect_mapping_entry_t's to the on-disk 386*eda14cbcSMatt Macy * mapping object. Also remove the entries from the list and free them. 387*eda14cbcSMatt Macy * This also implicitly extends the max_offset of the mapping (to the end 388*eda14cbcSMatt Macy * of the last entry). 389*eda14cbcSMatt Macy */ 390*eda14cbcSMatt Macy void 391*eda14cbcSMatt Macy vdev_indirect_mapping_add_entries(vdev_indirect_mapping_t *vim, 392*eda14cbcSMatt Macy list_t *list, dmu_tx_t *tx) 393*eda14cbcSMatt Macy { 394*eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *mapbuf; 395*eda14cbcSMatt Macy uint64_t old_size; 396*eda14cbcSMatt Macy uint32_t *countbuf = NULL; 397*eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *old_entries; 398*eda14cbcSMatt Macy uint64_t old_count; 399*eda14cbcSMatt Macy uint64_t entries_written = 0; 400*eda14cbcSMatt Macy 401*eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 402*eda14cbcSMatt Macy ASSERT(dmu_tx_is_syncing(tx)); 403*eda14cbcSMatt Macy ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx))); 404*eda14cbcSMatt Macy ASSERT(!list_is_empty(list)); 405*eda14cbcSMatt Macy 406*eda14cbcSMatt Macy old_size = vdev_indirect_mapping_size(vim); 407*eda14cbcSMatt Macy old_entries = vim->vim_entries; 408*eda14cbcSMatt Macy old_count = vim->vim_phys->vimp_num_entries; 409*eda14cbcSMatt Macy 410*eda14cbcSMatt Macy dmu_buf_will_dirty(vim->vim_dbuf, tx); 411*eda14cbcSMatt Macy 412*eda14cbcSMatt Macy mapbuf = vmem_alloc(SPA_OLD_MAXBLOCKSIZE, KM_SLEEP); 413*eda14cbcSMatt Macy if (vim->vim_havecounts) { 414*eda14cbcSMatt Macy countbuf = vmem_alloc(SPA_OLD_MAXBLOCKSIZE, KM_SLEEP); 415*eda14cbcSMatt Macy ASSERT(spa_feature_is_active(vim->vim_objset->os_spa, 416*eda14cbcSMatt Macy SPA_FEATURE_OBSOLETE_COUNTS)); 417*eda14cbcSMatt Macy } 418*eda14cbcSMatt Macy while (!list_is_empty(list)) { 419*eda14cbcSMatt Macy uint64_t i; 420*eda14cbcSMatt Macy /* 421*eda14cbcSMatt Macy * Write entries from the list to the 422*eda14cbcSMatt Macy * vdev_im_object in batches of size SPA_OLD_MAXBLOCKSIZE. 423*eda14cbcSMatt Macy */ 424*eda14cbcSMatt Macy for (i = 0; i < SPA_OLD_MAXBLOCKSIZE / sizeof (*mapbuf); i++) { 425*eda14cbcSMatt Macy vdev_indirect_mapping_entry_t *entry = 426*eda14cbcSMatt Macy list_remove_head(list); 427*eda14cbcSMatt Macy if (entry == NULL) 428*eda14cbcSMatt Macy break; 429*eda14cbcSMatt Macy 430*eda14cbcSMatt Macy uint64_t size = 431*eda14cbcSMatt Macy DVA_GET_ASIZE(&entry->vime_mapping.vimep_dst); 432*eda14cbcSMatt Macy uint64_t src_offset = 433*eda14cbcSMatt Macy DVA_MAPPING_GET_SRC_OFFSET(&entry->vime_mapping); 434*eda14cbcSMatt Macy 435*eda14cbcSMatt Macy /* 436*eda14cbcSMatt Macy * We shouldn't be adding an entry which is fully 437*eda14cbcSMatt Macy * obsolete. 438*eda14cbcSMatt Macy */ 439*eda14cbcSMatt Macy ASSERT3U(entry->vime_obsolete_count, <, size); 440*eda14cbcSMatt Macy IMPLY(entry->vime_obsolete_count != 0, 441*eda14cbcSMatt Macy vim->vim_havecounts); 442*eda14cbcSMatt Macy 443*eda14cbcSMatt Macy mapbuf[i] = entry->vime_mapping; 444*eda14cbcSMatt Macy if (vim->vim_havecounts) 445*eda14cbcSMatt Macy countbuf[i] = entry->vime_obsolete_count; 446*eda14cbcSMatt Macy 447*eda14cbcSMatt Macy vim->vim_phys->vimp_bytes_mapped += size; 448*eda14cbcSMatt Macy ASSERT3U(src_offset, >=, 449*eda14cbcSMatt Macy vim->vim_phys->vimp_max_offset); 450*eda14cbcSMatt Macy vim->vim_phys->vimp_max_offset = src_offset + size; 451*eda14cbcSMatt Macy 452*eda14cbcSMatt Macy entries_written++; 453*eda14cbcSMatt Macy 454*eda14cbcSMatt Macy vmem_free(entry, sizeof (*entry)); 455*eda14cbcSMatt Macy } 456*eda14cbcSMatt Macy dmu_write(vim->vim_objset, vim->vim_object, 457*eda14cbcSMatt Macy vim->vim_phys->vimp_num_entries * sizeof (*mapbuf), 458*eda14cbcSMatt Macy i * sizeof (*mapbuf), 459*eda14cbcSMatt Macy mapbuf, tx); 460*eda14cbcSMatt Macy if (vim->vim_havecounts) { 461*eda14cbcSMatt Macy dmu_write(vim->vim_objset, 462*eda14cbcSMatt Macy vim->vim_phys->vimp_counts_object, 463*eda14cbcSMatt Macy vim->vim_phys->vimp_num_entries * 464*eda14cbcSMatt Macy sizeof (*countbuf), 465*eda14cbcSMatt Macy i * sizeof (*countbuf), countbuf, tx); 466*eda14cbcSMatt Macy } 467*eda14cbcSMatt Macy vim->vim_phys->vimp_num_entries += i; 468*eda14cbcSMatt Macy } 469*eda14cbcSMatt Macy vmem_free(mapbuf, SPA_OLD_MAXBLOCKSIZE); 470*eda14cbcSMatt Macy if (vim->vim_havecounts) 471*eda14cbcSMatt Macy vmem_free(countbuf, SPA_OLD_MAXBLOCKSIZE); 472*eda14cbcSMatt Macy 473*eda14cbcSMatt Macy /* 474*eda14cbcSMatt Macy * Update the entry array to reflect the new entries. First, copy 475*eda14cbcSMatt Macy * over any old entries then read back the new entries we just wrote. 476*eda14cbcSMatt Macy */ 477*eda14cbcSMatt Macy uint64_t new_size = vdev_indirect_mapping_size(vim); 478*eda14cbcSMatt Macy ASSERT3U(new_size, >, old_size); 479*eda14cbcSMatt Macy ASSERT3U(new_size - old_size, ==, 480*eda14cbcSMatt Macy entries_written * sizeof (vdev_indirect_mapping_entry_phys_t)); 481*eda14cbcSMatt Macy vim->vim_entries = vmem_alloc(new_size, KM_SLEEP); 482*eda14cbcSMatt Macy if (old_size > 0) { 483*eda14cbcSMatt Macy bcopy(old_entries, vim->vim_entries, old_size); 484*eda14cbcSMatt Macy vmem_free(old_entries, old_size); 485*eda14cbcSMatt Macy } 486*eda14cbcSMatt Macy VERIFY0(dmu_read(vim->vim_objset, vim->vim_object, old_size, 487*eda14cbcSMatt Macy new_size - old_size, &vim->vim_entries[old_count], 488*eda14cbcSMatt Macy DMU_READ_PREFETCH)); 489*eda14cbcSMatt Macy 490*eda14cbcSMatt Macy zfs_dbgmsg("txg %llu: wrote %llu entries to " 491*eda14cbcSMatt Macy "indirect mapping obj %llu; max offset=0x%llx", 492*eda14cbcSMatt Macy (u_longlong_t)dmu_tx_get_txg(tx), 493*eda14cbcSMatt Macy (u_longlong_t)entries_written, 494*eda14cbcSMatt Macy (u_longlong_t)vim->vim_object, 495*eda14cbcSMatt Macy (u_longlong_t)vim->vim_phys->vimp_max_offset); 496*eda14cbcSMatt Macy } 497*eda14cbcSMatt Macy 498*eda14cbcSMatt Macy /* 499*eda14cbcSMatt Macy * Increment the relevant counts for the specified offset and length. 500*eda14cbcSMatt Macy * The counts array must be obtained from 501*eda14cbcSMatt Macy * vdev_indirect_mapping_load_obsolete_counts(). 502*eda14cbcSMatt Macy */ 503*eda14cbcSMatt Macy void 504*eda14cbcSMatt Macy vdev_indirect_mapping_increment_obsolete_count(vdev_indirect_mapping_t *vim, 505*eda14cbcSMatt Macy uint64_t offset, uint64_t length, uint32_t *counts) 506*eda14cbcSMatt Macy { 507*eda14cbcSMatt Macy vdev_indirect_mapping_entry_phys_t *mapping; 508*eda14cbcSMatt Macy uint64_t index; 509*eda14cbcSMatt Macy 510*eda14cbcSMatt Macy mapping = vdev_indirect_mapping_entry_for_offset(vim, offset); 511*eda14cbcSMatt Macy 512*eda14cbcSMatt Macy ASSERT(length > 0); 513*eda14cbcSMatt Macy ASSERT3P(mapping, !=, NULL); 514*eda14cbcSMatt Macy 515*eda14cbcSMatt Macy index = mapping - vim->vim_entries; 516*eda14cbcSMatt Macy 517*eda14cbcSMatt Macy while (length > 0) { 518*eda14cbcSMatt Macy ASSERT3U(index, <, vdev_indirect_mapping_num_entries(vim)); 519*eda14cbcSMatt Macy 520*eda14cbcSMatt Macy uint64_t size = DVA_GET_ASIZE(&mapping->vimep_dst); 521*eda14cbcSMatt Macy uint64_t inner_offset = offset - 522*eda14cbcSMatt Macy DVA_MAPPING_GET_SRC_OFFSET(mapping); 523*eda14cbcSMatt Macy VERIFY3U(inner_offset, <, size); 524*eda14cbcSMatt Macy uint64_t inner_size = MIN(length, size - inner_offset); 525*eda14cbcSMatt Macy 526*eda14cbcSMatt Macy VERIFY3U(counts[index] + inner_size, <=, size); 527*eda14cbcSMatt Macy counts[index] += inner_size; 528*eda14cbcSMatt Macy 529*eda14cbcSMatt Macy offset += inner_size; 530*eda14cbcSMatt Macy length -= inner_size; 531*eda14cbcSMatt Macy mapping++; 532*eda14cbcSMatt Macy index++; 533*eda14cbcSMatt Macy } 534*eda14cbcSMatt Macy } 535*eda14cbcSMatt Macy 536*eda14cbcSMatt Macy typedef struct load_obsolete_space_map_arg { 537*eda14cbcSMatt Macy vdev_indirect_mapping_t *losma_vim; 538*eda14cbcSMatt Macy uint32_t *losma_counts; 539*eda14cbcSMatt Macy } load_obsolete_space_map_arg_t; 540*eda14cbcSMatt Macy 541*eda14cbcSMatt Macy static int 542*eda14cbcSMatt Macy load_obsolete_sm_callback(space_map_entry_t *sme, void *arg) 543*eda14cbcSMatt Macy { 544*eda14cbcSMatt Macy load_obsolete_space_map_arg_t *losma = arg; 545*eda14cbcSMatt Macy ASSERT3S(sme->sme_type, ==, SM_ALLOC); 546*eda14cbcSMatt Macy 547*eda14cbcSMatt Macy vdev_indirect_mapping_increment_obsolete_count(losma->losma_vim, 548*eda14cbcSMatt Macy sme->sme_offset, sme->sme_run, losma->losma_counts); 549*eda14cbcSMatt Macy 550*eda14cbcSMatt Macy return (0); 551*eda14cbcSMatt Macy } 552*eda14cbcSMatt Macy 553*eda14cbcSMatt Macy /* 554*eda14cbcSMatt Macy * Modify the counts (increment them) based on the spacemap. 555*eda14cbcSMatt Macy */ 556*eda14cbcSMatt Macy void 557*eda14cbcSMatt Macy vdev_indirect_mapping_load_obsolete_spacemap(vdev_indirect_mapping_t *vim, 558*eda14cbcSMatt Macy uint32_t *counts, space_map_t *obsolete_space_sm) 559*eda14cbcSMatt Macy { 560*eda14cbcSMatt Macy load_obsolete_space_map_arg_t losma; 561*eda14cbcSMatt Macy losma.losma_counts = counts; 562*eda14cbcSMatt Macy losma.losma_vim = vim; 563*eda14cbcSMatt Macy VERIFY0(space_map_iterate(obsolete_space_sm, 564*eda14cbcSMatt Macy space_map_length(obsolete_space_sm), 565*eda14cbcSMatt Macy load_obsolete_sm_callback, &losma)); 566*eda14cbcSMatt Macy } 567*eda14cbcSMatt Macy 568*eda14cbcSMatt Macy /* 569*eda14cbcSMatt Macy * Read the obsolete counts from disk, returning them in an array. 570*eda14cbcSMatt Macy */ 571*eda14cbcSMatt Macy uint32_t * 572*eda14cbcSMatt Macy vdev_indirect_mapping_load_obsolete_counts(vdev_indirect_mapping_t *vim) 573*eda14cbcSMatt Macy { 574*eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 575*eda14cbcSMatt Macy 576*eda14cbcSMatt Macy uint64_t counts_size = 577*eda14cbcSMatt Macy vim->vim_phys->vimp_num_entries * sizeof (uint32_t); 578*eda14cbcSMatt Macy uint32_t *counts = vmem_alloc(counts_size, KM_SLEEP); 579*eda14cbcSMatt Macy if (vim->vim_havecounts) { 580*eda14cbcSMatt Macy VERIFY0(dmu_read(vim->vim_objset, 581*eda14cbcSMatt Macy vim->vim_phys->vimp_counts_object, 582*eda14cbcSMatt Macy 0, counts_size, 583*eda14cbcSMatt Macy counts, DMU_READ_PREFETCH)); 584*eda14cbcSMatt Macy } else { 585*eda14cbcSMatt Macy bzero(counts, counts_size); 586*eda14cbcSMatt Macy } 587*eda14cbcSMatt Macy return (counts); 588*eda14cbcSMatt Macy } 589*eda14cbcSMatt Macy 590*eda14cbcSMatt Macy extern void 591*eda14cbcSMatt Macy vdev_indirect_mapping_free_obsolete_counts(vdev_indirect_mapping_t *vim, 592*eda14cbcSMatt Macy uint32_t *counts) 593*eda14cbcSMatt Macy { 594*eda14cbcSMatt Macy ASSERT(vdev_indirect_mapping_verify(vim)); 595*eda14cbcSMatt Macy 596*eda14cbcSMatt Macy vmem_free(counts, vim->vim_phys->vimp_num_entries * sizeof (uint32_t)); 597*eda14cbcSMatt Macy } 598*eda14cbcSMatt Macy 599*eda14cbcSMatt Macy #if defined(_KERNEL) 600*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_add_entries); 601*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_alloc); 602*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_bytes_mapped); 603*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_close); 604*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_entry_for_offset); 605*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_entry_for_offset_or_next); 606*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_free); 607*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_free_obsolete_counts); 608*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_increment_obsolete_count); 609*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_load_obsolete_counts); 610*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_load_obsolete_spacemap); 611*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_max_offset); 612*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_num_entries); 613*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_object); 614*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_open); 615*eda14cbcSMatt Macy EXPORT_SYMBOL(vdev_indirect_mapping_size); 616*eda14cbcSMatt Macy #endif 617