1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy 22eda14cbcSMatt Macy /* 23eda14cbcSMatt Macy * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24eda14cbcSMatt Macy * Copyright (c) 2018 by Delphix. All rights reserved. 25e2df9bb4SMartin Matuska * Copyright (c) 2023, Klara Inc. 26eda14cbcSMatt Macy */ 27eda14cbcSMatt Macy 28eda14cbcSMatt Macy #include <sys/zfs_context.h> 29eda14cbcSMatt Macy #include <sys/spa.h> 30eda14cbcSMatt Macy #include <sys/zio.h> 31eda14cbcSMatt Macy #include <sys/ddt.h> 324fefe1b7SMartin Matuska #include <sys/ddt_impl.h> 33eda14cbcSMatt Macy #include <sys/zap.h> 34eda14cbcSMatt Macy #include <sys/dmu_tx.h> 354fefe1b7SMartin Matuska #include <sys/zio_compress.h> 36eda14cbcSMatt Macy 370a97523dSMartin Matuska static unsigned int ddt_zap_default_bs = 15; 380a97523dSMartin Matuska static unsigned int ddt_zap_default_ibs = 15; 39eda14cbcSMatt Macy 404fefe1b7SMartin Matuska #define DDT_ZAP_COMPRESS_BYTEORDER_MASK 0x80 414fefe1b7SMartin Matuska #define DDT_ZAP_COMPRESS_FUNCTION_MASK 0x7f 424fefe1b7SMartin Matuska 434fefe1b7SMartin Matuska #define DDT_KEY_WORDS (sizeof (ddt_key_t) / sizeof (uint64_t)) 444fefe1b7SMartin Matuska 454fefe1b7SMartin Matuska static size_t 464fefe1b7SMartin Matuska ddt_zap_compress(const void *src, uchar_t *dst, size_t s_len, size_t d_len) 474fefe1b7SMartin Matuska { 484fefe1b7SMartin Matuska uchar_t *version = dst++; 494fefe1b7SMartin Matuska int cpfunc = ZIO_COMPRESS_ZLE; 504fefe1b7SMartin Matuska zio_compress_info_t *ci = &zio_compress_table[cpfunc]; 514fefe1b7SMartin Matuska size_t c_len; 524fefe1b7SMartin Matuska 534fefe1b7SMartin Matuska ASSERT3U(d_len, >=, s_len + 1); /* no compression plus version byte */ 544fefe1b7SMartin Matuska 55e2df9bb4SMartin Matuska /* Call compress function directly to avoid hole detection. */ 56e2df9bb4SMartin Matuska abd_t sabd, dabd; 57e2df9bb4SMartin Matuska abd_get_from_buf_struct(&sabd, (void *)src, s_len); 58e2df9bb4SMartin Matuska abd_get_from_buf_struct(&dabd, dst, d_len); 59e2df9bb4SMartin Matuska c_len = ci->ci_compress(&sabd, &dabd, s_len, d_len - 1, ci->ci_level); 60e2df9bb4SMartin Matuska abd_free(&dabd); 61e2df9bb4SMartin Matuska abd_free(&sabd); 624fefe1b7SMartin Matuska 634fefe1b7SMartin Matuska if (c_len == s_len) { 644fefe1b7SMartin Matuska cpfunc = ZIO_COMPRESS_OFF; 654fefe1b7SMartin Matuska memcpy(dst, src, s_len); 664fefe1b7SMartin Matuska } 674fefe1b7SMartin Matuska 684fefe1b7SMartin Matuska *version = cpfunc; 694fefe1b7SMartin Matuska if (ZFS_HOST_BYTEORDER) 704fefe1b7SMartin Matuska *version |= DDT_ZAP_COMPRESS_BYTEORDER_MASK; 714fefe1b7SMartin Matuska 724fefe1b7SMartin Matuska return (c_len + 1); 734fefe1b7SMartin Matuska } 744fefe1b7SMartin Matuska 754fefe1b7SMartin Matuska static void 764fefe1b7SMartin Matuska ddt_zap_decompress(uchar_t *src, void *dst, size_t s_len, size_t d_len) 774fefe1b7SMartin Matuska { 784fefe1b7SMartin Matuska uchar_t version = *src++; 794fefe1b7SMartin Matuska int cpfunc = version & DDT_ZAP_COMPRESS_FUNCTION_MASK; 804fefe1b7SMartin Matuska 81e2df9bb4SMartin Matuska if (zio_compress_table[cpfunc].ci_decompress == NULL) { 824fefe1b7SMartin Matuska memcpy(dst, src, d_len); 83e2df9bb4SMartin Matuska return; 84e2df9bb4SMartin Matuska } 85e2df9bb4SMartin Matuska 86e2df9bb4SMartin Matuska abd_t sabd, dabd; 87e2df9bb4SMartin Matuska abd_get_from_buf_struct(&sabd, src, s_len); 88e2df9bb4SMartin Matuska abd_get_from_buf_struct(&dabd, dst, d_len); 89e2df9bb4SMartin Matuska VERIFY0(zio_decompress_data(cpfunc, &sabd, &dabd, s_len, d_len, NULL)); 90e2df9bb4SMartin Matuska abd_free(&dabd); 91e2df9bb4SMartin Matuska abd_free(&sabd); 924fefe1b7SMartin Matuska 934fefe1b7SMartin Matuska if (((version & DDT_ZAP_COMPRESS_BYTEORDER_MASK) != 0) != 944fefe1b7SMartin Matuska (ZFS_HOST_BYTEORDER != 0)) 954fefe1b7SMartin Matuska byteswap_uint64_array(dst, d_len); 964fefe1b7SMartin Matuska } 974fefe1b7SMartin Matuska 98eda14cbcSMatt Macy static int 99eda14cbcSMatt Macy ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash) 100eda14cbcSMatt Macy { 101eda14cbcSMatt Macy zap_flags_t flags = ZAP_FLAG_HASH64 | ZAP_FLAG_UINT64_KEY; 102eda14cbcSMatt Macy 103eda14cbcSMatt Macy if (prehash) 104eda14cbcSMatt Macy flags |= ZAP_FLAG_PRE_HASHED_KEY; 105eda14cbcSMatt Macy 106eda14cbcSMatt Macy *objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP, 1070a97523dSMartin Matuska ddt_zap_default_bs, ddt_zap_default_ibs, 108eda14cbcSMatt Macy DMU_OT_NONE, 0, tx); 1094fefe1b7SMartin Matuska if (*objectp == 0) 1104fefe1b7SMartin Matuska return (SET_ERROR(ENOTSUP)); 111eda14cbcSMatt Macy 1124fefe1b7SMartin Matuska return (0); 113eda14cbcSMatt Macy } 114eda14cbcSMatt Macy 115eda14cbcSMatt Macy static int 116eda14cbcSMatt Macy ddt_zap_destroy(objset_t *os, uint64_t object, dmu_tx_t *tx) 117eda14cbcSMatt Macy { 118eda14cbcSMatt Macy return (zap_destroy(os, object, tx)); 119eda14cbcSMatt Macy } 120eda14cbcSMatt Macy 121eda14cbcSMatt Macy static int 1224fefe1b7SMartin Matuska ddt_zap_lookup(objset_t *os, uint64_t object, 123e2df9bb4SMartin Matuska const ddt_key_t *ddk, void *phys, size_t psize) 124eda14cbcSMatt Macy { 125eda14cbcSMatt Macy uchar_t *cbuf; 126eda14cbcSMatt Macy uint64_t one, csize; 127eda14cbcSMatt Macy int error; 128eda14cbcSMatt Macy 1294fefe1b7SMartin Matuska error = zap_length_uint64(os, object, (uint64_t *)ddk, 130eda14cbcSMatt Macy DDT_KEY_WORDS, &one, &csize); 131eda14cbcSMatt Macy if (error) 1324fefe1b7SMartin Matuska return (error); 133eda14cbcSMatt Macy 1344fefe1b7SMartin Matuska ASSERT3U(one, ==, 1); 1354fefe1b7SMartin Matuska ASSERT3U(csize, <=, psize + 1); 136eda14cbcSMatt Macy 1374fefe1b7SMartin Matuska cbuf = kmem_alloc(csize, KM_SLEEP); 1384fefe1b7SMartin Matuska 1394fefe1b7SMartin Matuska error = zap_lookup_uint64(os, object, (uint64_t *)ddk, 140eda14cbcSMatt Macy DDT_KEY_WORDS, 1, csize, cbuf); 1414fefe1b7SMartin Matuska if (error == 0) 1424fefe1b7SMartin Matuska ddt_zap_decompress(cbuf, phys, csize, psize); 143eda14cbcSMatt Macy 1444fefe1b7SMartin Matuska kmem_free(cbuf, csize); 145eda14cbcSMatt Macy 146eda14cbcSMatt Macy return (error); 147eda14cbcSMatt Macy } 148eda14cbcSMatt Macy 1494fefe1b7SMartin Matuska static int 1504fefe1b7SMartin Matuska ddt_zap_contains(objset_t *os, uint64_t object, const ddt_key_t *ddk) 1514fefe1b7SMartin Matuska { 1524fefe1b7SMartin Matuska return (zap_length_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS, 1534fefe1b7SMartin Matuska NULL, NULL)); 1544fefe1b7SMartin Matuska } 1554fefe1b7SMartin Matuska 156eda14cbcSMatt Macy static void 1574fefe1b7SMartin Matuska ddt_zap_prefetch(objset_t *os, uint64_t object, const ddt_key_t *ddk) 158eda14cbcSMatt Macy { 1594fefe1b7SMartin Matuska (void) zap_prefetch_uint64(os, object, (uint64_t *)ddk, DDT_KEY_WORDS); 160eda14cbcSMatt Macy } 161eda14cbcSMatt Macy 162ce4dcb97SMartin Matuska static void 163ce4dcb97SMartin Matuska ddt_zap_prefetch_all(objset_t *os, uint64_t object) 164ce4dcb97SMartin Matuska { 165ce4dcb97SMartin Matuska (void) zap_prefetch_object(os, object); 166ce4dcb97SMartin Matuska } 167ce4dcb97SMartin Matuska 168eda14cbcSMatt Macy static int 1694fefe1b7SMartin Matuska ddt_zap_update(objset_t *os, uint64_t object, const ddt_key_t *ddk, 170e2df9bb4SMartin Matuska const void *phys, size_t psize, dmu_tx_t *tx) 171eda14cbcSMatt Macy { 1724fefe1b7SMartin Matuska const size_t cbuf_size = psize + 1; 173eda14cbcSMatt Macy 1744fefe1b7SMartin Matuska uchar_t *cbuf = kmem_alloc(cbuf_size, KM_SLEEP); 175eda14cbcSMatt Macy 1764fefe1b7SMartin Matuska uint64_t csize = ddt_zap_compress(phys, cbuf, psize, cbuf_size); 1774fefe1b7SMartin Matuska 1784fefe1b7SMartin Matuska int error = zap_update_uint64(os, object, (uint64_t *)ddk, 1794fefe1b7SMartin Matuska DDT_KEY_WORDS, 1, csize, cbuf, tx); 1804fefe1b7SMartin Matuska 1814fefe1b7SMartin Matuska kmem_free(cbuf, cbuf_size); 1824fefe1b7SMartin Matuska 1834fefe1b7SMartin Matuska return (error); 184eda14cbcSMatt Macy } 185eda14cbcSMatt Macy 186eda14cbcSMatt Macy static int 1874fefe1b7SMartin Matuska ddt_zap_remove(objset_t *os, uint64_t object, const ddt_key_t *ddk, 1884fefe1b7SMartin Matuska dmu_tx_t *tx) 189eda14cbcSMatt Macy { 1904fefe1b7SMartin Matuska return (zap_remove_uint64(os, object, (uint64_t *)ddk, 191eda14cbcSMatt Macy DDT_KEY_WORDS, tx)); 192eda14cbcSMatt Macy } 193eda14cbcSMatt Macy 194eda14cbcSMatt Macy static int 1954fefe1b7SMartin Matuska ddt_zap_walk(objset_t *os, uint64_t object, uint64_t *walk, ddt_key_t *ddk, 196e2df9bb4SMartin Matuska void *phys, size_t psize) 197eda14cbcSMatt Macy { 198eda14cbcSMatt Macy zap_cursor_t zc; 199*7a7741afSMartin Matuska zap_attribute_t *za; 200eda14cbcSMatt Macy int error; 201eda14cbcSMatt Macy 202*7a7741afSMartin Matuska za = zap_attribute_alloc(); 203eda14cbcSMatt Macy if (*walk == 0) { 204eda14cbcSMatt Macy /* 205eda14cbcSMatt Macy * We don't want to prefetch the entire ZAP object, because 206eda14cbcSMatt Macy * it can be enormous. Also the primary use of DDT iteration 207eda14cbcSMatt Macy * is for scrubbing, in which case we will be issuing many 208eda14cbcSMatt Macy * scrub I/Os for each ZAP block that we read in, so 209eda14cbcSMatt Macy * reading the ZAP is unlikely to be the bottleneck. 210eda14cbcSMatt Macy */ 211eda14cbcSMatt Macy zap_cursor_init_noprefetch(&zc, os, object); 212eda14cbcSMatt Macy } else { 213eda14cbcSMatt Macy zap_cursor_init_serialized(&zc, os, object, *walk); 214eda14cbcSMatt Macy } 215*7a7741afSMartin Matuska if ((error = zap_cursor_retrieve(&zc, za)) == 0) { 216*7a7741afSMartin Matuska uint64_t csize = za->za_num_integers; 2174fefe1b7SMartin Matuska 218*7a7741afSMartin Matuska ASSERT3U(za->za_integer_length, ==, 1); 2194fefe1b7SMartin Matuska ASSERT3U(csize, <=, psize + 1); 2204fefe1b7SMartin Matuska 2214fefe1b7SMartin Matuska uchar_t *cbuf = kmem_alloc(csize, KM_SLEEP); 2224fefe1b7SMartin Matuska 223*7a7741afSMartin Matuska error = zap_lookup_uint64(os, object, (uint64_t *)za->za_name, 224eda14cbcSMatt Macy DDT_KEY_WORDS, 1, csize, cbuf); 2254fefe1b7SMartin Matuska ASSERT0(error); 226eda14cbcSMatt Macy if (error == 0) { 2274fefe1b7SMartin Matuska ddt_zap_decompress(cbuf, phys, csize, psize); 228*7a7741afSMartin Matuska *ddk = *(ddt_key_t *)za->za_name; 229eda14cbcSMatt Macy } 2304fefe1b7SMartin Matuska 2314fefe1b7SMartin Matuska kmem_free(cbuf, csize); 2324fefe1b7SMartin Matuska 233eda14cbcSMatt Macy zap_cursor_advance(&zc); 234eda14cbcSMatt Macy *walk = zap_cursor_serialize(&zc); 235eda14cbcSMatt Macy } 236eda14cbcSMatt Macy zap_cursor_fini(&zc); 237*7a7741afSMartin Matuska zap_attribute_free(za); 238eda14cbcSMatt Macy return (error); 239eda14cbcSMatt Macy } 240eda14cbcSMatt Macy 241eda14cbcSMatt Macy static int 242eda14cbcSMatt Macy ddt_zap_count(objset_t *os, uint64_t object, uint64_t *count) 243eda14cbcSMatt Macy { 244eda14cbcSMatt Macy return (zap_count(os, object, count)); 245eda14cbcSMatt Macy } 246eda14cbcSMatt Macy 247eda14cbcSMatt Macy const ddt_ops_t ddt_zap_ops = { 248eda14cbcSMatt Macy "zap", 249eda14cbcSMatt Macy ddt_zap_create, 250eda14cbcSMatt Macy ddt_zap_destroy, 251eda14cbcSMatt Macy ddt_zap_lookup, 2524fefe1b7SMartin Matuska ddt_zap_contains, 253eda14cbcSMatt Macy ddt_zap_prefetch, 254ce4dcb97SMartin Matuska ddt_zap_prefetch_all, 255eda14cbcSMatt Macy ddt_zap_update, 256eda14cbcSMatt Macy ddt_zap_remove, 257eda14cbcSMatt Macy ddt_zap_walk, 258eda14cbcSMatt Macy ddt_zap_count, 259eda14cbcSMatt Macy }; 2600a97523dSMartin Matuska 2610a97523dSMartin Matuska ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW, 2620a97523dSMartin Matuska "DDT ZAP leaf blockshift"); 2630a97523dSMartin Matuska ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW, 2640a97523dSMartin Matuska "DDT ZAP indirect blockshift"); 265