1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * BSD 3-Clause New License (https://spdx.org/licenses/BSD-3-Clause.html) 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * Redistribution and use in source and binary forms, with or without 5eda14cbcSMatt Macy * modification, are permitted provided that the following conditions are met: 6eda14cbcSMatt Macy * 7eda14cbcSMatt Macy * 1. Redistributions of source code must retain the above copyright notice, 8eda14cbcSMatt Macy * this list of conditions and the following disclaimer. 9eda14cbcSMatt Macy * 10eda14cbcSMatt Macy * 2. Redistributions in binary form must reproduce the above copyright notice, 11eda14cbcSMatt Macy * this list of conditions and the following disclaimer in the documentation 12eda14cbcSMatt Macy * and/or other materials provided with the distribution. 13eda14cbcSMatt Macy * 14eda14cbcSMatt Macy * 3. Neither the name of the copyright holder nor the names of its 15eda14cbcSMatt Macy * contributors may be used to endorse or promote products derived from this 16eda14cbcSMatt Macy * software without specific prior written permission. 17eda14cbcSMatt Macy * 18eda14cbcSMatt Macy * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19eda14cbcSMatt Macy * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20eda14cbcSMatt Macy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21eda14cbcSMatt Macy * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22eda14cbcSMatt Macy * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23eda14cbcSMatt Macy * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24eda14cbcSMatt Macy * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25eda14cbcSMatt Macy * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26eda14cbcSMatt Macy * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27eda14cbcSMatt Macy * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28eda14cbcSMatt Macy * POSSIBILITY OF SUCH DAMAGE. 29eda14cbcSMatt Macy */ 30eda14cbcSMatt Macy 31eda14cbcSMatt Macy /* 32eda14cbcSMatt Macy * Copyright (c) 2016-2018, Klara Inc. 33eda14cbcSMatt Macy * Copyright (c) 2016-2018, Allan Jude 34eda14cbcSMatt Macy * Copyright (c) 2018-2020, Sebastian Gottschall 35eda14cbcSMatt Macy * Copyright (c) 2019-2020, Michael Niewöhner 36eda14cbcSMatt Macy * Copyright (c) 2020, The FreeBSD Foundation [1] 37eda14cbcSMatt Macy * 38eda14cbcSMatt Macy * [1] Portions of this software were developed by Allan Jude 39eda14cbcSMatt Macy * under sponsorship from the FreeBSD Foundation. 40eda14cbcSMatt Macy */ 41eda14cbcSMatt Macy 42eda14cbcSMatt Macy #include <sys/param.h> 43eda14cbcSMatt Macy #include <sys/sysmacros.h> 44eda14cbcSMatt Macy #include <sys/zfs_context.h> 45eda14cbcSMatt Macy #include <sys/zio_compress.h> 46eda14cbcSMatt Macy #include <sys/spa.h> 47eda14cbcSMatt Macy #include <sys/zstd/zstd.h> 48eda14cbcSMatt Macy 49eda14cbcSMatt Macy #define ZSTD_STATIC_LINKING_ONLY 50eda14cbcSMatt Macy #include "lib/zstd.h" 51c03c5b1cSMartin Matuska #include "lib/common/zstd_errors.h" 52eda14cbcSMatt Macy 53e3aa18adSMartin Matuska #ifndef IN_LIBSA 54be181ee2SMartin Matuska static uint_t zstd_earlyabort_pass = 1; 55e3aa18adSMartin Matuska static int zstd_cutoff_level = ZIO_ZSTD_LEVEL_3; 56e3aa18adSMartin Matuska static unsigned int zstd_abort_size = (128 * 1024); 57e3aa18adSMartin Matuska #endif 58e3aa18adSMartin Matuska 59*e2df9bb4SMartin Matuska #ifdef IN_BASE 60*e2df9bb4SMartin Matuska int zfs_zstd_decompress_buf(void *, void *, size_t, size_t, int); 61*e2df9bb4SMartin Matuska #endif 62*e2df9bb4SMartin Matuska 63716fd348SMartin Matuska static kstat_t *zstd_ksp = NULL; 64eda14cbcSMatt Macy 65eda14cbcSMatt Macy typedef struct zstd_stats { 66eda14cbcSMatt Macy kstat_named_t zstd_stat_alloc_fail; 67eda14cbcSMatt Macy kstat_named_t zstd_stat_alloc_fallback; 68eda14cbcSMatt Macy kstat_named_t zstd_stat_com_alloc_fail; 69eda14cbcSMatt Macy kstat_named_t zstd_stat_dec_alloc_fail; 70eda14cbcSMatt Macy kstat_named_t zstd_stat_com_inval; 71eda14cbcSMatt Macy kstat_named_t zstd_stat_dec_inval; 72eda14cbcSMatt Macy kstat_named_t zstd_stat_dec_header_inval; 73eda14cbcSMatt Macy kstat_named_t zstd_stat_com_fail; 74eda14cbcSMatt Macy kstat_named_t zstd_stat_dec_fail; 75e3aa18adSMartin Matuska /* 76e3aa18adSMartin Matuska * LZ4 first-pass early abort verdict 77e3aa18adSMartin Matuska */ 78e3aa18adSMartin Matuska kstat_named_t zstd_stat_lz4pass_allowed; 79e3aa18adSMartin Matuska kstat_named_t zstd_stat_lz4pass_rejected; 80e3aa18adSMartin Matuska /* 81e3aa18adSMartin Matuska * zstd-1 second-pass early abort verdict 82e3aa18adSMartin Matuska */ 83e3aa18adSMartin Matuska kstat_named_t zstd_stat_zstdpass_allowed; 84e3aa18adSMartin Matuska kstat_named_t zstd_stat_zstdpass_rejected; 85e3aa18adSMartin Matuska /* 86e3aa18adSMartin Matuska * We excluded this from early abort for some reason 87e3aa18adSMartin Matuska */ 88e3aa18adSMartin Matuska kstat_named_t zstd_stat_passignored; 89e3aa18adSMartin Matuska kstat_named_t zstd_stat_passignored_size; 904a58b4abSMateusz Guzik kstat_named_t zstd_stat_buffers; 914a58b4abSMateusz Guzik kstat_named_t zstd_stat_size; 92eda14cbcSMatt Macy } zstd_stats_t; 93eda14cbcSMatt Macy 94eda14cbcSMatt Macy static zstd_stats_t zstd_stats = { 95eda14cbcSMatt Macy { "alloc_fail", KSTAT_DATA_UINT64 }, 96eda14cbcSMatt Macy { "alloc_fallback", KSTAT_DATA_UINT64 }, 97eda14cbcSMatt Macy { "compress_alloc_fail", KSTAT_DATA_UINT64 }, 98eda14cbcSMatt Macy { "decompress_alloc_fail", KSTAT_DATA_UINT64 }, 99eda14cbcSMatt Macy { "compress_level_invalid", KSTAT_DATA_UINT64 }, 100eda14cbcSMatt Macy { "decompress_level_invalid", KSTAT_DATA_UINT64 }, 101eda14cbcSMatt Macy { "decompress_header_invalid", KSTAT_DATA_UINT64 }, 102eda14cbcSMatt Macy { "compress_failed", KSTAT_DATA_UINT64 }, 103eda14cbcSMatt Macy { "decompress_failed", KSTAT_DATA_UINT64 }, 104e3aa18adSMartin Matuska { "lz4pass_allowed", KSTAT_DATA_UINT64 }, 105e3aa18adSMartin Matuska { "lz4pass_rejected", KSTAT_DATA_UINT64 }, 106e3aa18adSMartin Matuska { "zstdpass_allowed", KSTAT_DATA_UINT64 }, 107e3aa18adSMartin Matuska { "zstdpass_rejected", KSTAT_DATA_UINT64 }, 108e3aa18adSMartin Matuska { "passignored", KSTAT_DATA_UINT64 }, 109e3aa18adSMartin Matuska { "passignored_size", KSTAT_DATA_UINT64 }, 1104a58b4abSMateusz Guzik { "buffers", KSTAT_DATA_UINT64 }, 1114a58b4abSMateusz Guzik { "size", KSTAT_DATA_UINT64 }, 112eda14cbcSMatt Macy }; 113eda14cbcSMatt Macy 114e3aa18adSMartin Matuska #ifdef _KERNEL 115e3aa18adSMartin Matuska static int 116e3aa18adSMartin Matuska kstat_zstd_update(kstat_t *ksp, int rw) 117e3aa18adSMartin Matuska { 118e3aa18adSMartin Matuska ASSERT(ksp != NULL); 119e3aa18adSMartin Matuska 120e3aa18adSMartin Matuska if (rw == KSTAT_WRITE && ksp == zstd_ksp) { 121e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_alloc_fail); 122e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_alloc_fallback); 123e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_com_alloc_fail); 124e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_dec_alloc_fail); 125e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_com_inval); 126e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_dec_inval); 127e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_dec_header_inval); 128e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_com_fail); 129e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_dec_fail); 130e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_lz4pass_allowed); 131e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_lz4pass_rejected); 132e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_zstdpass_allowed); 133e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_zstdpass_rejected); 134e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_passignored); 135e3aa18adSMartin Matuska ZSTDSTAT_ZERO(zstd_stat_passignored_size); 136e3aa18adSMartin Matuska } 137e3aa18adSMartin Matuska 138e3aa18adSMartin Matuska return (0); 139e3aa18adSMartin Matuska } 140e3aa18adSMartin Matuska #endif 141e3aa18adSMartin Matuska 142eda14cbcSMatt Macy /* Enums describing the allocator type specified by kmem_type in zstd_kmem */ 143eda14cbcSMatt Macy enum zstd_kmem_type { 144eda14cbcSMatt Macy ZSTD_KMEM_UNKNOWN = 0, 145eda14cbcSMatt Macy /* Allocation type using kmem_vmalloc */ 146eda14cbcSMatt Macy ZSTD_KMEM_DEFAULT, 147eda14cbcSMatt Macy /* Pool based allocation using mempool_alloc */ 148eda14cbcSMatt Macy ZSTD_KMEM_POOL, 149eda14cbcSMatt Macy /* Reserved fallback memory for decompression only */ 150eda14cbcSMatt Macy ZSTD_KMEM_DCTX, 151eda14cbcSMatt Macy ZSTD_KMEM_COUNT, 152eda14cbcSMatt Macy }; 153eda14cbcSMatt Macy 154eda14cbcSMatt Macy /* Structure for pooled memory objects */ 155eda14cbcSMatt Macy struct zstd_pool { 156eda14cbcSMatt Macy void *mem; 157eda14cbcSMatt Macy size_t size; 158eda14cbcSMatt Macy kmutex_t barrier; 159eda14cbcSMatt Macy hrtime_t timeout; 160eda14cbcSMatt Macy }; 161eda14cbcSMatt Macy 162eda14cbcSMatt Macy /* Global structure for handling memory allocations */ 163eda14cbcSMatt Macy struct zstd_kmem { 164eda14cbcSMatt Macy enum zstd_kmem_type kmem_type; 165eda14cbcSMatt Macy size_t kmem_size; 166eda14cbcSMatt Macy struct zstd_pool *pool; 167eda14cbcSMatt Macy }; 168eda14cbcSMatt Macy 169eda14cbcSMatt Macy /* Fallback memory structure used for decompression only if memory runs out */ 170eda14cbcSMatt Macy struct zstd_fallback_mem { 171eda14cbcSMatt Macy size_t mem_size; 172eda14cbcSMatt Macy void *mem; 173eda14cbcSMatt Macy kmutex_t barrier; 174eda14cbcSMatt Macy }; 175eda14cbcSMatt Macy 176eda14cbcSMatt Macy struct zstd_levelmap { 177eda14cbcSMatt Macy int16_t zstd_level; 178eda14cbcSMatt Macy enum zio_zstd_levels level; 179eda14cbcSMatt Macy }; 180eda14cbcSMatt Macy 181eda14cbcSMatt Macy /* 182eda14cbcSMatt Macy * ZSTD memory handlers 183eda14cbcSMatt Macy * 184eda14cbcSMatt Macy * For decompression we use a different handler which also provides fallback 185eda14cbcSMatt Macy * memory allocation in case memory runs out. 186eda14cbcSMatt Macy * 187eda14cbcSMatt Macy * The ZSTD handlers were split up for the most simplified implementation. 188eda14cbcSMatt Macy */ 1894f0c9b76SWarner Losh #ifndef IN_LIBSA 190eda14cbcSMatt Macy static void *zstd_alloc(void *opaque, size_t size); 1914f0c9b76SWarner Losh #endif 192eda14cbcSMatt Macy static void *zstd_dctx_alloc(void *opaque, size_t size); 193eda14cbcSMatt Macy static void zstd_free(void *opaque, void *ptr); 194eda14cbcSMatt Macy 1954f0c9b76SWarner Losh #ifndef IN_LIBSA 196eda14cbcSMatt Macy /* Compression memory handler */ 197eda14cbcSMatt Macy static const ZSTD_customMem zstd_malloc = { 198eda14cbcSMatt Macy zstd_alloc, 199eda14cbcSMatt Macy zstd_free, 200eda14cbcSMatt Macy NULL, 201eda14cbcSMatt Macy }; 2024f0c9b76SWarner Losh #endif 203eda14cbcSMatt Macy 204eda14cbcSMatt Macy /* Decompression memory handler */ 205eda14cbcSMatt Macy static const ZSTD_customMem zstd_dctx_malloc = { 206eda14cbcSMatt Macy zstd_dctx_alloc, 207eda14cbcSMatt Macy zstd_free, 208eda14cbcSMatt Macy NULL, 209eda14cbcSMatt Macy }; 210eda14cbcSMatt Macy 211eda14cbcSMatt Macy /* Level map for converting ZFS internal levels to ZSTD levels and vice versa */ 212eda14cbcSMatt Macy static struct zstd_levelmap zstd_levels[] = { 213eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_1, ZIO_ZSTD_LEVEL_1}, 214eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_2, ZIO_ZSTD_LEVEL_2}, 215eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_3, ZIO_ZSTD_LEVEL_3}, 216eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_4, ZIO_ZSTD_LEVEL_4}, 217eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_5, ZIO_ZSTD_LEVEL_5}, 218eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_6, ZIO_ZSTD_LEVEL_6}, 219eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_7, ZIO_ZSTD_LEVEL_7}, 220eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_8, ZIO_ZSTD_LEVEL_8}, 221eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_9, ZIO_ZSTD_LEVEL_9}, 222eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_10, ZIO_ZSTD_LEVEL_10}, 223eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_11, ZIO_ZSTD_LEVEL_11}, 224eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_12, ZIO_ZSTD_LEVEL_12}, 225eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_13, ZIO_ZSTD_LEVEL_13}, 226eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_14, ZIO_ZSTD_LEVEL_14}, 227eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_15, ZIO_ZSTD_LEVEL_15}, 228eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_16, ZIO_ZSTD_LEVEL_16}, 229eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_17, ZIO_ZSTD_LEVEL_17}, 230eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_18, ZIO_ZSTD_LEVEL_18}, 231eda14cbcSMatt Macy {ZIO_ZSTD_LEVEL_19, ZIO_ZSTD_LEVEL_19}, 232eda14cbcSMatt Macy {-1, ZIO_ZSTD_LEVEL_FAST_1}, 233eda14cbcSMatt Macy {-2, ZIO_ZSTD_LEVEL_FAST_2}, 234eda14cbcSMatt Macy {-3, ZIO_ZSTD_LEVEL_FAST_3}, 235eda14cbcSMatt Macy {-4, ZIO_ZSTD_LEVEL_FAST_4}, 236eda14cbcSMatt Macy {-5, ZIO_ZSTD_LEVEL_FAST_5}, 237eda14cbcSMatt Macy {-6, ZIO_ZSTD_LEVEL_FAST_6}, 238eda14cbcSMatt Macy {-7, ZIO_ZSTD_LEVEL_FAST_7}, 239eda14cbcSMatt Macy {-8, ZIO_ZSTD_LEVEL_FAST_8}, 240eda14cbcSMatt Macy {-9, ZIO_ZSTD_LEVEL_FAST_9}, 241eda14cbcSMatt Macy {-10, ZIO_ZSTD_LEVEL_FAST_10}, 242eda14cbcSMatt Macy {-20, ZIO_ZSTD_LEVEL_FAST_20}, 243eda14cbcSMatt Macy {-30, ZIO_ZSTD_LEVEL_FAST_30}, 244eda14cbcSMatt Macy {-40, ZIO_ZSTD_LEVEL_FAST_40}, 245eda14cbcSMatt Macy {-50, ZIO_ZSTD_LEVEL_FAST_50}, 246eda14cbcSMatt Macy {-60, ZIO_ZSTD_LEVEL_FAST_60}, 247eda14cbcSMatt Macy {-70, ZIO_ZSTD_LEVEL_FAST_70}, 248eda14cbcSMatt Macy {-80, ZIO_ZSTD_LEVEL_FAST_80}, 249eda14cbcSMatt Macy {-90, ZIO_ZSTD_LEVEL_FAST_90}, 250eda14cbcSMatt Macy {-100, ZIO_ZSTD_LEVEL_FAST_100}, 251eda14cbcSMatt Macy {-500, ZIO_ZSTD_LEVEL_FAST_500}, 252eda14cbcSMatt Macy {-1000, ZIO_ZSTD_LEVEL_FAST_1000}, 253eda14cbcSMatt Macy }; 254eda14cbcSMatt Macy 255eda14cbcSMatt Macy /* 256eda14cbcSMatt Macy * This variable represents the maximum count of the pool based on the number 257eda14cbcSMatt Macy * of CPUs plus some buffer. We default to cpu count * 4, see init_zstd. 258eda14cbcSMatt Macy */ 259eda14cbcSMatt Macy static int pool_count = 16; 260eda14cbcSMatt Macy 261eda14cbcSMatt Macy #define ZSTD_POOL_MAX pool_count 262eda14cbcSMatt Macy #define ZSTD_POOL_TIMEOUT 60 * 2 263eda14cbcSMatt Macy 264eda14cbcSMatt Macy static struct zstd_fallback_mem zstd_dctx_fallback; 265eda14cbcSMatt Macy static struct zstd_pool *zstd_mempool_cctx; 266eda14cbcSMatt Macy static struct zstd_pool *zstd_mempool_dctx; 267eda14cbcSMatt Macy 2682617128aSMartin Matuska /* 2692617128aSMartin Matuska * The library zstd code expects these if ADDRESS_SANITIZER gets defined, 2702617128aSMartin Matuska * and while ASAN does this, KASAN defines that and does not. So to avoid 2712617128aSMartin Matuska * changing the external code, we do this. 2722617128aSMartin Matuska */ 273c03c5b1cSMartin Matuska #if defined(ZFS_ASAN_ENABLED) 2742617128aSMartin Matuska #define ADDRESS_SANITIZER 1 2752617128aSMartin Matuska #endif 2762617128aSMartin Matuska #if defined(_KERNEL) && defined(ADDRESS_SANITIZER) 2772617128aSMartin Matuska void __asan_unpoison_memory_region(void const volatile *addr, size_t size); 2782617128aSMartin Matuska void __asan_poison_memory_region(void const volatile *addr, size_t size); 2792617128aSMartin Matuska void __asan_unpoison_memory_region(void const volatile *addr, size_t size) {}; 2802617128aSMartin Matuska void __asan_poison_memory_region(void const volatile *addr, size_t size) {}; 2812617128aSMartin Matuska #endif 2822617128aSMartin Matuska 2837877fdebSMatt Macy 2847877fdebSMatt Macy static void 2857877fdebSMatt Macy zstd_mempool_reap(struct zstd_pool *zstd_mempool) 2867877fdebSMatt Macy { 2877877fdebSMatt Macy struct zstd_pool *pool; 2887877fdebSMatt Macy 2897877fdebSMatt Macy if (!zstd_mempool || !ZSTDSTAT(zstd_stat_buffers)) { 2907877fdebSMatt Macy return; 2917877fdebSMatt Macy } 2927877fdebSMatt Macy 2937877fdebSMatt Macy /* free obsolete slots */ 2947877fdebSMatt Macy for (int i = 0; i < ZSTD_POOL_MAX; i++) { 2957877fdebSMatt Macy pool = &zstd_mempool[i]; 2967877fdebSMatt Macy if (pool->mem && mutex_tryenter(&pool->barrier)) { 2977877fdebSMatt Macy /* Free memory if unused object older than 2 minutes */ 2987877fdebSMatt Macy if (pool->mem && gethrestime_sec() > pool->timeout) { 2997877fdebSMatt Macy vmem_free(pool->mem, pool->size); 3007877fdebSMatt Macy ZSTDSTAT_SUB(zstd_stat_buffers, 1); 3017877fdebSMatt Macy ZSTDSTAT_SUB(zstd_stat_size, pool->size); 3027877fdebSMatt Macy pool->mem = NULL; 3037877fdebSMatt Macy pool->size = 0; 3047877fdebSMatt Macy pool->timeout = 0; 3057877fdebSMatt Macy } 3067877fdebSMatt Macy mutex_exit(&pool->barrier); 3077877fdebSMatt Macy } 3087877fdebSMatt Macy } 3097877fdebSMatt Macy } 3107877fdebSMatt Macy 311eda14cbcSMatt Macy /* 312eda14cbcSMatt Macy * Try to get a cached allocated buffer from memory pool or allocate a new one 313eda14cbcSMatt Macy * if necessary. If a object is older than 2 minutes and does not fit the 314eda14cbcSMatt Macy * requested size, it will be released and a new cached entry will be allocated. 315eda14cbcSMatt Macy * If other pooled objects are detected without being used for 2 minutes, they 316eda14cbcSMatt Macy * will be released, too. 317eda14cbcSMatt Macy * 318eda14cbcSMatt Macy * The concept is that high frequency memory allocations of bigger objects are 319eda14cbcSMatt Macy * expensive. So if a lot of work is going on, allocations will be kept for a 320eda14cbcSMatt Macy * while and can be reused in that time frame. 321eda14cbcSMatt Macy * 322eda14cbcSMatt Macy * The scheduled release will be updated every time a object is reused. 323eda14cbcSMatt Macy */ 3247877fdebSMatt Macy 325eda14cbcSMatt Macy static void * 326eda14cbcSMatt Macy zstd_mempool_alloc(struct zstd_pool *zstd_mempool, size_t size) 327eda14cbcSMatt Macy { 328eda14cbcSMatt Macy struct zstd_pool *pool; 329eda14cbcSMatt Macy struct zstd_kmem *mem = NULL; 330eda14cbcSMatt Macy 331eda14cbcSMatt Macy if (!zstd_mempool) { 332eda14cbcSMatt Macy return (NULL); 333eda14cbcSMatt Macy } 334eda14cbcSMatt Macy 335eda14cbcSMatt Macy /* Seek for preallocated memory slot and free obsolete slots */ 336eda14cbcSMatt Macy for (int i = 0; i < ZSTD_POOL_MAX; i++) { 337eda14cbcSMatt Macy pool = &zstd_mempool[i]; 338eda14cbcSMatt Macy /* 33916038816SMartin Matuska * This lock is simply a marker for a pool object being in use. 340eda14cbcSMatt Macy * If it's already hold, it will be skipped. 341eda14cbcSMatt Macy * 342eda14cbcSMatt Macy * We need to create it before checking it to avoid race 343eda14cbcSMatt Macy * conditions caused by running in a threaded context. 344eda14cbcSMatt Macy * 345eda14cbcSMatt Macy * The lock is later released by zstd_mempool_free. 346eda14cbcSMatt Macy */ 347eda14cbcSMatt Macy if (mutex_tryenter(&pool->barrier)) { 348eda14cbcSMatt Macy /* 349eda14cbcSMatt Macy * Check if objects fits the size, if so we take it and 350eda14cbcSMatt Macy * update the timestamp. 351eda14cbcSMatt Macy */ 3527877fdebSMatt Macy if (pool->mem && size <= pool->size) { 353eda14cbcSMatt Macy pool->timeout = gethrestime_sec() + 354eda14cbcSMatt Macy ZSTD_POOL_TIMEOUT; 355eda14cbcSMatt Macy mem = pool->mem; 3567877fdebSMatt Macy return (mem); 357eda14cbcSMatt Macy } 358eda14cbcSMatt Macy mutex_exit(&pool->barrier); 359eda14cbcSMatt Macy } 360eda14cbcSMatt Macy } 361eda14cbcSMatt Macy 362eda14cbcSMatt Macy /* 363eda14cbcSMatt Macy * If no preallocated slot was found, try to fill in a new one. 364eda14cbcSMatt Macy * 365eda14cbcSMatt Macy * We run a similar algorithm twice here to avoid pool fragmentation. 366eda14cbcSMatt Macy * The first one may generate holes in the list if objects get released. 367eda14cbcSMatt Macy * We always make sure that these holes get filled instead of adding new 368eda14cbcSMatt Macy * allocations constantly at the end. 369eda14cbcSMatt Macy */ 370eda14cbcSMatt Macy for (int i = 0; i < ZSTD_POOL_MAX; i++) { 371eda14cbcSMatt Macy pool = &zstd_mempool[i]; 372eda14cbcSMatt Macy if (mutex_tryenter(&pool->barrier)) { 373eda14cbcSMatt Macy /* Object is free, try to allocate new one */ 374eda14cbcSMatt Macy if (!pool->mem) { 375eda14cbcSMatt Macy mem = vmem_alloc(size, KM_SLEEP); 3764a58b4abSMateusz Guzik if (mem) { 3774a58b4abSMateusz Guzik ZSTDSTAT_ADD(zstd_stat_buffers, 1); 3784a58b4abSMateusz Guzik ZSTDSTAT_ADD(zstd_stat_size, size); 379eda14cbcSMatt Macy pool->mem = mem; 3804a58b4abSMateusz Guzik pool->size = size; 381eda14cbcSMatt Macy /* Keep track for later release */ 382eda14cbcSMatt Macy mem->pool = pool; 383eda14cbcSMatt Macy mem->kmem_type = ZSTD_KMEM_POOL; 384eda14cbcSMatt Macy mem->kmem_size = size; 385eda14cbcSMatt Macy } 386eda14cbcSMatt Macy } 387eda14cbcSMatt Macy 388eda14cbcSMatt Macy if (size <= pool->size) { 389eda14cbcSMatt Macy /* Update timestamp */ 390eda14cbcSMatt Macy pool->timeout = gethrestime_sec() + 391eda14cbcSMatt Macy ZSTD_POOL_TIMEOUT; 392eda14cbcSMatt Macy 393eda14cbcSMatt Macy return (pool->mem); 394eda14cbcSMatt Macy } 395eda14cbcSMatt Macy 396eda14cbcSMatt Macy mutex_exit(&pool->barrier); 397eda14cbcSMatt Macy } 398eda14cbcSMatt Macy } 399eda14cbcSMatt Macy 400eda14cbcSMatt Macy /* 401eda14cbcSMatt Macy * If the pool is full or the allocation failed, try lazy allocation 402eda14cbcSMatt Macy * instead. 403eda14cbcSMatt Macy */ 404eda14cbcSMatt Macy if (!mem) { 405eda14cbcSMatt Macy mem = vmem_alloc(size, KM_NOSLEEP); 406eda14cbcSMatt Macy if (mem) { 407eda14cbcSMatt Macy mem->pool = NULL; 408eda14cbcSMatt Macy mem->kmem_type = ZSTD_KMEM_DEFAULT; 409eda14cbcSMatt Macy mem->kmem_size = size; 410eda14cbcSMatt Macy } 411eda14cbcSMatt Macy } 412eda14cbcSMatt Macy 413eda14cbcSMatt Macy return (mem); 414eda14cbcSMatt Macy } 415eda14cbcSMatt Macy 416eda14cbcSMatt Macy /* Mark object as released by releasing the barrier mutex */ 417eda14cbcSMatt Macy static void 418eda14cbcSMatt Macy zstd_mempool_free(struct zstd_kmem *z) 419eda14cbcSMatt Macy { 420eda14cbcSMatt Macy mutex_exit(&z->pool->barrier); 421eda14cbcSMatt Macy } 422eda14cbcSMatt Macy 423eda14cbcSMatt Macy /* Convert ZFS internal enum to ZSTD level */ 424eda14cbcSMatt Macy static int 425eda14cbcSMatt Macy zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level) 426eda14cbcSMatt Macy { 427eda14cbcSMatt Macy if (level > 0 && level <= ZIO_ZSTD_LEVEL_19) { 428eda14cbcSMatt Macy *zstd_level = zstd_levels[level - 1].zstd_level; 429eda14cbcSMatt Macy return (0); 430eda14cbcSMatt Macy } 431eda14cbcSMatt Macy if (level >= ZIO_ZSTD_LEVEL_FAST_1 && 432eda14cbcSMatt Macy level <= ZIO_ZSTD_LEVEL_FAST_1000) { 433eda14cbcSMatt Macy *zstd_level = zstd_levels[level - ZIO_ZSTD_LEVEL_FAST_1 434eda14cbcSMatt Macy + ZIO_ZSTD_LEVEL_19].zstd_level; 435eda14cbcSMatt Macy return (0); 436eda14cbcSMatt Macy } 437eda14cbcSMatt Macy 438eda14cbcSMatt Macy /* Invalid/unknown zfs compression enum - this should never happen. */ 439eda14cbcSMatt Macy return (1); 440eda14cbcSMatt Macy } 441eda14cbcSMatt Macy 442e3aa18adSMartin Matuska #ifndef IN_LIBSA 443*e2df9bb4SMartin Matuska static size_t 444e3aa18adSMartin Matuska zfs_zstd_compress_wrap(void *s_start, void *d_start, size_t s_len, size_t d_len, 445e3aa18adSMartin Matuska int level) 446e3aa18adSMartin Matuska { 447e3aa18adSMartin Matuska int16_t zstd_level; 448e3aa18adSMartin Matuska if (zstd_enum_to_level(level, &zstd_level)) { 449e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_com_inval); 450e3aa18adSMartin Matuska return (s_len); 451e3aa18adSMartin Matuska } 452e3aa18adSMartin Matuska /* 453e3aa18adSMartin Matuska * A zstd early abort heuristic. 454e3aa18adSMartin Matuska * 455e3aa18adSMartin Matuska * - Zeroth, if this is <= zstd-3, or < zstd_abort_size (currently 456e3aa18adSMartin Matuska * 128k), don't try any of this, just go. 457e3aa18adSMartin Matuska * (because experimentally that was a reasonable cutoff for a perf win 458e3aa18adSMartin Matuska * with tiny ratio change) 459e3aa18adSMartin Matuska * - First, we try LZ4 compression, and if it doesn't early abort, we 460e3aa18adSMartin Matuska * jump directly to whatever compression level we intended to try. 461e3aa18adSMartin Matuska * - Second, we try zstd-1 - if that errors out (usually, but not 462e3aa18adSMartin Matuska * exclusively, if it would overflow), we give up early. 463e3aa18adSMartin Matuska * 464e3aa18adSMartin Matuska * If it works, instead we go on and compress anyway. 465e3aa18adSMartin Matuska * 466e3aa18adSMartin Matuska * Why two passes? LZ4 alone gets you a lot of the way, but on highly 467e3aa18adSMartin Matuska * compressible data, it was losing up to 8.5% of the compressed 468e3aa18adSMartin Matuska * savings versus no early abort, and all the zstd-fast levels are 469e3aa18adSMartin Matuska * worse indications on their own than LZ4, and don't improve the LZ4 470e3aa18adSMartin Matuska * pass noticably if stacked like this. 471e3aa18adSMartin Matuska */ 472e3aa18adSMartin Matuska size_t actual_abort_size = zstd_abort_size; 473e3aa18adSMartin Matuska if (zstd_earlyabort_pass > 0 && zstd_level >= zstd_cutoff_level && 474e3aa18adSMartin Matuska s_len >= actual_abort_size) { 475e3aa18adSMartin Matuska int pass_len = 1; 476*e2df9bb4SMartin Matuska pass_len = zfs_lz4_compress(s_start, d_start, s_len, d_len, 0); 477e3aa18adSMartin Matuska if (pass_len < d_len) { 478e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_lz4pass_allowed); 479e3aa18adSMartin Matuska goto keep_trying; 480e3aa18adSMartin Matuska } 481e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_lz4pass_rejected); 482e3aa18adSMartin Matuska 483e3aa18adSMartin Matuska pass_len = zfs_zstd_compress(s_start, d_start, s_len, d_len, 484e3aa18adSMartin Matuska ZIO_ZSTD_LEVEL_1); 485e3aa18adSMartin Matuska if (pass_len == s_len || pass_len <= 0 || pass_len > d_len) { 486e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_zstdpass_rejected); 487e3aa18adSMartin Matuska return (s_len); 488e3aa18adSMartin Matuska } 489e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_zstdpass_allowed); 490e3aa18adSMartin Matuska } else { 491e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_passignored); 492e3aa18adSMartin Matuska if (s_len < actual_abort_size) { 493e3aa18adSMartin Matuska ZSTDSTAT_BUMP(zstd_stat_passignored_size); 494e3aa18adSMartin Matuska } 495e3aa18adSMartin Matuska } 496e3aa18adSMartin Matuska keep_trying: 497e3aa18adSMartin Matuska return (zfs_zstd_compress(s_start, d_start, s_len, d_len, level)); 498e3aa18adSMartin Matuska 499e3aa18adSMartin Matuska } 50021b492edSMartin Matuska 501eda14cbcSMatt Macy /* Compress block using zstd */ 502*e2df9bb4SMartin Matuska static size_t 503*e2df9bb4SMartin Matuska zfs_zstd_compress_impl(void *s_start, void *d_start, size_t s_len, size_t d_len, 504eda14cbcSMatt Macy int level) 505eda14cbcSMatt Macy { 506eda14cbcSMatt Macy size_t c_len; 507eda14cbcSMatt Macy int16_t zstd_level; 508eda14cbcSMatt Macy zfs_zstdhdr_t *hdr; 509eda14cbcSMatt Macy ZSTD_CCtx *cctx; 510eda14cbcSMatt Macy 511eda14cbcSMatt Macy hdr = (zfs_zstdhdr_t *)d_start; 512eda14cbcSMatt Macy 513eda14cbcSMatt Macy /* Skip compression if the specified level is invalid */ 514eda14cbcSMatt Macy if (zstd_enum_to_level(level, &zstd_level)) { 515eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_com_inval); 516eda14cbcSMatt Macy return (s_len); 517eda14cbcSMatt Macy } 518eda14cbcSMatt Macy 519eda14cbcSMatt Macy ASSERT3U(d_len, >=, sizeof (*hdr)); 520eda14cbcSMatt Macy ASSERT3U(d_len, <=, s_len); 521eda14cbcSMatt Macy ASSERT3U(zstd_level, !=, 0); 522eda14cbcSMatt Macy 523eda14cbcSMatt Macy cctx = ZSTD_createCCtx_advanced(zstd_malloc); 524eda14cbcSMatt Macy 525eda14cbcSMatt Macy /* 526eda14cbcSMatt Macy * Out of kernel memory, gently fall through - this will disable 527eda14cbcSMatt Macy * compression in zio_compress_data 528eda14cbcSMatt Macy */ 529eda14cbcSMatt Macy if (!cctx) { 530eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_com_alloc_fail); 531eda14cbcSMatt Macy return (s_len); 532eda14cbcSMatt Macy } 533eda14cbcSMatt Macy 534eda14cbcSMatt Macy /* Set the compression level */ 535eda14cbcSMatt Macy ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstd_level); 536eda14cbcSMatt Macy 537eda14cbcSMatt Macy /* Use the "magicless" zstd header which saves us 4 header bytes */ 538eda14cbcSMatt Macy ZSTD_CCtx_setParameter(cctx, ZSTD_c_format, ZSTD_f_zstd1_magicless); 539eda14cbcSMatt Macy 540eda14cbcSMatt Macy /* 541eda14cbcSMatt Macy * Disable redundant checksum calculation and content size storage since 542eda14cbcSMatt Macy * this is already done by ZFS itself. 543eda14cbcSMatt Macy */ 544eda14cbcSMatt Macy ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 0); 545eda14cbcSMatt Macy ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0); 546eda14cbcSMatt Macy 547eda14cbcSMatt Macy c_len = ZSTD_compress2(cctx, 548eda14cbcSMatt Macy hdr->data, 549eda14cbcSMatt Macy d_len - sizeof (*hdr), 550eda14cbcSMatt Macy s_start, s_len); 551eda14cbcSMatt Macy 552eda14cbcSMatt Macy ZSTD_freeCCtx(cctx); 553eda14cbcSMatt Macy 554eda14cbcSMatt Macy /* Error in the compression routine, disable compression. */ 555eda14cbcSMatt Macy if (ZSTD_isError(c_len)) { 556eda14cbcSMatt Macy /* 557eda14cbcSMatt Macy * If we are aborting the compression because the saves are 558eda14cbcSMatt Macy * too small, that is not a failure. Everything else is a 559eda14cbcSMatt Macy * failure, so increment the compression failure counter. 560eda14cbcSMatt Macy */ 561e3aa18adSMartin Matuska int err = ZSTD_getErrorCode(c_len); 562e3aa18adSMartin Matuska if (err != ZSTD_error_dstSize_tooSmall) { 563eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_com_fail); 564e3aa18adSMartin Matuska dprintf("Error: %s", ZSTD_getErrorString(err)); 565eda14cbcSMatt Macy } 566eda14cbcSMatt Macy return (s_len); 567eda14cbcSMatt Macy } 568eda14cbcSMatt Macy 569eda14cbcSMatt Macy /* 570eda14cbcSMatt Macy * Encode the compressed buffer size at the start. We'll need this in 571eda14cbcSMatt Macy * decompression to counter the effects of padding which might be added 572eda14cbcSMatt Macy * to the compressed buffer and which, if unhandled, would confuse the 573eda14cbcSMatt Macy * hell out of our decompression function. 574eda14cbcSMatt Macy */ 575eda14cbcSMatt Macy hdr->c_len = BE_32(c_len); 576eda14cbcSMatt Macy 577eda14cbcSMatt Macy /* 578eda14cbcSMatt Macy * Check version for overflow. 579eda14cbcSMatt Macy * The limit of 24 bits must not be exceeded. This allows a maximum 580eda14cbcSMatt Macy * version 1677.72.15 which we don't expect to be ever reached. 581eda14cbcSMatt Macy */ 582eda14cbcSMatt Macy ASSERT3U(ZSTD_VERSION_NUMBER, <=, 0xFFFFFF); 583eda14cbcSMatt Macy 584eda14cbcSMatt Macy /* 585eda14cbcSMatt Macy * Encode the compression level as well. We may need to know the 586eda14cbcSMatt Macy * original compression level if compressed_arc is disabled, to match 587eda14cbcSMatt Macy * the compression settings to write this block to the L2ARC. 588eda14cbcSMatt Macy * 589eda14cbcSMatt Macy * Encode the actual level, so if the enum changes in the future, we 590eda14cbcSMatt Macy * will be compatible. 591eda14cbcSMatt Macy * 592eda14cbcSMatt Macy * The upper 24 bits store the ZSTD version to be able to provide 593eda14cbcSMatt Macy * future compatibility, since new versions might enhance the 594eda14cbcSMatt Macy * compression algorithm in a way, where the compressed data will 595eda14cbcSMatt Macy * change. 596eda14cbcSMatt Macy * 597eda14cbcSMatt Macy * As soon as such incompatibility occurs, handling code needs to be 598eda14cbcSMatt Macy * added, differentiating between the versions. 599eda14cbcSMatt Macy */ 60021b492edSMartin Matuska zfs_set_hdrversion(hdr, ZSTD_VERSION_NUMBER); 60121b492edSMartin Matuska zfs_set_hdrlevel(hdr, level); 602eda14cbcSMatt Macy hdr->raw_version_level = BE_32(hdr->raw_version_level); 603eda14cbcSMatt Macy 604eda14cbcSMatt Macy return (c_len + sizeof (*hdr)); 605eda14cbcSMatt Macy } 606*e2df9bb4SMartin Matuska 607*e2df9bb4SMartin Matuska static size_t 608*e2df9bb4SMartin Matuska zfs_zstd_compress_buf(void *s_start, void *d_start, size_t s_len, size_t d_len, 609*e2df9bb4SMartin Matuska int level) 610*e2df9bb4SMartin Matuska { 611*e2df9bb4SMartin Matuska int16_t zstd_level; 612*e2df9bb4SMartin Matuska if (zstd_enum_to_level(level, &zstd_level)) { 613*e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_com_inval); 614*e2df9bb4SMartin Matuska return (s_len); 615*e2df9bb4SMartin Matuska } 616*e2df9bb4SMartin Matuska /* 617*e2df9bb4SMartin Matuska * A zstd early abort heuristic. 618*e2df9bb4SMartin Matuska * 619*e2df9bb4SMartin Matuska * - Zeroth, if this is <= zstd-3, or < zstd_abort_size (currently 620*e2df9bb4SMartin Matuska * 128k), don't try any of this, just go. 621*e2df9bb4SMartin Matuska * (because experimentally that was a reasonable cutoff for a perf win 622*e2df9bb4SMartin Matuska * with tiny ratio change) 623*e2df9bb4SMartin Matuska * - First, we try LZ4 compression, and if it doesn't early abort, we 624*e2df9bb4SMartin Matuska * jump directly to whatever compression level we intended to try. 625*e2df9bb4SMartin Matuska * - Second, we try zstd-1 - if that errors out (usually, but not 626*e2df9bb4SMartin Matuska * exclusively, if it would overflow), we give up early. 627*e2df9bb4SMartin Matuska * 628*e2df9bb4SMartin Matuska * If it works, instead we go on and compress anyway. 629*e2df9bb4SMartin Matuska * 630*e2df9bb4SMartin Matuska * Why two passes? LZ4 alone gets you a lot of the way, but on highly 631*e2df9bb4SMartin Matuska * compressible data, it was losing up to 8.5% of the compressed 632*e2df9bb4SMartin Matuska * savings versus no early abort, and all the zstd-fast levels are 633*e2df9bb4SMartin Matuska * worse indications on their own than LZ4, and don't improve the LZ4 634*e2df9bb4SMartin Matuska * pass noticably if stacked like this. 635*e2df9bb4SMartin Matuska */ 636*e2df9bb4SMartin Matuska size_t actual_abort_size = zstd_abort_size; 637*e2df9bb4SMartin Matuska if (zstd_earlyabort_pass > 0 && zstd_level >= zstd_cutoff_level && 638*e2df9bb4SMartin Matuska s_len >= actual_abort_size) { 639*e2df9bb4SMartin Matuska int pass_len = 1; 640*e2df9bb4SMartin Matuska abd_t sabd, dabd; 641*e2df9bb4SMartin Matuska abd_get_from_buf_struct(&sabd, s_start, s_len); 642*e2df9bb4SMartin Matuska abd_get_from_buf_struct(&dabd, d_start, d_len); 643*e2df9bb4SMartin Matuska pass_len = zfs_lz4_compress(&sabd, &dabd, s_len, d_len, 0); 644*e2df9bb4SMartin Matuska abd_free(&dabd); 645*e2df9bb4SMartin Matuska abd_free(&sabd); 646*e2df9bb4SMartin Matuska if (pass_len < d_len) { 647*e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_lz4pass_allowed); 648*e2df9bb4SMartin Matuska goto keep_trying; 649*e2df9bb4SMartin Matuska } 650*e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_lz4pass_rejected); 651*e2df9bb4SMartin Matuska 652*e2df9bb4SMartin Matuska pass_len = zfs_zstd_compress_impl(s_start, d_start, s_len, 653*e2df9bb4SMartin Matuska d_len, ZIO_ZSTD_LEVEL_1); 654*e2df9bb4SMartin Matuska if (pass_len == s_len || pass_len <= 0 || pass_len > d_len) { 655*e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_zstdpass_rejected); 656*e2df9bb4SMartin Matuska return (s_len); 657*e2df9bb4SMartin Matuska } 658*e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_zstdpass_allowed); 659*e2df9bb4SMartin Matuska } else { 660*e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_passignored); 661*e2df9bb4SMartin Matuska if (s_len < actual_abort_size) { 662*e2df9bb4SMartin Matuska ZSTDSTAT_BUMP(zstd_stat_passignored_size); 663*e2df9bb4SMartin Matuska } 664*e2df9bb4SMartin Matuska } 665*e2df9bb4SMartin Matuska keep_trying: 666*e2df9bb4SMartin Matuska return (zfs_zstd_compress_impl(s_start, d_start, s_len, d_len, level)); 667*e2df9bb4SMartin Matuska 668*e2df9bb4SMartin Matuska } 6694f0c9b76SWarner Losh #endif 670eda14cbcSMatt Macy 671eda14cbcSMatt Macy /* Decompress block using zstd and return its stored level */ 672*e2df9bb4SMartin Matuska static int 673*e2df9bb4SMartin Matuska zfs_zstd_decompress_level_buf(void *s_start, void *d_start, size_t s_len, 674eda14cbcSMatt Macy size_t d_len, uint8_t *level) 675eda14cbcSMatt Macy { 676eda14cbcSMatt Macy ZSTD_DCtx *dctx; 677eda14cbcSMatt Macy size_t result; 678eda14cbcSMatt Macy int16_t zstd_level; 679eda14cbcSMatt Macy uint32_t c_len; 680eda14cbcSMatt Macy const zfs_zstdhdr_t *hdr; 681eda14cbcSMatt Macy zfs_zstdhdr_t hdr_copy; 682eda14cbcSMatt Macy 683eda14cbcSMatt Macy hdr = (const zfs_zstdhdr_t *)s_start; 684eda14cbcSMatt Macy c_len = BE_32(hdr->c_len); 685eda14cbcSMatt Macy 686eda14cbcSMatt Macy /* 687eda14cbcSMatt Macy * Make a copy instead of directly converting the header, since we must 688eda14cbcSMatt Macy * not modify the original data that may be used again later. 689eda14cbcSMatt Macy */ 690eda14cbcSMatt Macy hdr_copy.raw_version_level = BE_32(hdr->raw_version_level); 69121b492edSMartin Matuska uint8_t curlevel = zfs_get_hdrlevel(&hdr_copy); 692eda14cbcSMatt Macy 693eda14cbcSMatt Macy /* 694eda14cbcSMatt Macy * NOTE: We ignore the ZSTD version for now. As soon as any 69516038816SMartin Matuska * incompatibility occurs, it has to be handled accordingly. 696eda14cbcSMatt Macy * The version can be accessed via `hdr_copy.version`. 697eda14cbcSMatt Macy */ 698eda14cbcSMatt Macy 699eda14cbcSMatt Macy /* 700eda14cbcSMatt Macy * Convert and check the level 701eda14cbcSMatt Macy * An invalid level is a strong indicator for data corruption! In such 702eda14cbcSMatt Macy * case return an error so the upper layers can try to fix it. 703eda14cbcSMatt Macy */ 70421b492edSMartin Matuska if (zstd_enum_to_level(curlevel, &zstd_level)) { 705eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_dec_inval); 706eda14cbcSMatt Macy return (1); 707eda14cbcSMatt Macy } 708eda14cbcSMatt Macy 709eda14cbcSMatt Macy ASSERT3U(d_len, >=, s_len); 71021b492edSMartin Matuska ASSERT3U(curlevel, !=, ZIO_COMPLEVEL_INHERIT); 711eda14cbcSMatt Macy 712eda14cbcSMatt Macy /* Invalid compressed buffer size encoded at start */ 713eda14cbcSMatt Macy if (c_len + sizeof (*hdr) > s_len) { 714eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_dec_header_inval); 715eda14cbcSMatt Macy return (1); 716eda14cbcSMatt Macy } 717eda14cbcSMatt Macy 718eda14cbcSMatt Macy dctx = ZSTD_createDCtx_advanced(zstd_dctx_malloc); 719eda14cbcSMatt Macy if (!dctx) { 720eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_dec_alloc_fail); 721eda14cbcSMatt Macy return (1); 722eda14cbcSMatt Macy } 723eda14cbcSMatt Macy 724eda14cbcSMatt Macy /* Set header type to "magicless" */ 725eda14cbcSMatt Macy ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless); 726eda14cbcSMatt Macy 727eda14cbcSMatt Macy /* Decompress the data and release the context */ 728eda14cbcSMatt Macy result = ZSTD_decompressDCtx(dctx, d_start, d_len, hdr->data, c_len); 729eda14cbcSMatt Macy ZSTD_freeDCtx(dctx); 730eda14cbcSMatt Macy 731eda14cbcSMatt Macy /* 732eda14cbcSMatt Macy * Returns 0 on success (decompression function returned non-negative) 733eda14cbcSMatt Macy * and non-zero on failure (decompression function returned negative. 734eda14cbcSMatt Macy */ 735eda14cbcSMatt Macy if (ZSTD_isError(result)) { 736eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_dec_fail); 737eda14cbcSMatt Macy return (1); 738eda14cbcSMatt Macy } 739eda14cbcSMatt Macy 740eda14cbcSMatt Macy if (level) { 74121b492edSMartin Matuska *level = curlevel; 742eda14cbcSMatt Macy } 743eda14cbcSMatt Macy 744eda14cbcSMatt Macy return (0); 745eda14cbcSMatt Macy } 746eda14cbcSMatt Macy 747eda14cbcSMatt Macy /* Decompress datablock using zstd */ 748*e2df9bb4SMartin Matuska #ifdef IN_BASE 749eda14cbcSMatt Macy int 750*e2df9bb4SMartin Matuska zfs_zstd_decompress_buf(void *s_start, void *d_start, size_t s_len, 751*e2df9bb4SMartin Matuska size_t d_len, int level __maybe_unused) 752eda14cbcSMatt Macy { 753eda14cbcSMatt Macy 754*e2df9bb4SMartin Matuska return (zfs_zstd_decompress_level_buf(s_start, d_start, s_len, d_len, 755eda14cbcSMatt Macy NULL)); 756eda14cbcSMatt Macy } 757*e2df9bb4SMartin Matuska #else 758*e2df9bb4SMartin Matuska static int 759*e2df9bb4SMartin Matuska zfs_zstd_decompress_buf(void *s_start, void *d_start, size_t s_len, 760*e2df9bb4SMartin Matuska size_t d_len, int level __maybe_unused) 761*e2df9bb4SMartin Matuska { 762*e2df9bb4SMartin Matuska 763*e2df9bb4SMartin Matuska return (zfs_zstd_decompress_level_buf(s_start, d_start, s_len, d_len, 764*e2df9bb4SMartin Matuska NULL)); 765*e2df9bb4SMartin Matuska } 766*e2df9bb4SMartin Matuska #endif 767eda14cbcSMatt Macy 7684f0c9b76SWarner Losh #ifndef IN_LIBSA 769*e2df9bb4SMartin Matuska ZFS_COMPRESS_WRAP_DECL(zfs_zstd_compress) 770*e2df9bb4SMartin Matuska ZFS_DECOMPRESS_WRAP_DECL(zfs_zstd_decompress) 771*e2df9bb4SMartin Matuska ZFS_DECOMPRESS_LEVEL_WRAP_DECL(zfs_zstd_decompress_level) 772*e2df9bb4SMartin Matuska 773eda14cbcSMatt Macy /* Allocator for zstd compression context using mempool_allocator */ 774eda14cbcSMatt Macy static void * 775eda14cbcSMatt Macy zstd_alloc(void *opaque __maybe_unused, size_t size) 776eda14cbcSMatt Macy { 777eda14cbcSMatt Macy size_t nbytes = sizeof (struct zstd_kmem) + size; 778eda14cbcSMatt Macy struct zstd_kmem *z = NULL; 779eda14cbcSMatt Macy 780eda14cbcSMatt Macy z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_cctx, nbytes); 781eda14cbcSMatt Macy 782eda14cbcSMatt Macy if (!z) { 783eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_alloc_fail); 784eda14cbcSMatt Macy return (NULL); 785eda14cbcSMatt Macy } 786eda14cbcSMatt Macy 787eda14cbcSMatt Macy return ((void*)z + (sizeof (struct zstd_kmem))); 788eda14cbcSMatt Macy } 789eda14cbcSMatt Macy 790*e2df9bb4SMartin Matuska #endif 791eda14cbcSMatt Macy /* 792eda14cbcSMatt Macy * Allocator for zstd decompression context using mempool_allocator with 793eda14cbcSMatt Macy * fallback to reserved memory if allocation fails 794eda14cbcSMatt Macy */ 795eda14cbcSMatt Macy static void * 796eda14cbcSMatt Macy zstd_dctx_alloc(void *opaque __maybe_unused, size_t size) 797eda14cbcSMatt Macy { 798eda14cbcSMatt Macy size_t nbytes = sizeof (struct zstd_kmem) + size; 799eda14cbcSMatt Macy struct zstd_kmem *z = NULL; 800eda14cbcSMatt Macy enum zstd_kmem_type type = ZSTD_KMEM_DEFAULT; 801eda14cbcSMatt Macy 802eda14cbcSMatt Macy z = (struct zstd_kmem *)zstd_mempool_alloc(zstd_mempool_dctx, nbytes); 803eda14cbcSMatt Macy if (!z) { 804eda14cbcSMatt Macy /* Try harder, decompression shall not fail */ 805eda14cbcSMatt Macy z = vmem_alloc(nbytes, KM_SLEEP); 806eda14cbcSMatt Macy if (z) { 807eda14cbcSMatt Macy z->pool = NULL; 808eda14cbcSMatt Macy } 809eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_alloc_fail); 810eda14cbcSMatt Macy } else { 811eda14cbcSMatt Macy return ((void*)z + (sizeof (struct zstd_kmem))); 812eda14cbcSMatt Macy } 813eda14cbcSMatt Macy 814eda14cbcSMatt Macy /* Fallback if everything fails */ 815eda14cbcSMatt Macy if (!z) { 816eda14cbcSMatt Macy /* 817eda14cbcSMatt Macy * Barrier since we only can handle it in a single thread. All 818eda14cbcSMatt Macy * other following threads need to wait here until decompression 819eda14cbcSMatt Macy * is completed. zstd_free will release this barrier later. 820eda14cbcSMatt Macy */ 821eda14cbcSMatt Macy mutex_enter(&zstd_dctx_fallback.barrier); 822eda14cbcSMatt Macy 823eda14cbcSMatt Macy z = zstd_dctx_fallback.mem; 824eda14cbcSMatt Macy type = ZSTD_KMEM_DCTX; 825eda14cbcSMatt Macy ZSTDSTAT_BUMP(zstd_stat_alloc_fallback); 826eda14cbcSMatt Macy } 827eda14cbcSMatt Macy 828eda14cbcSMatt Macy /* Allocation should always be successful */ 829eda14cbcSMatt Macy if (!z) { 830eda14cbcSMatt Macy return (NULL); 831eda14cbcSMatt Macy } 832eda14cbcSMatt Macy 833eda14cbcSMatt Macy z->kmem_type = type; 834eda14cbcSMatt Macy z->kmem_size = nbytes; 835eda14cbcSMatt Macy 836eda14cbcSMatt Macy return ((void*)z + (sizeof (struct zstd_kmem))); 837eda14cbcSMatt Macy } 838eda14cbcSMatt Macy 839eda14cbcSMatt Macy /* Free allocated memory by its specific type */ 840eda14cbcSMatt Macy static void 841eda14cbcSMatt Macy zstd_free(void *opaque __maybe_unused, void *ptr) 842eda14cbcSMatt Macy { 843eda14cbcSMatt Macy struct zstd_kmem *z = (ptr - sizeof (struct zstd_kmem)); 844eda14cbcSMatt Macy enum zstd_kmem_type type; 845eda14cbcSMatt Macy 846eda14cbcSMatt Macy ASSERT3U(z->kmem_type, <, ZSTD_KMEM_COUNT); 847eda14cbcSMatt Macy ASSERT3U(z->kmem_type, >, ZSTD_KMEM_UNKNOWN); 848eda14cbcSMatt Macy 849eda14cbcSMatt Macy type = z->kmem_type; 850eda14cbcSMatt Macy switch (type) { 851eda14cbcSMatt Macy case ZSTD_KMEM_DEFAULT: 852eda14cbcSMatt Macy vmem_free(z, z->kmem_size); 853eda14cbcSMatt Macy break; 854eda14cbcSMatt Macy case ZSTD_KMEM_POOL: 855eda14cbcSMatt Macy zstd_mempool_free(z); 856eda14cbcSMatt Macy break; 857eda14cbcSMatt Macy case ZSTD_KMEM_DCTX: 858eda14cbcSMatt Macy mutex_exit(&zstd_dctx_fallback.barrier); 859eda14cbcSMatt Macy break; 860eda14cbcSMatt Macy default: 861eda14cbcSMatt Macy break; 862eda14cbcSMatt Macy } 863eda14cbcSMatt Macy } 864eda14cbcSMatt Macy 865eda14cbcSMatt Macy /* Allocate fallback memory to ensure safe decompression */ 866eda14cbcSMatt Macy static void __init 867eda14cbcSMatt Macy create_fallback_mem(struct zstd_fallback_mem *mem, size_t size) 868eda14cbcSMatt Macy { 869eda14cbcSMatt Macy mem->mem_size = size; 870eda14cbcSMatt Macy mem->mem = vmem_zalloc(mem->mem_size, KM_SLEEP); 871eda14cbcSMatt Macy mutex_init(&mem->barrier, NULL, MUTEX_DEFAULT, NULL); 872eda14cbcSMatt Macy } 873eda14cbcSMatt Macy 874eda14cbcSMatt Macy /* Initialize memory pool barrier mutexes */ 875eda14cbcSMatt Macy static void __init 876eda14cbcSMatt Macy zstd_mempool_init(void) 877eda14cbcSMatt Macy { 87815f0b8c3SMartin Matuska zstd_mempool_cctx = 879eda14cbcSMatt Macy kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); 88015f0b8c3SMartin Matuska zstd_mempool_dctx = 881eda14cbcSMatt Macy kmem_zalloc(ZSTD_POOL_MAX * sizeof (struct zstd_pool), KM_SLEEP); 882eda14cbcSMatt Macy 883eda14cbcSMatt Macy for (int i = 0; i < ZSTD_POOL_MAX; i++) { 884eda14cbcSMatt Macy mutex_init(&zstd_mempool_cctx[i].barrier, NULL, 885eda14cbcSMatt Macy MUTEX_DEFAULT, NULL); 886eda14cbcSMatt Macy mutex_init(&zstd_mempool_dctx[i].barrier, NULL, 887eda14cbcSMatt Macy MUTEX_DEFAULT, NULL); 888eda14cbcSMatt Macy } 889eda14cbcSMatt Macy } 890eda14cbcSMatt Macy 891eda14cbcSMatt Macy /* Initialize zstd-related memory handling */ 892eda14cbcSMatt Macy static int __init 893eda14cbcSMatt Macy zstd_meminit(void) 894eda14cbcSMatt Macy { 895eda14cbcSMatt Macy zstd_mempool_init(); 896eda14cbcSMatt Macy 897eda14cbcSMatt Macy /* 898eda14cbcSMatt Macy * Estimate the size of the fallback decompression context. 899eda14cbcSMatt Macy * The expected size on x64 with current ZSTD should be about 160 KB. 900eda14cbcSMatt Macy */ 901eda14cbcSMatt Macy create_fallback_mem(&zstd_dctx_fallback, 902eda14cbcSMatt Macy P2ROUNDUP(ZSTD_estimateDCtxSize() + sizeof (struct zstd_kmem), 903eda14cbcSMatt Macy PAGESIZE)); 904eda14cbcSMatt Macy 905eda14cbcSMatt Macy return (0); 906eda14cbcSMatt Macy } 907eda14cbcSMatt Macy 908eda14cbcSMatt Macy /* Release object from pool and free memory */ 909716fd348SMartin Matuska static void 910eda14cbcSMatt Macy release_pool(struct zstd_pool *pool) 911eda14cbcSMatt Macy { 912eda14cbcSMatt Macy mutex_destroy(&pool->barrier); 913eda14cbcSMatt Macy vmem_free(pool->mem, pool->size); 914eda14cbcSMatt Macy pool->mem = NULL; 915eda14cbcSMatt Macy pool->size = 0; 916eda14cbcSMatt Macy } 917eda14cbcSMatt Macy 918eda14cbcSMatt Macy /* Release memory pool objects */ 919716fd348SMartin Matuska static void 920eda14cbcSMatt Macy zstd_mempool_deinit(void) 921eda14cbcSMatt Macy { 922eda14cbcSMatt Macy for (int i = 0; i < ZSTD_POOL_MAX; i++) { 923eda14cbcSMatt Macy release_pool(&zstd_mempool_cctx[i]); 924eda14cbcSMatt Macy release_pool(&zstd_mempool_dctx[i]); 925eda14cbcSMatt Macy } 926eda14cbcSMatt Macy 927eda14cbcSMatt Macy kmem_free(zstd_mempool_dctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); 928eda14cbcSMatt Macy kmem_free(zstd_mempool_cctx, ZSTD_POOL_MAX * sizeof (struct zstd_pool)); 929eda14cbcSMatt Macy zstd_mempool_dctx = NULL; 930eda14cbcSMatt Macy zstd_mempool_cctx = NULL; 931eda14cbcSMatt Macy } 932eda14cbcSMatt Macy 933c40487d4SMatt Macy /* release unused memory from pool */ 934c40487d4SMatt Macy 935c40487d4SMatt Macy void 936c40487d4SMatt Macy zfs_zstd_cache_reap_now(void) 937c40487d4SMatt Macy { 93836639c39SMateusz Guzik 93936639c39SMateusz Guzik /* 94036639c39SMateusz Guzik * Short-circuit if there are no buffers to begin with. 94136639c39SMateusz Guzik */ 94236639c39SMateusz Guzik if (ZSTDSTAT(zstd_stat_buffers) == 0) 94336639c39SMateusz Guzik return; 94436639c39SMateusz Guzik 945c40487d4SMatt Macy /* 946c40487d4SMatt Macy * calling alloc with zero size seeks 947c40487d4SMatt Macy * and releases old unused objects 948c40487d4SMatt Macy */ 9497877fdebSMatt Macy zstd_mempool_reap(zstd_mempool_cctx); 9507877fdebSMatt Macy zstd_mempool_reap(zstd_mempool_dctx); 951c40487d4SMatt Macy } 952c40487d4SMatt Macy 953eda14cbcSMatt Macy extern int __init 954eda14cbcSMatt Macy zstd_init(void) 955eda14cbcSMatt Macy { 956eda14cbcSMatt Macy /* Set pool size by using maximum sane thread count * 4 */ 957eda14cbcSMatt Macy pool_count = (boot_ncpus * 4); 958eda14cbcSMatt Macy zstd_meminit(); 959eda14cbcSMatt Macy 960eda14cbcSMatt Macy /* Initialize kstat */ 961eda14cbcSMatt Macy zstd_ksp = kstat_create("zfs", 0, "zstd", "misc", 962eda14cbcSMatt Macy KSTAT_TYPE_NAMED, sizeof (zstd_stats) / sizeof (kstat_named_t), 963eda14cbcSMatt Macy KSTAT_FLAG_VIRTUAL); 964eda14cbcSMatt Macy if (zstd_ksp != NULL) { 965eda14cbcSMatt Macy zstd_ksp->ks_data = &zstd_stats; 966eda14cbcSMatt Macy kstat_install(zstd_ksp); 967e3aa18adSMartin Matuska #ifdef _KERNEL 968e3aa18adSMartin Matuska zstd_ksp->ks_update = kstat_zstd_update; 969e3aa18adSMartin Matuska #endif 970eda14cbcSMatt Macy } 971eda14cbcSMatt Macy 972eda14cbcSMatt Macy return (0); 973eda14cbcSMatt Macy } 974eda14cbcSMatt Macy 975716fd348SMartin Matuska extern void 976eda14cbcSMatt Macy zstd_fini(void) 977eda14cbcSMatt Macy { 978eda14cbcSMatt Macy /* Deinitialize kstat */ 979eda14cbcSMatt Macy if (zstd_ksp != NULL) { 980eda14cbcSMatt Macy kstat_delete(zstd_ksp); 981eda14cbcSMatt Macy zstd_ksp = NULL; 982eda14cbcSMatt Macy } 983eda14cbcSMatt Macy 984eda14cbcSMatt Macy /* Release fallback memory */ 985eda14cbcSMatt Macy vmem_free(zstd_dctx_fallback.mem, zstd_dctx_fallback.mem_size); 986eda14cbcSMatt Macy mutex_destroy(&zstd_dctx_fallback.barrier); 987eda14cbcSMatt Macy 988eda14cbcSMatt Macy /* Deinit memory pool */ 989eda14cbcSMatt Macy zstd_mempool_deinit(); 990eda14cbcSMatt Macy } 991eda14cbcSMatt Macy 992eda14cbcSMatt Macy #if defined(_KERNEL) 993716fd348SMartin Matuska #ifdef __FreeBSD__ 994eda14cbcSMatt Macy module_init(zstd_init); 995eda14cbcSMatt Macy module_exit(zstd_fini); 996716fd348SMartin Matuska #endif 997eda14cbcSMatt Macy 998be181ee2SMartin Matuska ZFS_MODULE_PARAM(zfs, zstd_, earlyabort_pass, UINT, ZMOD_RW, 999e3aa18adSMartin Matuska "Enable early abort attempts when using zstd"); 1000e3aa18adSMartin Matuska ZFS_MODULE_PARAM(zfs, zstd_, abort_size, UINT, ZMOD_RW, 1001e3aa18adSMartin Matuska "Minimal size of block to attempt early abort"); 1002eda14cbcSMatt Macy #endif 1003