1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy /* 22eda14cbcSMatt Macy * Copyright (C) 2016 Gvozden Nešković. All rights reserved. 23eda14cbcSMatt Macy */ 24eda14cbcSMatt Macy 25eda14cbcSMatt Macy #include <sys/zfs_context.h> 26eda14cbcSMatt Macy #include <sys/types.h> 27eda14cbcSMatt Macy #include <sys/zio.h> 28eda14cbcSMatt Macy #include <sys/debug.h> 29eda14cbcSMatt Macy #include <sys/zfs_debug.h> 30eda14cbcSMatt Macy #include <sys/vdev_raidz.h> 31eda14cbcSMatt Macy #include <sys/vdev_raidz_impl.h> 32eda14cbcSMatt Macy #include <sys/simd.h> 33eda14cbcSMatt Macy 34eda14cbcSMatt Macy /* Opaque implementation with NULL methods to represent original methods */ 35eda14cbcSMatt Macy static const raidz_impl_ops_t vdev_raidz_original_impl = { 36eda14cbcSMatt Macy .name = "original", 37eda14cbcSMatt Macy .is_supported = raidz_will_scalar_work, 38eda14cbcSMatt Macy }; 39eda14cbcSMatt Macy 40eda14cbcSMatt Macy /* RAIDZ parity op that contain the fastest methods */ 41eda14cbcSMatt Macy static raidz_impl_ops_t vdev_raidz_fastest_impl = { 42eda14cbcSMatt Macy .name = "fastest" 43eda14cbcSMatt Macy }; 44eda14cbcSMatt Macy 45eda14cbcSMatt Macy /* All compiled in implementations */ 46*e92ffd9bSMartin Matuska static const raidz_impl_ops_t *const raidz_all_maths[] = { 47eda14cbcSMatt Macy &vdev_raidz_original_impl, 48eda14cbcSMatt Macy &vdev_raidz_scalar_impl, 49eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_SSE2) /* only x86_64 for now */ 50eda14cbcSMatt Macy &vdev_raidz_sse2_impl, 51eda14cbcSMatt Macy #endif 52eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_SSSE3) /* only x86_64 for now */ 53eda14cbcSMatt Macy &vdev_raidz_ssse3_impl, 54eda14cbcSMatt Macy #endif 55eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_AVX2) /* only x86_64 for now */ 56eda14cbcSMatt Macy &vdev_raidz_avx2_impl, 57eda14cbcSMatt Macy #endif 58eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_AVX512F) /* only x86_64 for now */ 59eda14cbcSMatt Macy &vdev_raidz_avx512f_impl, 60eda14cbcSMatt Macy #endif 61eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_AVX512BW) /* only x86_64 for now */ 62eda14cbcSMatt Macy &vdev_raidz_avx512bw_impl, 63eda14cbcSMatt Macy #endif 64ac0bf12eSMatt Macy #if defined(__aarch64__) && !defined(__FreeBSD__) 65eda14cbcSMatt Macy &vdev_raidz_aarch64_neon_impl, 66eda14cbcSMatt Macy &vdev_raidz_aarch64_neonx2_impl, 67eda14cbcSMatt Macy #endif 68eda14cbcSMatt Macy #if defined(__powerpc__) && defined(__altivec__) 69eda14cbcSMatt Macy &vdev_raidz_powerpc_altivec_impl, 70eda14cbcSMatt Macy #endif 71eda14cbcSMatt Macy }; 72eda14cbcSMatt Macy 73eda14cbcSMatt Macy /* Indicate that benchmark has been completed */ 74eda14cbcSMatt Macy static boolean_t raidz_math_initialized = B_FALSE; 75eda14cbcSMatt Macy 76eda14cbcSMatt Macy /* Select raidz implementation */ 77eda14cbcSMatt Macy #define IMPL_FASTEST (UINT32_MAX) 78eda14cbcSMatt Macy #define IMPL_CYCLE (UINT32_MAX - 1) 79eda14cbcSMatt Macy #define IMPL_ORIGINAL (0) 80eda14cbcSMatt Macy #define IMPL_SCALAR (1) 81eda14cbcSMatt Macy 82eda14cbcSMatt Macy #define RAIDZ_IMPL_READ(i) (*(volatile uint32_t *) &(i)) 83eda14cbcSMatt Macy 84eda14cbcSMatt Macy static uint32_t zfs_vdev_raidz_impl = IMPL_SCALAR; 85eda14cbcSMatt Macy static uint32_t user_sel_impl = IMPL_FASTEST; 86eda14cbcSMatt Macy 87eda14cbcSMatt Macy /* Hold all supported implementations */ 88eda14cbcSMatt Macy static size_t raidz_supp_impl_cnt = 0; 89eda14cbcSMatt Macy static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)]; 90eda14cbcSMatt Macy 91eda14cbcSMatt Macy #if defined(_KERNEL) 92eda14cbcSMatt Macy /* 93eda14cbcSMatt Macy * kstats values for supported implementations 94eda14cbcSMatt Macy * Values represent per disk throughput of 8 disk+parity raidz vdev [B/s] 95eda14cbcSMatt Macy */ 96eda14cbcSMatt Macy static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1]; 97eda14cbcSMatt Macy 98eda14cbcSMatt Macy /* kstat for benchmarked implementations */ 99eda14cbcSMatt Macy static kstat_t *raidz_math_kstat = NULL; 100eda14cbcSMatt Macy #endif 101eda14cbcSMatt Macy 102eda14cbcSMatt Macy /* 103eda14cbcSMatt Macy * Returns the RAIDZ operations for raidz_map() parity calculations. When 104eda14cbcSMatt Macy * a SIMD implementation is not allowed in the current context, then fallback 105eda14cbcSMatt Macy * to the fastest generic implementation. 106eda14cbcSMatt Macy */ 107eda14cbcSMatt Macy const raidz_impl_ops_t * 108eda14cbcSMatt Macy vdev_raidz_math_get_ops(void) 109eda14cbcSMatt Macy { 110eda14cbcSMatt Macy if (!kfpu_allowed()) 111eda14cbcSMatt Macy return (&vdev_raidz_scalar_impl); 112eda14cbcSMatt Macy 113eda14cbcSMatt Macy raidz_impl_ops_t *ops = NULL; 114eda14cbcSMatt Macy const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl); 115eda14cbcSMatt Macy 116eda14cbcSMatt Macy switch (impl) { 117eda14cbcSMatt Macy case IMPL_FASTEST: 118eda14cbcSMatt Macy ASSERT(raidz_math_initialized); 119eda14cbcSMatt Macy ops = &vdev_raidz_fastest_impl; 120eda14cbcSMatt Macy break; 121eda14cbcSMatt Macy case IMPL_CYCLE: 122eda14cbcSMatt Macy /* Cycle through all supported implementations */ 123eda14cbcSMatt Macy ASSERT(raidz_math_initialized); 124eda14cbcSMatt Macy ASSERT3U(raidz_supp_impl_cnt, >, 0); 125eda14cbcSMatt Macy static size_t cycle_impl_idx = 0; 126eda14cbcSMatt Macy size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt; 127eda14cbcSMatt Macy ops = raidz_supp_impl[idx]; 128eda14cbcSMatt Macy break; 129eda14cbcSMatt Macy case IMPL_ORIGINAL: 130eda14cbcSMatt Macy ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl; 131eda14cbcSMatt Macy break; 132eda14cbcSMatt Macy case IMPL_SCALAR: 133eda14cbcSMatt Macy ops = (raidz_impl_ops_t *)&vdev_raidz_scalar_impl; 134eda14cbcSMatt Macy break; 135eda14cbcSMatt Macy default: 136eda14cbcSMatt Macy ASSERT3U(impl, <, raidz_supp_impl_cnt); 137eda14cbcSMatt Macy ASSERT3U(raidz_supp_impl_cnt, >, 0); 138eda14cbcSMatt Macy if (impl < ARRAY_SIZE(raidz_all_maths)) 139eda14cbcSMatt Macy ops = raidz_supp_impl[impl]; 140eda14cbcSMatt Macy break; 141eda14cbcSMatt Macy } 142eda14cbcSMatt Macy 143eda14cbcSMatt Macy ASSERT3P(ops, !=, NULL); 144eda14cbcSMatt Macy 145eda14cbcSMatt Macy return (ops); 146eda14cbcSMatt Macy } 147eda14cbcSMatt Macy 148eda14cbcSMatt Macy /* 149eda14cbcSMatt Macy * Select parity generation method for raidz_map 150eda14cbcSMatt Macy */ 151eda14cbcSMatt Macy int 1527877fdebSMatt Macy vdev_raidz_math_generate(raidz_map_t *rm, raidz_row_t *rr) 153eda14cbcSMatt Macy { 154eda14cbcSMatt Macy raidz_gen_f gen_parity = NULL; 155eda14cbcSMatt Macy 156eda14cbcSMatt Macy switch (raidz_parity(rm)) { 157eda14cbcSMatt Macy case 1: 158eda14cbcSMatt Macy gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P]; 159eda14cbcSMatt Macy break; 160eda14cbcSMatt Macy case 2: 161eda14cbcSMatt Macy gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQ]; 162eda14cbcSMatt Macy break; 163eda14cbcSMatt Macy case 3: 164eda14cbcSMatt Macy gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQR]; 165eda14cbcSMatt Macy break; 166eda14cbcSMatt Macy default: 167eda14cbcSMatt Macy gen_parity = NULL; 1681f88aa09SMartin Matuska cmn_err(CE_PANIC, "invalid RAID-Z configuration %llu", 1691f88aa09SMartin Matuska (u_longlong_t)raidz_parity(rm)); 170eda14cbcSMatt Macy break; 171eda14cbcSMatt Macy } 172eda14cbcSMatt Macy 173eda14cbcSMatt Macy /* if method is NULL execute the original implementation */ 174eda14cbcSMatt Macy if (gen_parity == NULL) 175eda14cbcSMatt Macy return (RAIDZ_ORIGINAL_IMPL); 176eda14cbcSMatt Macy 1777877fdebSMatt Macy gen_parity(rr); 178eda14cbcSMatt Macy 179eda14cbcSMatt Macy return (0); 180eda14cbcSMatt Macy } 181eda14cbcSMatt Macy 182eda14cbcSMatt Macy static raidz_rec_f 183eda14cbcSMatt Macy reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid, 184eda14cbcSMatt Macy const int nbaddata) 185eda14cbcSMatt Macy { 186eda14cbcSMatt Macy if (nbaddata == 1 && parity_valid[CODE_P]) { 187eda14cbcSMatt Macy return (rm->rm_ops->rec[RAIDZ_REC_P]); 188eda14cbcSMatt Macy } 189eda14cbcSMatt Macy return ((raidz_rec_f) NULL); 190eda14cbcSMatt Macy } 191eda14cbcSMatt Macy 192eda14cbcSMatt Macy static raidz_rec_f 193eda14cbcSMatt Macy reconstruct_fun_pq_sel(raidz_map_t *rm, const int *parity_valid, 194eda14cbcSMatt Macy const int nbaddata) 195eda14cbcSMatt Macy { 196eda14cbcSMatt Macy if (nbaddata == 1) { 197eda14cbcSMatt Macy if (parity_valid[CODE_P]) { 198eda14cbcSMatt Macy return (rm->rm_ops->rec[RAIDZ_REC_P]); 199eda14cbcSMatt Macy } else if (parity_valid[CODE_Q]) { 200eda14cbcSMatt Macy return (rm->rm_ops->rec[RAIDZ_REC_Q]); 201eda14cbcSMatt Macy } 202eda14cbcSMatt Macy } else if (nbaddata == 2 && 203eda14cbcSMatt Macy parity_valid[CODE_P] && parity_valid[CODE_Q]) { 204eda14cbcSMatt Macy return (rm->rm_ops->rec[RAIDZ_REC_PQ]); 205eda14cbcSMatt Macy } 206eda14cbcSMatt Macy return ((raidz_rec_f) NULL); 207eda14cbcSMatt Macy } 208eda14cbcSMatt Macy 209eda14cbcSMatt Macy static raidz_rec_f 210eda14cbcSMatt Macy reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid, 211eda14cbcSMatt Macy const int nbaddata) 212eda14cbcSMatt Macy { 213eda14cbcSMatt Macy if (nbaddata == 1) { 214eda14cbcSMatt Macy if (parity_valid[CODE_P]) { 215eda14cbcSMatt Macy return (rm->rm_ops->rec[RAIDZ_REC_P]); 216eda14cbcSMatt Macy } else if (parity_valid[CODE_Q]) { 217eda14cbcSMatt Macy return (rm->rm_ops->rec[RAIDZ_REC_Q]); 218eda14cbcSMatt Macy } else if (parity_valid[CODE_R]) { 219eda14cbcSMatt Macy return (rm->rm_ops->rec[RAIDZ_REC_R]); 220eda14cbcSMatt Macy } 221eda14cbcSMatt Macy } else if (nbaddata == 2) { 222eda14cbcSMatt Macy if (parity_valid[CODE_P] && parity_valid[CODE_Q]) { 223eda14cbcSMatt Macy return (rm->rm_ops->rec[RAIDZ_REC_PQ]); 224eda14cbcSMatt Macy } else if (parity_valid[CODE_P] && parity_valid[CODE_R]) { 225eda14cbcSMatt Macy return (rm->rm_ops->rec[RAIDZ_REC_PR]); 226eda14cbcSMatt Macy } else if (parity_valid[CODE_Q] && parity_valid[CODE_R]) { 227eda14cbcSMatt Macy return (rm->rm_ops->rec[RAIDZ_REC_QR]); 228eda14cbcSMatt Macy } 229eda14cbcSMatt Macy } else if (nbaddata == 3 && 230eda14cbcSMatt Macy parity_valid[CODE_P] && parity_valid[CODE_Q] && 231eda14cbcSMatt Macy parity_valid[CODE_R]) { 232eda14cbcSMatt Macy return (rm->rm_ops->rec[RAIDZ_REC_PQR]); 233eda14cbcSMatt Macy } 234eda14cbcSMatt Macy return ((raidz_rec_f) NULL); 235eda14cbcSMatt Macy } 236eda14cbcSMatt Macy 237eda14cbcSMatt Macy /* 238eda14cbcSMatt Macy * Select data reconstruction method for raidz_map 239eda14cbcSMatt Macy * @parity_valid - Parity validity flag 240eda14cbcSMatt Macy * @dt - Failed data index array 241eda14cbcSMatt Macy * @nbaddata - Number of failed data columns 242eda14cbcSMatt Macy */ 243eda14cbcSMatt Macy int 2447877fdebSMatt Macy vdev_raidz_math_reconstruct(raidz_map_t *rm, raidz_row_t *rr, 2457877fdebSMatt Macy const int *parity_valid, const int *dt, const int nbaddata) 246eda14cbcSMatt Macy { 247eda14cbcSMatt Macy raidz_rec_f rec_fn = NULL; 248eda14cbcSMatt Macy 249eda14cbcSMatt Macy switch (raidz_parity(rm)) { 250eda14cbcSMatt Macy case PARITY_P: 251eda14cbcSMatt Macy rec_fn = reconstruct_fun_p_sel(rm, parity_valid, nbaddata); 252eda14cbcSMatt Macy break; 253eda14cbcSMatt Macy case PARITY_PQ: 254eda14cbcSMatt Macy rec_fn = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata); 255eda14cbcSMatt Macy break; 256eda14cbcSMatt Macy case PARITY_PQR: 257eda14cbcSMatt Macy rec_fn = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata); 258eda14cbcSMatt Macy break; 259eda14cbcSMatt Macy default: 2601f88aa09SMartin Matuska cmn_err(CE_PANIC, "invalid RAID-Z configuration %llu", 2611f88aa09SMartin Matuska (u_longlong_t)raidz_parity(rm)); 262eda14cbcSMatt Macy break; 263eda14cbcSMatt Macy } 264eda14cbcSMatt Macy 265eda14cbcSMatt Macy if (rec_fn == NULL) 266eda14cbcSMatt Macy return (RAIDZ_ORIGINAL_IMPL); 267eda14cbcSMatt Macy else 2687877fdebSMatt Macy return (rec_fn(rr, dt)); 269eda14cbcSMatt Macy } 270eda14cbcSMatt Macy 271*e92ffd9bSMartin Matuska const char *const raidz_gen_name[] = { 272eda14cbcSMatt Macy "gen_p", "gen_pq", "gen_pqr" 273eda14cbcSMatt Macy }; 274*e92ffd9bSMartin Matuska const char *const raidz_rec_name[] = { 275eda14cbcSMatt Macy "rec_p", "rec_q", "rec_r", 276eda14cbcSMatt Macy "rec_pq", "rec_pr", "rec_qr", "rec_pqr" 277eda14cbcSMatt Macy }; 278eda14cbcSMatt Macy 279eda14cbcSMatt Macy #if defined(_KERNEL) 280eda14cbcSMatt Macy 281eda14cbcSMatt Macy #define RAIDZ_KSTAT_LINE_LEN (17 + 10*12 + 1) 282eda14cbcSMatt Macy 283eda14cbcSMatt Macy static int 284eda14cbcSMatt Macy raidz_math_kstat_headers(char *buf, size_t size) 285eda14cbcSMatt Macy { 286eda14cbcSMatt Macy ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN); 287eda14cbcSMatt Macy 288*e92ffd9bSMartin Matuska ssize_t off = snprintf(buf, size, "%-17s", "implementation"); 289eda14cbcSMatt Macy 290*e92ffd9bSMartin Matuska for (int i = 0; i < ARRAY_SIZE(raidz_gen_name); i++) 291eda14cbcSMatt Macy off += snprintf(buf + off, size - off, "%-16s", 292eda14cbcSMatt Macy raidz_gen_name[i]); 293eda14cbcSMatt Macy 294*e92ffd9bSMartin Matuska for (int i = 0; i < ARRAY_SIZE(raidz_rec_name); i++) 295eda14cbcSMatt Macy off += snprintf(buf + off, size - off, "%-16s", 296eda14cbcSMatt Macy raidz_rec_name[i]); 297eda14cbcSMatt Macy 298eda14cbcSMatt Macy (void) snprintf(buf + off, size - off, "\n"); 299eda14cbcSMatt Macy 300eda14cbcSMatt Macy return (0); 301eda14cbcSMatt Macy } 302eda14cbcSMatt Macy 303eda14cbcSMatt Macy static int 304eda14cbcSMatt Macy raidz_math_kstat_data(char *buf, size_t size, void *data) 305eda14cbcSMatt Macy { 306eda14cbcSMatt Macy raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt]; 307eda14cbcSMatt Macy raidz_impl_kstat_t *cstat = (raidz_impl_kstat_t *)data; 308eda14cbcSMatt Macy ssize_t off = 0; 309eda14cbcSMatt Macy int i; 310eda14cbcSMatt Macy 311eda14cbcSMatt Macy ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN); 312eda14cbcSMatt Macy 313eda14cbcSMatt Macy if (cstat == fstat) { 314eda14cbcSMatt Macy off += snprintf(buf + off, size - off, "%-17s", "fastest"); 315eda14cbcSMatt Macy 316eda14cbcSMatt Macy for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++) { 317eda14cbcSMatt Macy int id = fstat->gen[i]; 318eda14cbcSMatt Macy off += snprintf(buf + off, size - off, "%-16s", 319eda14cbcSMatt Macy raidz_supp_impl[id]->name); 320eda14cbcSMatt Macy } 321eda14cbcSMatt Macy for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++) { 322eda14cbcSMatt Macy int id = fstat->rec[i]; 323eda14cbcSMatt Macy off += snprintf(buf + off, size - off, "%-16s", 324eda14cbcSMatt Macy raidz_supp_impl[id]->name); 325eda14cbcSMatt Macy } 326eda14cbcSMatt Macy } else { 327eda14cbcSMatt Macy ptrdiff_t id = cstat - raidz_impl_kstats; 328eda14cbcSMatt Macy 329eda14cbcSMatt Macy off += snprintf(buf + off, size - off, "%-17s", 330eda14cbcSMatt Macy raidz_supp_impl[id]->name); 331eda14cbcSMatt Macy 332eda14cbcSMatt Macy for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++) 333eda14cbcSMatt Macy off += snprintf(buf + off, size - off, "%-16llu", 334eda14cbcSMatt Macy (u_longlong_t)cstat->gen[i]); 335eda14cbcSMatt Macy 336eda14cbcSMatt Macy for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++) 337eda14cbcSMatt Macy off += snprintf(buf + off, size - off, "%-16llu", 338eda14cbcSMatt Macy (u_longlong_t)cstat->rec[i]); 339eda14cbcSMatt Macy } 340eda14cbcSMatt Macy 341eda14cbcSMatt Macy (void) snprintf(buf + off, size - off, "\n"); 342eda14cbcSMatt Macy 343eda14cbcSMatt Macy return (0); 344eda14cbcSMatt Macy } 345eda14cbcSMatt Macy 346eda14cbcSMatt Macy static void * 347eda14cbcSMatt Macy raidz_math_kstat_addr(kstat_t *ksp, loff_t n) 348eda14cbcSMatt Macy { 349eda14cbcSMatt Macy if (n <= raidz_supp_impl_cnt) 350eda14cbcSMatt Macy ksp->ks_private = (void *) (raidz_impl_kstats + n); 351eda14cbcSMatt Macy else 352eda14cbcSMatt Macy ksp->ks_private = NULL; 353eda14cbcSMatt Macy 354eda14cbcSMatt Macy return (ksp->ks_private); 355eda14cbcSMatt Macy } 356eda14cbcSMatt Macy 357eda14cbcSMatt Macy #define BENCH_D_COLS (8ULL) 358eda14cbcSMatt Macy #define BENCH_COLS (BENCH_D_COLS + PARITY_PQR) 359eda14cbcSMatt Macy #define BENCH_ZIO_SIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT) /* 128 kiB */ 3607877fdebSMatt Macy #define BENCH_NS MSEC2NSEC(1) /* 1ms */ 361eda14cbcSMatt Macy 362eda14cbcSMatt Macy typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn); 363eda14cbcSMatt Macy 364eda14cbcSMatt Macy static void 365eda14cbcSMatt Macy benchmark_gen_impl(raidz_map_t *rm, const int fn) 366eda14cbcSMatt Macy { 367eda14cbcSMatt Macy (void) fn; 368eda14cbcSMatt Macy vdev_raidz_generate_parity(rm); 369eda14cbcSMatt Macy } 370eda14cbcSMatt Macy 371eda14cbcSMatt Macy static void 372eda14cbcSMatt Macy benchmark_rec_impl(raidz_map_t *rm, const int fn) 373eda14cbcSMatt Macy { 374eda14cbcSMatt Macy static const int rec_tgt[7][3] = { 375eda14cbcSMatt Macy {1, 2, 3}, /* rec_p: bad QR & D[0] */ 376eda14cbcSMatt Macy {0, 2, 3}, /* rec_q: bad PR & D[0] */ 377eda14cbcSMatt Macy {0, 1, 3}, /* rec_r: bad PQ & D[0] */ 378eda14cbcSMatt Macy {2, 3, 4}, /* rec_pq: bad R & D[0][1] */ 379eda14cbcSMatt Macy {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */ 380eda14cbcSMatt Macy {0, 3, 4}, /* rec_qr: bad P & D[0][1] */ 381eda14cbcSMatt Macy {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */ 382eda14cbcSMatt Macy }; 383eda14cbcSMatt Macy 384eda14cbcSMatt Macy vdev_raidz_reconstruct(rm, rec_tgt[fn], 3); 385eda14cbcSMatt Macy } 386eda14cbcSMatt Macy 387eda14cbcSMatt Macy /* 388eda14cbcSMatt Macy * Benchmarking of all supported implementations (raidz_supp_impl_cnt) 389eda14cbcSMatt Macy * is performed by setting the rm_ops pointer and calling the top level 390eda14cbcSMatt Macy * generate/reconstruct methods of bench_rm. 391eda14cbcSMatt Macy */ 392eda14cbcSMatt Macy static void 393eda14cbcSMatt Macy benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn) 394eda14cbcSMatt Macy { 395eda14cbcSMatt Macy uint64_t run_cnt, speed, best_speed = 0; 396eda14cbcSMatt Macy hrtime_t t_start, t_diff; 397eda14cbcSMatt Macy raidz_impl_ops_t *curr_impl; 398eda14cbcSMatt Macy raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt]; 399eda14cbcSMatt Macy int impl, i; 400eda14cbcSMatt Macy 401eda14cbcSMatt Macy for (impl = 0; impl < raidz_supp_impl_cnt; impl++) { 402eda14cbcSMatt Macy /* set an implementation to benchmark */ 403eda14cbcSMatt Macy curr_impl = raidz_supp_impl[impl]; 404eda14cbcSMatt Macy bench_rm->rm_ops = curr_impl; 405eda14cbcSMatt Macy 406eda14cbcSMatt Macy run_cnt = 0; 407eda14cbcSMatt Macy t_start = gethrtime(); 408eda14cbcSMatt Macy 409eda14cbcSMatt Macy do { 4107877fdebSMatt Macy for (i = 0; i < 5; i++, run_cnt++) 411eda14cbcSMatt Macy bench_fn(bench_rm, fn); 412eda14cbcSMatt Macy 413eda14cbcSMatt Macy t_diff = gethrtime() - t_start; 414eda14cbcSMatt Macy } while (t_diff < BENCH_NS); 415eda14cbcSMatt Macy 416eda14cbcSMatt Macy speed = run_cnt * BENCH_ZIO_SIZE * NANOSEC; 417eda14cbcSMatt Macy speed /= (t_diff * BENCH_COLS); 418eda14cbcSMatt Macy 419eda14cbcSMatt Macy if (bench_fn == benchmark_gen_impl) 420eda14cbcSMatt Macy raidz_impl_kstats[impl].gen[fn] = speed; 421eda14cbcSMatt Macy else 422eda14cbcSMatt Macy raidz_impl_kstats[impl].rec[fn] = speed; 423eda14cbcSMatt Macy 424eda14cbcSMatt Macy /* Update fastest implementation method */ 425eda14cbcSMatt Macy if (speed > best_speed) { 426eda14cbcSMatt Macy best_speed = speed; 427eda14cbcSMatt Macy 428eda14cbcSMatt Macy if (bench_fn == benchmark_gen_impl) { 429eda14cbcSMatt Macy fstat->gen[fn] = impl; 430eda14cbcSMatt Macy vdev_raidz_fastest_impl.gen[fn] = 431eda14cbcSMatt Macy curr_impl->gen[fn]; 432eda14cbcSMatt Macy } else { 433eda14cbcSMatt Macy fstat->rec[fn] = impl; 434eda14cbcSMatt Macy vdev_raidz_fastest_impl.rec[fn] = 435eda14cbcSMatt Macy curr_impl->rec[fn]; 436eda14cbcSMatt Macy } 437eda14cbcSMatt Macy } 438eda14cbcSMatt Macy } 439eda14cbcSMatt Macy } 440eda14cbcSMatt Macy #endif 441eda14cbcSMatt Macy 442eda14cbcSMatt Macy /* 443eda14cbcSMatt Macy * Initialize and benchmark all supported implementations. 444eda14cbcSMatt Macy */ 445eda14cbcSMatt Macy static void 446eda14cbcSMatt Macy benchmark_raidz(void) 447eda14cbcSMatt Macy { 448eda14cbcSMatt Macy raidz_impl_ops_t *curr_impl; 449eda14cbcSMatt Macy int i, c; 450eda14cbcSMatt Macy 451eda14cbcSMatt Macy /* Move supported impl into raidz_supp_impl */ 452eda14cbcSMatt Macy for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) { 453eda14cbcSMatt Macy curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i]; 454eda14cbcSMatt Macy 455eda14cbcSMatt Macy if (curr_impl->init) 456eda14cbcSMatt Macy curr_impl->init(); 457eda14cbcSMatt Macy 458eda14cbcSMatt Macy if (curr_impl->is_supported()) 459eda14cbcSMatt Macy raidz_supp_impl[c++] = (raidz_impl_ops_t *)curr_impl; 460eda14cbcSMatt Macy } 461eda14cbcSMatt Macy membar_producer(); /* complete raidz_supp_impl[] init */ 462eda14cbcSMatt Macy raidz_supp_impl_cnt = c; /* number of supported impl */ 463eda14cbcSMatt Macy 464eda14cbcSMatt Macy #if defined(_KERNEL) 46521b492edSMartin Matuska abd_t *pabd; 466eda14cbcSMatt Macy zio_t *bench_zio = NULL; 467eda14cbcSMatt Macy raidz_map_t *bench_rm = NULL; 468eda14cbcSMatt Macy uint64_t bench_parity; 469eda14cbcSMatt Macy 470eda14cbcSMatt Macy /* Fake a zio and run the benchmark on a warmed up buffer */ 471eda14cbcSMatt Macy bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP); 472eda14cbcSMatt Macy bench_zio->io_offset = 0; 473eda14cbcSMatt Macy bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */ 474eda14cbcSMatt Macy bench_zio->io_abd = abd_alloc_linear(BENCH_ZIO_SIZE, B_TRUE); 475eda14cbcSMatt Macy memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE); 476eda14cbcSMatt Macy 477eda14cbcSMatt Macy /* Benchmark parity generation methods */ 478eda14cbcSMatt Macy for (int fn = 0; fn < RAIDZ_GEN_NUM; fn++) { 479eda14cbcSMatt Macy bench_parity = fn + 1; 480eda14cbcSMatt Macy /* New raidz_map is needed for each generate_p/q/r */ 481eda14cbcSMatt Macy bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT, 482eda14cbcSMatt Macy BENCH_D_COLS + bench_parity, bench_parity); 483eda14cbcSMatt Macy 484eda14cbcSMatt Macy benchmark_raidz_impl(bench_rm, fn, benchmark_gen_impl); 485eda14cbcSMatt Macy 486eda14cbcSMatt Macy vdev_raidz_map_free(bench_rm); 487eda14cbcSMatt Macy } 488eda14cbcSMatt Macy 489eda14cbcSMatt Macy /* Benchmark data reconstruction methods */ 490eda14cbcSMatt Macy bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT, 491eda14cbcSMatt Macy BENCH_COLS, PARITY_PQR); 492eda14cbcSMatt Macy 49321b492edSMartin Matuska /* Ensure that fake parity blocks are initialized */ 49421b492edSMartin Matuska for (c = 0; c < bench_rm->rm_row[0]->rr_firstdatacol; c++) { 49521b492edSMartin Matuska pabd = bench_rm->rm_row[0]->rr_col[c].rc_abd; 49621b492edSMartin Matuska memset(abd_to_buf(pabd), 0xAA, abd_get_size(pabd)); 49721b492edSMartin Matuska } 49821b492edSMartin Matuska 499eda14cbcSMatt Macy for (int fn = 0; fn < RAIDZ_REC_NUM; fn++) 500eda14cbcSMatt Macy benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl); 501eda14cbcSMatt Macy 502eda14cbcSMatt Macy vdev_raidz_map_free(bench_rm); 503eda14cbcSMatt Macy 504eda14cbcSMatt Macy /* cleanup the bench zio */ 505eda14cbcSMatt Macy abd_free(bench_zio->io_abd); 506eda14cbcSMatt Macy kmem_free(bench_zio, sizeof (zio_t)); 507eda14cbcSMatt Macy #else 508eda14cbcSMatt Macy /* 509eda14cbcSMatt Macy * Skip the benchmark in user space to avoid impacting libzpool 510eda14cbcSMatt Macy * consumers (zdb, zhack, zinject, ztest). The last implementation 511eda14cbcSMatt Macy * is assumed to be the fastest and used by default. 512eda14cbcSMatt Macy */ 513eda14cbcSMatt Macy memcpy(&vdev_raidz_fastest_impl, 514eda14cbcSMatt Macy raidz_supp_impl[raidz_supp_impl_cnt - 1], 515eda14cbcSMatt Macy sizeof (vdev_raidz_fastest_impl)); 516eda14cbcSMatt Macy strcpy(vdev_raidz_fastest_impl.name, "fastest"); 517eda14cbcSMatt Macy #endif /* _KERNEL */ 518eda14cbcSMatt Macy } 519eda14cbcSMatt Macy 520eda14cbcSMatt Macy void 521eda14cbcSMatt Macy vdev_raidz_math_init(void) 522eda14cbcSMatt Macy { 523eda14cbcSMatt Macy /* Determine the fastest available implementation. */ 524eda14cbcSMatt Macy benchmark_raidz(); 525eda14cbcSMatt Macy 526eda14cbcSMatt Macy #if defined(_KERNEL) 527eda14cbcSMatt Macy /* Install kstats for all implementations */ 528eda14cbcSMatt Macy raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench", "misc", 529eda14cbcSMatt Macy KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL); 530eda14cbcSMatt Macy if (raidz_math_kstat != NULL) { 531eda14cbcSMatt Macy raidz_math_kstat->ks_data = NULL; 532eda14cbcSMatt Macy raidz_math_kstat->ks_ndata = UINT32_MAX; 533eda14cbcSMatt Macy kstat_set_raw_ops(raidz_math_kstat, 534eda14cbcSMatt Macy raidz_math_kstat_headers, 535eda14cbcSMatt Macy raidz_math_kstat_data, 536eda14cbcSMatt Macy raidz_math_kstat_addr); 537eda14cbcSMatt Macy kstat_install(raidz_math_kstat); 538eda14cbcSMatt Macy } 539eda14cbcSMatt Macy #endif 540eda14cbcSMatt Macy 541eda14cbcSMatt Macy /* Finish initialization */ 542eda14cbcSMatt Macy atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl); 543eda14cbcSMatt Macy raidz_math_initialized = B_TRUE; 544eda14cbcSMatt Macy } 545eda14cbcSMatt Macy 546eda14cbcSMatt Macy void 547eda14cbcSMatt Macy vdev_raidz_math_fini(void) 548eda14cbcSMatt Macy { 549eda14cbcSMatt Macy raidz_impl_ops_t const *curr_impl; 550eda14cbcSMatt Macy 551eda14cbcSMatt Macy #if defined(_KERNEL) 552eda14cbcSMatt Macy if (raidz_math_kstat != NULL) { 553eda14cbcSMatt Macy kstat_delete(raidz_math_kstat); 554eda14cbcSMatt Macy raidz_math_kstat = NULL; 555eda14cbcSMatt Macy } 556eda14cbcSMatt Macy #endif 557eda14cbcSMatt Macy 558eda14cbcSMatt Macy for (int i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) { 559eda14cbcSMatt Macy curr_impl = raidz_all_maths[i]; 560eda14cbcSMatt Macy if (curr_impl->fini) 561eda14cbcSMatt Macy curr_impl->fini(); 562eda14cbcSMatt Macy } 563eda14cbcSMatt Macy } 564eda14cbcSMatt Macy 565eda14cbcSMatt Macy static const struct { 566eda14cbcSMatt Macy char *name; 567eda14cbcSMatt Macy uint32_t sel; 568eda14cbcSMatt Macy } math_impl_opts[] = { 569eda14cbcSMatt Macy { "cycle", IMPL_CYCLE }, 570eda14cbcSMatt Macy { "fastest", IMPL_FASTEST }, 571eda14cbcSMatt Macy { "original", IMPL_ORIGINAL }, 572eda14cbcSMatt Macy { "scalar", IMPL_SCALAR } 573eda14cbcSMatt Macy }; 574eda14cbcSMatt Macy 575eda14cbcSMatt Macy /* 576eda14cbcSMatt Macy * Function sets desired raidz implementation. 577eda14cbcSMatt Macy * 578eda14cbcSMatt Macy * If we are called before init(), user preference will be saved in 579eda14cbcSMatt Macy * user_sel_impl, and applied in later init() call. This occurs when module 580eda14cbcSMatt Macy * parameter is specified on module load. Otherwise, directly update 581eda14cbcSMatt Macy * zfs_vdev_raidz_impl. 582eda14cbcSMatt Macy * 583eda14cbcSMatt Macy * @val Name of raidz implementation to use 584eda14cbcSMatt Macy * @param Unused. 585eda14cbcSMatt Macy */ 586eda14cbcSMatt Macy int 587eda14cbcSMatt Macy vdev_raidz_impl_set(const char *val) 588eda14cbcSMatt Macy { 589eda14cbcSMatt Macy int err = -EINVAL; 590eda14cbcSMatt Macy char req_name[RAIDZ_IMPL_NAME_MAX]; 591eda14cbcSMatt Macy uint32_t impl = RAIDZ_IMPL_READ(user_sel_impl); 592eda14cbcSMatt Macy size_t i; 593eda14cbcSMatt Macy 594eda14cbcSMatt Macy /* sanitize input */ 595eda14cbcSMatt Macy i = strnlen(val, RAIDZ_IMPL_NAME_MAX); 596eda14cbcSMatt Macy if (i == 0 || i == RAIDZ_IMPL_NAME_MAX) 597eda14cbcSMatt Macy return (err); 598eda14cbcSMatt Macy 599eda14cbcSMatt Macy strlcpy(req_name, val, RAIDZ_IMPL_NAME_MAX); 600eda14cbcSMatt Macy while (i > 0 && !!isspace(req_name[i-1])) 601eda14cbcSMatt Macy i--; 602eda14cbcSMatt Macy req_name[i] = '\0'; 603eda14cbcSMatt Macy 604eda14cbcSMatt Macy /* Check mandatory options */ 605eda14cbcSMatt Macy for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) { 606eda14cbcSMatt Macy if (strcmp(req_name, math_impl_opts[i].name) == 0) { 607eda14cbcSMatt Macy impl = math_impl_opts[i].sel; 608eda14cbcSMatt Macy err = 0; 609eda14cbcSMatt Macy break; 610eda14cbcSMatt Macy } 611eda14cbcSMatt Macy } 612eda14cbcSMatt Macy 613eda14cbcSMatt Macy /* check all supported impl if init() was already called */ 614eda14cbcSMatt Macy if (err != 0 && raidz_math_initialized) { 615eda14cbcSMatt Macy /* check all supported implementations */ 616eda14cbcSMatt Macy for (i = 0; i < raidz_supp_impl_cnt; i++) { 617eda14cbcSMatt Macy if (strcmp(req_name, raidz_supp_impl[i]->name) == 0) { 618eda14cbcSMatt Macy impl = i; 619eda14cbcSMatt Macy err = 0; 620eda14cbcSMatt Macy break; 621eda14cbcSMatt Macy } 622eda14cbcSMatt Macy } 623eda14cbcSMatt Macy } 624eda14cbcSMatt Macy 625eda14cbcSMatt Macy if (err == 0) { 626eda14cbcSMatt Macy if (raidz_math_initialized) 627eda14cbcSMatt Macy atomic_swap_32(&zfs_vdev_raidz_impl, impl); 628eda14cbcSMatt Macy else 629eda14cbcSMatt Macy atomic_swap_32(&user_sel_impl, impl); 630eda14cbcSMatt Macy } 631eda14cbcSMatt Macy 632eda14cbcSMatt Macy return (err); 633eda14cbcSMatt Macy } 634eda14cbcSMatt Macy 635eda14cbcSMatt Macy #if defined(_KERNEL) && defined(__linux__) 636eda14cbcSMatt Macy 637eda14cbcSMatt Macy static int 638eda14cbcSMatt Macy zfs_vdev_raidz_impl_set(const char *val, zfs_kernel_param_t *kp) 639eda14cbcSMatt Macy { 640eda14cbcSMatt Macy return (vdev_raidz_impl_set(val)); 641eda14cbcSMatt Macy } 642eda14cbcSMatt Macy 643eda14cbcSMatt Macy static int 644eda14cbcSMatt Macy zfs_vdev_raidz_impl_get(char *buffer, zfs_kernel_param_t *kp) 645eda14cbcSMatt Macy { 646eda14cbcSMatt Macy int i, cnt = 0; 647eda14cbcSMatt Macy char *fmt; 648eda14cbcSMatt Macy const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl); 649eda14cbcSMatt Macy 650eda14cbcSMatt Macy ASSERT(raidz_math_initialized); 651eda14cbcSMatt Macy 652eda14cbcSMatt Macy /* list mandatory options */ 653eda14cbcSMatt Macy for (i = 0; i < ARRAY_SIZE(math_impl_opts) - 2; i++) { 654eda14cbcSMatt Macy fmt = (impl == math_impl_opts[i].sel) ? "[%s] " : "%s "; 655eda14cbcSMatt Macy cnt += sprintf(buffer + cnt, fmt, math_impl_opts[i].name); 656eda14cbcSMatt Macy } 657eda14cbcSMatt Macy 658eda14cbcSMatt Macy /* list all supported implementations */ 659eda14cbcSMatt Macy for (i = 0; i < raidz_supp_impl_cnt; i++) { 660eda14cbcSMatt Macy fmt = (i == impl) ? "[%s] " : "%s "; 661eda14cbcSMatt Macy cnt += sprintf(buffer + cnt, fmt, raidz_supp_impl[i]->name); 662eda14cbcSMatt Macy } 663eda14cbcSMatt Macy 664eda14cbcSMatt Macy return (cnt); 665eda14cbcSMatt Macy } 666eda14cbcSMatt Macy 667eda14cbcSMatt Macy module_param_call(zfs_vdev_raidz_impl, zfs_vdev_raidz_impl_set, 668eda14cbcSMatt Macy zfs_vdev_raidz_impl_get, NULL, 0644); 669eda14cbcSMatt Macy MODULE_PARM_DESC(zfs_vdev_raidz_impl, "Select raidz implementation."); 670eda14cbcSMatt Macy #endif 671