xref: /freebsd-src/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c (revision e92ffd9b626833ebdbf2742c8ffddc6cd94b963e)
1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * CDDL HEADER START
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7eda14cbcSMatt Macy  *
8eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9eda14cbcSMatt Macy  * or http://www.opensolaris.org/os/licensing.
10eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11eda14cbcSMatt Macy  * and limitations under the License.
12eda14cbcSMatt Macy  *
13eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18eda14cbcSMatt Macy  *
19eda14cbcSMatt Macy  * CDDL HEADER END
20eda14cbcSMatt Macy  */
21eda14cbcSMatt Macy /*
22eda14cbcSMatt Macy  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
23eda14cbcSMatt Macy  */
24eda14cbcSMatt Macy 
25eda14cbcSMatt Macy #include <sys/zfs_context.h>
26eda14cbcSMatt Macy #include <sys/types.h>
27eda14cbcSMatt Macy #include <sys/zio.h>
28eda14cbcSMatt Macy #include <sys/debug.h>
29eda14cbcSMatt Macy #include <sys/zfs_debug.h>
30eda14cbcSMatt Macy #include <sys/vdev_raidz.h>
31eda14cbcSMatt Macy #include <sys/vdev_raidz_impl.h>
32eda14cbcSMatt Macy #include <sys/simd.h>
33eda14cbcSMatt Macy 
34eda14cbcSMatt Macy /* Opaque implementation with NULL methods to represent original methods */
35eda14cbcSMatt Macy static const raidz_impl_ops_t vdev_raidz_original_impl = {
36eda14cbcSMatt Macy 	.name = "original",
37eda14cbcSMatt Macy 	.is_supported = raidz_will_scalar_work,
38eda14cbcSMatt Macy };
39eda14cbcSMatt Macy 
40eda14cbcSMatt Macy /* RAIDZ parity op that contain the fastest methods */
41eda14cbcSMatt Macy static raidz_impl_ops_t vdev_raidz_fastest_impl = {
42eda14cbcSMatt Macy 	.name = "fastest"
43eda14cbcSMatt Macy };
44eda14cbcSMatt Macy 
45eda14cbcSMatt Macy /* All compiled in implementations */
46*e92ffd9bSMartin Matuska static const raidz_impl_ops_t *const raidz_all_maths[] = {
47eda14cbcSMatt Macy 	&vdev_raidz_original_impl,
48eda14cbcSMatt Macy 	&vdev_raidz_scalar_impl,
49eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_SSE2)	/* only x86_64 for now */
50eda14cbcSMatt Macy 	&vdev_raidz_sse2_impl,
51eda14cbcSMatt Macy #endif
52eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_SSSE3)	/* only x86_64 for now */
53eda14cbcSMatt Macy 	&vdev_raidz_ssse3_impl,
54eda14cbcSMatt Macy #endif
55eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_AVX2)	/* only x86_64 for now */
56eda14cbcSMatt Macy 	&vdev_raidz_avx2_impl,
57eda14cbcSMatt Macy #endif
58eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_AVX512F)	/* only x86_64 for now */
59eda14cbcSMatt Macy 	&vdev_raidz_avx512f_impl,
60eda14cbcSMatt Macy #endif
61eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_AVX512BW)	/* only x86_64 for now */
62eda14cbcSMatt Macy 	&vdev_raidz_avx512bw_impl,
63eda14cbcSMatt Macy #endif
64ac0bf12eSMatt Macy #if defined(__aarch64__) && !defined(__FreeBSD__)
65eda14cbcSMatt Macy 	&vdev_raidz_aarch64_neon_impl,
66eda14cbcSMatt Macy 	&vdev_raidz_aarch64_neonx2_impl,
67eda14cbcSMatt Macy #endif
68eda14cbcSMatt Macy #if defined(__powerpc__) && defined(__altivec__)
69eda14cbcSMatt Macy 	&vdev_raidz_powerpc_altivec_impl,
70eda14cbcSMatt Macy #endif
71eda14cbcSMatt Macy };
72eda14cbcSMatt Macy 
73eda14cbcSMatt Macy /* Indicate that benchmark has been completed */
74eda14cbcSMatt Macy static boolean_t raidz_math_initialized = B_FALSE;
75eda14cbcSMatt Macy 
76eda14cbcSMatt Macy /* Select raidz implementation */
77eda14cbcSMatt Macy #define	IMPL_FASTEST	(UINT32_MAX)
78eda14cbcSMatt Macy #define	IMPL_CYCLE	(UINT32_MAX - 1)
79eda14cbcSMatt Macy #define	IMPL_ORIGINAL	(0)
80eda14cbcSMatt Macy #define	IMPL_SCALAR	(1)
81eda14cbcSMatt Macy 
82eda14cbcSMatt Macy #define	RAIDZ_IMPL_READ(i)	(*(volatile uint32_t *) &(i))
83eda14cbcSMatt Macy 
84eda14cbcSMatt Macy static uint32_t zfs_vdev_raidz_impl = IMPL_SCALAR;
85eda14cbcSMatt Macy static uint32_t user_sel_impl = IMPL_FASTEST;
86eda14cbcSMatt Macy 
87eda14cbcSMatt Macy /* Hold all supported implementations */
88eda14cbcSMatt Macy static size_t raidz_supp_impl_cnt = 0;
89eda14cbcSMatt Macy static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)];
90eda14cbcSMatt Macy 
91eda14cbcSMatt Macy #if defined(_KERNEL)
92eda14cbcSMatt Macy /*
93eda14cbcSMatt Macy  * kstats values for supported implementations
94eda14cbcSMatt Macy  * Values represent per disk throughput of 8 disk+parity raidz vdev [B/s]
95eda14cbcSMatt Macy  */
96eda14cbcSMatt Macy static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1];
97eda14cbcSMatt Macy 
98eda14cbcSMatt Macy /* kstat for benchmarked implementations */
99eda14cbcSMatt Macy static kstat_t *raidz_math_kstat = NULL;
100eda14cbcSMatt Macy #endif
101eda14cbcSMatt Macy 
102eda14cbcSMatt Macy /*
103eda14cbcSMatt Macy  * Returns the RAIDZ operations for raidz_map() parity calculations.   When
104eda14cbcSMatt Macy  * a SIMD implementation is not allowed in the current context, then fallback
105eda14cbcSMatt Macy  * to the fastest generic implementation.
106eda14cbcSMatt Macy  */
107eda14cbcSMatt Macy const raidz_impl_ops_t *
108eda14cbcSMatt Macy vdev_raidz_math_get_ops(void)
109eda14cbcSMatt Macy {
110eda14cbcSMatt Macy 	if (!kfpu_allowed())
111eda14cbcSMatt Macy 		return (&vdev_raidz_scalar_impl);
112eda14cbcSMatt Macy 
113eda14cbcSMatt Macy 	raidz_impl_ops_t *ops = NULL;
114eda14cbcSMatt Macy 	const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
115eda14cbcSMatt Macy 
116eda14cbcSMatt Macy 	switch (impl) {
117eda14cbcSMatt Macy 	case IMPL_FASTEST:
118eda14cbcSMatt Macy 		ASSERT(raidz_math_initialized);
119eda14cbcSMatt Macy 		ops = &vdev_raidz_fastest_impl;
120eda14cbcSMatt Macy 		break;
121eda14cbcSMatt Macy 	case IMPL_CYCLE:
122eda14cbcSMatt Macy 		/* Cycle through all supported implementations */
123eda14cbcSMatt Macy 		ASSERT(raidz_math_initialized);
124eda14cbcSMatt Macy 		ASSERT3U(raidz_supp_impl_cnt, >, 0);
125eda14cbcSMatt Macy 		static size_t cycle_impl_idx = 0;
126eda14cbcSMatt Macy 		size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt;
127eda14cbcSMatt Macy 		ops = raidz_supp_impl[idx];
128eda14cbcSMatt Macy 		break;
129eda14cbcSMatt Macy 	case IMPL_ORIGINAL:
130eda14cbcSMatt Macy 		ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl;
131eda14cbcSMatt Macy 		break;
132eda14cbcSMatt Macy 	case IMPL_SCALAR:
133eda14cbcSMatt Macy 		ops = (raidz_impl_ops_t *)&vdev_raidz_scalar_impl;
134eda14cbcSMatt Macy 		break;
135eda14cbcSMatt Macy 	default:
136eda14cbcSMatt Macy 		ASSERT3U(impl, <, raidz_supp_impl_cnt);
137eda14cbcSMatt Macy 		ASSERT3U(raidz_supp_impl_cnt, >, 0);
138eda14cbcSMatt Macy 		if (impl < ARRAY_SIZE(raidz_all_maths))
139eda14cbcSMatt Macy 			ops = raidz_supp_impl[impl];
140eda14cbcSMatt Macy 		break;
141eda14cbcSMatt Macy 	}
142eda14cbcSMatt Macy 
143eda14cbcSMatt Macy 	ASSERT3P(ops, !=, NULL);
144eda14cbcSMatt Macy 
145eda14cbcSMatt Macy 	return (ops);
146eda14cbcSMatt Macy }
147eda14cbcSMatt Macy 
148eda14cbcSMatt Macy /*
149eda14cbcSMatt Macy  * Select parity generation method for raidz_map
150eda14cbcSMatt Macy  */
151eda14cbcSMatt Macy int
1527877fdebSMatt Macy vdev_raidz_math_generate(raidz_map_t *rm, raidz_row_t *rr)
153eda14cbcSMatt Macy {
154eda14cbcSMatt Macy 	raidz_gen_f gen_parity = NULL;
155eda14cbcSMatt Macy 
156eda14cbcSMatt Macy 	switch (raidz_parity(rm)) {
157eda14cbcSMatt Macy 		case 1:
158eda14cbcSMatt Macy 			gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P];
159eda14cbcSMatt Macy 			break;
160eda14cbcSMatt Macy 		case 2:
161eda14cbcSMatt Macy 			gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQ];
162eda14cbcSMatt Macy 			break;
163eda14cbcSMatt Macy 		case 3:
164eda14cbcSMatt Macy 			gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQR];
165eda14cbcSMatt Macy 			break;
166eda14cbcSMatt Macy 		default:
167eda14cbcSMatt Macy 			gen_parity = NULL;
1681f88aa09SMartin Matuska 			cmn_err(CE_PANIC, "invalid RAID-Z configuration %llu",
1691f88aa09SMartin Matuska 			    (u_longlong_t)raidz_parity(rm));
170eda14cbcSMatt Macy 			break;
171eda14cbcSMatt Macy 	}
172eda14cbcSMatt Macy 
173eda14cbcSMatt Macy 	/* if method is NULL execute the original implementation */
174eda14cbcSMatt Macy 	if (gen_parity == NULL)
175eda14cbcSMatt Macy 		return (RAIDZ_ORIGINAL_IMPL);
176eda14cbcSMatt Macy 
1777877fdebSMatt Macy 	gen_parity(rr);
178eda14cbcSMatt Macy 
179eda14cbcSMatt Macy 	return (0);
180eda14cbcSMatt Macy }
181eda14cbcSMatt Macy 
182eda14cbcSMatt Macy static raidz_rec_f
183eda14cbcSMatt Macy reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid,
184eda14cbcSMatt Macy     const int nbaddata)
185eda14cbcSMatt Macy {
186eda14cbcSMatt Macy 	if (nbaddata == 1 && parity_valid[CODE_P]) {
187eda14cbcSMatt Macy 		return (rm->rm_ops->rec[RAIDZ_REC_P]);
188eda14cbcSMatt Macy 	}
189eda14cbcSMatt Macy 	return ((raidz_rec_f) NULL);
190eda14cbcSMatt Macy }
191eda14cbcSMatt Macy 
192eda14cbcSMatt Macy static raidz_rec_f
193eda14cbcSMatt Macy reconstruct_fun_pq_sel(raidz_map_t *rm, const int *parity_valid,
194eda14cbcSMatt Macy     const int nbaddata)
195eda14cbcSMatt Macy {
196eda14cbcSMatt Macy 	if (nbaddata == 1) {
197eda14cbcSMatt Macy 		if (parity_valid[CODE_P]) {
198eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_P]);
199eda14cbcSMatt Macy 		} else if (parity_valid[CODE_Q]) {
200eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_Q]);
201eda14cbcSMatt Macy 		}
202eda14cbcSMatt Macy 	} else if (nbaddata == 2 &&
203eda14cbcSMatt Macy 	    parity_valid[CODE_P] && parity_valid[CODE_Q]) {
204eda14cbcSMatt Macy 		return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
205eda14cbcSMatt Macy 	}
206eda14cbcSMatt Macy 	return ((raidz_rec_f) NULL);
207eda14cbcSMatt Macy }
208eda14cbcSMatt Macy 
209eda14cbcSMatt Macy static raidz_rec_f
210eda14cbcSMatt Macy reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid,
211eda14cbcSMatt Macy     const int nbaddata)
212eda14cbcSMatt Macy {
213eda14cbcSMatt Macy 	if (nbaddata == 1) {
214eda14cbcSMatt Macy 		if (parity_valid[CODE_P]) {
215eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_P]);
216eda14cbcSMatt Macy 		} else if (parity_valid[CODE_Q]) {
217eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_Q]);
218eda14cbcSMatt Macy 		} else if (parity_valid[CODE_R]) {
219eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_R]);
220eda14cbcSMatt Macy 		}
221eda14cbcSMatt Macy 	} else if (nbaddata == 2) {
222eda14cbcSMatt Macy 		if (parity_valid[CODE_P] && parity_valid[CODE_Q]) {
223eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
224eda14cbcSMatt Macy 		} else if (parity_valid[CODE_P] && parity_valid[CODE_R]) {
225eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_PR]);
226eda14cbcSMatt Macy 		} else if (parity_valid[CODE_Q] && parity_valid[CODE_R]) {
227eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_QR]);
228eda14cbcSMatt Macy 		}
229eda14cbcSMatt Macy 	} else if (nbaddata == 3 &&
230eda14cbcSMatt Macy 	    parity_valid[CODE_P] && parity_valid[CODE_Q] &&
231eda14cbcSMatt Macy 	    parity_valid[CODE_R]) {
232eda14cbcSMatt Macy 		return (rm->rm_ops->rec[RAIDZ_REC_PQR]);
233eda14cbcSMatt Macy 	}
234eda14cbcSMatt Macy 	return ((raidz_rec_f) NULL);
235eda14cbcSMatt Macy }
236eda14cbcSMatt Macy 
237eda14cbcSMatt Macy /*
238eda14cbcSMatt Macy  * Select data reconstruction method for raidz_map
239eda14cbcSMatt Macy  * @parity_valid - Parity validity flag
240eda14cbcSMatt Macy  * @dt           - Failed data index array
241eda14cbcSMatt Macy  * @nbaddata     - Number of failed data columns
242eda14cbcSMatt Macy  */
243eda14cbcSMatt Macy int
2447877fdebSMatt Macy vdev_raidz_math_reconstruct(raidz_map_t *rm, raidz_row_t *rr,
2457877fdebSMatt Macy     const int *parity_valid, const int *dt, const int nbaddata)
246eda14cbcSMatt Macy {
247eda14cbcSMatt Macy 	raidz_rec_f rec_fn = NULL;
248eda14cbcSMatt Macy 
249eda14cbcSMatt Macy 	switch (raidz_parity(rm)) {
250eda14cbcSMatt Macy 	case PARITY_P:
251eda14cbcSMatt Macy 		rec_fn = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
252eda14cbcSMatt Macy 		break;
253eda14cbcSMatt Macy 	case PARITY_PQ:
254eda14cbcSMatt Macy 		rec_fn = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
255eda14cbcSMatt Macy 		break;
256eda14cbcSMatt Macy 	case PARITY_PQR:
257eda14cbcSMatt Macy 		rec_fn = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
258eda14cbcSMatt Macy 		break;
259eda14cbcSMatt Macy 	default:
2601f88aa09SMartin Matuska 		cmn_err(CE_PANIC, "invalid RAID-Z configuration %llu",
2611f88aa09SMartin Matuska 		    (u_longlong_t)raidz_parity(rm));
262eda14cbcSMatt Macy 		break;
263eda14cbcSMatt Macy 	}
264eda14cbcSMatt Macy 
265eda14cbcSMatt Macy 	if (rec_fn == NULL)
266eda14cbcSMatt Macy 		return (RAIDZ_ORIGINAL_IMPL);
267eda14cbcSMatt Macy 	else
2687877fdebSMatt Macy 		return (rec_fn(rr, dt));
269eda14cbcSMatt Macy }
270eda14cbcSMatt Macy 
271*e92ffd9bSMartin Matuska const char *const raidz_gen_name[] = {
272eda14cbcSMatt Macy 	"gen_p", "gen_pq", "gen_pqr"
273eda14cbcSMatt Macy };
274*e92ffd9bSMartin Matuska const char *const raidz_rec_name[] = {
275eda14cbcSMatt Macy 	"rec_p", "rec_q", "rec_r",
276eda14cbcSMatt Macy 	"rec_pq", "rec_pr", "rec_qr", "rec_pqr"
277eda14cbcSMatt Macy };
278eda14cbcSMatt Macy 
279eda14cbcSMatt Macy #if defined(_KERNEL)
280eda14cbcSMatt Macy 
281eda14cbcSMatt Macy #define	RAIDZ_KSTAT_LINE_LEN	(17 + 10*12 + 1)
282eda14cbcSMatt Macy 
283eda14cbcSMatt Macy static int
284eda14cbcSMatt Macy raidz_math_kstat_headers(char *buf, size_t size)
285eda14cbcSMatt Macy {
286eda14cbcSMatt Macy 	ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN);
287eda14cbcSMatt Macy 
288*e92ffd9bSMartin Matuska 	ssize_t off = snprintf(buf, size, "%-17s", "implementation");
289eda14cbcSMatt Macy 
290*e92ffd9bSMartin Matuska 	for (int i = 0; i < ARRAY_SIZE(raidz_gen_name); i++)
291eda14cbcSMatt Macy 		off += snprintf(buf + off, size - off, "%-16s",
292eda14cbcSMatt Macy 		    raidz_gen_name[i]);
293eda14cbcSMatt Macy 
294*e92ffd9bSMartin Matuska 	for (int i = 0; i < ARRAY_SIZE(raidz_rec_name); i++)
295eda14cbcSMatt Macy 		off += snprintf(buf + off, size - off, "%-16s",
296eda14cbcSMatt Macy 		    raidz_rec_name[i]);
297eda14cbcSMatt Macy 
298eda14cbcSMatt Macy 	(void) snprintf(buf + off, size - off, "\n");
299eda14cbcSMatt Macy 
300eda14cbcSMatt Macy 	return (0);
301eda14cbcSMatt Macy }
302eda14cbcSMatt Macy 
303eda14cbcSMatt Macy static int
304eda14cbcSMatt Macy raidz_math_kstat_data(char *buf, size_t size, void *data)
305eda14cbcSMatt Macy {
306eda14cbcSMatt Macy 	raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt];
307eda14cbcSMatt Macy 	raidz_impl_kstat_t *cstat = (raidz_impl_kstat_t *)data;
308eda14cbcSMatt Macy 	ssize_t off = 0;
309eda14cbcSMatt Macy 	int i;
310eda14cbcSMatt Macy 
311eda14cbcSMatt Macy 	ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN);
312eda14cbcSMatt Macy 
313eda14cbcSMatt Macy 	if (cstat == fstat) {
314eda14cbcSMatt Macy 		off += snprintf(buf + off, size - off, "%-17s", "fastest");
315eda14cbcSMatt Macy 
316eda14cbcSMatt Macy 		for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++) {
317eda14cbcSMatt Macy 			int id = fstat->gen[i];
318eda14cbcSMatt Macy 			off += snprintf(buf + off, size - off, "%-16s",
319eda14cbcSMatt Macy 			    raidz_supp_impl[id]->name);
320eda14cbcSMatt Macy 		}
321eda14cbcSMatt Macy 		for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++) {
322eda14cbcSMatt Macy 			int id = fstat->rec[i];
323eda14cbcSMatt Macy 			off += snprintf(buf + off, size - off, "%-16s",
324eda14cbcSMatt Macy 			    raidz_supp_impl[id]->name);
325eda14cbcSMatt Macy 		}
326eda14cbcSMatt Macy 	} else {
327eda14cbcSMatt Macy 		ptrdiff_t id = cstat - raidz_impl_kstats;
328eda14cbcSMatt Macy 
329eda14cbcSMatt Macy 		off += snprintf(buf + off, size - off, "%-17s",
330eda14cbcSMatt Macy 		    raidz_supp_impl[id]->name);
331eda14cbcSMatt Macy 
332eda14cbcSMatt Macy 		for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++)
333eda14cbcSMatt Macy 			off += snprintf(buf + off, size - off, "%-16llu",
334eda14cbcSMatt Macy 			    (u_longlong_t)cstat->gen[i]);
335eda14cbcSMatt Macy 
336eda14cbcSMatt Macy 		for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++)
337eda14cbcSMatt Macy 			off += snprintf(buf + off, size - off, "%-16llu",
338eda14cbcSMatt Macy 			    (u_longlong_t)cstat->rec[i]);
339eda14cbcSMatt Macy 	}
340eda14cbcSMatt Macy 
341eda14cbcSMatt Macy 	(void) snprintf(buf + off, size - off, "\n");
342eda14cbcSMatt Macy 
343eda14cbcSMatt Macy 	return (0);
344eda14cbcSMatt Macy }
345eda14cbcSMatt Macy 
346eda14cbcSMatt Macy static void *
347eda14cbcSMatt Macy raidz_math_kstat_addr(kstat_t *ksp, loff_t n)
348eda14cbcSMatt Macy {
349eda14cbcSMatt Macy 	if (n <= raidz_supp_impl_cnt)
350eda14cbcSMatt Macy 		ksp->ks_private = (void *) (raidz_impl_kstats + n);
351eda14cbcSMatt Macy 	else
352eda14cbcSMatt Macy 		ksp->ks_private = NULL;
353eda14cbcSMatt Macy 
354eda14cbcSMatt Macy 	return (ksp->ks_private);
355eda14cbcSMatt Macy }
356eda14cbcSMatt Macy 
357eda14cbcSMatt Macy #define	BENCH_D_COLS	(8ULL)
358eda14cbcSMatt Macy #define	BENCH_COLS	(BENCH_D_COLS + PARITY_PQR)
359eda14cbcSMatt Macy #define	BENCH_ZIO_SIZE	(1ULL << SPA_OLD_MAXBLOCKSHIFT)	/* 128 kiB */
3607877fdebSMatt Macy #define	BENCH_NS	MSEC2NSEC(1)			/* 1ms */
361eda14cbcSMatt Macy 
362eda14cbcSMatt Macy typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn);
363eda14cbcSMatt Macy 
364eda14cbcSMatt Macy static void
365eda14cbcSMatt Macy benchmark_gen_impl(raidz_map_t *rm, const int fn)
366eda14cbcSMatt Macy {
367eda14cbcSMatt Macy 	(void) fn;
368eda14cbcSMatt Macy 	vdev_raidz_generate_parity(rm);
369eda14cbcSMatt Macy }
370eda14cbcSMatt Macy 
371eda14cbcSMatt Macy static void
372eda14cbcSMatt Macy benchmark_rec_impl(raidz_map_t *rm, const int fn)
373eda14cbcSMatt Macy {
374eda14cbcSMatt Macy 	static const int rec_tgt[7][3] = {
375eda14cbcSMatt Macy 		{1, 2, 3},	/* rec_p:   bad QR & D[0]	*/
376eda14cbcSMatt Macy 		{0, 2, 3},	/* rec_q:   bad PR & D[0]	*/
377eda14cbcSMatt Macy 		{0, 1, 3},	/* rec_r:   bad PQ & D[0]	*/
378eda14cbcSMatt Macy 		{2, 3, 4},	/* rec_pq:  bad R  & D[0][1]	*/
379eda14cbcSMatt Macy 		{1, 3, 4},	/* rec_pr:  bad Q  & D[0][1]	*/
380eda14cbcSMatt Macy 		{0, 3, 4},	/* rec_qr:  bad P  & D[0][1]	*/
381eda14cbcSMatt Macy 		{3, 4, 5}	/* rec_pqr: bad    & D[0][1][2] */
382eda14cbcSMatt Macy 	};
383eda14cbcSMatt Macy 
384eda14cbcSMatt Macy 	vdev_raidz_reconstruct(rm, rec_tgt[fn], 3);
385eda14cbcSMatt Macy }
386eda14cbcSMatt Macy 
387eda14cbcSMatt Macy /*
388eda14cbcSMatt Macy  * Benchmarking of all supported implementations (raidz_supp_impl_cnt)
389eda14cbcSMatt Macy  * is performed by setting the rm_ops pointer and calling the top level
390eda14cbcSMatt Macy  * generate/reconstruct methods of bench_rm.
391eda14cbcSMatt Macy  */
392eda14cbcSMatt Macy static void
393eda14cbcSMatt Macy benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
394eda14cbcSMatt Macy {
395eda14cbcSMatt Macy 	uint64_t run_cnt, speed, best_speed = 0;
396eda14cbcSMatt Macy 	hrtime_t t_start, t_diff;
397eda14cbcSMatt Macy 	raidz_impl_ops_t *curr_impl;
398eda14cbcSMatt Macy 	raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt];
399eda14cbcSMatt Macy 	int impl, i;
400eda14cbcSMatt Macy 
401eda14cbcSMatt Macy 	for (impl = 0; impl < raidz_supp_impl_cnt; impl++) {
402eda14cbcSMatt Macy 		/* set an implementation to benchmark */
403eda14cbcSMatt Macy 		curr_impl = raidz_supp_impl[impl];
404eda14cbcSMatt Macy 		bench_rm->rm_ops = curr_impl;
405eda14cbcSMatt Macy 
406eda14cbcSMatt Macy 		run_cnt = 0;
407eda14cbcSMatt Macy 		t_start = gethrtime();
408eda14cbcSMatt Macy 
409eda14cbcSMatt Macy 		do {
4107877fdebSMatt Macy 			for (i = 0; i < 5; i++, run_cnt++)
411eda14cbcSMatt Macy 				bench_fn(bench_rm, fn);
412eda14cbcSMatt Macy 
413eda14cbcSMatt Macy 			t_diff = gethrtime() - t_start;
414eda14cbcSMatt Macy 		} while (t_diff < BENCH_NS);
415eda14cbcSMatt Macy 
416eda14cbcSMatt Macy 		speed = run_cnt * BENCH_ZIO_SIZE * NANOSEC;
417eda14cbcSMatt Macy 		speed /= (t_diff * BENCH_COLS);
418eda14cbcSMatt Macy 
419eda14cbcSMatt Macy 		if (bench_fn == benchmark_gen_impl)
420eda14cbcSMatt Macy 			raidz_impl_kstats[impl].gen[fn] = speed;
421eda14cbcSMatt Macy 		else
422eda14cbcSMatt Macy 			raidz_impl_kstats[impl].rec[fn] = speed;
423eda14cbcSMatt Macy 
424eda14cbcSMatt Macy 		/* Update fastest implementation method */
425eda14cbcSMatt Macy 		if (speed > best_speed) {
426eda14cbcSMatt Macy 			best_speed = speed;
427eda14cbcSMatt Macy 
428eda14cbcSMatt Macy 			if (bench_fn == benchmark_gen_impl) {
429eda14cbcSMatt Macy 				fstat->gen[fn] = impl;
430eda14cbcSMatt Macy 				vdev_raidz_fastest_impl.gen[fn] =
431eda14cbcSMatt Macy 				    curr_impl->gen[fn];
432eda14cbcSMatt Macy 			} else {
433eda14cbcSMatt Macy 				fstat->rec[fn] = impl;
434eda14cbcSMatt Macy 				vdev_raidz_fastest_impl.rec[fn] =
435eda14cbcSMatt Macy 				    curr_impl->rec[fn];
436eda14cbcSMatt Macy 			}
437eda14cbcSMatt Macy 		}
438eda14cbcSMatt Macy 	}
439eda14cbcSMatt Macy }
440eda14cbcSMatt Macy #endif
441eda14cbcSMatt Macy 
442eda14cbcSMatt Macy /*
443eda14cbcSMatt Macy  * Initialize and benchmark all supported implementations.
444eda14cbcSMatt Macy  */
445eda14cbcSMatt Macy static void
446eda14cbcSMatt Macy benchmark_raidz(void)
447eda14cbcSMatt Macy {
448eda14cbcSMatt Macy 	raidz_impl_ops_t *curr_impl;
449eda14cbcSMatt Macy 	int i, c;
450eda14cbcSMatt Macy 
451eda14cbcSMatt Macy 	/* Move supported impl into raidz_supp_impl */
452eda14cbcSMatt Macy 	for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
453eda14cbcSMatt Macy 		curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i];
454eda14cbcSMatt Macy 
455eda14cbcSMatt Macy 		if (curr_impl->init)
456eda14cbcSMatt Macy 			curr_impl->init();
457eda14cbcSMatt Macy 
458eda14cbcSMatt Macy 		if (curr_impl->is_supported())
459eda14cbcSMatt Macy 			raidz_supp_impl[c++] = (raidz_impl_ops_t *)curr_impl;
460eda14cbcSMatt Macy 	}
461eda14cbcSMatt Macy 	membar_producer();		/* complete raidz_supp_impl[] init */
462eda14cbcSMatt Macy 	raidz_supp_impl_cnt = c;	/* number of supported impl */
463eda14cbcSMatt Macy 
464eda14cbcSMatt Macy #if defined(_KERNEL)
46521b492edSMartin Matuska 	abd_t *pabd;
466eda14cbcSMatt Macy 	zio_t *bench_zio = NULL;
467eda14cbcSMatt Macy 	raidz_map_t *bench_rm = NULL;
468eda14cbcSMatt Macy 	uint64_t bench_parity;
469eda14cbcSMatt Macy 
470eda14cbcSMatt Macy 	/* Fake a zio and run the benchmark on a warmed up buffer */
471eda14cbcSMatt Macy 	bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
472eda14cbcSMatt Macy 	bench_zio->io_offset = 0;
473eda14cbcSMatt Macy 	bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */
474eda14cbcSMatt Macy 	bench_zio->io_abd = abd_alloc_linear(BENCH_ZIO_SIZE, B_TRUE);
475eda14cbcSMatt Macy 	memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE);
476eda14cbcSMatt Macy 
477eda14cbcSMatt Macy 	/* Benchmark parity generation methods */
478eda14cbcSMatt Macy 	for (int fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
479eda14cbcSMatt Macy 		bench_parity = fn + 1;
480eda14cbcSMatt Macy 		/* New raidz_map is needed for each generate_p/q/r */
481eda14cbcSMatt Macy 		bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
482eda14cbcSMatt Macy 		    BENCH_D_COLS + bench_parity, bench_parity);
483eda14cbcSMatt Macy 
484eda14cbcSMatt Macy 		benchmark_raidz_impl(bench_rm, fn, benchmark_gen_impl);
485eda14cbcSMatt Macy 
486eda14cbcSMatt Macy 		vdev_raidz_map_free(bench_rm);
487eda14cbcSMatt Macy 	}
488eda14cbcSMatt Macy 
489eda14cbcSMatt Macy 	/* Benchmark data reconstruction methods */
490eda14cbcSMatt Macy 	bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
491eda14cbcSMatt Macy 	    BENCH_COLS, PARITY_PQR);
492eda14cbcSMatt Macy 
49321b492edSMartin Matuska 	/* Ensure that fake parity blocks are initialized */
49421b492edSMartin Matuska 	for (c = 0; c < bench_rm->rm_row[0]->rr_firstdatacol; c++) {
49521b492edSMartin Matuska 		pabd = bench_rm->rm_row[0]->rr_col[c].rc_abd;
49621b492edSMartin Matuska 		memset(abd_to_buf(pabd), 0xAA, abd_get_size(pabd));
49721b492edSMartin Matuska 	}
49821b492edSMartin Matuska 
499eda14cbcSMatt Macy 	for (int fn = 0; fn < RAIDZ_REC_NUM; fn++)
500eda14cbcSMatt Macy 		benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
501eda14cbcSMatt Macy 
502eda14cbcSMatt Macy 	vdev_raidz_map_free(bench_rm);
503eda14cbcSMatt Macy 
504eda14cbcSMatt Macy 	/* cleanup the bench zio */
505eda14cbcSMatt Macy 	abd_free(bench_zio->io_abd);
506eda14cbcSMatt Macy 	kmem_free(bench_zio, sizeof (zio_t));
507eda14cbcSMatt Macy #else
508eda14cbcSMatt Macy 	/*
509eda14cbcSMatt Macy 	 * Skip the benchmark in user space to avoid impacting libzpool
510eda14cbcSMatt Macy 	 * consumers (zdb, zhack, zinject, ztest).  The last implementation
511eda14cbcSMatt Macy 	 * is assumed to be the fastest and used by default.
512eda14cbcSMatt Macy 	 */
513eda14cbcSMatt Macy 	memcpy(&vdev_raidz_fastest_impl,
514eda14cbcSMatt Macy 	    raidz_supp_impl[raidz_supp_impl_cnt - 1],
515eda14cbcSMatt Macy 	    sizeof (vdev_raidz_fastest_impl));
516eda14cbcSMatt Macy 	strcpy(vdev_raidz_fastest_impl.name, "fastest");
517eda14cbcSMatt Macy #endif /* _KERNEL */
518eda14cbcSMatt Macy }
519eda14cbcSMatt Macy 
520eda14cbcSMatt Macy void
521eda14cbcSMatt Macy vdev_raidz_math_init(void)
522eda14cbcSMatt Macy {
523eda14cbcSMatt Macy 	/* Determine the fastest available implementation. */
524eda14cbcSMatt Macy 	benchmark_raidz();
525eda14cbcSMatt Macy 
526eda14cbcSMatt Macy #if defined(_KERNEL)
527eda14cbcSMatt Macy 	/* Install kstats for all implementations */
528eda14cbcSMatt Macy 	raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench", "misc",
529eda14cbcSMatt Macy 	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
530eda14cbcSMatt Macy 	if (raidz_math_kstat != NULL) {
531eda14cbcSMatt Macy 		raidz_math_kstat->ks_data = NULL;
532eda14cbcSMatt Macy 		raidz_math_kstat->ks_ndata = UINT32_MAX;
533eda14cbcSMatt Macy 		kstat_set_raw_ops(raidz_math_kstat,
534eda14cbcSMatt Macy 		    raidz_math_kstat_headers,
535eda14cbcSMatt Macy 		    raidz_math_kstat_data,
536eda14cbcSMatt Macy 		    raidz_math_kstat_addr);
537eda14cbcSMatt Macy 		kstat_install(raidz_math_kstat);
538eda14cbcSMatt Macy 	}
539eda14cbcSMatt Macy #endif
540eda14cbcSMatt Macy 
541eda14cbcSMatt Macy 	/* Finish initialization */
542eda14cbcSMatt Macy 	atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl);
543eda14cbcSMatt Macy 	raidz_math_initialized = B_TRUE;
544eda14cbcSMatt Macy }
545eda14cbcSMatt Macy 
546eda14cbcSMatt Macy void
547eda14cbcSMatt Macy vdev_raidz_math_fini(void)
548eda14cbcSMatt Macy {
549eda14cbcSMatt Macy 	raidz_impl_ops_t const *curr_impl;
550eda14cbcSMatt Macy 
551eda14cbcSMatt Macy #if defined(_KERNEL)
552eda14cbcSMatt Macy 	if (raidz_math_kstat != NULL) {
553eda14cbcSMatt Macy 		kstat_delete(raidz_math_kstat);
554eda14cbcSMatt Macy 		raidz_math_kstat = NULL;
555eda14cbcSMatt Macy 	}
556eda14cbcSMatt Macy #endif
557eda14cbcSMatt Macy 
558eda14cbcSMatt Macy 	for (int i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
559eda14cbcSMatt Macy 		curr_impl = raidz_all_maths[i];
560eda14cbcSMatt Macy 		if (curr_impl->fini)
561eda14cbcSMatt Macy 			curr_impl->fini();
562eda14cbcSMatt Macy 	}
563eda14cbcSMatt Macy }
564eda14cbcSMatt Macy 
565eda14cbcSMatt Macy static const struct {
566eda14cbcSMatt Macy 	char *name;
567eda14cbcSMatt Macy 	uint32_t sel;
568eda14cbcSMatt Macy } math_impl_opts[] = {
569eda14cbcSMatt Macy 		{ "cycle",	IMPL_CYCLE },
570eda14cbcSMatt Macy 		{ "fastest",	IMPL_FASTEST },
571eda14cbcSMatt Macy 		{ "original",	IMPL_ORIGINAL },
572eda14cbcSMatt Macy 		{ "scalar",	IMPL_SCALAR }
573eda14cbcSMatt Macy };
574eda14cbcSMatt Macy 
575eda14cbcSMatt Macy /*
576eda14cbcSMatt Macy  * Function sets desired raidz implementation.
577eda14cbcSMatt Macy  *
578eda14cbcSMatt Macy  * If we are called before init(), user preference will be saved in
579eda14cbcSMatt Macy  * user_sel_impl, and applied in later init() call. This occurs when module
580eda14cbcSMatt Macy  * parameter is specified on module load. Otherwise, directly update
581eda14cbcSMatt Macy  * zfs_vdev_raidz_impl.
582eda14cbcSMatt Macy  *
583eda14cbcSMatt Macy  * @val		Name of raidz implementation to use
584eda14cbcSMatt Macy  * @param	Unused.
585eda14cbcSMatt Macy  */
586eda14cbcSMatt Macy int
587eda14cbcSMatt Macy vdev_raidz_impl_set(const char *val)
588eda14cbcSMatt Macy {
589eda14cbcSMatt Macy 	int err = -EINVAL;
590eda14cbcSMatt Macy 	char req_name[RAIDZ_IMPL_NAME_MAX];
591eda14cbcSMatt Macy 	uint32_t impl = RAIDZ_IMPL_READ(user_sel_impl);
592eda14cbcSMatt Macy 	size_t i;
593eda14cbcSMatt Macy 
594eda14cbcSMatt Macy 	/* sanitize input */
595eda14cbcSMatt Macy 	i = strnlen(val, RAIDZ_IMPL_NAME_MAX);
596eda14cbcSMatt Macy 	if (i == 0 || i == RAIDZ_IMPL_NAME_MAX)
597eda14cbcSMatt Macy 		return (err);
598eda14cbcSMatt Macy 
599eda14cbcSMatt Macy 	strlcpy(req_name, val, RAIDZ_IMPL_NAME_MAX);
600eda14cbcSMatt Macy 	while (i > 0 && !!isspace(req_name[i-1]))
601eda14cbcSMatt Macy 		i--;
602eda14cbcSMatt Macy 	req_name[i] = '\0';
603eda14cbcSMatt Macy 
604eda14cbcSMatt Macy 	/* Check mandatory options */
605eda14cbcSMatt Macy 	for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
606eda14cbcSMatt Macy 		if (strcmp(req_name, math_impl_opts[i].name) == 0) {
607eda14cbcSMatt Macy 			impl = math_impl_opts[i].sel;
608eda14cbcSMatt Macy 			err = 0;
609eda14cbcSMatt Macy 			break;
610eda14cbcSMatt Macy 		}
611eda14cbcSMatt Macy 	}
612eda14cbcSMatt Macy 
613eda14cbcSMatt Macy 	/* check all supported impl if init() was already called */
614eda14cbcSMatt Macy 	if (err != 0 && raidz_math_initialized) {
615eda14cbcSMatt Macy 		/* check all supported implementations */
616eda14cbcSMatt Macy 		for (i = 0; i < raidz_supp_impl_cnt; i++) {
617eda14cbcSMatt Macy 			if (strcmp(req_name, raidz_supp_impl[i]->name) == 0) {
618eda14cbcSMatt Macy 				impl = i;
619eda14cbcSMatt Macy 				err = 0;
620eda14cbcSMatt Macy 				break;
621eda14cbcSMatt Macy 			}
622eda14cbcSMatt Macy 		}
623eda14cbcSMatt Macy 	}
624eda14cbcSMatt Macy 
625eda14cbcSMatt Macy 	if (err == 0) {
626eda14cbcSMatt Macy 		if (raidz_math_initialized)
627eda14cbcSMatt Macy 			atomic_swap_32(&zfs_vdev_raidz_impl, impl);
628eda14cbcSMatt Macy 		else
629eda14cbcSMatt Macy 			atomic_swap_32(&user_sel_impl, impl);
630eda14cbcSMatt Macy 	}
631eda14cbcSMatt Macy 
632eda14cbcSMatt Macy 	return (err);
633eda14cbcSMatt Macy }
634eda14cbcSMatt Macy 
635eda14cbcSMatt Macy #if defined(_KERNEL) && defined(__linux__)
636eda14cbcSMatt Macy 
637eda14cbcSMatt Macy static int
638eda14cbcSMatt Macy zfs_vdev_raidz_impl_set(const char *val, zfs_kernel_param_t *kp)
639eda14cbcSMatt Macy {
640eda14cbcSMatt Macy 	return (vdev_raidz_impl_set(val));
641eda14cbcSMatt Macy }
642eda14cbcSMatt Macy 
643eda14cbcSMatt Macy static int
644eda14cbcSMatt Macy zfs_vdev_raidz_impl_get(char *buffer, zfs_kernel_param_t *kp)
645eda14cbcSMatt Macy {
646eda14cbcSMatt Macy 	int i, cnt = 0;
647eda14cbcSMatt Macy 	char *fmt;
648eda14cbcSMatt Macy 	const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
649eda14cbcSMatt Macy 
650eda14cbcSMatt Macy 	ASSERT(raidz_math_initialized);
651eda14cbcSMatt Macy 
652eda14cbcSMatt Macy 	/* list mandatory options */
653eda14cbcSMatt Macy 	for (i = 0; i < ARRAY_SIZE(math_impl_opts) - 2; i++) {
654eda14cbcSMatt Macy 		fmt = (impl == math_impl_opts[i].sel) ? "[%s] " : "%s ";
655eda14cbcSMatt Macy 		cnt += sprintf(buffer + cnt, fmt, math_impl_opts[i].name);
656eda14cbcSMatt Macy 	}
657eda14cbcSMatt Macy 
658eda14cbcSMatt Macy 	/* list all supported implementations */
659eda14cbcSMatt Macy 	for (i = 0; i < raidz_supp_impl_cnt; i++) {
660eda14cbcSMatt Macy 		fmt = (i == impl) ? "[%s] " : "%s ";
661eda14cbcSMatt Macy 		cnt += sprintf(buffer + cnt, fmt, raidz_supp_impl[i]->name);
662eda14cbcSMatt Macy 	}
663eda14cbcSMatt Macy 
664eda14cbcSMatt Macy 	return (cnt);
665eda14cbcSMatt Macy }
666eda14cbcSMatt Macy 
667eda14cbcSMatt Macy module_param_call(zfs_vdev_raidz_impl, zfs_vdev_raidz_impl_set,
668eda14cbcSMatt Macy     zfs_vdev_raidz_impl_get, NULL, 0644);
669eda14cbcSMatt Macy MODULE_PARM_DESC(zfs_vdev_raidz_impl, "Select raidz implementation.");
670eda14cbcSMatt Macy #endif
671