xref: /freebsd-src/sys/contrib/openzfs/module/zfs/vdev_raidz_math.c (revision eda14cbc264d6969b02f2b1994cef11148e914f1)
1*eda14cbcSMatt Macy /*
2*eda14cbcSMatt Macy  * CDDL HEADER START
3*eda14cbcSMatt Macy  *
4*eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5*eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6*eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7*eda14cbcSMatt Macy  *
8*eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*eda14cbcSMatt Macy  * or http://www.opensolaris.org/os/licensing.
10*eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11*eda14cbcSMatt Macy  * and limitations under the License.
12*eda14cbcSMatt Macy  *
13*eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14*eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16*eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17*eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18*eda14cbcSMatt Macy  *
19*eda14cbcSMatt Macy  * CDDL HEADER END
20*eda14cbcSMatt Macy  */
21*eda14cbcSMatt Macy /*
22*eda14cbcSMatt Macy  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
23*eda14cbcSMatt Macy  */
24*eda14cbcSMatt Macy 
25*eda14cbcSMatt Macy #include <sys/zfs_context.h>
26*eda14cbcSMatt Macy #include <sys/types.h>
27*eda14cbcSMatt Macy #include <sys/zio.h>
28*eda14cbcSMatt Macy #include <sys/debug.h>
29*eda14cbcSMatt Macy #include <sys/zfs_debug.h>
30*eda14cbcSMatt Macy #include <sys/vdev_raidz.h>
31*eda14cbcSMatt Macy #include <sys/vdev_raidz_impl.h>
32*eda14cbcSMatt Macy #include <sys/simd.h>
33*eda14cbcSMatt Macy 
34*eda14cbcSMatt Macy /* Opaque implementation with NULL methods to represent original methods */
35*eda14cbcSMatt Macy static const raidz_impl_ops_t vdev_raidz_original_impl = {
36*eda14cbcSMatt Macy 	.name = "original",
37*eda14cbcSMatt Macy 	.is_supported = raidz_will_scalar_work,
38*eda14cbcSMatt Macy };
39*eda14cbcSMatt Macy 
40*eda14cbcSMatt Macy /* RAIDZ parity op that contain the fastest methods */
41*eda14cbcSMatt Macy static raidz_impl_ops_t vdev_raidz_fastest_impl = {
42*eda14cbcSMatt Macy 	.name = "fastest"
43*eda14cbcSMatt Macy };
44*eda14cbcSMatt Macy 
45*eda14cbcSMatt Macy /* All compiled in implementations */
46*eda14cbcSMatt Macy const raidz_impl_ops_t *raidz_all_maths[] = {
47*eda14cbcSMatt Macy 	&vdev_raidz_original_impl,
48*eda14cbcSMatt Macy 	&vdev_raidz_scalar_impl,
49*eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_SSE2)	/* only x86_64 for now */
50*eda14cbcSMatt Macy 	&vdev_raidz_sse2_impl,
51*eda14cbcSMatt Macy #endif
52*eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_SSSE3)	/* only x86_64 for now */
53*eda14cbcSMatt Macy 	&vdev_raidz_ssse3_impl,
54*eda14cbcSMatt Macy #endif
55*eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_AVX2)	/* only x86_64 for now */
56*eda14cbcSMatt Macy 	&vdev_raidz_avx2_impl,
57*eda14cbcSMatt Macy #endif
58*eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_AVX512F)	/* only x86_64 for now */
59*eda14cbcSMatt Macy 	&vdev_raidz_avx512f_impl,
60*eda14cbcSMatt Macy #endif
61*eda14cbcSMatt Macy #if defined(__x86_64) && defined(HAVE_AVX512BW)	/* only x86_64 for now */
62*eda14cbcSMatt Macy 	&vdev_raidz_avx512bw_impl,
63*eda14cbcSMatt Macy #endif
64*eda14cbcSMatt Macy #if defined(__aarch64__)
65*eda14cbcSMatt Macy 	&vdev_raidz_aarch64_neon_impl,
66*eda14cbcSMatt Macy 	&vdev_raidz_aarch64_neonx2_impl,
67*eda14cbcSMatt Macy #endif
68*eda14cbcSMatt Macy #if defined(__powerpc__) && defined(__altivec__)
69*eda14cbcSMatt Macy 	&vdev_raidz_powerpc_altivec_impl,
70*eda14cbcSMatt Macy #endif
71*eda14cbcSMatt Macy };
72*eda14cbcSMatt Macy 
73*eda14cbcSMatt Macy /* Indicate that benchmark has been completed */
74*eda14cbcSMatt Macy static boolean_t raidz_math_initialized = B_FALSE;
75*eda14cbcSMatt Macy 
76*eda14cbcSMatt Macy /* Select raidz implementation */
77*eda14cbcSMatt Macy #define	IMPL_FASTEST	(UINT32_MAX)
78*eda14cbcSMatt Macy #define	IMPL_CYCLE	(UINT32_MAX - 1)
79*eda14cbcSMatt Macy #define	IMPL_ORIGINAL	(0)
80*eda14cbcSMatt Macy #define	IMPL_SCALAR	(1)
81*eda14cbcSMatt Macy 
82*eda14cbcSMatt Macy #define	RAIDZ_IMPL_READ(i)	(*(volatile uint32_t *) &(i))
83*eda14cbcSMatt Macy 
84*eda14cbcSMatt Macy static uint32_t zfs_vdev_raidz_impl = IMPL_SCALAR;
85*eda14cbcSMatt Macy static uint32_t user_sel_impl = IMPL_FASTEST;
86*eda14cbcSMatt Macy 
87*eda14cbcSMatt Macy /* Hold all supported implementations */
88*eda14cbcSMatt Macy static size_t raidz_supp_impl_cnt = 0;
89*eda14cbcSMatt Macy static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)];
90*eda14cbcSMatt Macy 
91*eda14cbcSMatt Macy #if defined(_KERNEL)
92*eda14cbcSMatt Macy /*
93*eda14cbcSMatt Macy  * kstats values for supported implementations
94*eda14cbcSMatt Macy  * Values represent per disk throughput of 8 disk+parity raidz vdev [B/s]
95*eda14cbcSMatt Macy  */
96*eda14cbcSMatt Macy static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1];
97*eda14cbcSMatt Macy 
98*eda14cbcSMatt Macy /* kstat for benchmarked implementations */
99*eda14cbcSMatt Macy static kstat_t *raidz_math_kstat = NULL;
100*eda14cbcSMatt Macy #endif
101*eda14cbcSMatt Macy 
102*eda14cbcSMatt Macy /*
103*eda14cbcSMatt Macy  * Returns the RAIDZ operations for raidz_map() parity calculations.   When
104*eda14cbcSMatt Macy  * a SIMD implementation is not allowed in the current context, then fallback
105*eda14cbcSMatt Macy  * to the fastest generic implementation.
106*eda14cbcSMatt Macy  */
107*eda14cbcSMatt Macy const raidz_impl_ops_t *
108*eda14cbcSMatt Macy vdev_raidz_math_get_ops(void)
109*eda14cbcSMatt Macy {
110*eda14cbcSMatt Macy 	if (!kfpu_allowed())
111*eda14cbcSMatt Macy 		return (&vdev_raidz_scalar_impl);
112*eda14cbcSMatt Macy 
113*eda14cbcSMatt Macy 	raidz_impl_ops_t *ops = NULL;
114*eda14cbcSMatt Macy 	const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
115*eda14cbcSMatt Macy 
116*eda14cbcSMatt Macy 	switch (impl) {
117*eda14cbcSMatt Macy 	case IMPL_FASTEST:
118*eda14cbcSMatt Macy 		ASSERT(raidz_math_initialized);
119*eda14cbcSMatt Macy 		ops = &vdev_raidz_fastest_impl;
120*eda14cbcSMatt Macy 		break;
121*eda14cbcSMatt Macy 	case IMPL_CYCLE:
122*eda14cbcSMatt Macy 		/* Cycle through all supported implementations */
123*eda14cbcSMatt Macy 		ASSERT(raidz_math_initialized);
124*eda14cbcSMatt Macy 		ASSERT3U(raidz_supp_impl_cnt, >, 0);
125*eda14cbcSMatt Macy 		static size_t cycle_impl_idx = 0;
126*eda14cbcSMatt Macy 		size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt;
127*eda14cbcSMatt Macy 		ops = raidz_supp_impl[idx];
128*eda14cbcSMatt Macy 		break;
129*eda14cbcSMatt Macy 	case IMPL_ORIGINAL:
130*eda14cbcSMatt Macy 		ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl;
131*eda14cbcSMatt Macy 		break;
132*eda14cbcSMatt Macy 	case IMPL_SCALAR:
133*eda14cbcSMatt Macy 		ops = (raidz_impl_ops_t *)&vdev_raidz_scalar_impl;
134*eda14cbcSMatt Macy 		break;
135*eda14cbcSMatt Macy 	default:
136*eda14cbcSMatt Macy 		ASSERT3U(impl, <, raidz_supp_impl_cnt);
137*eda14cbcSMatt Macy 		ASSERT3U(raidz_supp_impl_cnt, >, 0);
138*eda14cbcSMatt Macy 		if (impl < ARRAY_SIZE(raidz_all_maths))
139*eda14cbcSMatt Macy 			ops = raidz_supp_impl[impl];
140*eda14cbcSMatt Macy 		break;
141*eda14cbcSMatt Macy 	}
142*eda14cbcSMatt Macy 
143*eda14cbcSMatt Macy 	ASSERT3P(ops, !=, NULL);
144*eda14cbcSMatt Macy 
145*eda14cbcSMatt Macy 	return (ops);
146*eda14cbcSMatt Macy }
147*eda14cbcSMatt Macy 
148*eda14cbcSMatt Macy /*
149*eda14cbcSMatt Macy  * Select parity generation method for raidz_map
150*eda14cbcSMatt Macy  */
151*eda14cbcSMatt Macy int
152*eda14cbcSMatt Macy vdev_raidz_math_generate(raidz_map_t *rm)
153*eda14cbcSMatt Macy {
154*eda14cbcSMatt Macy 	raidz_gen_f gen_parity = NULL;
155*eda14cbcSMatt Macy 
156*eda14cbcSMatt Macy 	switch (raidz_parity(rm)) {
157*eda14cbcSMatt Macy 		case 1:
158*eda14cbcSMatt Macy 			gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P];
159*eda14cbcSMatt Macy 			break;
160*eda14cbcSMatt Macy 		case 2:
161*eda14cbcSMatt Macy 			gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQ];
162*eda14cbcSMatt Macy 			break;
163*eda14cbcSMatt Macy 		case 3:
164*eda14cbcSMatt Macy 			gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQR];
165*eda14cbcSMatt Macy 			break;
166*eda14cbcSMatt Macy 		default:
167*eda14cbcSMatt Macy 			gen_parity = NULL;
168*eda14cbcSMatt Macy 			cmn_err(CE_PANIC, "invalid RAID-Z configuration %d",
169*eda14cbcSMatt Macy 			    raidz_parity(rm));
170*eda14cbcSMatt Macy 			break;
171*eda14cbcSMatt Macy 	}
172*eda14cbcSMatt Macy 
173*eda14cbcSMatt Macy 	/* if method is NULL execute the original implementation */
174*eda14cbcSMatt Macy 	if (gen_parity == NULL)
175*eda14cbcSMatt Macy 		return (RAIDZ_ORIGINAL_IMPL);
176*eda14cbcSMatt Macy 
177*eda14cbcSMatt Macy 	gen_parity(rm);
178*eda14cbcSMatt Macy 
179*eda14cbcSMatt Macy 	return (0);
180*eda14cbcSMatt Macy }
181*eda14cbcSMatt Macy 
182*eda14cbcSMatt Macy static raidz_rec_f
183*eda14cbcSMatt Macy reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid,
184*eda14cbcSMatt Macy     const int nbaddata)
185*eda14cbcSMatt Macy {
186*eda14cbcSMatt Macy 	if (nbaddata == 1 && parity_valid[CODE_P]) {
187*eda14cbcSMatt Macy 		return (rm->rm_ops->rec[RAIDZ_REC_P]);
188*eda14cbcSMatt Macy 	}
189*eda14cbcSMatt Macy 	return ((raidz_rec_f) NULL);
190*eda14cbcSMatt Macy }
191*eda14cbcSMatt Macy 
192*eda14cbcSMatt Macy static raidz_rec_f
193*eda14cbcSMatt Macy reconstruct_fun_pq_sel(raidz_map_t *rm, const int *parity_valid,
194*eda14cbcSMatt Macy     const int nbaddata)
195*eda14cbcSMatt Macy {
196*eda14cbcSMatt Macy 	if (nbaddata == 1) {
197*eda14cbcSMatt Macy 		if (parity_valid[CODE_P]) {
198*eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_P]);
199*eda14cbcSMatt Macy 		} else if (parity_valid[CODE_Q]) {
200*eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_Q]);
201*eda14cbcSMatt Macy 		}
202*eda14cbcSMatt Macy 	} else if (nbaddata == 2 &&
203*eda14cbcSMatt Macy 	    parity_valid[CODE_P] && parity_valid[CODE_Q]) {
204*eda14cbcSMatt Macy 		return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
205*eda14cbcSMatt Macy 	}
206*eda14cbcSMatt Macy 	return ((raidz_rec_f) NULL);
207*eda14cbcSMatt Macy }
208*eda14cbcSMatt Macy 
209*eda14cbcSMatt Macy static raidz_rec_f
210*eda14cbcSMatt Macy reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid,
211*eda14cbcSMatt Macy     const int nbaddata)
212*eda14cbcSMatt Macy {
213*eda14cbcSMatt Macy 	if (nbaddata == 1) {
214*eda14cbcSMatt Macy 		if (parity_valid[CODE_P]) {
215*eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_P]);
216*eda14cbcSMatt Macy 		} else if (parity_valid[CODE_Q]) {
217*eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_Q]);
218*eda14cbcSMatt Macy 		} else if (parity_valid[CODE_R]) {
219*eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_R]);
220*eda14cbcSMatt Macy 		}
221*eda14cbcSMatt Macy 	} else if (nbaddata == 2) {
222*eda14cbcSMatt Macy 		if (parity_valid[CODE_P] && parity_valid[CODE_Q]) {
223*eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
224*eda14cbcSMatt Macy 		} else if (parity_valid[CODE_P] && parity_valid[CODE_R]) {
225*eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_PR]);
226*eda14cbcSMatt Macy 		} else if (parity_valid[CODE_Q] && parity_valid[CODE_R]) {
227*eda14cbcSMatt Macy 			return (rm->rm_ops->rec[RAIDZ_REC_QR]);
228*eda14cbcSMatt Macy 		}
229*eda14cbcSMatt Macy 	} else if (nbaddata == 3 &&
230*eda14cbcSMatt Macy 	    parity_valid[CODE_P] && parity_valid[CODE_Q] &&
231*eda14cbcSMatt Macy 	    parity_valid[CODE_R]) {
232*eda14cbcSMatt Macy 		return (rm->rm_ops->rec[RAIDZ_REC_PQR]);
233*eda14cbcSMatt Macy 	}
234*eda14cbcSMatt Macy 	return ((raidz_rec_f) NULL);
235*eda14cbcSMatt Macy }
236*eda14cbcSMatt Macy 
237*eda14cbcSMatt Macy /*
238*eda14cbcSMatt Macy  * Select data reconstruction method for raidz_map
239*eda14cbcSMatt Macy  * @parity_valid - Parity validity flag
240*eda14cbcSMatt Macy  * @dt           - Failed data index array
241*eda14cbcSMatt Macy  * @nbaddata     - Number of failed data columns
242*eda14cbcSMatt Macy  */
243*eda14cbcSMatt Macy int
244*eda14cbcSMatt Macy vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
245*eda14cbcSMatt Macy     const int *dt, const int nbaddata)
246*eda14cbcSMatt Macy {
247*eda14cbcSMatt Macy 	raidz_rec_f rec_fn = NULL;
248*eda14cbcSMatt Macy 
249*eda14cbcSMatt Macy 	switch (raidz_parity(rm)) {
250*eda14cbcSMatt Macy 	case PARITY_P:
251*eda14cbcSMatt Macy 		rec_fn = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
252*eda14cbcSMatt Macy 		break;
253*eda14cbcSMatt Macy 	case PARITY_PQ:
254*eda14cbcSMatt Macy 		rec_fn = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
255*eda14cbcSMatt Macy 		break;
256*eda14cbcSMatt Macy 	case PARITY_PQR:
257*eda14cbcSMatt Macy 		rec_fn = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
258*eda14cbcSMatt Macy 		break;
259*eda14cbcSMatt Macy 	default:
260*eda14cbcSMatt Macy 		cmn_err(CE_PANIC, "invalid RAID-Z configuration %d",
261*eda14cbcSMatt Macy 		    raidz_parity(rm));
262*eda14cbcSMatt Macy 		break;
263*eda14cbcSMatt Macy 	}
264*eda14cbcSMatt Macy 
265*eda14cbcSMatt Macy 	if (rec_fn == NULL)
266*eda14cbcSMatt Macy 		return (RAIDZ_ORIGINAL_IMPL);
267*eda14cbcSMatt Macy 	else
268*eda14cbcSMatt Macy 		return (rec_fn(rm, dt));
269*eda14cbcSMatt Macy }
270*eda14cbcSMatt Macy 
271*eda14cbcSMatt Macy const char *raidz_gen_name[] = {
272*eda14cbcSMatt Macy 	"gen_p", "gen_pq", "gen_pqr"
273*eda14cbcSMatt Macy };
274*eda14cbcSMatt Macy const char *raidz_rec_name[] = {
275*eda14cbcSMatt Macy 	"rec_p", "rec_q", "rec_r",
276*eda14cbcSMatt Macy 	"rec_pq", "rec_pr", "rec_qr", "rec_pqr"
277*eda14cbcSMatt Macy };
278*eda14cbcSMatt Macy 
279*eda14cbcSMatt Macy #if defined(_KERNEL)
280*eda14cbcSMatt Macy 
281*eda14cbcSMatt Macy #define	RAIDZ_KSTAT_LINE_LEN	(17 + 10*12 + 1)
282*eda14cbcSMatt Macy 
283*eda14cbcSMatt Macy static int
284*eda14cbcSMatt Macy raidz_math_kstat_headers(char *buf, size_t size)
285*eda14cbcSMatt Macy {
286*eda14cbcSMatt Macy 	int i;
287*eda14cbcSMatt Macy 	ssize_t off;
288*eda14cbcSMatt Macy 
289*eda14cbcSMatt Macy 	ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN);
290*eda14cbcSMatt Macy 
291*eda14cbcSMatt Macy 	off = snprintf(buf, size, "%-17s", "implementation");
292*eda14cbcSMatt Macy 
293*eda14cbcSMatt Macy 	for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++)
294*eda14cbcSMatt Macy 		off += snprintf(buf + off, size - off, "%-16s",
295*eda14cbcSMatt Macy 		    raidz_gen_name[i]);
296*eda14cbcSMatt Macy 
297*eda14cbcSMatt Macy 	for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++)
298*eda14cbcSMatt Macy 		off += snprintf(buf + off, size - off, "%-16s",
299*eda14cbcSMatt Macy 		    raidz_rec_name[i]);
300*eda14cbcSMatt Macy 
301*eda14cbcSMatt Macy 	(void) snprintf(buf + off, size - off, "\n");
302*eda14cbcSMatt Macy 
303*eda14cbcSMatt Macy 	return (0);
304*eda14cbcSMatt Macy }
305*eda14cbcSMatt Macy 
306*eda14cbcSMatt Macy static int
307*eda14cbcSMatt Macy raidz_math_kstat_data(char *buf, size_t size, void *data)
308*eda14cbcSMatt Macy {
309*eda14cbcSMatt Macy 	raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt];
310*eda14cbcSMatt Macy 	raidz_impl_kstat_t *cstat = (raidz_impl_kstat_t *)data;
311*eda14cbcSMatt Macy 	ssize_t off = 0;
312*eda14cbcSMatt Macy 	int i;
313*eda14cbcSMatt Macy 
314*eda14cbcSMatt Macy 	ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN);
315*eda14cbcSMatt Macy 
316*eda14cbcSMatt Macy 	if (cstat == fstat) {
317*eda14cbcSMatt Macy 		off += snprintf(buf + off, size - off, "%-17s", "fastest");
318*eda14cbcSMatt Macy 
319*eda14cbcSMatt Macy 		for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++) {
320*eda14cbcSMatt Macy 			int id = fstat->gen[i];
321*eda14cbcSMatt Macy 			off += snprintf(buf + off, size - off, "%-16s",
322*eda14cbcSMatt Macy 			    raidz_supp_impl[id]->name);
323*eda14cbcSMatt Macy 		}
324*eda14cbcSMatt Macy 		for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++) {
325*eda14cbcSMatt Macy 			int id = fstat->rec[i];
326*eda14cbcSMatt Macy 			off += snprintf(buf + off, size - off, "%-16s",
327*eda14cbcSMatt Macy 			    raidz_supp_impl[id]->name);
328*eda14cbcSMatt Macy 		}
329*eda14cbcSMatt Macy 	} else {
330*eda14cbcSMatt Macy 		ptrdiff_t id = cstat - raidz_impl_kstats;
331*eda14cbcSMatt Macy 
332*eda14cbcSMatt Macy 		off += snprintf(buf + off, size - off, "%-17s",
333*eda14cbcSMatt Macy 		    raidz_supp_impl[id]->name);
334*eda14cbcSMatt Macy 
335*eda14cbcSMatt Macy 		for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++)
336*eda14cbcSMatt Macy 			off += snprintf(buf + off, size - off, "%-16llu",
337*eda14cbcSMatt Macy 			    (u_longlong_t)cstat->gen[i]);
338*eda14cbcSMatt Macy 
339*eda14cbcSMatt Macy 		for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++)
340*eda14cbcSMatt Macy 			off += snprintf(buf + off, size - off, "%-16llu",
341*eda14cbcSMatt Macy 			    (u_longlong_t)cstat->rec[i]);
342*eda14cbcSMatt Macy 	}
343*eda14cbcSMatt Macy 
344*eda14cbcSMatt Macy 	(void) snprintf(buf + off, size - off, "\n");
345*eda14cbcSMatt Macy 
346*eda14cbcSMatt Macy 	return (0);
347*eda14cbcSMatt Macy }
348*eda14cbcSMatt Macy 
349*eda14cbcSMatt Macy static void *
350*eda14cbcSMatt Macy raidz_math_kstat_addr(kstat_t *ksp, loff_t n)
351*eda14cbcSMatt Macy {
352*eda14cbcSMatt Macy 	if (n <= raidz_supp_impl_cnt)
353*eda14cbcSMatt Macy 		ksp->ks_private = (void *) (raidz_impl_kstats + n);
354*eda14cbcSMatt Macy 	else
355*eda14cbcSMatt Macy 		ksp->ks_private = NULL;
356*eda14cbcSMatt Macy 
357*eda14cbcSMatt Macy 	return (ksp->ks_private);
358*eda14cbcSMatt Macy }
359*eda14cbcSMatt Macy 
360*eda14cbcSMatt Macy #define	BENCH_D_COLS	(8ULL)
361*eda14cbcSMatt Macy #define	BENCH_COLS	(BENCH_D_COLS + PARITY_PQR)
362*eda14cbcSMatt Macy #define	BENCH_ZIO_SIZE	(1ULL << SPA_OLD_MAXBLOCKSHIFT)	/* 128 kiB */
363*eda14cbcSMatt Macy #define	BENCH_NS	MSEC2NSEC(25)			/* 25ms */
364*eda14cbcSMatt Macy 
365*eda14cbcSMatt Macy typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn);
366*eda14cbcSMatt Macy 
367*eda14cbcSMatt Macy static void
368*eda14cbcSMatt Macy benchmark_gen_impl(raidz_map_t *rm, const int fn)
369*eda14cbcSMatt Macy {
370*eda14cbcSMatt Macy 	(void) fn;
371*eda14cbcSMatt Macy 	vdev_raidz_generate_parity(rm);
372*eda14cbcSMatt Macy }
373*eda14cbcSMatt Macy 
374*eda14cbcSMatt Macy static void
375*eda14cbcSMatt Macy benchmark_rec_impl(raidz_map_t *rm, const int fn)
376*eda14cbcSMatt Macy {
377*eda14cbcSMatt Macy 	static const int rec_tgt[7][3] = {
378*eda14cbcSMatt Macy 		{1, 2, 3},	/* rec_p:   bad QR & D[0]	*/
379*eda14cbcSMatt Macy 		{0, 2, 3},	/* rec_q:   bad PR & D[0]	*/
380*eda14cbcSMatt Macy 		{0, 1, 3},	/* rec_r:   bad PQ & D[0]	*/
381*eda14cbcSMatt Macy 		{2, 3, 4},	/* rec_pq:  bad R  & D[0][1]	*/
382*eda14cbcSMatt Macy 		{1, 3, 4},	/* rec_pr:  bad Q  & D[0][1]	*/
383*eda14cbcSMatt Macy 		{0, 3, 4},	/* rec_qr:  bad P  & D[0][1]	*/
384*eda14cbcSMatt Macy 		{3, 4, 5}	/* rec_pqr: bad    & D[0][1][2] */
385*eda14cbcSMatt Macy 	};
386*eda14cbcSMatt Macy 
387*eda14cbcSMatt Macy 	vdev_raidz_reconstruct(rm, rec_tgt[fn], 3);
388*eda14cbcSMatt Macy }
389*eda14cbcSMatt Macy 
390*eda14cbcSMatt Macy /*
391*eda14cbcSMatt Macy  * Benchmarking of all supported implementations (raidz_supp_impl_cnt)
392*eda14cbcSMatt Macy  * is performed by setting the rm_ops pointer and calling the top level
393*eda14cbcSMatt Macy  * generate/reconstruct methods of bench_rm.
394*eda14cbcSMatt Macy  */
395*eda14cbcSMatt Macy static void
396*eda14cbcSMatt Macy benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
397*eda14cbcSMatt Macy {
398*eda14cbcSMatt Macy 	uint64_t run_cnt, speed, best_speed = 0;
399*eda14cbcSMatt Macy 	hrtime_t t_start, t_diff;
400*eda14cbcSMatt Macy 	raidz_impl_ops_t *curr_impl;
401*eda14cbcSMatt Macy 	raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt];
402*eda14cbcSMatt Macy 	int impl, i;
403*eda14cbcSMatt Macy 
404*eda14cbcSMatt Macy 	for (impl = 0; impl < raidz_supp_impl_cnt; impl++) {
405*eda14cbcSMatt Macy 		/* set an implementation to benchmark */
406*eda14cbcSMatt Macy 		curr_impl = raidz_supp_impl[impl];
407*eda14cbcSMatt Macy 		bench_rm->rm_ops = curr_impl;
408*eda14cbcSMatt Macy 
409*eda14cbcSMatt Macy 		run_cnt = 0;
410*eda14cbcSMatt Macy 		t_start = gethrtime();
411*eda14cbcSMatt Macy 
412*eda14cbcSMatt Macy 		do {
413*eda14cbcSMatt Macy 			for (i = 0; i < 25; i++, run_cnt++)
414*eda14cbcSMatt Macy 				bench_fn(bench_rm, fn);
415*eda14cbcSMatt Macy 
416*eda14cbcSMatt Macy 			t_diff = gethrtime() - t_start;
417*eda14cbcSMatt Macy 		} while (t_diff < BENCH_NS);
418*eda14cbcSMatt Macy 
419*eda14cbcSMatt Macy 		speed = run_cnt * BENCH_ZIO_SIZE * NANOSEC;
420*eda14cbcSMatt Macy 		speed /= (t_diff * BENCH_COLS);
421*eda14cbcSMatt Macy 
422*eda14cbcSMatt Macy 		if (bench_fn == benchmark_gen_impl)
423*eda14cbcSMatt Macy 			raidz_impl_kstats[impl].gen[fn] = speed;
424*eda14cbcSMatt Macy 		else
425*eda14cbcSMatt Macy 			raidz_impl_kstats[impl].rec[fn] = speed;
426*eda14cbcSMatt Macy 
427*eda14cbcSMatt Macy 		/* Update fastest implementation method */
428*eda14cbcSMatt Macy 		if (speed > best_speed) {
429*eda14cbcSMatt Macy 			best_speed = speed;
430*eda14cbcSMatt Macy 
431*eda14cbcSMatt Macy 			if (bench_fn == benchmark_gen_impl) {
432*eda14cbcSMatt Macy 				fstat->gen[fn] = impl;
433*eda14cbcSMatt Macy 				vdev_raidz_fastest_impl.gen[fn] =
434*eda14cbcSMatt Macy 				    curr_impl->gen[fn];
435*eda14cbcSMatt Macy 			} else {
436*eda14cbcSMatt Macy 				fstat->rec[fn] = impl;
437*eda14cbcSMatt Macy 				vdev_raidz_fastest_impl.rec[fn] =
438*eda14cbcSMatt Macy 				    curr_impl->rec[fn];
439*eda14cbcSMatt Macy 			}
440*eda14cbcSMatt Macy 		}
441*eda14cbcSMatt Macy 	}
442*eda14cbcSMatt Macy }
443*eda14cbcSMatt Macy #endif
444*eda14cbcSMatt Macy 
445*eda14cbcSMatt Macy /*
446*eda14cbcSMatt Macy  * Initialize and benchmark all supported implementations.
447*eda14cbcSMatt Macy  */
448*eda14cbcSMatt Macy static void
449*eda14cbcSMatt Macy benchmark_raidz(void)
450*eda14cbcSMatt Macy {
451*eda14cbcSMatt Macy 	raidz_impl_ops_t *curr_impl;
452*eda14cbcSMatt Macy 	int i, c;
453*eda14cbcSMatt Macy 
454*eda14cbcSMatt Macy 	/* Move supported impl into raidz_supp_impl */
455*eda14cbcSMatt Macy 	for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
456*eda14cbcSMatt Macy 		curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i];
457*eda14cbcSMatt Macy 
458*eda14cbcSMatt Macy 		if (curr_impl->init)
459*eda14cbcSMatt Macy 			curr_impl->init();
460*eda14cbcSMatt Macy 
461*eda14cbcSMatt Macy 		if (curr_impl->is_supported())
462*eda14cbcSMatt Macy 			raidz_supp_impl[c++] = (raidz_impl_ops_t *)curr_impl;
463*eda14cbcSMatt Macy 	}
464*eda14cbcSMatt Macy 	membar_producer();		/* complete raidz_supp_impl[] init */
465*eda14cbcSMatt Macy 	raidz_supp_impl_cnt = c;	/* number of supported impl */
466*eda14cbcSMatt Macy 
467*eda14cbcSMatt Macy #if defined(_KERNEL)
468*eda14cbcSMatt Macy 	zio_t *bench_zio = NULL;
469*eda14cbcSMatt Macy 	raidz_map_t *bench_rm = NULL;
470*eda14cbcSMatt Macy 	uint64_t bench_parity;
471*eda14cbcSMatt Macy 
472*eda14cbcSMatt Macy 	/* Fake a zio and run the benchmark on a warmed up buffer */
473*eda14cbcSMatt Macy 	bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
474*eda14cbcSMatt Macy 	bench_zio->io_offset = 0;
475*eda14cbcSMatt Macy 	bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */
476*eda14cbcSMatt Macy 	bench_zio->io_abd = abd_alloc_linear(BENCH_ZIO_SIZE, B_TRUE);
477*eda14cbcSMatt Macy 	memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE);
478*eda14cbcSMatt Macy 
479*eda14cbcSMatt Macy 	/* Benchmark parity generation methods */
480*eda14cbcSMatt Macy 	for (int fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
481*eda14cbcSMatt Macy 		bench_parity = fn + 1;
482*eda14cbcSMatt Macy 		/* New raidz_map is needed for each generate_p/q/r */
483*eda14cbcSMatt Macy 		bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
484*eda14cbcSMatt Macy 		    BENCH_D_COLS + bench_parity, bench_parity);
485*eda14cbcSMatt Macy 
486*eda14cbcSMatt Macy 		benchmark_raidz_impl(bench_rm, fn, benchmark_gen_impl);
487*eda14cbcSMatt Macy 
488*eda14cbcSMatt Macy 		vdev_raidz_map_free(bench_rm);
489*eda14cbcSMatt Macy 	}
490*eda14cbcSMatt Macy 
491*eda14cbcSMatt Macy 	/* Benchmark data reconstruction methods */
492*eda14cbcSMatt Macy 	bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
493*eda14cbcSMatt Macy 	    BENCH_COLS, PARITY_PQR);
494*eda14cbcSMatt Macy 
495*eda14cbcSMatt Macy 	for (int fn = 0; fn < RAIDZ_REC_NUM; fn++)
496*eda14cbcSMatt Macy 		benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
497*eda14cbcSMatt Macy 
498*eda14cbcSMatt Macy 	vdev_raidz_map_free(bench_rm);
499*eda14cbcSMatt Macy 
500*eda14cbcSMatt Macy 	/* cleanup the bench zio */
501*eda14cbcSMatt Macy 	abd_free(bench_zio->io_abd);
502*eda14cbcSMatt Macy 	kmem_free(bench_zio, sizeof (zio_t));
503*eda14cbcSMatt Macy #else
504*eda14cbcSMatt Macy 	/*
505*eda14cbcSMatt Macy 	 * Skip the benchmark in user space to avoid impacting libzpool
506*eda14cbcSMatt Macy 	 * consumers (zdb, zhack, zinject, ztest).  The last implementation
507*eda14cbcSMatt Macy 	 * is assumed to be the fastest and used by default.
508*eda14cbcSMatt Macy 	 */
509*eda14cbcSMatt Macy 	memcpy(&vdev_raidz_fastest_impl,
510*eda14cbcSMatt Macy 	    raidz_supp_impl[raidz_supp_impl_cnt - 1],
511*eda14cbcSMatt Macy 	    sizeof (vdev_raidz_fastest_impl));
512*eda14cbcSMatt Macy 	strcpy(vdev_raidz_fastest_impl.name, "fastest");
513*eda14cbcSMatt Macy #endif /* _KERNEL */
514*eda14cbcSMatt Macy }
515*eda14cbcSMatt Macy 
516*eda14cbcSMatt Macy void
517*eda14cbcSMatt Macy vdev_raidz_math_init(void)
518*eda14cbcSMatt Macy {
519*eda14cbcSMatt Macy 	/* Determine the fastest available implementation. */
520*eda14cbcSMatt Macy 	benchmark_raidz();
521*eda14cbcSMatt Macy 
522*eda14cbcSMatt Macy #if defined(_KERNEL)
523*eda14cbcSMatt Macy 	/* Install kstats for all implementations */
524*eda14cbcSMatt Macy 	raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench", "misc",
525*eda14cbcSMatt Macy 	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
526*eda14cbcSMatt Macy 	if (raidz_math_kstat != NULL) {
527*eda14cbcSMatt Macy 		raidz_math_kstat->ks_data = NULL;
528*eda14cbcSMatt Macy 		raidz_math_kstat->ks_ndata = UINT32_MAX;
529*eda14cbcSMatt Macy 		kstat_set_raw_ops(raidz_math_kstat,
530*eda14cbcSMatt Macy 		    raidz_math_kstat_headers,
531*eda14cbcSMatt Macy 		    raidz_math_kstat_data,
532*eda14cbcSMatt Macy 		    raidz_math_kstat_addr);
533*eda14cbcSMatt Macy 		kstat_install(raidz_math_kstat);
534*eda14cbcSMatt Macy 	}
535*eda14cbcSMatt Macy #endif
536*eda14cbcSMatt Macy 
537*eda14cbcSMatt Macy 	/* Finish initialization */
538*eda14cbcSMatt Macy 	atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl);
539*eda14cbcSMatt Macy 	raidz_math_initialized = B_TRUE;
540*eda14cbcSMatt Macy }
541*eda14cbcSMatt Macy 
542*eda14cbcSMatt Macy void
543*eda14cbcSMatt Macy vdev_raidz_math_fini(void)
544*eda14cbcSMatt Macy {
545*eda14cbcSMatt Macy 	raidz_impl_ops_t const *curr_impl;
546*eda14cbcSMatt Macy 
547*eda14cbcSMatt Macy #if defined(_KERNEL)
548*eda14cbcSMatt Macy 	if (raidz_math_kstat != NULL) {
549*eda14cbcSMatt Macy 		kstat_delete(raidz_math_kstat);
550*eda14cbcSMatt Macy 		raidz_math_kstat = NULL;
551*eda14cbcSMatt Macy 	}
552*eda14cbcSMatt Macy #endif
553*eda14cbcSMatt Macy 
554*eda14cbcSMatt Macy 	for (int i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
555*eda14cbcSMatt Macy 		curr_impl = raidz_all_maths[i];
556*eda14cbcSMatt Macy 		if (curr_impl->fini)
557*eda14cbcSMatt Macy 			curr_impl->fini();
558*eda14cbcSMatt Macy 	}
559*eda14cbcSMatt Macy }
560*eda14cbcSMatt Macy 
561*eda14cbcSMatt Macy static const struct {
562*eda14cbcSMatt Macy 	char *name;
563*eda14cbcSMatt Macy 	uint32_t sel;
564*eda14cbcSMatt Macy } math_impl_opts[] = {
565*eda14cbcSMatt Macy 		{ "cycle",	IMPL_CYCLE },
566*eda14cbcSMatt Macy 		{ "fastest",	IMPL_FASTEST },
567*eda14cbcSMatt Macy 		{ "original",	IMPL_ORIGINAL },
568*eda14cbcSMatt Macy 		{ "scalar",	IMPL_SCALAR }
569*eda14cbcSMatt Macy };
570*eda14cbcSMatt Macy 
571*eda14cbcSMatt Macy /*
572*eda14cbcSMatt Macy  * Function sets desired raidz implementation.
573*eda14cbcSMatt Macy  *
574*eda14cbcSMatt Macy  * If we are called before init(), user preference will be saved in
575*eda14cbcSMatt Macy  * user_sel_impl, and applied in later init() call. This occurs when module
576*eda14cbcSMatt Macy  * parameter is specified on module load. Otherwise, directly update
577*eda14cbcSMatt Macy  * zfs_vdev_raidz_impl.
578*eda14cbcSMatt Macy  *
579*eda14cbcSMatt Macy  * @val		Name of raidz implementation to use
580*eda14cbcSMatt Macy  * @param	Unused.
581*eda14cbcSMatt Macy  */
582*eda14cbcSMatt Macy int
583*eda14cbcSMatt Macy vdev_raidz_impl_set(const char *val)
584*eda14cbcSMatt Macy {
585*eda14cbcSMatt Macy 	int err = -EINVAL;
586*eda14cbcSMatt Macy 	char req_name[RAIDZ_IMPL_NAME_MAX];
587*eda14cbcSMatt Macy 	uint32_t impl = RAIDZ_IMPL_READ(user_sel_impl);
588*eda14cbcSMatt Macy 	size_t i;
589*eda14cbcSMatt Macy 
590*eda14cbcSMatt Macy 	/* sanitize input */
591*eda14cbcSMatt Macy 	i = strnlen(val, RAIDZ_IMPL_NAME_MAX);
592*eda14cbcSMatt Macy 	if (i == 0 || i == RAIDZ_IMPL_NAME_MAX)
593*eda14cbcSMatt Macy 		return (err);
594*eda14cbcSMatt Macy 
595*eda14cbcSMatt Macy 	strlcpy(req_name, val, RAIDZ_IMPL_NAME_MAX);
596*eda14cbcSMatt Macy 	while (i > 0 && !!isspace(req_name[i-1]))
597*eda14cbcSMatt Macy 		i--;
598*eda14cbcSMatt Macy 	req_name[i] = '\0';
599*eda14cbcSMatt Macy 
600*eda14cbcSMatt Macy 	/* Check mandatory options */
601*eda14cbcSMatt Macy 	for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
602*eda14cbcSMatt Macy 		if (strcmp(req_name, math_impl_opts[i].name) == 0) {
603*eda14cbcSMatt Macy 			impl = math_impl_opts[i].sel;
604*eda14cbcSMatt Macy 			err = 0;
605*eda14cbcSMatt Macy 			break;
606*eda14cbcSMatt Macy 		}
607*eda14cbcSMatt Macy 	}
608*eda14cbcSMatt Macy 
609*eda14cbcSMatt Macy 	/* check all supported impl if init() was already called */
610*eda14cbcSMatt Macy 	if (err != 0 && raidz_math_initialized) {
611*eda14cbcSMatt Macy 		/* check all supported implementations */
612*eda14cbcSMatt Macy 		for (i = 0; i < raidz_supp_impl_cnt; i++) {
613*eda14cbcSMatt Macy 			if (strcmp(req_name, raidz_supp_impl[i]->name) == 0) {
614*eda14cbcSMatt Macy 				impl = i;
615*eda14cbcSMatt Macy 				err = 0;
616*eda14cbcSMatt Macy 				break;
617*eda14cbcSMatt Macy 			}
618*eda14cbcSMatt Macy 		}
619*eda14cbcSMatt Macy 	}
620*eda14cbcSMatt Macy 
621*eda14cbcSMatt Macy 	if (err == 0) {
622*eda14cbcSMatt Macy 		if (raidz_math_initialized)
623*eda14cbcSMatt Macy 			atomic_swap_32(&zfs_vdev_raidz_impl, impl);
624*eda14cbcSMatt Macy 		else
625*eda14cbcSMatt Macy 			atomic_swap_32(&user_sel_impl, impl);
626*eda14cbcSMatt Macy 	}
627*eda14cbcSMatt Macy 
628*eda14cbcSMatt Macy 	return (err);
629*eda14cbcSMatt Macy }
630*eda14cbcSMatt Macy 
631*eda14cbcSMatt Macy #if defined(_KERNEL) && defined(__linux__)
632*eda14cbcSMatt Macy 
633*eda14cbcSMatt Macy static int
634*eda14cbcSMatt Macy zfs_vdev_raidz_impl_set(const char *val, zfs_kernel_param_t *kp)
635*eda14cbcSMatt Macy {
636*eda14cbcSMatt Macy 	return (vdev_raidz_impl_set(val));
637*eda14cbcSMatt Macy }
638*eda14cbcSMatt Macy 
639*eda14cbcSMatt Macy static int
640*eda14cbcSMatt Macy zfs_vdev_raidz_impl_get(char *buffer, zfs_kernel_param_t *kp)
641*eda14cbcSMatt Macy {
642*eda14cbcSMatt Macy 	int i, cnt = 0;
643*eda14cbcSMatt Macy 	char *fmt;
644*eda14cbcSMatt Macy 	const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
645*eda14cbcSMatt Macy 
646*eda14cbcSMatt Macy 	ASSERT(raidz_math_initialized);
647*eda14cbcSMatt Macy 
648*eda14cbcSMatt Macy 	/* list mandatory options */
649*eda14cbcSMatt Macy 	for (i = 0; i < ARRAY_SIZE(math_impl_opts) - 2; i++) {
650*eda14cbcSMatt Macy 		fmt = (impl == math_impl_opts[i].sel) ? "[%s] " : "%s ";
651*eda14cbcSMatt Macy 		cnt += sprintf(buffer + cnt, fmt, math_impl_opts[i].name);
652*eda14cbcSMatt Macy 	}
653*eda14cbcSMatt Macy 
654*eda14cbcSMatt Macy 	/* list all supported implementations */
655*eda14cbcSMatt Macy 	for (i = 0; i < raidz_supp_impl_cnt; i++) {
656*eda14cbcSMatt Macy 		fmt = (i == impl) ? "[%s] " : "%s ";
657*eda14cbcSMatt Macy 		cnt += sprintf(buffer + cnt, fmt, raidz_supp_impl[i]->name);
658*eda14cbcSMatt Macy 	}
659*eda14cbcSMatt Macy 
660*eda14cbcSMatt Macy 	return (cnt);
661*eda14cbcSMatt Macy }
662*eda14cbcSMatt Macy 
663*eda14cbcSMatt Macy module_param_call(zfs_vdev_raidz_impl, zfs_vdev_raidz_impl_set,
664*eda14cbcSMatt Macy     zfs_vdev_raidz_impl_get, NULL, 0644);
665*eda14cbcSMatt Macy MODULE_PARM_DESC(zfs_vdev_raidz_impl, "Select raidz implementation.");
666*eda14cbcSMatt Macy #endif
667