xref: /freebsd-src/sys/contrib/openzfs/cmd/raidz_test/raidz_test.c (revision dbd5678dca91abcefe8d046aa2f9b66497a95ffb)
1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * CDDL HEADER START
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7eda14cbcSMatt Macy  *
8eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
10eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11eda14cbcSMatt Macy  * and limitations under the License.
12eda14cbcSMatt Macy  *
13eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18eda14cbcSMatt Macy  *
19eda14cbcSMatt Macy  * CDDL HEADER END
20eda14cbcSMatt Macy  */
21eda14cbcSMatt Macy 
22eda14cbcSMatt Macy /*
23eda14cbcSMatt Macy  * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
24eda14cbcSMatt Macy  */
25eda14cbcSMatt Macy 
26eda14cbcSMatt Macy #include <sys/zfs_context.h>
27eda14cbcSMatt Macy #include <sys/time.h>
28eda14cbcSMatt Macy #include <sys/wait.h>
29eda14cbcSMatt Macy #include <sys/zio.h>
30eda14cbcSMatt Macy #include <umem.h>
31eda14cbcSMatt Macy #include <sys/vdev_raidz.h>
32eda14cbcSMatt Macy #include <sys/vdev_raidz_impl.h>
33eda14cbcSMatt Macy #include <assert.h>
34eda14cbcSMatt Macy #include <stdio.h>
35eda14cbcSMatt Macy #include "raidz_test.h"
36eda14cbcSMatt Macy 
37eda14cbcSMatt Macy static int *rand_data;
38eda14cbcSMatt Macy raidz_test_opts_t rto_opts;
39eda14cbcSMatt Macy 
4016038816SMartin Matuska static char pid_s[16];
41eda14cbcSMatt Macy 
42eda14cbcSMatt Macy static void sig_handler(int signo)
43eda14cbcSMatt Macy {
4416038816SMartin Matuska 	int old_errno = errno;
45eda14cbcSMatt Macy 	struct sigaction action;
46eda14cbcSMatt Macy 	/*
47eda14cbcSMatt Macy 	 * Restore default action and re-raise signal so SIGSEGV and
48eda14cbcSMatt Macy 	 * SIGABRT can trigger a core dump.
49eda14cbcSMatt Macy 	 */
50eda14cbcSMatt Macy 	action.sa_handler = SIG_DFL;
51eda14cbcSMatt Macy 	sigemptyset(&action.sa_mask);
52eda14cbcSMatt Macy 	action.sa_flags = 0;
53eda14cbcSMatt Macy 	(void) sigaction(signo, &action, NULL);
54eda14cbcSMatt Macy 
5516038816SMartin Matuska 	if (rto_opts.rto_gdb) {
5616038816SMartin Matuska 		pid_t pid = fork();
5716038816SMartin Matuska 		if (pid == 0) {
5816038816SMartin Matuska 			execlp("gdb", "gdb", "-ex", "set pagination 0",
5916038816SMartin Matuska 			    "-p", pid_s, NULL);
6016038816SMartin Matuska 			_exit(-1);
6116038816SMartin Matuska 		} else if (pid > 0)
6216038816SMartin Matuska 			while (waitpid(pid, NULL, 0) == -1 && errno == EINTR)
6316038816SMartin Matuska 				;
6416038816SMartin Matuska 	}
65eda14cbcSMatt Macy 
66eda14cbcSMatt Macy 	raise(signo);
6716038816SMartin Matuska 	errno = old_errno;
68eda14cbcSMatt Macy }
69eda14cbcSMatt Macy 
70eda14cbcSMatt Macy static void print_opts(raidz_test_opts_t *opts, boolean_t force)
71eda14cbcSMatt Macy {
72a0b956f5SMartin Matuska 	const char *verbose;
73eda14cbcSMatt Macy 	switch (opts->rto_v) {
74c03c5b1cSMartin Matuska 		case D_ALL:
75eda14cbcSMatt Macy 			verbose = "no";
76eda14cbcSMatt Macy 			break;
77c03c5b1cSMartin Matuska 		case D_INFO:
78eda14cbcSMatt Macy 			verbose = "info";
79eda14cbcSMatt Macy 			break;
80c03c5b1cSMartin Matuska 		case D_DEBUG:
81eda14cbcSMatt Macy 		default:
82eda14cbcSMatt Macy 			verbose = "debug";
83eda14cbcSMatt Macy 			break;
84eda14cbcSMatt Macy 	}
85eda14cbcSMatt Macy 
86eda14cbcSMatt Macy 	if (force || opts->rto_v >= D_INFO) {
87eda14cbcSMatt Macy 		(void) fprintf(stdout, DBLSEP "Running with options:\n"
88eda14cbcSMatt Macy 		    "  (-a) zio ashift                   : %zu\n"
89eda14cbcSMatt Macy 		    "  (-o) zio offset                   : 1 << %zu\n"
907877fdebSMatt Macy 		    "  (-e) expanded map                 : %s\n"
917877fdebSMatt Macy 		    "  (-r) reflow offset                : %llx\n"
92eda14cbcSMatt Macy 		    "  (-d) number of raidz data columns : %zu\n"
93eda14cbcSMatt Macy 		    "  (-s) size of DATA                 : 1 << %zu\n"
94eda14cbcSMatt Macy 		    "  (-S) sweep parameters             : %s \n"
95eda14cbcSMatt Macy 		    "  (-v) verbose                      : %s \n\n",
96eda14cbcSMatt Macy 		    opts->rto_ashift,				/* -a */
97eda14cbcSMatt Macy 		    ilog2(opts->rto_offset),			/* -o */
987877fdebSMatt Macy 		    opts->rto_expand ? "yes" : "no",		/* -e */
997877fdebSMatt Macy 		    (u_longlong_t)opts->rto_expand_offset,	/* -r */
100eda14cbcSMatt Macy 		    opts->rto_dcols,				/* -d */
101eda14cbcSMatt Macy 		    ilog2(opts->rto_dsize),			/* -s */
102eda14cbcSMatt Macy 		    opts->rto_sweep ? "yes" : "no",		/* -S */
103eda14cbcSMatt Macy 		    verbose);					/* -v */
104eda14cbcSMatt Macy 	}
105eda14cbcSMatt Macy }
106eda14cbcSMatt Macy 
107eda14cbcSMatt Macy static void usage(boolean_t requested)
108eda14cbcSMatt Macy {
109eda14cbcSMatt Macy 	const raidz_test_opts_t *o = &rto_opts_defaults;
110eda14cbcSMatt Macy 
111eda14cbcSMatt Macy 	FILE *fp = requested ? stdout : stderr;
112eda14cbcSMatt Macy 
113eda14cbcSMatt Macy 	(void) fprintf(fp, "Usage:\n"
114eda14cbcSMatt Macy 	    "\t[-a zio ashift (default: %zu)]\n"
115eda14cbcSMatt Macy 	    "\t[-o zio offset, exponent radix 2 (default: %zu)]\n"
116eda14cbcSMatt Macy 	    "\t[-d number of raidz data columns (default: %zu)]\n"
117eda14cbcSMatt Macy 	    "\t[-s zio size, exponent radix 2 (default: %zu)]\n"
118eda14cbcSMatt Macy 	    "\t[-S parameter sweep (default: %s)]\n"
119eda14cbcSMatt Macy 	    "\t[-t timeout for parameter sweep test]\n"
120eda14cbcSMatt Macy 	    "\t[-B benchmark all raidz implementations]\n"
1217877fdebSMatt Macy 	    "\t[-e use expanded raidz map (default: %s)]\n"
1227877fdebSMatt Macy 	    "\t[-r expanded raidz map reflow offset (default: %llx)]\n"
123c03c5b1cSMartin Matuska 	    "\t[-v increase verbosity (default: %d)]\n"
124eda14cbcSMatt Macy 	    "\t[-h (print help)]\n"
125eda14cbcSMatt Macy 	    "\t[-T test the test, see if failure would be detected]\n"
126eda14cbcSMatt Macy 	    "\t[-D debug (attach gdb on SIGSEGV)]\n"
127eda14cbcSMatt Macy 	    "",
128eda14cbcSMatt Macy 	    o->rto_ashift,				/* -a */
129eda14cbcSMatt Macy 	    ilog2(o->rto_offset),			/* -o */
130eda14cbcSMatt Macy 	    o->rto_dcols,				/* -d */
131eda14cbcSMatt Macy 	    ilog2(o->rto_dsize),			/* -s */
132eda14cbcSMatt Macy 	    rto_opts.rto_sweep ? "yes" : "no",		/* -S */
1337877fdebSMatt Macy 	    rto_opts.rto_expand ? "yes" : "no",		/* -e */
1347877fdebSMatt Macy 	    (u_longlong_t)o->rto_expand_offset,		/* -r */
135c03c5b1cSMartin Matuska 	    o->rto_v);					/* -v */
136eda14cbcSMatt Macy 
137eda14cbcSMatt Macy 	exit(requested ? 0 : 1);
138eda14cbcSMatt Macy }
139eda14cbcSMatt Macy 
140eda14cbcSMatt Macy static void process_options(int argc, char **argv)
141eda14cbcSMatt Macy {
142eda14cbcSMatt Macy 	size_t value;
143eda14cbcSMatt Macy 	int opt;
144eda14cbcSMatt Macy 	raidz_test_opts_t *o = &rto_opts;
145eda14cbcSMatt Macy 
146da5137abSMartin Matuska 	memcpy(o, &rto_opts_defaults, sizeof (*o));
147eda14cbcSMatt Macy 
1487877fdebSMatt Macy 	while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) {
149eda14cbcSMatt Macy 		switch (opt) {
150eda14cbcSMatt Macy 		case 'a':
151eda14cbcSMatt Macy 			value = strtoull(optarg, NULL, 0);
152eda14cbcSMatt Macy 			o->rto_ashift = MIN(13, MAX(9, value));
153eda14cbcSMatt Macy 			break;
1547877fdebSMatt Macy 		case 'e':
1557877fdebSMatt Macy 			o->rto_expand = 1;
1567877fdebSMatt Macy 			break;
1577877fdebSMatt Macy 		case 'r':
1587877fdebSMatt Macy 			o->rto_expand_offset = strtoull(optarg, NULL, 0);
1597877fdebSMatt Macy 			break;
160eda14cbcSMatt Macy 		case 'o':
161eda14cbcSMatt Macy 			value = strtoull(optarg, NULL, 0);
162eda14cbcSMatt Macy 			o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9;
163eda14cbcSMatt Macy 			break;
164eda14cbcSMatt Macy 		case 'd':
165eda14cbcSMatt Macy 			value = strtoull(optarg, NULL, 0);
166eda14cbcSMatt Macy 			o->rto_dcols = MIN(255, MAX(1, value));
167eda14cbcSMatt Macy 			break;
168eda14cbcSMatt Macy 		case 's':
169eda14cbcSMatt Macy 			value = strtoull(optarg, NULL, 0);
170eda14cbcSMatt Macy 			o->rto_dsize = 1ULL <<  MIN(SPA_MAXBLOCKSHIFT,
171eda14cbcSMatt Macy 			    MAX(SPA_MINBLOCKSHIFT, value));
172eda14cbcSMatt Macy 			break;
173eda14cbcSMatt Macy 		case 't':
174eda14cbcSMatt Macy 			value = strtoull(optarg, NULL, 0);
175eda14cbcSMatt Macy 			o->rto_sweep_timeout = value;
176eda14cbcSMatt Macy 			break;
177eda14cbcSMatt Macy 		case 'v':
178eda14cbcSMatt Macy 			o->rto_v++;
179eda14cbcSMatt Macy 			break;
180eda14cbcSMatt Macy 		case 'S':
181eda14cbcSMatt Macy 			o->rto_sweep = 1;
182eda14cbcSMatt Macy 			break;
183eda14cbcSMatt Macy 		case 'B':
184eda14cbcSMatt Macy 			o->rto_benchmark = 1;
185eda14cbcSMatt Macy 			break;
186eda14cbcSMatt Macy 		case 'D':
187eda14cbcSMatt Macy 			o->rto_gdb = 1;
188eda14cbcSMatt Macy 			break;
189eda14cbcSMatt Macy 		case 'T':
190eda14cbcSMatt Macy 			o->rto_sanity = 1;
191eda14cbcSMatt Macy 			break;
192eda14cbcSMatt Macy 		case 'h':
193eda14cbcSMatt Macy 			usage(B_TRUE);
194eda14cbcSMatt Macy 			break;
195eda14cbcSMatt Macy 		case '?':
196eda14cbcSMatt Macy 		default:
197eda14cbcSMatt Macy 			usage(B_FALSE);
198eda14cbcSMatt Macy 			break;
199eda14cbcSMatt Macy 		}
200eda14cbcSMatt Macy 	}
201eda14cbcSMatt Macy }
202eda14cbcSMatt Macy 
2037877fdebSMatt Macy #define	DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd)
2047877fdebSMatt Macy #define	DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size)
205eda14cbcSMatt Macy 
2067877fdebSMatt Macy #define	CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd)
2077877fdebSMatt Macy #define	CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size)
208eda14cbcSMatt Macy 
209eda14cbcSMatt Macy static int
210eda14cbcSMatt Macy cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity)
211eda14cbcSMatt Macy {
2127877fdebSMatt Macy 	int r, i, ret = 0;
213eda14cbcSMatt Macy 
214eda14cbcSMatt Macy 	VERIFY(parity >= 1 && parity <= 3);
215eda14cbcSMatt Macy 
2167877fdebSMatt Macy 	for (r = 0; r < rm->rm_nrows; r++) {
2177877fdebSMatt Macy 		raidz_row_t * const rr = rm->rm_row[r];
2187877fdebSMatt Macy 		raidz_row_t * const rrg = opts->rm_golden->rm_row[r];
219eda14cbcSMatt Macy 		for (i = 0; i < parity; i++) {
2207877fdebSMatt Macy 			if (CODE_COL_SIZE(rrg, i) == 0) {
2217877fdebSMatt Macy 				VERIFY0(CODE_COL_SIZE(rr, i));
2227877fdebSMatt Macy 				continue;
2237877fdebSMatt Macy 			}
2247877fdebSMatt Macy 
2257877fdebSMatt Macy 			if (abd_cmp(CODE_COL(rr, i),
2267877fdebSMatt Macy 			    CODE_COL(rrg, i)) != 0) {
227eda14cbcSMatt Macy 				ret++;
228eda14cbcSMatt Macy 				LOG_OPT(D_DEBUG, opts,
229eda14cbcSMatt Macy 				    "\nParity block [%d] different!\n", i);
230eda14cbcSMatt Macy 			}
231eda14cbcSMatt Macy 		}
2327877fdebSMatt Macy 	}
233eda14cbcSMatt Macy 	return (ret);
234eda14cbcSMatt Macy }
235eda14cbcSMatt Macy 
236eda14cbcSMatt Macy static int
237eda14cbcSMatt Macy cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm)
238eda14cbcSMatt Macy {
2397877fdebSMatt Macy 	int r, i, dcols, ret = 0;
240eda14cbcSMatt Macy 
2417877fdebSMatt Macy 	for (r = 0; r < rm->rm_nrows; r++) {
2427877fdebSMatt Macy 		raidz_row_t *rr = rm->rm_row[r];
2437877fdebSMatt Macy 		raidz_row_t *rrg = opts->rm_golden->rm_row[r];
2447877fdebSMatt Macy 		dcols = opts->rm_golden->rm_row[0]->rr_cols -
2457877fdebSMatt Macy 		    raidz_parity(opts->rm_golden);
246eda14cbcSMatt Macy 		for (i = 0; i < dcols; i++) {
2477877fdebSMatt Macy 			if (DATA_COL_SIZE(rrg, i) == 0) {
2487877fdebSMatt Macy 				VERIFY0(DATA_COL_SIZE(rr, i));
2497877fdebSMatt Macy 				continue;
2507877fdebSMatt Macy 			}
2517877fdebSMatt Macy 
2527877fdebSMatt Macy 			if (abd_cmp(DATA_COL(rrg, i),
2537877fdebSMatt Macy 			    DATA_COL(rr, i)) != 0) {
254eda14cbcSMatt Macy 				ret++;
255eda14cbcSMatt Macy 
256eda14cbcSMatt Macy 				LOG_OPT(D_DEBUG, opts,
257eda14cbcSMatt Macy 				    "\nData block [%d] different!\n", i);
258eda14cbcSMatt Macy 			}
259eda14cbcSMatt Macy 		}
2607877fdebSMatt Macy 	}
261eda14cbcSMatt Macy 	return (ret);
262eda14cbcSMatt Macy }
263eda14cbcSMatt Macy 
264eda14cbcSMatt Macy static int
265eda14cbcSMatt Macy init_rand(void *data, size_t size, void *private)
266eda14cbcSMatt Macy {
267e92ffd9bSMartin Matuska 	(void) private;
268e92ffd9bSMartin Matuska 	memcpy(data, rand_data, size);
269eda14cbcSMatt Macy 	return (0);
270eda14cbcSMatt Macy }
271eda14cbcSMatt Macy 
272eda14cbcSMatt Macy static void
273eda14cbcSMatt Macy corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt)
274eda14cbcSMatt Macy {
2757877fdebSMatt Macy 	for (int r = 0; r < rm->rm_nrows; r++) {
2767877fdebSMatt Macy 		raidz_row_t *rr = rm->rm_row[r];
2777877fdebSMatt Macy 		for (int i = 0; i < cnt; i++) {
2787877fdebSMatt Macy 			raidz_col_t *col = &rr->rr_col[tgts[i]];
2797877fdebSMatt Macy 			abd_iterate_func(col->rc_abd, 0, col->rc_size,
2807877fdebSMatt Macy 			    init_rand, NULL);
2817877fdebSMatt Macy 		}
282eda14cbcSMatt Macy 	}
283eda14cbcSMatt Macy }
284eda14cbcSMatt Macy 
285eda14cbcSMatt Macy void
286eda14cbcSMatt Macy init_zio_abd(zio_t *zio)
287eda14cbcSMatt Macy {
288eda14cbcSMatt Macy 	abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL);
289eda14cbcSMatt Macy }
290eda14cbcSMatt Macy 
291eda14cbcSMatt Macy static void
292eda14cbcSMatt Macy fini_raidz_map(zio_t **zio, raidz_map_t **rm)
293eda14cbcSMatt Macy {
294eda14cbcSMatt Macy 	vdev_raidz_map_free(*rm);
295eda14cbcSMatt Macy 	raidz_free((*zio)->io_abd, (*zio)->io_size);
296eda14cbcSMatt Macy 	umem_free(*zio, sizeof (zio_t));
297eda14cbcSMatt Macy 
298eda14cbcSMatt Macy 	*zio = NULL;
299eda14cbcSMatt Macy 	*rm = NULL;
300eda14cbcSMatt Macy }
301eda14cbcSMatt Macy 
302eda14cbcSMatt Macy static int
303eda14cbcSMatt Macy init_raidz_golden_map(raidz_test_opts_t *opts, const int parity)
304eda14cbcSMatt Macy {
305eda14cbcSMatt Macy 	int err = 0;
306eda14cbcSMatt Macy 	zio_t *zio_test;
307eda14cbcSMatt Macy 	raidz_map_t *rm_test;
308eda14cbcSMatt Macy 	const size_t total_ncols = opts->rto_dcols + parity;
309eda14cbcSMatt Macy 
310eda14cbcSMatt Macy 	if (opts->rm_golden) {
311eda14cbcSMatt Macy 		fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
312eda14cbcSMatt Macy 	}
313eda14cbcSMatt Macy 
314eda14cbcSMatt Macy 	opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
315eda14cbcSMatt Macy 	zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
316eda14cbcSMatt Macy 
317eda14cbcSMatt Macy 	opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset;
318eda14cbcSMatt Macy 	opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize;
319eda14cbcSMatt Macy 
320eda14cbcSMatt Macy 	opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize);
321eda14cbcSMatt Macy 	zio_test->io_abd = raidz_alloc(opts->rto_dsize);
322eda14cbcSMatt Macy 
323eda14cbcSMatt Macy 	init_zio_abd(opts->zio_golden);
324eda14cbcSMatt Macy 	init_zio_abd(zio_test);
325eda14cbcSMatt Macy 
326eda14cbcSMatt Macy 	VERIFY0(vdev_raidz_impl_set("original"));
327eda14cbcSMatt Macy 
3287877fdebSMatt Macy 	if (opts->rto_expand) {
3297877fdebSMatt Macy 		opts->rm_golden =
3307877fdebSMatt Macy 		    vdev_raidz_map_alloc_expanded(opts->zio_golden->io_abd,
3317877fdebSMatt Macy 		    opts->zio_golden->io_size, opts->zio_golden->io_offset,
3327877fdebSMatt Macy 		    opts->rto_ashift, total_ncols+1, total_ncols,
3337877fdebSMatt Macy 		    parity, opts->rto_expand_offset);
3347877fdebSMatt Macy 		rm_test = vdev_raidz_map_alloc_expanded(zio_test->io_abd,
3357877fdebSMatt Macy 		    zio_test->io_size, zio_test->io_offset,
3367877fdebSMatt Macy 		    opts->rto_ashift, total_ncols+1, total_ncols,
3377877fdebSMatt Macy 		    parity, opts->rto_expand_offset);
3387877fdebSMatt Macy 	} else {
339eda14cbcSMatt Macy 		opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden,
340eda14cbcSMatt Macy 		    opts->rto_ashift, total_ncols, parity);
341eda14cbcSMatt Macy 		rm_test = vdev_raidz_map_alloc(zio_test,
342eda14cbcSMatt Macy 		    opts->rto_ashift, total_ncols, parity);
3437877fdebSMatt Macy 	}
344eda14cbcSMatt Macy 
345eda14cbcSMatt Macy 	VERIFY(opts->zio_golden);
346eda14cbcSMatt Macy 	VERIFY(opts->rm_golden);
347eda14cbcSMatt Macy 
348eda14cbcSMatt Macy 	vdev_raidz_generate_parity(opts->rm_golden);
349eda14cbcSMatt Macy 	vdev_raidz_generate_parity(rm_test);
350eda14cbcSMatt Macy 
351eda14cbcSMatt Macy 	/* sanity check */
352eda14cbcSMatt Macy 	err |= cmp_data(opts, rm_test);
353eda14cbcSMatt Macy 	err |= cmp_code(opts, rm_test, parity);
354eda14cbcSMatt Macy 
355eda14cbcSMatt Macy 	if (err)
356eda14cbcSMatt Macy 		ERR("initializing the golden copy ... [FAIL]!\n");
357eda14cbcSMatt Macy 
358eda14cbcSMatt Macy 	/* tear down raidz_map of test zio */
359eda14cbcSMatt Macy 	fini_raidz_map(&zio_test, &rm_test);
360eda14cbcSMatt Macy 
361eda14cbcSMatt Macy 	return (err);
362eda14cbcSMatt Macy }
363eda14cbcSMatt Macy 
3647877fdebSMatt Macy /*
3657877fdebSMatt Macy  * If reflow is not in progress, reflow_offset should be UINT64_MAX.
3667877fdebSMatt Macy  * For each row, if the row is entirely before reflow_offset, it will
3677877fdebSMatt Macy  * come from the new location.  Otherwise this row will come from the
3687877fdebSMatt Macy  * old location.  Therefore, rows that straddle the reflow_offset will
3697877fdebSMatt Macy  * come from the old location.
3707877fdebSMatt Macy  *
3717877fdebSMatt Macy  * NOTE: Until raidz expansion is implemented this function is only
3727877fdebSMatt Macy  * needed by raidz_test.c to the multi-row raid_map_t functionality.
3737877fdebSMatt Macy  */
3747877fdebSMatt Macy raidz_map_t *
3757877fdebSMatt Macy vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
3767877fdebSMatt Macy     uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols,
3777877fdebSMatt Macy     uint64_t nparity, uint64_t reflow_offset)
3787877fdebSMatt Macy {
3797877fdebSMatt Macy 	/* The zio's size in units of the vdev's minimum sector size. */
3807877fdebSMatt Macy 	uint64_t s = size >> ashift;
3817877fdebSMatt Macy 	uint64_t q, r, bc, devidx, asize = 0, tot;
3827877fdebSMatt Macy 
3837877fdebSMatt Macy 	/*
3847877fdebSMatt Macy 	 * "Quotient": The number of data sectors for this stripe on all but
3857877fdebSMatt Macy 	 * the "big column" child vdevs that also contain "remainder" data.
3867877fdebSMatt Macy 	 * AKA "full rows"
3877877fdebSMatt Macy 	 */
3887877fdebSMatt Macy 	q = s / (logical_cols - nparity);
3897877fdebSMatt Macy 
3907877fdebSMatt Macy 	/*
3917877fdebSMatt Macy 	 * "Remainder": The number of partial stripe data sectors in this I/O.
3927877fdebSMatt Macy 	 * This will add a sector to some, but not all, child vdevs.
3937877fdebSMatt Macy 	 */
3947877fdebSMatt Macy 	r = s - q * (logical_cols - nparity);
3957877fdebSMatt Macy 
3967877fdebSMatt Macy 	/* The number of "big columns" - those which contain remainder data. */
3977877fdebSMatt Macy 	bc = (r == 0 ? 0 : r + nparity);
3987877fdebSMatt Macy 
3997877fdebSMatt Macy 	/*
4007877fdebSMatt Macy 	 * The total number of data and parity sectors associated with
4017877fdebSMatt Macy 	 * this I/O.
4027877fdebSMatt Macy 	 */
4037877fdebSMatt Macy 	tot = s + nparity * (q + (r == 0 ? 0 : 1));
4047877fdebSMatt Macy 
4057877fdebSMatt Macy 	/* How many rows contain data (not skip) */
4067877fdebSMatt Macy 	uint64_t rows = howmany(tot, logical_cols);
4077877fdebSMatt Macy 	int cols = MIN(tot, logical_cols);
4087877fdebSMatt Macy 
4097877fdebSMatt Macy 	raidz_map_t *rm = kmem_zalloc(offsetof(raidz_map_t, rm_row[rows]),
4107877fdebSMatt Macy 	    KM_SLEEP);
4117877fdebSMatt Macy 	rm->rm_nrows = rows;
4127877fdebSMatt Macy 
4137877fdebSMatt Macy 	for (uint64_t row = 0; row < rows; row++) {
4147877fdebSMatt Macy 		raidz_row_t *rr = kmem_alloc(offsetof(raidz_row_t,
4157877fdebSMatt Macy 		    rr_col[cols]), KM_SLEEP);
4167877fdebSMatt Macy 		rm->rm_row[row] = rr;
4177877fdebSMatt Macy 
4187877fdebSMatt Macy 		/* The starting RAIDZ (parent) vdev sector of the row. */
4197877fdebSMatt Macy 		uint64_t b = (offset >> ashift) + row * logical_cols;
4207877fdebSMatt Macy 
4217877fdebSMatt Macy 		/*
4227877fdebSMatt Macy 		 * If we are in the middle of a reflow, and any part of this
4237877fdebSMatt Macy 		 * row has not been copied, then use the old location of
4247877fdebSMatt Macy 		 * this row.
4257877fdebSMatt Macy 		 */
4267877fdebSMatt Macy 		int row_phys_cols = physical_cols;
4277877fdebSMatt Macy 		if (b + (logical_cols - nparity) > reflow_offset >> ashift)
4287877fdebSMatt Macy 			row_phys_cols--;
4297877fdebSMatt Macy 
4307877fdebSMatt Macy 		/* starting child of this row */
4317877fdebSMatt Macy 		uint64_t child_id = b % row_phys_cols;
4327877fdebSMatt Macy 		/* The starting byte offset on each child vdev. */
4337877fdebSMatt Macy 		uint64_t child_offset = (b / row_phys_cols) << ashift;
4347877fdebSMatt Macy 
4357877fdebSMatt Macy 		/*
4367877fdebSMatt Macy 		 * We set cols to the entire width of the block, even
4377877fdebSMatt Macy 		 * if this row is shorter.  This is needed because parity
4387877fdebSMatt Macy 		 * generation (for Q and R) needs to know the entire width,
4397877fdebSMatt Macy 		 * because it treats the short row as though it was
4407877fdebSMatt Macy 		 * full-width (and the "phantom" sectors were zero-filled).
4417877fdebSMatt Macy 		 *
4427877fdebSMatt Macy 		 * Another approach to this would be to set cols shorter
4437877fdebSMatt Macy 		 * (to just the number of columns that we might do i/o to)
4447877fdebSMatt Macy 		 * and have another mechanism to tell the parity generation
4457877fdebSMatt Macy 		 * about the "entire width".  Reconstruction (at least
4467877fdebSMatt Macy 		 * vdev_raidz_reconstruct_general()) would also need to
4477877fdebSMatt Macy 		 * know about the "entire width".
4487877fdebSMatt Macy 		 */
4497877fdebSMatt Macy 		rr->rr_cols = cols;
4507877fdebSMatt Macy 		rr->rr_bigcols = bc;
4517877fdebSMatt Macy 		rr->rr_missingdata = 0;
4527877fdebSMatt Macy 		rr->rr_missingparity = 0;
4537877fdebSMatt Macy 		rr->rr_firstdatacol = nparity;
4547877fdebSMatt Macy 		rr->rr_abd_empty = NULL;
4557877fdebSMatt Macy 		rr->rr_nempty = 0;
4567877fdebSMatt Macy 
4577877fdebSMatt Macy 		for (int c = 0; c < rr->rr_cols; c++, child_id++) {
4587877fdebSMatt Macy 			if (child_id >= row_phys_cols) {
4597877fdebSMatt Macy 				child_id -= row_phys_cols;
4607877fdebSMatt Macy 				child_offset += 1ULL << ashift;
4617877fdebSMatt Macy 			}
4627877fdebSMatt Macy 			rr->rr_col[c].rc_devidx = child_id;
4637877fdebSMatt Macy 			rr->rr_col[c].rc_offset = child_offset;
4647877fdebSMatt Macy 			rr->rr_col[c].rc_orig_data = NULL;
4657877fdebSMatt Macy 			rr->rr_col[c].rc_error = 0;
4667877fdebSMatt Macy 			rr->rr_col[c].rc_tried = 0;
4677877fdebSMatt Macy 			rr->rr_col[c].rc_skipped = 0;
4687877fdebSMatt Macy 			rr->rr_col[c].rc_need_orig_restore = B_FALSE;
4697877fdebSMatt Macy 
4707877fdebSMatt Macy 			uint64_t dc = c - rr->rr_firstdatacol;
4717877fdebSMatt Macy 			if (c < rr->rr_firstdatacol) {
4727877fdebSMatt Macy 				rr->rr_col[c].rc_size = 1ULL << ashift;
4737877fdebSMatt Macy 				rr->rr_col[c].rc_abd =
4747877fdebSMatt Macy 				    abd_alloc_linear(rr->rr_col[c].rc_size,
4757877fdebSMatt Macy 				    B_TRUE);
4767877fdebSMatt Macy 			} else if (row == rows - 1 && bc != 0 && c >= bc) {
4777877fdebSMatt Macy 				/*
4787877fdebSMatt Macy 				 * Past the end, this for parity generation.
4797877fdebSMatt Macy 				 */
4807877fdebSMatt Macy 				rr->rr_col[c].rc_size = 0;
4817877fdebSMatt Macy 				rr->rr_col[c].rc_abd = NULL;
4827877fdebSMatt Macy 			} else {
4837877fdebSMatt Macy 				/*
4847877fdebSMatt Macy 				 * "data column" (col excluding parity)
4857877fdebSMatt Macy 				 * Add an ASCII art diagram here
4867877fdebSMatt Macy 				 */
4877877fdebSMatt Macy 				uint64_t off;
4887877fdebSMatt Macy 
4897877fdebSMatt Macy 				if (c < bc || r == 0) {
4907877fdebSMatt Macy 					off = dc * rows + row;
4917877fdebSMatt Macy 				} else {
4927877fdebSMatt Macy 					off = r * rows +
4937877fdebSMatt Macy 					    (dc - r) * (rows - 1) + row;
4947877fdebSMatt Macy 				}
4957877fdebSMatt Macy 				rr->rr_col[c].rc_size = 1ULL << ashift;
496184c1b94SMartin Matuska 				rr->rr_col[c].rc_abd = abd_get_offset_struct(
497184c1b94SMartin Matuska 				    &rr->rr_col[c].rc_abdstruct,
498184c1b94SMartin Matuska 				    abd, off << ashift, 1 << ashift);
4997877fdebSMatt Macy 			}
5007877fdebSMatt Macy 
5017877fdebSMatt Macy 			asize += rr->rr_col[c].rc_size;
5027877fdebSMatt Macy 		}
5037877fdebSMatt Macy 		/*
5047877fdebSMatt Macy 		 * If all data stored spans all columns, there's a danger that
5057877fdebSMatt Macy 		 * parity will always be on the same device and, since parity
5067877fdebSMatt Macy 		 * isn't read during normal operation, that that device's I/O
5077877fdebSMatt Macy 		 * bandwidth won't be used effectively. We therefore switch
5087877fdebSMatt Macy 		 * the parity every 1MB.
5097877fdebSMatt Macy 		 *
5107877fdebSMatt Macy 		 * ...at least that was, ostensibly, the theory. As a practical
5117877fdebSMatt Macy 		 * matter unless we juggle the parity between all devices
5127877fdebSMatt Macy 		 * evenly, we won't see any benefit. Further, occasional writes
5137877fdebSMatt Macy 		 * that aren't a multiple of the LCM of the number of children
5147877fdebSMatt Macy 		 * and the minimum stripe width are sufficient to avoid pessimal
5157877fdebSMatt Macy 		 * behavior. Unfortunately, this decision created an implicit
5167877fdebSMatt Macy 		 * on-disk format requirement that we need to support for all
5177877fdebSMatt Macy 		 * eternity, but only for single-parity RAID-Z.
5187877fdebSMatt Macy 		 *
5197877fdebSMatt Macy 		 * If we intend to skip a sector in the zeroth column for
5207877fdebSMatt Macy 		 * padding we must make sure to note this swap. We will never
5217877fdebSMatt Macy 		 * intend to skip the first column since at least one data and
5227877fdebSMatt Macy 		 * one parity column must appear in each row.
5237877fdebSMatt Macy 		 */
5247877fdebSMatt Macy 		if (rr->rr_firstdatacol == 1 && rr->rr_cols > 1 &&
5257877fdebSMatt Macy 		    (offset & (1ULL << 20))) {
5267877fdebSMatt Macy 			ASSERT(rr->rr_cols >= 2);
5277877fdebSMatt Macy 			ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size);
5287877fdebSMatt Macy 			devidx = rr->rr_col[0].rc_devidx;
5297877fdebSMatt Macy 			uint64_t o = rr->rr_col[0].rc_offset;
5307877fdebSMatt Macy 			rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx;
5317877fdebSMatt Macy 			rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset;
5327877fdebSMatt Macy 			rr->rr_col[1].rc_devidx = devidx;
5337877fdebSMatt Macy 			rr->rr_col[1].rc_offset = o;
5347877fdebSMatt Macy 		}
5357877fdebSMatt Macy 
5367877fdebSMatt Macy 	}
5377877fdebSMatt Macy 	ASSERT3U(asize, ==, tot << ashift);
5387877fdebSMatt Macy 
5397877fdebSMatt Macy 	/* init RAIDZ parity ops */
5407877fdebSMatt Macy 	rm->rm_ops = vdev_raidz_math_get_ops();
5417877fdebSMatt Macy 
5427877fdebSMatt Macy 	return (rm);
5437877fdebSMatt Macy }
5447877fdebSMatt Macy 
545eda14cbcSMatt Macy static raidz_map_t *
546eda14cbcSMatt Macy init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity)
547eda14cbcSMatt Macy {
548eda14cbcSMatt Macy 	raidz_map_t *rm = NULL;
549eda14cbcSMatt Macy 	const size_t alloc_dsize = opts->rto_dsize;
550eda14cbcSMatt Macy 	const size_t total_ncols = opts->rto_dcols + parity;
551eda14cbcSMatt Macy 	const int ccols[] = { 0, 1, 2 };
552eda14cbcSMatt Macy 
553eda14cbcSMatt Macy 	VERIFY(zio);
554eda14cbcSMatt Macy 	VERIFY(parity <= 3 && parity >= 1);
555eda14cbcSMatt Macy 
556eda14cbcSMatt Macy 	*zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL);
557eda14cbcSMatt Macy 
558eda14cbcSMatt Macy 	(*zio)->io_offset = 0;
559eda14cbcSMatt Macy 	(*zio)->io_size = alloc_dsize;
560eda14cbcSMatt Macy 	(*zio)->io_abd = raidz_alloc(alloc_dsize);
561eda14cbcSMatt Macy 	init_zio_abd(*zio);
562eda14cbcSMatt Macy 
5637877fdebSMatt Macy 	if (opts->rto_expand) {
5647877fdebSMatt Macy 		rm = vdev_raidz_map_alloc_expanded((*zio)->io_abd,
5657877fdebSMatt Macy 		    (*zio)->io_size, (*zio)->io_offset,
5667877fdebSMatt Macy 		    opts->rto_ashift, total_ncols+1, total_ncols,
5677877fdebSMatt Macy 		    parity, opts->rto_expand_offset);
5687877fdebSMatt Macy 	} else {
569eda14cbcSMatt Macy 		rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift,
570eda14cbcSMatt Macy 		    total_ncols, parity);
5717877fdebSMatt Macy 	}
572eda14cbcSMatt Macy 	VERIFY(rm);
573eda14cbcSMatt Macy 
574eda14cbcSMatt Macy 	/* Make sure code columns are destroyed */
575eda14cbcSMatt Macy 	corrupt_colums(rm, ccols, parity);
576eda14cbcSMatt Macy 
577eda14cbcSMatt Macy 	return (rm);
578eda14cbcSMatt Macy }
579eda14cbcSMatt Macy 
580eda14cbcSMatt Macy static int
581eda14cbcSMatt Macy run_gen_check(raidz_test_opts_t *opts)
582eda14cbcSMatt Macy {
583eda14cbcSMatt Macy 	char **impl_name;
584eda14cbcSMatt Macy 	int fn, err = 0;
585eda14cbcSMatt Macy 	zio_t *zio_test;
586eda14cbcSMatt Macy 	raidz_map_t *rm_test;
587eda14cbcSMatt Macy 
588eda14cbcSMatt Macy 	err = init_raidz_golden_map(opts, PARITY_PQR);
589eda14cbcSMatt Macy 	if (0 != err)
590eda14cbcSMatt Macy 		return (err);
591eda14cbcSMatt Macy 
592eda14cbcSMatt Macy 	LOG(D_INFO, DBLSEP);
593eda14cbcSMatt Macy 	LOG(D_INFO, "Testing parity generation...\n");
594eda14cbcSMatt Macy 
595eda14cbcSMatt Macy 	for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
596eda14cbcSMatt Macy 	    impl_name++) {
597eda14cbcSMatt Macy 
598eda14cbcSMatt Macy 		LOG(D_INFO, SEP);
599eda14cbcSMatt Macy 		LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
600eda14cbcSMatt Macy 
601eda14cbcSMatt Macy 		if (0 != vdev_raidz_impl_set(*impl_name)) {
602eda14cbcSMatt Macy 			LOG(D_INFO, "[SKIP]\n");
603eda14cbcSMatt Macy 			continue;
604eda14cbcSMatt Macy 		} else {
605eda14cbcSMatt Macy 			LOG(D_INFO, "[SUPPORTED]\n");
606eda14cbcSMatt Macy 		}
607eda14cbcSMatt Macy 
608eda14cbcSMatt Macy 		for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
609eda14cbcSMatt Macy 
610eda14cbcSMatt Macy 			/* Check if should stop */
611eda14cbcSMatt Macy 			if (rto_opts.rto_should_stop)
612eda14cbcSMatt Macy 				return (err);
613eda14cbcSMatt Macy 
614eda14cbcSMatt Macy 			/* create suitable raidz_map */
615eda14cbcSMatt Macy 			rm_test = init_raidz_map(opts, &zio_test, fn+1);
616eda14cbcSMatt Macy 			VERIFY(rm_test);
617eda14cbcSMatt Macy 
618eda14cbcSMatt Macy 			LOG(D_INFO, "\t\tTesting method [%s] ...",
619eda14cbcSMatt Macy 			    raidz_gen_name[fn]);
620eda14cbcSMatt Macy 
621eda14cbcSMatt Macy 			if (!opts->rto_sanity)
622eda14cbcSMatt Macy 				vdev_raidz_generate_parity(rm_test);
623eda14cbcSMatt Macy 
624eda14cbcSMatt Macy 			if (cmp_code(opts, rm_test, fn+1) != 0) {
625eda14cbcSMatt Macy 				LOG(D_INFO, "[FAIL]\n");
626eda14cbcSMatt Macy 				err++;
627eda14cbcSMatt Macy 			} else
628eda14cbcSMatt Macy 				LOG(D_INFO, "[PASS]\n");
629eda14cbcSMatt Macy 
630eda14cbcSMatt Macy 			fini_raidz_map(&zio_test, &rm_test);
631eda14cbcSMatt Macy 		}
632eda14cbcSMatt Macy 	}
633eda14cbcSMatt Macy 
634eda14cbcSMatt Macy 	fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
635eda14cbcSMatt Macy 
636eda14cbcSMatt Macy 	return (err);
637eda14cbcSMatt Macy }
638eda14cbcSMatt Macy 
639eda14cbcSMatt Macy static int
640eda14cbcSMatt Macy run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn)
641eda14cbcSMatt Macy {
642eda14cbcSMatt Macy 	int x0, x1, x2;
643eda14cbcSMatt Macy 	int tgtidx[3];
644eda14cbcSMatt Macy 	int err = 0;
645eda14cbcSMatt Macy 	static const int rec_tgts[7][3] = {
646eda14cbcSMatt Macy 		{1, 2, 3},	/* rec_p:   bad QR & D[0]	*/
647eda14cbcSMatt Macy 		{0, 2, 3},	/* rec_q:   bad PR & D[0]	*/
648eda14cbcSMatt Macy 		{0, 1, 3},	/* rec_r:   bad PQ & D[0]	*/
649eda14cbcSMatt Macy 		{2, 3, 4},	/* rec_pq:  bad R  & D[0][1]	*/
650eda14cbcSMatt Macy 		{1, 3, 4},	/* rec_pr:  bad Q  & D[0][1]	*/
651eda14cbcSMatt Macy 		{0, 3, 4},	/* rec_qr:  bad P  & D[0][1]	*/
652eda14cbcSMatt Macy 		{3, 4, 5}	/* rec_pqr: bad    & D[0][1][2] */
653eda14cbcSMatt Macy 	};
654eda14cbcSMatt Macy 
655eda14cbcSMatt Macy 	memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx));
656eda14cbcSMatt Macy 
657eda14cbcSMatt Macy 	if (fn < RAIDZ_REC_PQ) {
658eda14cbcSMatt Macy 		/* can reconstruct 1 failed data disk */
659eda14cbcSMatt Macy 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
6607877fdebSMatt Macy 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
661eda14cbcSMatt Macy 				continue;
662eda14cbcSMatt Macy 
663eda14cbcSMatt Macy 			/* Check if should stop */
664eda14cbcSMatt Macy 			if (rto_opts.rto_should_stop)
665eda14cbcSMatt Macy 				return (err);
666eda14cbcSMatt Macy 
667eda14cbcSMatt Macy 			LOG(D_DEBUG, "[%d] ", x0);
668eda14cbcSMatt Macy 
669eda14cbcSMatt Macy 			tgtidx[2] = x0 + raidz_parity(rm);
670eda14cbcSMatt Macy 
671eda14cbcSMatt Macy 			corrupt_colums(rm, tgtidx+2, 1);
672eda14cbcSMatt Macy 
673eda14cbcSMatt Macy 			if (!opts->rto_sanity)
674eda14cbcSMatt Macy 				vdev_raidz_reconstruct(rm, tgtidx, 3);
675eda14cbcSMatt Macy 
676eda14cbcSMatt Macy 			if (cmp_data(opts, rm) != 0) {
677eda14cbcSMatt Macy 				err++;
678eda14cbcSMatt Macy 				LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0);
679eda14cbcSMatt Macy 			}
680eda14cbcSMatt Macy 		}
681eda14cbcSMatt Macy 
682eda14cbcSMatt Macy 	} else if (fn < RAIDZ_REC_PQR) {
683eda14cbcSMatt Macy 		/* can reconstruct 2 failed data disk */
684eda14cbcSMatt Macy 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
6857877fdebSMatt Macy 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
686eda14cbcSMatt Macy 				continue;
687eda14cbcSMatt Macy 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
6887877fdebSMatt Macy 				if (x1 >= rm->rm_row[0]->rr_cols -
6897877fdebSMatt Macy 				    raidz_parity(rm))
690eda14cbcSMatt Macy 					continue;
691eda14cbcSMatt Macy 
692eda14cbcSMatt Macy 				/* Check if should stop */
693eda14cbcSMatt Macy 				if (rto_opts.rto_should_stop)
694eda14cbcSMatt Macy 					return (err);
695eda14cbcSMatt Macy 
696eda14cbcSMatt Macy 				LOG(D_DEBUG, "[%d %d] ", x0, x1);
697eda14cbcSMatt Macy 
698eda14cbcSMatt Macy 				tgtidx[1] = x0 + raidz_parity(rm);
699eda14cbcSMatt Macy 				tgtidx[2] = x1 + raidz_parity(rm);
700eda14cbcSMatt Macy 
701eda14cbcSMatt Macy 				corrupt_colums(rm, tgtidx+1, 2);
702eda14cbcSMatt Macy 
703eda14cbcSMatt Macy 				if (!opts->rto_sanity)
704eda14cbcSMatt Macy 					vdev_raidz_reconstruct(rm, tgtidx, 3);
705eda14cbcSMatt Macy 
706eda14cbcSMatt Macy 				if (cmp_data(opts, rm) != 0) {
707eda14cbcSMatt Macy 					err++;
708eda14cbcSMatt Macy 					LOG(D_DEBUG, "\nREC D[%d %d]... "
709eda14cbcSMatt Macy 					    "[FAIL]\n", x0, x1);
710eda14cbcSMatt Macy 				}
711eda14cbcSMatt Macy 			}
712eda14cbcSMatt Macy 		}
713eda14cbcSMatt Macy 	} else {
714eda14cbcSMatt Macy 		/* can reconstruct 3 failed data disk */
715eda14cbcSMatt Macy 		for (x0 = 0; x0 < opts->rto_dcols; x0++) {
7167877fdebSMatt Macy 			if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm))
717eda14cbcSMatt Macy 				continue;
718eda14cbcSMatt Macy 			for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) {
7197877fdebSMatt Macy 				if (x1 >= rm->rm_row[0]->rr_cols -
7207877fdebSMatt Macy 				    raidz_parity(rm))
721eda14cbcSMatt Macy 					continue;
722eda14cbcSMatt Macy 				for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) {
7237877fdebSMatt Macy 					if (x2 >= rm->rm_row[0]->rr_cols -
7247877fdebSMatt Macy 					    raidz_parity(rm))
725eda14cbcSMatt Macy 						continue;
726eda14cbcSMatt Macy 
727eda14cbcSMatt Macy 					/* Check if should stop */
728eda14cbcSMatt Macy 					if (rto_opts.rto_should_stop)
729eda14cbcSMatt Macy 						return (err);
730eda14cbcSMatt Macy 
731eda14cbcSMatt Macy 					LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2);
732eda14cbcSMatt Macy 
733eda14cbcSMatt Macy 					tgtidx[0] = x0 + raidz_parity(rm);
734eda14cbcSMatt Macy 					tgtidx[1] = x1 + raidz_parity(rm);
735eda14cbcSMatt Macy 					tgtidx[2] = x2 + raidz_parity(rm);
736eda14cbcSMatt Macy 
737eda14cbcSMatt Macy 					corrupt_colums(rm, tgtidx, 3);
738eda14cbcSMatt Macy 
739eda14cbcSMatt Macy 					if (!opts->rto_sanity)
740eda14cbcSMatt Macy 						vdev_raidz_reconstruct(rm,
741eda14cbcSMatt Macy 						    tgtidx, 3);
742eda14cbcSMatt Macy 
743eda14cbcSMatt Macy 					if (cmp_data(opts, rm) != 0) {
744eda14cbcSMatt Macy 						err++;
745eda14cbcSMatt Macy 						LOG(D_DEBUG,
746eda14cbcSMatt Macy 						    "\nREC D[%d %d %d]... "
747eda14cbcSMatt Macy 						    "[FAIL]\n", x0, x1, x2);
748eda14cbcSMatt Macy 					}
749eda14cbcSMatt Macy 				}
750eda14cbcSMatt Macy 			}
751eda14cbcSMatt Macy 		}
752eda14cbcSMatt Macy 	}
753eda14cbcSMatt Macy 	return (err);
754eda14cbcSMatt Macy }
755eda14cbcSMatt Macy 
756eda14cbcSMatt Macy static int
757eda14cbcSMatt Macy run_rec_check(raidz_test_opts_t *opts)
758eda14cbcSMatt Macy {
759eda14cbcSMatt Macy 	char **impl_name;
760eda14cbcSMatt Macy 	unsigned fn, err = 0;
761eda14cbcSMatt Macy 	zio_t *zio_test;
762eda14cbcSMatt Macy 	raidz_map_t *rm_test;
763eda14cbcSMatt Macy 
764eda14cbcSMatt Macy 	err = init_raidz_golden_map(opts, PARITY_PQR);
765eda14cbcSMatt Macy 	if (0 != err)
766eda14cbcSMatt Macy 		return (err);
767eda14cbcSMatt Macy 
768eda14cbcSMatt Macy 	LOG(D_INFO, DBLSEP);
769eda14cbcSMatt Macy 	LOG(D_INFO, "Testing data reconstruction...\n");
770eda14cbcSMatt Macy 
771eda14cbcSMatt Macy 	for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL;
772eda14cbcSMatt Macy 	    impl_name++) {
773eda14cbcSMatt Macy 
774eda14cbcSMatt Macy 		LOG(D_INFO, SEP);
775eda14cbcSMatt Macy 		LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name);
776eda14cbcSMatt Macy 
777eda14cbcSMatt Macy 		if (vdev_raidz_impl_set(*impl_name) != 0) {
778eda14cbcSMatt Macy 			LOG(D_INFO, "[SKIP]\n");
779eda14cbcSMatt Macy 			continue;
780eda14cbcSMatt Macy 		} else
781eda14cbcSMatt Macy 			LOG(D_INFO, "[SUPPORTED]\n");
782eda14cbcSMatt Macy 
783eda14cbcSMatt Macy 
784eda14cbcSMatt Macy 		/* create suitable raidz_map */
785eda14cbcSMatt Macy 		rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR);
786eda14cbcSMatt Macy 		/* generate parity */
787eda14cbcSMatt Macy 		vdev_raidz_generate_parity(rm_test);
788eda14cbcSMatt Macy 
789eda14cbcSMatt Macy 		for (fn = 0; fn < RAIDZ_REC_NUM; fn++) {
790eda14cbcSMatt Macy 
791eda14cbcSMatt Macy 			LOG(D_INFO, "\t\tTesting method [%s] ...",
792eda14cbcSMatt Macy 			    raidz_rec_name[fn]);
793eda14cbcSMatt Macy 
794eda14cbcSMatt Macy 			if (run_rec_check_impl(opts, rm_test, fn) != 0) {
795eda14cbcSMatt Macy 				LOG(D_INFO, "[FAIL]\n");
796eda14cbcSMatt Macy 				err++;
797eda14cbcSMatt Macy 
798eda14cbcSMatt Macy 			} else
799eda14cbcSMatt Macy 				LOG(D_INFO, "[PASS]\n");
800eda14cbcSMatt Macy 
801eda14cbcSMatt Macy 		}
802eda14cbcSMatt Macy 		/* tear down test raidz_map */
803eda14cbcSMatt Macy 		fini_raidz_map(&zio_test, &rm_test);
804eda14cbcSMatt Macy 	}
805eda14cbcSMatt Macy 
806eda14cbcSMatt Macy 	fini_raidz_map(&opts->zio_golden, &opts->rm_golden);
807eda14cbcSMatt Macy 
808eda14cbcSMatt Macy 	return (err);
809eda14cbcSMatt Macy }
810eda14cbcSMatt Macy 
811eda14cbcSMatt Macy static int
812eda14cbcSMatt Macy run_test(raidz_test_opts_t *opts)
813eda14cbcSMatt Macy {
814eda14cbcSMatt Macy 	int err = 0;
815eda14cbcSMatt Macy 
816eda14cbcSMatt Macy 	if (opts == NULL)
817eda14cbcSMatt Macy 		opts = &rto_opts;
818eda14cbcSMatt Macy 
819eda14cbcSMatt Macy 	print_opts(opts, B_FALSE);
820eda14cbcSMatt Macy 
821eda14cbcSMatt Macy 	err |= run_gen_check(opts);
822eda14cbcSMatt Macy 	err |= run_rec_check(opts);
823eda14cbcSMatt Macy 
824eda14cbcSMatt Macy 	return (err);
825eda14cbcSMatt Macy }
826eda14cbcSMatt Macy 
827eda14cbcSMatt Macy #define	SWEEP_RUNNING	0
828eda14cbcSMatt Macy #define	SWEEP_FINISHED	1
829eda14cbcSMatt Macy #define	SWEEP_ERROR	2
830eda14cbcSMatt Macy #define	SWEEP_TIMEOUT	3
831eda14cbcSMatt Macy 
832eda14cbcSMatt Macy static int sweep_state = 0;
833eda14cbcSMatt Macy static raidz_test_opts_t failed_opts;
834eda14cbcSMatt Macy 
835eda14cbcSMatt Macy static kmutex_t sem_mtx;
836eda14cbcSMatt Macy static kcondvar_t sem_cv;
837eda14cbcSMatt Macy static int max_free_slots;
838eda14cbcSMatt Macy static int free_slots;
839eda14cbcSMatt Macy 
840da5137abSMartin Matuska static __attribute__((noreturn)) void
841eda14cbcSMatt Macy sweep_thread(void *arg)
842eda14cbcSMatt Macy {
843eda14cbcSMatt Macy 	int err = 0;
844eda14cbcSMatt Macy 	raidz_test_opts_t *opts = (raidz_test_opts_t *)arg;
845eda14cbcSMatt Macy 	VERIFY(opts != NULL);
846eda14cbcSMatt Macy 
847eda14cbcSMatt Macy 	err = run_test(opts);
848eda14cbcSMatt Macy 
849eda14cbcSMatt Macy 	if (rto_opts.rto_sanity) {
850eda14cbcSMatt Macy 		/* 25% chance that a sweep test fails */
851eda14cbcSMatt Macy 		if (rand() < (RAND_MAX/4))
852eda14cbcSMatt Macy 			err = 1;
853eda14cbcSMatt Macy 	}
854eda14cbcSMatt Macy 
855eda14cbcSMatt Macy 	if (0 != err) {
856eda14cbcSMatt Macy 		mutex_enter(&sem_mtx);
857eda14cbcSMatt Macy 		memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t));
858eda14cbcSMatt Macy 		sweep_state = SWEEP_ERROR;
859eda14cbcSMatt Macy 		mutex_exit(&sem_mtx);
860eda14cbcSMatt Macy 	}
861eda14cbcSMatt Macy 
862eda14cbcSMatt Macy 	umem_free(opts, sizeof (raidz_test_opts_t));
863eda14cbcSMatt Macy 
864eda14cbcSMatt Macy 	/* signal the next thread */
865eda14cbcSMatt Macy 	mutex_enter(&sem_mtx);
866eda14cbcSMatt Macy 	free_slots++;
867eda14cbcSMatt Macy 	cv_signal(&sem_cv);
868eda14cbcSMatt Macy 	mutex_exit(&sem_mtx);
869eda14cbcSMatt Macy 
870eda14cbcSMatt Macy 	thread_exit();
871eda14cbcSMatt Macy }
872eda14cbcSMatt Macy 
873eda14cbcSMatt Macy static int
874eda14cbcSMatt Macy run_sweep(void)
875eda14cbcSMatt Macy {
876eda14cbcSMatt Macy 	static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 };
877eda14cbcSMatt Macy 	static const size_t ashift_v[] = { 9, 12, 14 };
878eda14cbcSMatt Macy 	static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12),
879eda14cbcSMatt Macy 		1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE };
880eda14cbcSMatt Macy 
881eda14cbcSMatt Macy 	(void) setvbuf(stdout, NULL, _IONBF, 0);
882eda14cbcSMatt Macy 
883eda14cbcSMatt Macy 	ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) *
884eda14cbcSMatt Macy 	    ARRAY_SIZE(dcols_v);
885eda14cbcSMatt Macy 	ulong_t tried_comb = 0;
886eda14cbcSMatt Macy 	hrtime_t time_diff, start_time = gethrtime();
887eda14cbcSMatt Macy 	raidz_test_opts_t *opts;
888eda14cbcSMatt Macy 	int a, d, s;
889eda14cbcSMatt Macy 
890eda14cbcSMatt Macy 	max_free_slots = free_slots = MAX(2, boot_ncpus);
891eda14cbcSMatt Macy 
892eda14cbcSMatt Macy 	mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL);
893eda14cbcSMatt Macy 	cv_init(&sem_cv, NULL, CV_DEFAULT, NULL);
894eda14cbcSMatt Macy 
895eda14cbcSMatt Macy 	for (s = 0; s < ARRAY_SIZE(size_v); s++)
896eda14cbcSMatt Macy 	for (a = 0; a < ARRAY_SIZE(ashift_v); a++)
897eda14cbcSMatt Macy 	for (d = 0; d < ARRAY_SIZE(dcols_v); d++) {
898eda14cbcSMatt Macy 
899eda14cbcSMatt Macy 		if (size_v[s] < (1 << ashift_v[a])) {
900eda14cbcSMatt Macy 			total_comb--;
901eda14cbcSMatt Macy 			continue;
902eda14cbcSMatt Macy 		}
903eda14cbcSMatt Macy 
904eda14cbcSMatt Macy 		if (++tried_comb % 20 == 0)
905eda14cbcSMatt Macy 			LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb);
906eda14cbcSMatt Macy 
907eda14cbcSMatt Macy 		/* wait for signal to start new thread */
908eda14cbcSMatt Macy 		mutex_enter(&sem_mtx);
909eda14cbcSMatt Macy 		while (cv_timedwait_sig(&sem_cv, &sem_mtx,
910eda14cbcSMatt Macy 		    ddi_get_lbolt() + hz)) {
911eda14cbcSMatt Macy 
912eda14cbcSMatt Macy 			/* check if should stop the test (timeout) */
913eda14cbcSMatt Macy 			time_diff = (gethrtime() - start_time) / NANOSEC;
914eda14cbcSMatt Macy 			if (rto_opts.rto_sweep_timeout > 0 &&
915eda14cbcSMatt Macy 			    time_diff >= rto_opts.rto_sweep_timeout) {
916eda14cbcSMatt Macy 				sweep_state = SWEEP_TIMEOUT;
917eda14cbcSMatt Macy 				rto_opts.rto_should_stop = B_TRUE;
918eda14cbcSMatt Macy 				mutex_exit(&sem_mtx);
919eda14cbcSMatt Macy 				goto exit;
920eda14cbcSMatt Macy 			}
921eda14cbcSMatt Macy 
922eda14cbcSMatt Macy 			/* check if should stop the test (error) */
923eda14cbcSMatt Macy 			if (sweep_state != SWEEP_RUNNING) {
924eda14cbcSMatt Macy 				mutex_exit(&sem_mtx);
925eda14cbcSMatt Macy 				goto exit;
926eda14cbcSMatt Macy 			}
927eda14cbcSMatt Macy 
928eda14cbcSMatt Macy 			/* exit loop if a slot is available */
929eda14cbcSMatt Macy 			if (free_slots > 0) {
930eda14cbcSMatt Macy 				break;
931eda14cbcSMatt Macy 			}
932eda14cbcSMatt Macy 		}
933eda14cbcSMatt Macy 
934eda14cbcSMatt Macy 		free_slots--;
935eda14cbcSMatt Macy 		mutex_exit(&sem_mtx);
936eda14cbcSMatt Macy 
937eda14cbcSMatt Macy 		opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL);
938eda14cbcSMatt Macy 		opts->rto_ashift = ashift_v[a];
939eda14cbcSMatt Macy 		opts->rto_dcols = dcols_v[d];
940*dbd5678dSMartin Matuska 		opts->rto_offset = (1ULL << ashift_v[a]) * rand();
941eda14cbcSMatt Macy 		opts->rto_dsize = size_v[s];
9427877fdebSMatt Macy 		opts->rto_expand = rto_opts.rto_expand;
9437877fdebSMatt Macy 		opts->rto_expand_offset = rto_opts.rto_expand_offset;
944eda14cbcSMatt Macy 		opts->rto_v = 0; /* be quiet */
945eda14cbcSMatt Macy 
946eda14cbcSMatt Macy 		VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts,
947eda14cbcSMatt Macy 		    0, NULL, TS_RUN, defclsyspri), !=, NULL);
948eda14cbcSMatt Macy 	}
949eda14cbcSMatt Macy 
950eda14cbcSMatt Macy exit:
951eda14cbcSMatt Macy 	LOG(D_ALL, "\nWaiting for test threads to finish...\n");
952eda14cbcSMatt Macy 	mutex_enter(&sem_mtx);
953eda14cbcSMatt Macy 	VERIFY(free_slots <= max_free_slots);
954eda14cbcSMatt Macy 	while (free_slots < max_free_slots) {
955eda14cbcSMatt Macy 		(void) cv_wait(&sem_cv, &sem_mtx);
956eda14cbcSMatt Macy 	}
957eda14cbcSMatt Macy 	mutex_exit(&sem_mtx);
958eda14cbcSMatt Macy 
959eda14cbcSMatt Macy 	if (sweep_state == SWEEP_ERROR) {
960eda14cbcSMatt Macy 		ERR("Sweep test failed! Failed option: \n");
961eda14cbcSMatt Macy 		print_opts(&failed_opts, B_TRUE);
962eda14cbcSMatt Macy 	} else {
963eda14cbcSMatt Macy 		if (sweep_state == SWEEP_TIMEOUT)
964eda14cbcSMatt Macy 			LOG(D_ALL, "Test timeout (%lus). Stopping...\n",
965eda14cbcSMatt Macy 			    (ulong_t)rto_opts.rto_sweep_timeout);
966eda14cbcSMatt Macy 
967eda14cbcSMatt Macy 		LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n",
968eda14cbcSMatt Macy 		    (ulong_t)tried_comb);
969eda14cbcSMatt Macy 	}
970eda14cbcSMatt Macy 
971eda14cbcSMatt Macy 	mutex_destroy(&sem_mtx);
972eda14cbcSMatt Macy 
973eda14cbcSMatt Macy 	return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0);
974eda14cbcSMatt Macy }
975eda14cbcSMatt Macy 
9767877fdebSMatt Macy 
977eda14cbcSMatt Macy int
978eda14cbcSMatt Macy main(int argc, char **argv)
979eda14cbcSMatt Macy {
980eda14cbcSMatt Macy 	size_t i;
981eda14cbcSMatt Macy 	struct sigaction action;
982eda14cbcSMatt Macy 	int err = 0;
983eda14cbcSMatt Macy 
98416038816SMartin Matuska 	/* init gdb pid string early */
98516038816SMartin Matuska 	(void) sprintf(pid_s, "%d", getpid());
986eda14cbcSMatt Macy 
987eda14cbcSMatt Macy 	action.sa_handler = sig_handler;
988eda14cbcSMatt Macy 	sigemptyset(&action.sa_mask);
989eda14cbcSMatt Macy 	action.sa_flags = 0;
990eda14cbcSMatt Macy 
991eda14cbcSMatt Macy 	if (sigaction(SIGSEGV, &action, NULL) < 0) {
992eda14cbcSMatt Macy 		ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno));
993eda14cbcSMatt Macy 		exit(EXIT_FAILURE);
994eda14cbcSMatt Macy 	}
995eda14cbcSMatt Macy 
996eda14cbcSMatt Macy 	(void) setvbuf(stdout, NULL, _IOLBF, 0);
997eda14cbcSMatt Macy 
998eda14cbcSMatt Macy 	dprintf_setup(&argc, argv);
999eda14cbcSMatt Macy 
1000eda14cbcSMatt Macy 	process_options(argc, argv);
1001eda14cbcSMatt Macy 
1002eda14cbcSMatt Macy 	kernel_init(SPA_MODE_READ);
1003eda14cbcSMatt Macy 
1004eda14cbcSMatt Macy 	/* setup random data because rand() is not reentrant */
1005eda14cbcSMatt Macy 	rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL);
1006eda14cbcSMatt Macy 	srand((unsigned)time(NULL) * getpid());
1007eda14cbcSMatt Macy 	for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++)
1008eda14cbcSMatt Macy 		rand_data[i] = rand();
1009eda14cbcSMatt Macy 
1010eda14cbcSMatt Macy 	mprotect(rand_data, SPA_MAXBLOCKSIZE, PROT_READ);
1011eda14cbcSMatt Macy 
1012eda14cbcSMatt Macy 	if (rto_opts.rto_benchmark) {
1013eda14cbcSMatt Macy 		run_raidz_benchmark();
1014eda14cbcSMatt Macy 	} else if (rto_opts.rto_sweep) {
1015eda14cbcSMatt Macy 		err = run_sweep();
1016eda14cbcSMatt Macy 	} else {
1017eda14cbcSMatt Macy 		err = run_test(NULL);
1018eda14cbcSMatt Macy 	}
1019eda14cbcSMatt Macy 
1020eda14cbcSMatt Macy 	umem_free(rand_data, SPA_MAXBLOCKSIZE);
1021eda14cbcSMatt Macy 	kernel_fini();
1022eda14cbcSMatt Macy 
1023eda14cbcSMatt Macy 	return (err);
1024eda14cbcSMatt Macy }
1025