1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9271171e0SMartin Matuska * or https://opensource.org/licenses/CDDL-1.0. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy 22eda14cbcSMatt Macy /* 23eda14cbcSMatt Macy * Copyright (C) 2016 Gvozden Nešković. All rights reserved. 24eda14cbcSMatt Macy */ 25eda14cbcSMatt Macy 26eda14cbcSMatt Macy #include <sys/zfs_context.h> 27eda14cbcSMatt Macy #include <sys/time.h> 28eda14cbcSMatt Macy #include <sys/wait.h> 29eda14cbcSMatt Macy #include <sys/zio.h> 30eda14cbcSMatt Macy #include <umem.h> 31eda14cbcSMatt Macy #include <sys/vdev_raidz.h> 32eda14cbcSMatt Macy #include <sys/vdev_raidz_impl.h> 33eda14cbcSMatt Macy #include <assert.h> 34eda14cbcSMatt Macy #include <stdio.h> 35eda14cbcSMatt Macy #include "raidz_test.h" 36eda14cbcSMatt Macy 37eda14cbcSMatt Macy static int *rand_data; 38eda14cbcSMatt Macy raidz_test_opts_t rto_opts; 39eda14cbcSMatt Macy 4016038816SMartin Matuska static char pid_s[16]; 41eda14cbcSMatt Macy 42eda14cbcSMatt Macy static void sig_handler(int signo) 43eda14cbcSMatt Macy { 4416038816SMartin Matuska int old_errno = errno; 45eda14cbcSMatt Macy struct sigaction action; 46eda14cbcSMatt Macy /* 47eda14cbcSMatt Macy * Restore default action and re-raise signal so SIGSEGV and 48eda14cbcSMatt Macy * SIGABRT can trigger a core dump. 49eda14cbcSMatt Macy */ 50eda14cbcSMatt Macy action.sa_handler = SIG_DFL; 51eda14cbcSMatt Macy sigemptyset(&action.sa_mask); 52eda14cbcSMatt Macy action.sa_flags = 0; 53eda14cbcSMatt Macy (void) sigaction(signo, &action, NULL); 54eda14cbcSMatt Macy 5516038816SMartin Matuska if (rto_opts.rto_gdb) { 5616038816SMartin Matuska pid_t pid = fork(); 5716038816SMartin Matuska if (pid == 0) { 5816038816SMartin Matuska execlp("gdb", "gdb", "-ex", "set pagination 0", 5916038816SMartin Matuska "-p", pid_s, NULL); 6016038816SMartin Matuska _exit(-1); 6116038816SMartin Matuska } else if (pid > 0) 6216038816SMartin Matuska while (waitpid(pid, NULL, 0) == -1 && errno == EINTR) 6316038816SMartin Matuska ; 6416038816SMartin Matuska } 65eda14cbcSMatt Macy 66eda14cbcSMatt Macy raise(signo); 6716038816SMartin Matuska errno = old_errno; 68eda14cbcSMatt Macy } 69eda14cbcSMatt Macy 70eda14cbcSMatt Macy static void print_opts(raidz_test_opts_t *opts, boolean_t force) 71eda14cbcSMatt Macy { 72a0b956f5SMartin Matuska const char *verbose; 73eda14cbcSMatt Macy switch (opts->rto_v) { 74c03c5b1cSMartin Matuska case D_ALL: 75eda14cbcSMatt Macy verbose = "no"; 76eda14cbcSMatt Macy break; 77c03c5b1cSMartin Matuska case D_INFO: 78eda14cbcSMatt Macy verbose = "info"; 79eda14cbcSMatt Macy break; 80c03c5b1cSMartin Matuska case D_DEBUG: 81eda14cbcSMatt Macy default: 82eda14cbcSMatt Macy verbose = "debug"; 83eda14cbcSMatt Macy break; 84eda14cbcSMatt Macy } 85eda14cbcSMatt Macy 86eda14cbcSMatt Macy if (force || opts->rto_v >= D_INFO) { 87eda14cbcSMatt Macy (void) fprintf(stdout, DBLSEP "Running with options:\n" 88eda14cbcSMatt Macy " (-a) zio ashift : %zu\n" 89eda14cbcSMatt Macy " (-o) zio offset : 1 << %zu\n" 907877fdebSMatt Macy " (-e) expanded map : %s\n" 917877fdebSMatt Macy " (-r) reflow offset : %llx\n" 92eda14cbcSMatt Macy " (-d) number of raidz data columns : %zu\n" 93eda14cbcSMatt Macy " (-s) size of DATA : 1 << %zu\n" 94eda14cbcSMatt Macy " (-S) sweep parameters : %s \n" 95eda14cbcSMatt Macy " (-v) verbose : %s \n\n", 96eda14cbcSMatt Macy opts->rto_ashift, /* -a */ 97eda14cbcSMatt Macy ilog2(opts->rto_offset), /* -o */ 987877fdebSMatt Macy opts->rto_expand ? "yes" : "no", /* -e */ 997877fdebSMatt Macy (u_longlong_t)opts->rto_expand_offset, /* -r */ 100eda14cbcSMatt Macy opts->rto_dcols, /* -d */ 101eda14cbcSMatt Macy ilog2(opts->rto_dsize), /* -s */ 102eda14cbcSMatt Macy opts->rto_sweep ? "yes" : "no", /* -S */ 103eda14cbcSMatt Macy verbose); /* -v */ 104eda14cbcSMatt Macy } 105eda14cbcSMatt Macy } 106eda14cbcSMatt Macy 107eda14cbcSMatt Macy static void usage(boolean_t requested) 108eda14cbcSMatt Macy { 109eda14cbcSMatt Macy const raidz_test_opts_t *o = &rto_opts_defaults; 110eda14cbcSMatt Macy 111eda14cbcSMatt Macy FILE *fp = requested ? stdout : stderr; 112eda14cbcSMatt Macy 113eda14cbcSMatt Macy (void) fprintf(fp, "Usage:\n" 114eda14cbcSMatt Macy "\t[-a zio ashift (default: %zu)]\n" 115eda14cbcSMatt Macy "\t[-o zio offset, exponent radix 2 (default: %zu)]\n" 116eda14cbcSMatt Macy "\t[-d number of raidz data columns (default: %zu)]\n" 117eda14cbcSMatt Macy "\t[-s zio size, exponent radix 2 (default: %zu)]\n" 118eda14cbcSMatt Macy "\t[-S parameter sweep (default: %s)]\n" 119eda14cbcSMatt Macy "\t[-t timeout for parameter sweep test]\n" 120eda14cbcSMatt Macy "\t[-B benchmark all raidz implementations]\n" 1217877fdebSMatt Macy "\t[-e use expanded raidz map (default: %s)]\n" 1227877fdebSMatt Macy "\t[-r expanded raidz map reflow offset (default: %llx)]\n" 123c03c5b1cSMartin Matuska "\t[-v increase verbosity (default: %d)]\n" 124eda14cbcSMatt Macy "\t[-h (print help)]\n" 125eda14cbcSMatt Macy "\t[-T test the test, see if failure would be detected]\n" 126eda14cbcSMatt Macy "\t[-D debug (attach gdb on SIGSEGV)]\n" 127eda14cbcSMatt Macy "", 128eda14cbcSMatt Macy o->rto_ashift, /* -a */ 129eda14cbcSMatt Macy ilog2(o->rto_offset), /* -o */ 130eda14cbcSMatt Macy o->rto_dcols, /* -d */ 131eda14cbcSMatt Macy ilog2(o->rto_dsize), /* -s */ 132eda14cbcSMatt Macy rto_opts.rto_sweep ? "yes" : "no", /* -S */ 1337877fdebSMatt Macy rto_opts.rto_expand ? "yes" : "no", /* -e */ 1347877fdebSMatt Macy (u_longlong_t)o->rto_expand_offset, /* -r */ 135c03c5b1cSMartin Matuska o->rto_v); /* -v */ 136eda14cbcSMatt Macy 137eda14cbcSMatt Macy exit(requested ? 0 : 1); 138eda14cbcSMatt Macy } 139eda14cbcSMatt Macy 140eda14cbcSMatt Macy static void process_options(int argc, char **argv) 141eda14cbcSMatt Macy { 142eda14cbcSMatt Macy size_t value; 143eda14cbcSMatt Macy int opt; 144eda14cbcSMatt Macy raidz_test_opts_t *o = &rto_opts; 145eda14cbcSMatt Macy 146da5137abSMartin Matuska memcpy(o, &rto_opts_defaults, sizeof (*o)); 147eda14cbcSMatt Macy 1487877fdebSMatt Macy while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) { 149eda14cbcSMatt Macy switch (opt) { 150eda14cbcSMatt Macy case 'a': 151eda14cbcSMatt Macy value = strtoull(optarg, NULL, 0); 152eda14cbcSMatt Macy o->rto_ashift = MIN(13, MAX(9, value)); 153eda14cbcSMatt Macy break; 1547877fdebSMatt Macy case 'e': 1557877fdebSMatt Macy o->rto_expand = 1; 1567877fdebSMatt Macy break; 1577877fdebSMatt Macy case 'r': 1587877fdebSMatt Macy o->rto_expand_offset = strtoull(optarg, NULL, 0); 1597877fdebSMatt Macy break; 160eda14cbcSMatt Macy case 'o': 161eda14cbcSMatt Macy value = strtoull(optarg, NULL, 0); 162eda14cbcSMatt Macy o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9; 163eda14cbcSMatt Macy break; 164eda14cbcSMatt Macy case 'd': 165eda14cbcSMatt Macy value = strtoull(optarg, NULL, 0); 166eda14cbcSMatt Macy o->rto_dcols = MIN(255, MAX(1, value)); 167eda14cbcSMatt Macy break; 168eda14cbcSMatt Macy case 's': 169eda14cbcSMatt Macy value = strtoull(optarg, NULL, 0); 170eda14cbcSMatt Macy o->rto_dsize = 1ULL << MIN(SPA_MAXBLOCKSHIFT, 171eda14cbcSMatt Macy MAX(SPA_MINBLOCKSHIFT, value)); 172eda14cbcSMatt Macy break; 173eda14cbcSMatt Macy case 't': 174eda14cbcSMatt Macy value = strtoull(optarg, NULL, 0); 175eda14cbcSMatt Macy o->rto_sweep_timeout = value; 176eda14cbcSMatt Macy break; 177eda14cbcSMatt Macy case 'v': 178eda14cbcSMatt Macy o->rto_v++; 179eda14cbcSMatt Macy break; 180eda14cbcSMatt Macy case 'S': 181eda14cbcSMatt Macy o->rto_sweep = 1; 182eda14cbcSMatt Macy break; 183eda14cbcSMatt Macy case 'B': 184eda14cbcSMatt Macy o->rto_benchmark = 1; 185eda14cbcSMatt Macy break; 186eda14cbcSMatt Macy case 'D': 187eda14cbcSMatt Macy o->rto_gdb = 1; 188eda14cbcSMatt Macy break; 189eda14cbcSMatt Macy case 'T': 190eda14cbcSMatt Macy o->rto_sanity = 1; 191eda14cbcSMatt Macy break; 192eda14cbcSMatt Macy case 'h': 193eda14cbcSMatt Macy usage(B_TRUE); 194eda14cbcSMatt Macy break; 195eda14cbcSMatt Macy case '?': 196eda14cbcSMatt Macy default: 197eda14cbcSMatt Macy usage(B_FALSE); 198eda14cbcSMatt Macy break; 199eda14cbcSMatt Macy } 200eda14cbcSMatt Macy } 201eda14cbcSMatt Macy } 202eda14cbcSMatt Macy 2037877fdebSMatt Macy #define DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd) 2047877fdebSMatt Macy #define DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size) 205eda14cbcSMatt Macy 2067877fdebSMatt Macy #define CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd) 2077877fdebSMatt Macy #define CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size) 208eda14cbcSMatt Macy 209eda14cbcSMatt Macy static int 210eda14cbcSMatt Macy cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity) 211eda14cbcSMatt Macy { 2127877fdebSMatt Macy int r, i, ret = 0; 213eda14cbcSMatt Macy 214eda14cbcSMatt Macy VERIFY(parity >= 1 && parity <= 3); 215eda14cbcSMatt Macy 2167877fdebSMatt Macy for (r = 0; r < rm->rm_nrows; r++) { 2177877fdebSMatt Macy raidz_row_t * const rr = rm->rm_row[r]; 2187877fdebSMatt Macy raidz_row_t * const rrg = opts->rm_golden->rm_row[r]; 219eda14cbcSMatt Macy for (i = 0; i < parity; i++) { 2207877fdebSMatt Macy if (CODE_COL_SIZE(rrg, i) == 0) { 2217877fdebSMatt Macy VERIFY0(CODE_COL_SIZE(rr, i)); 2227877fdebSMatt Macy continue; 2237877fdebSMatt Macy } 2247877fdebSMatt Macy 2257877fdebSMatt Macy if (abd_cmp(CODE_COL(rr, i), 2267877fdebSMatt Macy CODE_COL(rrg, i)) != 0) { 227eda14cbcSMatt Macy ret++; 228eda14cbcSMatt Macy LOG_OPT(D_DEBUG, opts, 229eda14cbcSMatt Macy "\nParity block [%d] different!\n", i); 230eda14cbcSMatt Macy } 231eda14cbcSMatt Macy } 2327877fdebSMatt Macy } 233eda14cbcSMatt Macy return (ret); 234eda14cbcSMatt Macy } 235eda14cbcSMatt Macy 236eda14cbcSMatt Macy static int 237eda14cbcSMatt Macy cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm) 238eda14cbcSMatt Macy { 2397877fdebSMatt Macy int r, i, dcols, ret = 0; 240eda14cbcSMatt Macy 2417877fdebSMatt Macy for (r = 0; r < rm->rm_nrows; r++) { 2427877fdebSMatt Macy raidz_row_t *rr = rm->rm_row[r]; 2437877fdebSMatt Macy raidz_row_t *rrg = opts->rm_golden->rm_row[r]; 2447877fdebSMatt Macy dcols = opts->rm_golden->rm_row[0]->rr_cols - 2457877fdebSMatt Macy raidz_parity(opts->rm_golden); 246eda14cbcSMatt Macy for (i = 0; i < dcols; i++) { 2477877fdebSMatt Macy if (DATA_COL_SIZE(rrg, i) == 0) { 2487877fdebSMatt Macy VERIFY0(DATA_COL_SIZE(rr, i)); 2497877fdebSMatt Macy continue; 2507877fdebSMatt Macy } 2517877fdebSMatt Macy 2527877fdebSMatt Macy if (abd_cmp(DATA_COL(rrg, i), 2537877fdebSMatt Macy DATA_COL(rr, i)) != 0) { 254eda14cbcSMatt Macy ret++; 255eda14cbcSMatt Macy 256eda14cbcSMatt Macy LOG_OPT(D_DEBUG, opts, 257eda14cbcSMatt Macy "\nData block [%d] different!\n", i); 258eda14cbcSMatt Macy } 259eda14cbcSMatt Macy } 2607877fdebSMatt Macy } 261eda14cbcSMatt Macy return (ret); 262eda14cbcSMatt Macy } 263eda14cbcSMatt Macy 264eda14cbcSMatt Macy static int 265eda14cbcSMatt Macy init_rand(void *data, size_t size, void *private) 266eda14cbcSMatt Macy { 267e92ffd9bSMartin Matuska (void) private; 268e92ffd9bSMartin Matuska memcpy(data, rand_data, size); 269eda14cbcSMatt Macy return (0); 270eda14cbcSMatt Macy } 271eda14cbcSMatt Macy 272eda14cbcSMatt Macy static void 273eda14cbcSMatt Macy corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt) 274eda14cbcSMatt Macy { 2757877fdebSMatt Macy for (int r = 0; r < rm->rm_nrows; r++) { 2767877fdebSMatt Macy raidz_row_t *rr = rm->rm_row[r]; 2777877fdebSMatt Macy for (int i = 0; i < cnt; i++) { 2787877fdebSMatt Macy raidz_col_t *col = &rr->rr_col[tgts[i]]; 2797877fdebSMatt Macy abd_iterate_func(col->rc_abd, 0, col->rc_size, 2807877fdebSMatt Macy init_rand, NULL); 2817877fdebSMatt Macy } 282eda14cbcSMatt Macy } 283eda14cbcSMatt Macy } 284eda14cbcSMatt Macy 285eda14cbcSMatt Macy void 286eda14cbcSMatt Macy init_zio_abd(zio_t *zio) 287eda14cbcSMatt Macy { 288eda14cbcSMatt Macy abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL); 289eda14cbcSMatt Macy } 290eda14cbcSMatt Macy 291eda14cbcSMatt Macy static void 292eda14cbcSMatt Macy fini_raidz_map(zio_t **zio, raidz_map_t **rm) 293eda14cbcSMatt Macy { 294eda14cbcSMatt Macy vdev_raidz_map_free(*rm); 295eda14cbcSMatt Macy raidz_free((*zio)->io_abd, (*zio)->io_size); 296eda14cbcSMatt Macy umem_free(*zio, sizeof (zio_t)); 297eda14cbcSMatt Macy 298eda14cbcSMatt Macy *zio = NULL; 299eda14cbcSMatt Macy *rm = NULL; 300eda14cbcSMatt Macy } 301eda14cbcSMatt Macy 302eda14cbcSMatt Macy static int 303eda14cbcSMatt Macy init_raidz_golden_map(raidz_test_opts_t *opts, const int parity) 304eda14cbcSMatt Macy { 305eda14cbcSMatt Macy int err = 0; 306eda14cbcSMatt Macy zio_t *zio_test; 307eda14cbcSMatt Macy raidz_map_t *rm_test; 308eda14cbcSMatt Macy const size_t total_ncols = opts->rto_dcols + parity; 309eda14cbcSMatt Macy 310eda14cbcSMatt Macy if (opts->rm_golden) { 311eda14cbcSMatt Macy fini_raidz_map(&opts->zio_golden, &opts->rm_golden); 312eda14cbcSMatt Macy } 313eda14cbcSMatt Macy 314eda14cbcSMatt Macy opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL); 315eda14cbcSMatt Macy zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL); 316eda14cbcSMatt Macy 317eda14cbcSMatt Macy opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset; 318eda14cbcSMatt Macy opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize; 319eda14cbcSMatt Macy 320eda14cbcSMatt Macy opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize); 321eda14cbcSMatt Macy zio_test->io_abd = raidz_alloc(opts->rto_dsize); 322eda14cbcSMatt Macy 323eda14cbcSMatt Macy init_zio_abd(opts->zio_golden); 324eda14cbcSMatt Macy init_zio_abd(zio_test); 325eda14cbcSMatt Macy 326eda14cbcSMatt Macy VERIFY0(vdev_raidz_impl_set("original")); 327eda14cbcSMatt Macy 3287877fdebSMatt Macy if (opts->rto_expand) { 3297877fdebSMatt Macy opts->rm_golden = 3307877fdebSMatt Macy vdev_raidz_map_alloc_expanded(opts->zio_golden->io_abd, 3317877fdebSMatt Macy opts->zio_golden->io_size, opts->zio_golden->io_offset, 3327877fdebSMatt Macy opts->rto_ashift, total_ncols+1, total_ncols, 3337877fdebSMatt Macy parity, opts->rto_expand_offset); 3347877fdebSMatt Macy rm_test = vdev_raidz_map_alloc_expanded(zio_test->io_abd, 3357877fdebSMatt Macy zio_test->io_size, zio_test->io_offset, 3367877fdebSMatt Macy opts->rto_ashift, total_ncols+1, total_ncols, 3377877fdebSMatt Macy parity, opts->rto_expand_offset); 3387877fdebSMatt Macy } else { 339eda14cbcSMatt Macy opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden, 340eda14cbcSMatt Macy opts->rto_ashift, total_ncols, parity); 341eda14cbcSMatt Macy rm_test = vdev_raidz_map_alloc(zio_test, 342eda14cbcSMatt Macy opts->rto_ashift, total_ncols, parity); 3437877fdebSMatt Macy } 344eda14cbcSMatt Macy 345eda14cbcSMatt Macy VERIFY(opts->zio_golden); 346eda14cbcSMatt Macy VERIFY(opts->rm_golden); 347eda14cbcSMatt Macy 348eda14cbcSMatt Macy vdev_raidz_generate_parity(opts->rm_golden); 349eda14cbcSMatt Macy vdev_raidz_generate_parity(rm_test); 350eda14cbcSMatt Macy 351eda14cbcSMatt Macy /* sanity check */ 352eda14cbcSMatt Macy err |= cmp_data(opts, rm_test); 353eda14cbcSMatt Macy err |= cmp_code(opts, rm_test, parity); 354eda14cbcSMatt Macy 355eda14cbcSMatt Macy if (err) 356eda14cbcSMatt Macy ERR("initializing the golden copy ... [FAIL]!\n"); 357eda14cbcSMatt Macy 358eda14cbcSMatt Macy /* tear down raidz_map of test zio */ 359eda14cbcSMatt Macy fini_raidz_map(&zio_test, &rm_test); 360eda14cbcSMatt Macy 361eda14cbcSMatt Macy return (err); 362eda14cbcSMatt Macy } 363eda14cbcSMatt Macy 3647877fdebSMatt Macy /* 3657877fdebSMatt Macy * If reflow is not in progress, reflow_offset should be UINT64_MAX. 3667877fdebSMatt Macy * For each row, if the row is entirely before reflow_offset, it will 3677877fdebSMatt Macy * come from the new location. Otherwise this row will come from the 3687877fdebSMatt Macy * old location. Therefore, rows that straddle the reflow_offset will 3697877fdebSMatt Macy * come from the old location. 3707877fdebSMatt Macy * 3717877fdebSMatt Macy * NOTE: Until raidz expansion is implemented this function is only 3727877fdebSMatt Macy * needed by raidz_test.c to the multi-row raid_map_t functionality. 3737877fdebSMatt Macy */ 3747877fdebSMatt Macy raidz_map_t * 3757877fdebSMatt Macy vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset, 3767877fdebSMatt Macy uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols, 3777877fdebSMatt Macy uint64_t nparity, uint64_t reflow_offset) 3787877fdebSMatt Macy { 3797877fdebSMatt Macy /* The zio's size in units of the vdev's minimum sector size. */ 3807877fdebSMatt Macy uint64_t s = size >> ashift; 3817877fdebSMatt Macy uint64_t q, r, bc, devidx, asize = 0, tot; 3827877fdebSMatt Macy 3837877fdebSMatt Macy /* 3847877fdebSMatt Macy * "Quotient": The number of data sectors for this stripe on all but 3857877fdebSMatt Macy * the "big column" child vdevs that also contain "remainder" data. 3867877fdebSMatt Macy * AKA "full rows" 3877877fdebSMatt Macy */ 3887877fdebSMatt Macy q = s / (logical_cols - nparity); 3897877fdebSMatt Macy 3907877fdebSMatt Macy /* 3917877fdebSMatt Macy * "Remainder": The number of partial stripe data sectors in this I/O. 3927877fdebSMatt Macy * This will add a sector to some, but not all, child vdevs. 3937877fdebSMatt Macy */ 3947877fdebSMatt Macy r = s - q * (logical_cols - nparity); 3957877fdebSMatt Macy 3967877fdebSMatt Macy /* The number of "big columns" - those which contain remainder data. */ 3977877fdebSMatt Macy bc = (r == 0 ? 0 : r + nparity); 3987877fdebSMatt Macy 3997877fdebSMatt Macy /* 4007877fdebSMatt Macy * The total number of data and parity sectors associated with 4017877fdebSMatt Macy * this I/O. 4027877fdebSMatt Macy */ 4037877fdebSMatt Macy tot = s + nparity * (q + (r == 0 ? 0 : 1)); 4047877fdebSMatt Macy 4057877fdebSMatt Macy /* How many rows contain data (not skip) */ 4067877fdebSMatt Macy uint64_t rows = howmany(tot, logical_cols); 4077877fdebSMatt Macy int cols = MIN(tot, logical_cols); 4087877fdebSMatt Macy 4097877fdebSMatt Macy raidz_map_t *rm = kmem_zalloc(offsetof(raidz_map_t, rm_row[rows]), 4107877fdebSMatt Macy KM_SLEEP); 4117877fdebSMatt Macy rm->rm_nrows = rows; 4127877fdebSMatt Macy 4137877fdebSMatt Macy for (uint64_t row = 0; row < rows; row++) { 4147877fdebSMatt Macy raidz_row_t *rr = kmem_alloc(offsetof(raidz_row_t, 4157877fdebSMatt Macy rr_col[cols]), KM_SLEEP); 4167877fdebSMatt Macy rm->rm_row[row] = rr; 4177877fdebSMatt Macy 4187877fdebSMatt Macy /* The starting RAIDZ (parent) vdev sector of the row. */ 4197877fdebSMatt Macy uint64_t b = (offset >> ashift) + row * logical_cols; 4207877fdebSMatt Macy 4217877fdebSMatt Macy /* 4227877fdebSMatt Macy * If we are in the middle of a reflow, and any part of this 4237877fdebSMatt Macy * row has not been copied, then use the old location of 4247877fdebSMatt Macy * this row. 4257877fdebSMatt Macy */ 4267877fdebSMatt Macy int row_phys_cols = physical_cols; 4277877fdebSMatt Macy if (b + (logical_cols - nparity) > reflow_offset >> ashift) 4287877fdebSMatt Macy row_phys_cols--; 4297877fdebSMatt Macy 4307877fdebSMatt Macy /* starting child of this row */ 4317877fdebSMatt Macy uint64_t child_id = b % row_phys_cols; 4327877fdebSMatt Macy /* The starting byte offset on each child vdev. */ 4337877fdebSMatt Macy uint64_t child_offset = (b / row_phys_cols) << ashift; 4347877fdebSMatt Macy 4357877fdebSMatt Macy /* 4367877fdebSMatt Macy * We set cols to the entire width of the block, even 4377877fdebSMatt Macy * if this row is shorter. This is needed because parity 4387877fdebSMatt Macy * generation (for Q and R) needs to know the entire width, 4397877fdebSMatt Macy * because it treats the short row as though it was 4407877fdebSMatt Macy * full-width (and the "phantom" sectors were zero-filled). 4417877fdebSMatt Macy * 4427877fdebSMatt Macy * Another approach to this would be to set cols shorter 4437877fdebSMatt Macy * (to just the number of columns that we might do i/o to) 4447877fdebSMatt Macy * and have another mechanism to tell the parity generation 4457877fdebSMatt Macy * about the "entire width". Reconstruction (at least 4467877fdebSMatt Macy * vdev_raidz_reconstruct_general()) would also need to 4477877fdebSMatt Macy * know about the "entire width". 4487877fdebSMatt Macy */ 4497877fdebSMatt Macy rr->rr_cols = cols; 4507877fdebSMatt Macy rr->rr_bigcols = bc; 4517877fdebSMatt Macy rr->rr_missingdata = 0; 4527877fdebSMatt Macy rr->rr_missingparity = 0; 4537877fdebSMatt Macy rr->rr_firstdatacol = nparity; 4547877fdebSMatt Macy rr->rr_abd_empty = NULL; 4557877fdebSMatt Macy rr->rr_nempty = 0; 4567877fdebSMatt Macy 4577877fdebSMatt Macy for (int c = 0; c < rr->rr_cols; c++, child_id++) { 4587877fdebSMatt Macy if (child_id >= row_phys_cols) { 4597877fdebSMatt Macy child_id -= row_phys_cols; 4607877fdebSMatt Macy child_offset += 1ULL << ashift; 4617877fdebSMatt Macy } 4627877fdebSMatt Macy rr->rr_col[c].rc_devidx = child_id; 4637877fdebSMatt Macy rr->rr_col[c].rc_offset = child_offset; 4647877fdebSMatt Macy rr->rr_col[c].rc_orig_data = NULL; 4657877fdebSMatt Macy rr->rr_col[c].rc_error = 0; 4667877fdebSMatt Macy rr->rr_col[c].rc_tried = 0; 4677877fdebSMatt Macy rr->rr_col[c].rc_skipped = 0; 4687877fdebSMatt Macy rr->rr_col[c].rc_need_orig_restore = B_FALSE; 4697877fdebSMatt Macy 4707877fdebSMatt Macy uint64_t dc = c - rr->rr_firstdatacol; 4717877fdebSMatt Macy if (c < rr->rr_firstdatacol) { 4727877fdebSMatt Macy rr->rr_col[c].rc_size = 1ULL << ashift; 4737877fdebSMatt Macy rr->rr_col[c].rc_abd = 4747877fdebSMatt Macy abd_alloc_linear(rr->rr_col[c].rc_size, 4757877fdebSMatt Macy B_TRUE); 4767877fdebSMatt Macy } else if (row == rows - 1 && bc != 0 && c >= bc) { 4777877fdebSMatt Macy /* 4787877fdebSMatt Macy * Past the end, this for parity generation. 4797877fdebSMatt Macy */ 4807877fdebSMatt Macy rr->rr_col[c].rc_size = 0; 4817877fdebSMatt Macy rr->rr_col[c].rc_abd = NULL; 4827877fdebSMatt Macy } else { 4837877fdebSMatt Macy /* 4847877fdebSMatt Macy * "data column" (col excluding parity) 4857877fdebSMatt Macy * Add an ASCII art diagram here 4867877fdebSMatt Macy */ 4877877fdebSMatt Macy uint64_t off; 4887877fdebSMatt Macy 4897877fdebSMatt Macy if (c < bc || r == 0) { 4907877fdebSMatt Macy off = dc * rows + row; 4917877fdebSMatt Macy } else { 4927877fdebSMatt Macy off = r * rows + 4937877fdebSMatt Macy (dc - r) * (rows - 1) + row; 4947877fdebSMatt Macy } 4957877fdebSMatt Macy rr->rr_col[c].rc_size = 1ULL << ashift; 496184c1b94SMartin Matuska rr->rr_col[c].rc_abd = abd_get_offset_struct( 497184c1b94SMartin Matuska &rr->rr_col[c].rc_abdstruct, 498184c1b94SMartin Matuska abd, off << ashift, 1 << ashift); 4997877fdebSMatt Macy } 5007877fdebSMatt Macy 5017877fdebSMatt Macy asize += rr->rr_col[c].rc_size; 5027877fdebSMatt Macy } 5037877fdebSMatt Macy /* 5047877fdebSMatt Macy * If all data stored spans all columns, there's a danger that 5057877fdebSMatt Macy * parity will always be on the same device and, since parity 5067877fdebSMatt Macy * isn't read during normal operation, that that device's I/O 5077877fdebSMatt Macy * bandwidth won't be used effectively. We therefore switch 5087877fdebSMatt Macy * the parity every 1MB. 5097877fdebSMatt Macy * 5107877fdebSMatt Macy * ...at least that was, ostensibly, the theory. As a practical 5117877fdebSMatt Macy * matter unless we juggle the parity between all devices 5127877fdebSMatt Macy * evenly, we won't see any benefit. Further, occasional writes 5137877fdebSMatt Macy * that aren't a multiple of the LCM of the number of children 5147877fdebSMatt Macy * and the minimum stripe width are sufficient to avoid pessimal 5157877fdebSMatt Macy * behavior. Unfortunately, this decision created an implicit 5167877fdebSMatt Macy * on-disk format requirement that we need to support for all 5177877fdebSMatt Macy * eternity, but only for single-parity RAID-Z. 5187877fdebSMatt Macy * 5197877fdebSMatt Macy * If we intend to skip a sector in the zeroth column for 5207877fdebSMatt Macy * padding we must make sure to note this swap. We will never 5217877fdebSMatt Macy * intend to skip the first column since at least one data and 5227877fdebSMatt Macy * one parity column must appear in each row. 5237877fdebSMatt Macy */ 5247877fdebSMatt Macy if (rr->rr_firstdatacol == 1 && rr->rr_cols > 1 && 5257877fdebSMatt Macy (offset & (1ULL << 20))) { 5267877fdebSMatt Macy ASSERT(rr->rr_cols >= 2); 5277877fdebSMatt Macy ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size); 5287877fdebSMatt Macy devidx = rr->rr_col[0].rc_devidx; 5297877fdebSMatt Macy uint64_t o = rr->rr_col[0].rc_offset; 5307877fdebSMatt Macy rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx; 5317877fdebSMatt Macy rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset; 5327877fdebSMatt Macy rr->rr_col[1].rc_devidx = devidx; 5337877fdebSMatt Macy rr->rr_col[1].rc_offset = o; 5347877fdebSMatt Macy } 5357877fdebSMatt Macy 5367877fdebSMatt Macy } 5377877fdebSMatt Macy ASSERT3U(asize, ==, tot << ashift); 5387877fdebSMatt Macy 5397877fdebSMatt Macy /* init RAIDZ parity ops */ 5407877fdebSMatt Macy rm->rm_ops = vdev_raidz_math_get_ops(); 5417877fdebSMatt Macy 5427877fdebSMatt Macy return (rm); 5437877fdebSMatt Macy } 5447877fdebSMatt Macy 545eda14cbcSMatt Macy static raidz_map_t * 546eda14cbcSMatt Macy init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity) 547eda14cbcSMatt Macy { 548eda14cbcSMatt Macy raidz_map_t *rm = NULL; 549eda14cbcSMatt Macy const size_t alloc_dsize = opts->rto_dsize; 550eda14cbcSMatt Macy const size_t total_ncols = opts->rto_dcols + parity; 551eda14cbcSMatt Macy const int ccols[] = { 0, 1, 2 }; 552eda14cbcSMatt Macy 553eda14cbcSMatt Macy VERIFY(zio); 554eda14cbcSMatt Macy VERIFY(parity <= 3 && parity >= 1); 555eda14cbcSMatt Macy 556eda14cbcSMatt Macy *zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL); 557eda14cbcSMatt Macy 558eda14cbcSMatt Macy (*zio)->io_offset = 0; 559eda14cbcSMatt Macy (*zio)->io_size = alloc_dsize; 560eda14cbcSMatt Macy (*zio)->io_abd = raidz_alloc(alloc_dsize); 561eda14cbcSMatt Macy init_zio_abd(*zio); 562eda14cbcSMatt Macy 5637877fdebSMatt Macy if (opts->rto_expand) { 5647877fdebSMatt Macy rm = vdev_raidz_map_alloc_expanded((*zio)->io_abd, 5657877fdebSMatt Macy (*zio)->io_size, (*zio)->io_offset, 5667877fdebSMatt Macy opts->rto_ashift, total_ncols+1, total_ncols, 5677877fdebSMatt Macy parity, opts->rto_expand_offset); 5687877fdebSMatt Macy } else { 569eda14cbcSMatt Macy rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift, 570eda14cbcSMatt Macy total_ncols, parity); 5717877fdebSMatt Macy } 572eda14cbcSMatt Macy VERIFY(rm); 573eda14cbcSMatt Macy 574eda14cbcSMatt Macy /* Make sure code columns are destroyed */ 575eda14cbcSMatt Macy corrupt_colums(rm, ccols, parity); 576eda14cbcSMatt Macy 577eda14cbcSMatt Macy return (rm); 578eda14cbcSMatt Macy } 579eda14cbcSMatt Macy 580eda14cbcSMatt Macy static int 581eda14cbcSMatt Macy run_gen_check(raidz_test_opts_t *opts) 582eda14cbcSMatt Macy { 583eda14cbcSMatt Macy char **impl_name; 584eda14cbcSMatt Macy int fn, err = 0; 585eda14cbcSMatt Macy zio_t *zio_test; 586eda14cbcSMatt Macy raidz_map_t *rm_test; 587eda14cbcSMatt Macy 588eda14cbcSMatt Macy err = init_raidz_golden_map(opts, PARITY_PQR); 589eda14cbcSMatt Macy if (0 != err) 590eda14cbcSMatt Macy return (err); 591eda14cbcSMatt Macy 592eda14cbcSMatt Macy LOG(D_INFO, DBLSEP); 593eda14cbcSMatt Macy LOG(D_INFO, "Testing parity generation...\n"); 594eda14cbcSMatt Macy 595eda14cbcSMatt Macy for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL; 596eda14cbcSMatt Macy impl_name++) { 597eda14cbcSMatt Macy 598eda14cbcSMatt Macy LOG(D_INFO, SEP); 599eda14cbcSMatt Macy LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name); 600eda14cbcSMatt Macy 601eda14cbcSMatt Macy if (0 != vdev_raidz_impl_set(*impl_name)) { 602eda14cbcSMatt Macy LOG(D_INFO, "[SKIP]\n"); 603eda14cbcSMatt Macy continue; 604eda14cbcSMatt Macy } else { 605eda14cbcSMatt Macy LOG(D_INFO, "[SUPPORTED]\n"); 606eda14cbcSMatt Macy } 607eda14cbcSMatt Macy 608eda14cbcSMatt Macy for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) { 609eda14cbcSMatt Macy 610eda14cbcSMatt Macy /* Check if should stop */ 611eda14cbcSMatt Macy if (rto_opts.rto_should_stop) 612eda14cbcSMatt Macy return (err); 613eda14cbcSMatt Macy 614eda14cbcSMatt Macy /* create suitable raidz_map */ 615eda14cbcSMatt Macy rm_test = init_raidz_map(opts, &zio_test, fn+1); 616eda14cbcSMatt Macy VERIFY(rm_test); 617eda14cbcSMatt Macy 618eda14cbcSMatt Macy LOG(D_INFO, "\t\tTesting method [%s] ...", 619eda14cbcSMatt Macy raidz_gen_name[fn]); 620eda14cbcSMatt Macy 621eda14cbcSMatt Macy if (!opts->rto_sanity) 622eda14cbcSMatt Macy vdev_raidz_generate_parity(rm_test); 623eda14cbcSMatt Macy 624eda14cbcSMatt Macy if (cmp_code(opts, rm_test, fn+1) != 0) { 625eda14cbcSMatt Macy LOG(D_INFO, "[FAIL]\n"); 626eda14cbcSMatt Macy err++; 627eda14cbcSMatt Macy } else 628eda14cbcSMatt Macy LOG(D_INFO, "[PASS]\n"); 629eda14cbcSMatt Macy 630eda14cbcSMatt Macy fini_raidz_map(&zio_test, &rm_test); 631eda14cbcSMatt Macy } 632eda14cbcSMatt Macy } 633eda14cbcSMatt Macy 634eda14cbcSMatt Macy fini_raidz_map(&opts->zio_golden, &opts->rm_golden); 635eda14cbcSMatt Macy 636eda14cbcSMatt Macy return (err); 637eda14cbcSMatt Macy } 638eda14cbcSMatt Macy 639eda14cbcSMatt Macy static int 640eda14cbcSMatt Macy run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn) 641eda14cbcSMatt Macy { 642eda14cbcSMatt Macy int x0, x1, x2; 643eda14cbcSMatt Macy int tgtidx[3]; 644eda14cbcSMatt Macy int err = 0; 645eda14cbcSMatt Macy static const int rec_tgts[7][3] = { 646eda14cbcSMatt Macy {1, 2, 3}, /* rec_p: bad QR & D[0] */ 647eda14cbcSMatt Macy {0, 2, 3}, /* rec_q: bad PR & D[0] */ 648eda14cbcSMatt Macy {0, 1, 3}, /* rec_r: bad PQ & D[0] */ 649eda14cbcSMatt Macy {2, 3, 4}, /* rec_pq: bad R & D[0][1] */ 650eda14cbcSMatt Macy {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */ 651eda14cbcSMatt Macy {0, 3, 4}, /* rec_qr: bad P & D[0][1] */ 652eda14cbcSMatt Macy {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */ 653eda14cbcSMatt Macy }; 654eda14cbcSMatt Macy 655eda14cbcSMatt Macy memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx)); 656eda14cbcSMatt Macy 657eda14cbcSMatt Macy if (fn < RAIDZ_REC_PQ) { 658eda14cbcSMatt Macy /* can reconstruct 1 failed data disk */ 659eda14cbcSMatt Macy for (x0 = 0; x0 < opts->rto_dcols; x0++) { 6607877fdebSMatt Macy if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) 661eda14cbcSMatt Macy continue; 662eda14cbcSMatt Macy 663eda14cbcSMatt Macy /* Check if should stop */ 664eda14cbcSMatt Macy if (rto_opts.rto_should_stop) 665eda14cbcSMatt Macy return (err); 666eda14cbcSMatt Macy 667eda14cbcSMatt Macy LOG(D_DEBUG, "[%d] ", x0); 668eda14cbcSMatt Macy 669eda14cbcSMatt Macy tgtidx[2] = x0 + raidz_parity(rm); 670eda14cbcSMatt Macy 671eda14cbcSMatt Macy corrupt_colums(rm, tgtidx+2, 1); 672eda14cbcSMatt Macy 673eda14cbcSMatt Macy if (!opts->rto_sanity) 674eda14cbcSMatt Macy vdev_raidz_reconstruct(rm, tgtidx, 3); 675eda14cbcSMatt Macy 676eda14cbcSMatt Macy if (cmp_data(opts, rm) != 0) { 677eda14cbcSMatt Macy err++; 678eda14cbcSMatt Macy LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0); 679eda14cbcSMatt Macy } 680eda14cbcSMatt Macy } 681eda14cbcSMatt Macy 682eda14cbcSMatt Macy } else if (fn < RAIDZ_REC_PQR) { 683eda14cbcSMatt Macy /* can reconstruct 2 failed data disk */ 684eda14cbcSMatt Macy for (x0 = 0; x0 < opts->rto_dcols; x0++) { 6857877fdebSMatt Macy if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) 686eda14cbcSMatt Macy continue; 687eda14cbcSMatt Macy for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) { 6887877fdebSMatt Macy if (x1 >= rm->rm_row[0]->rr_cols - 6897877fdebSMatt Macy raidz_parity(rm)) 690eda14cbcSMatt Macy continue; 691eda14cbcSMatt Macy 692eda14cbcSMatt Macy /* Check if should stop */ 693eda14cbcSMatt Macy if (rto_opts.rto_should_stop) 694eda14cbcSMatt Macy return (err); 695eda14cbcSMatt Macy 696eda14cbcSMatt Macy LOG(D_DEBUG, "[%d %d] ", x0, x1); 697eda14cbcSMatt Macy 698eda14cbcSMatt Macy tgtidx[1] = x0 + raidz_parity(rm); 699eda14cbcSMatt Macy tgtidx[2] = x1 + raidz_parity(rm); 700eda14cbcSMatt Macy 701eda14cbcSMatt Macy corrupt_colums(rm, tgtidx+1, 2); 702eda14cbcSMatt Macy 703eda14cbcSMatt Macy if (!opts->rto_sanity) 704eda14cbcSMatt Macy vdev_raidz_reconstruct(rm, tgtidx, 3); 705eda14cbcSMatt Macy 706eda14cbcSMatt Macy if (cmp_data(opts, rm) != 0) { 707eda14cbcSMatt Macy err++; 708eda14cbcSMatt Macy LOG(D_DEBUG, "\nREC D[%d %d]... " 709eda14cbcSMatt Macy "[FAIL]\n", x0, x1); 710eda14cbcSMatt Macy } 711eda14cbcSMatt Macy } 712eda14cbcSMatt Macy } 713eda14cbcSMatt Macy } else { 714eda14cbcSMatt Macy /* can reconstruct 3 failed data disk */ 715eda14cbcSMatt Macy for (x0 = 0; x0 < opts->rto_dcols; x0++) { 7167877fdebSMatt Macy if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) 717eda14cbcSMatt Macy continue; 718eda14cbcSMatt Macy for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) { 7197877fdebSMatt Macy if (x1 >= rm->rm_row[0]->rr_cols - 7207877fdebSMatt Macy raidz_parity(rm)) 721eda14cbcSMatt Macy continue; 722eda14cbcSMatt Macy for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) { 7237877fdebSMatt Macy if (x2 >= rm->rm_row[0]->rr_cols - 7247877fdebSMatt Macy raidz_parity(rm)) 725eda14cbcSMatt Macy continue; 726eda14cbcSMatt Macy 727eda14cbcSMatt Macy /* Check if should stop */ 728eda14cbcSMatt Macy if (rto_opts.rto_should_stop) 729eda14cbcSMatt Macy return (err); 730eda14cbcSMatt Macy 731eda14cbcSMatt Macy LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2); 732eda14cbcSMatt Macy 733eda14cbcSMatt Macy tgtidx[0] = x0 + raidz_parity(rm); 734eda14cbcSMatt Macy tgtidx[1] = x1 + raidz_parity(rm); 735eda14cbcSMatt Macy tgtidx[2] = x2 + raidz_parity(rm); 736eda14cbcSMatt Macy 737eda14cbcSMatt Macy corrupt_colums(rm, tgtidx, 3); 738eda14cbcSMatt Macy 739eda14cbcSMatt Macy if (!opts->rto_sanity) 740eda14cbcSMatt Macy vdev_raidz_reconstruct(rm, 741eda14cbcSMatt Macy tgtidx, 3); 742eda14cbcSMatt Macy 743eda14cbcSMatt Macy if (cmp_data(opts, rm) != 0) { 744eda14cbcSMatt Macy err++; 745eda14cbcSMatt Macy LOG(D_DEBUG, 746eda14cbcSMatt Macy "\nREC D[%d %d %d]... " 747eda14cbcSMatt Macy "[FAIL]\n", x0, x1, x2); 748eda14cbcSMatt Macy } 749eda14cbcSMatt Macy } 750eda14cbcSMatt Macy } 751eda14cbcSMatt Macy } 752eda14cbcSMatt Macy } 753eda14cbcSMatt Macy return (err); 754eda14cbcSMatt Macy } 755eda14cbcSMatt Macy 756eda14cbcSMatt Macy static int 757eda14cbcSMatt Macy run_rec_check(raidz_test_opts_t *opts) 758eda14cbcSMatt Macy { 759eda14cbcSMatt Macy char **impl_name; 760eda14cbcSMatt Macy unsigned fn, err = 0; 761eda14cbcSMatt Macy zio_t *zio_test; 762eda14cbcSMatt Macy raidz_map_t *rm_test; 763eda14cbcSMatt Macy 764eda14cbcSMatt Macy err = init_raidz_golden_map(opts, PARITY_PQR); 765eda14cbcSMatt Macy if (0 != err) 766eda14cbcSMatt Macy return (err); 767eda14cbcSMatt Macy 768eda14cbcSMatt Macy LOG(D_INFO, DBLSEP); 769eda14cbcSMatt Macy LOG(D_INFO, "Testing data reconstruction...\n"); 770eda14cbcSMatt Macy 771eda14cbcSMatt Macy for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL; 772eda14cbcSMatt Macy impl_name++) { 773eda14cbcSMatt Macy 774eda14cbcSMatt Macy LOG(D_INFO, SEP); 775eda14cbcSMatt Macy LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name); 776eda14cbcSMatt Macy 777eda14cbcSMatt Macy if (vdev_raidz_impl_set(*impl_name) != 0) { 778eda14cbcSMatt Macy LOG(D_INFO, "[SKIP]\n"); 779eda14cbcSMatt Macy continue; 780eda14cbcSMatt Macy } else 781eda14cbcSMatt Macy LOG(D_INFO, "[SUPPORTED]\n"); 782eda14cbcSMatt Macy 783eda14cbcSMatt Macy 784eda14cbcSMatt Macy /* create suitable raidz_map */ 785eda14cbcSMatt Macy rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR); 786eda14cbcSMatt Macy /* generate parity */ 787eda14cbcSMatt Macy vdev_raidz_generate_parity(rm_test); 788eda14cbcSMatt Macy 789eda14cbcSMatt Macy for (fn = 0; fn < RAIDZ_REC_NUM; fn++) { 790eda14cbcSMatt Macy 791eda14cbcSMatt Macy LOG(D_INFO, "\t\tTesting method [%s] ...", 792eda14cbcSMatt Macy raidz_rec_name[fn]); 793eda14cbcSMatt Macy 794eda14cbcSMatt Macy if (run_rec_check_impl(opts, rm_test, fn) != 0) { 795eda14cbcSMatt Macy LOG(D_INFO, "[FAIL]\n"); 796eda14cbcSMatt Macy err++; 797eda14cbcSMatt Macy 798eda14cbcSMatt Macy } else 799eda14cbcSMatt Macy LOG(D_INFO, "[PASS]\n"); 800eda14cbcSMatt Macy 801eda14cbcSMatt Macy } 802eda14cbcSMatt Macy /* tear down test raidz_map */ 803eda14cbcSMatt Macy fini_raidz_map(&zio_test, &rm_test); 804eda14cbcSMatt Macy } 805eda14cbcSMatt Macy 806eda14cbcSMatt Macy fini_raidz_map(&opts->zio_golden, &opts->rm_golden); 807eda14cbcSMatt Macy 808eda14cbcSMatt Macy return (err); 809eda14cbcSMatt Macy } 810eda14cbcSMatt Macy 811eda14cbcSMatt Macy static int 812eda14cbcSMatt Macy run_test(raidz_test_opts_t *opts) 813eda14cbcSMatt Macy { 814eda14cbcSMatt Macy int err = 0; 815eda14cbcSMatt Macy 816eda14cbcSMatt Macy if (opts == NULL) 817eda14cbcSMatt Macy opts = &rto_opts; 818eda14cbcSMatt Macy 819eda14cbcSMatt Macy print_opts(opts, B_FALSE); 820eda14cbcSMatt Macy 821eda14cbcSMatt Macy err |= run_gen_check(opts); 822eda14cbcSMatt Macy err |= run_rec_check(opts); 823eda14cbcSMatt Macy 824eda14cbcSMatt Macy return (err); 825eda14cbcSMatt Macy } 826eda14cbcSMatt Macy 827eda14cbcSMatt Macy #define SWEEP_RUNNING 0 828eda14cbcSMatt Macy #define SWEEP_FINISHED 1 829eda14cbcSMatt Macy #define SWEEP_ERROR 2 830eda14cbcSMatt Macy #define SWEEP_TIMEOUT 3 831eda14cbcSMatt Macy 832eda14cbcSMatt Macy static int sweep_state = 0; 833eda14cbcSMatt Macy static raidz_test_opts_t failed_opts; 834eda14cbcSMatt Macy 835eda14cbcSMatt Macy static kmutex_t sem_mtx; 836eda14cbcSMatt Macy static kcondvar_t sem_cv; 837eda14cbcSMatt Macy static int max_free_slots; 838eda14cbcSMatt Macy static int free_slots; 839eda14cbcSMatt Macy 840da5137abSMartin Matuska static __attribute__((noreturn)) void 841eda14cbcSMatt Macy sweep_thread(void *arg) 842eda14cbcSMatt Macy { 843eda14cbcSMatt Macy int err = 0; 844eda14cbcSMatt Macy raidz_test_opts_t *opts = (raidz_test_opts_t *)arg; 845eda14cbcSMatt Macy VERIFY(opts != NULL); 846eda14cbcSMatt Macy 847eda14cbcSMatt Macy err = run_test(opts); 848eda14cbcSMatt Macy 849eda14cbcSMatt Macy if (rto_opts.rto_sanity) { 850eda14cbcSMatt Macy /* 25% chance that a sweep test fails */ 851eda14cbcSMatt Macy if (rand() < (RAND_MAX/4)) 852eda14cbcSMatt Macy err = 1; 853eda14cbcSMatt Macy } 854eda14cbcSMatt Macy 855eda14cbcSMatt Macy if (0 != err) { 856eda14cbcSMatt Macy mutex_enter(&sem_mtx); 857eda14cbcSMatt Macy memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t)); 858eda14cbcSMatt Macy sweep_state = SWEEP_ERROR; 859eda14cbcSMatt Macy mutex_exit(&sem_mtx); 860eda14cbcSMatt Macy } 861eda14cbcSMatt Macy 862eda14cbcSMatt Macy umem_free(opts, sizeof (raidz_test_opts_t)); 863eda14cbcSMatt Macy 864eda14cbcSMatt Macy /* signal the next thread */ 865eda14cbcSMatt Macy mutex_enter(&sem_mtx); 866eda14cbcSMatt Macy free_slots++; 867eda14cbcSMatt Macy cv_signal(&sem_cv); 868eda14cbcSMatt Macy mutex_exit(&sem_mtx); 869eda14cbcSMatt Macy 870eda14cbcSMatt Macy thread_exit(); 871eda14cbcSMatt Macy } 872eda14cbcSMatt Macy 873eda14cbcSMatt Macy static int 874eda14cbcSMatt Macy run_sweep(void) 875eda14cbcSMatt Macy { 876eda14cbcSMatt Macy static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 }; 877eda14cbcSMatt Macy static const size_t ashift_v[] = { 9, 12, 14 }; 878eda14cbcSMatt Macy static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12), 879eda14cbcSMatt Macy 1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE }; 880eda14cbcSMatt Macy 881eda14cbcSMatt Macy (void) setvbuf(stdout, NULL, _IONBF, 0); 882eda14cbcSMatt Macy 883eda14cbcSMatt Macy ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) * 884eda14cbcSMatt Macy ARRAY_SIZE(dcols_v); 885eda14cbcSMatt Macy ulong_t tried_comb = 0; 886eda14cbcSMatt Macy hrtime_t time_diff, start_time = gethrtime(); 887eda14cbcSMatt Macy raidz_test_opts_t *opts; 888eda14cbcSMatt Macy int a, d, s; 889eda14cbcSMatt Macy 890eda14cbcSMatt Macy max_free_slots = free_slots = MAX(2, boot_ncpus); 891eda14cbcSMatt Macy 892eda14cbcSMatt Macy mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL); 893eda14cbcSMatt Macy cv_init(&sem_cv, NULL, CV_DEFAULT, NULL); 894eda14cbcSMatt Macy 895eda14cbcSMatt Macy for (s = 0; s < ARRAY_SIZE(size_v); s++) 896eda14cbcSMatt Macy for (a = 0; a < ARRAY_SIZE(ashift_v); a++) 897eda14cbcSMatt Macy for (d = 0; d < ARRAY_SIZE(dcols_v); d++) { 898eda14cbcSMatt Macy 899eda14cbcSMatt Macy if (size_v[s] < (1 << ashift_v[a])) { 900eda14cbcSMatt Macy total_comb--; 901eda14cbcSMatt Macy continue; 902eda14cbcSMatt Macy } 903eda14cbcSMatt Macy 904eda14cbcSMatt Macy if (++tried_comb % 20 == 0) 905eda14cbcSMatt Macy LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb); 906eda14cbcSMatt Macy 907eda14cbcSMatt Macy /* wait for signal to start new thread */ 908eda14cbcSMatt Macy mutex_enter(&sem_mtx); 909eda14cbcSMatt Macy while (cv_timedwait_sig(&sem_cv, &sem_mtx, 910eda14cbcSMatt Macy ddi_get_lbolt() + hz)) { 911eda14cbcSMatt Macy 912eda14cbcSMatt Macy /* check if should stop the test (timeout) */ 913eda14cbcSMatt Macy time_diff = (gethrtime() - start_time) / NANOSEC; 914eda14cbcSMatt Macy if (rto_opts.rto_sweep_timeout > 0 && 915eda14cbcSMatt Macy time_diff >= rto_opts.rto_sweep_timeout) { 916eda14cbcSMatt Macy sweep_state = SWEEP_TIMEOUT; 917eda14cbcSMatt Macy rto_opts.rto_should_stop = B_TRUE; 918eda14cbcSMatt Macy mutex_exit(&sem_mtx); 919eda14cbcSMatt Macy goto exit; 920eda14cbcSMatt Macy } 921eda14cbcSMatt Macy 922eda14cbcSMatt Macy /* check if should stop the test (error) */ 923eda14cbcSMatt Macy if (sweep_state != SWEEP_RUNNING) { 924eda14cbcSMatt Macy mutex_exit(&sem_mtx); 925eda14cbcSMatt Macy goto exit; 926eda14cbcSMatt Macy } 927eda14cbcSMatt Macy 928eda14cbcSMatt Macy /* exit loop if a slot is available */ 929eda14cbcSMatt Macy if (free_slots > 0) { 930eda14cbcSMatt Macy break; 931eda14cbcSMatt Macy } 932eda14cbcSMatt Macy } 933eda14cbcSMatt Macy 934eda14cbcSMatt Macy free_slots--; 935eda14cbcSMatt Macy mutex_exit(&sem_mtx); 936eda14cbcSMatt Macy 937eda14cbcSMatt Macy opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL); 938eda14cbcSMatt Macy opts->rto_ashift = ashift_v[a]; 939eda14cbcSMatt Macy opts->rto_dcols = dcols_v[d]; 940*dbd5678dSMartin Matuska opts->rto_offset = (1ULL << ashift_v[a]) * rand(); 941eda14cbcSMatt Macy opts->rto_dsize = size_v[s]; 9427877fdebSMatt Macy opts->rto_expand = rto_opts.rto_expand; 9437877fdebSMatt Macy opts->rto_expand_offset = rto_opts.rto_expand_offset; 944eda14cbcSMatt Macy opts->rto_v = 0; /* be quiet */ 945eda14cbcSMatt Macy 946eda14cbcSMatt Macy VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts, 947eda14cbcSMatt Macy 0, NULL, TS_RUN, defclsyspri), !=, NULL); 948eda14cbcSMatt Macy } 949eda14cbcSMatt Macy 950eda14cbcSMatt Macy exit: 951eda14cbcSMatt Macy LOG(D_ALL, "\nWaiting for test threads to finish...\n"); 952eda14cbcSMatt Macy mutex_enter(&sem_mtx); 953eda14cbcSMatt Macy VERIFY(free_slots <= max_free_slots); 954eda14cbcSMatt Macy while (free_slots < max_free_slots) { 955eda14cbcSMatt Macy (void) cv_wait(&sem_cv, &sem_mtx); 956eda14cbcSMatt Macy } 957eda14cbcSMatt Macy mutex_exit(&sem_mtx); 958eda14cbcSMatt Macy 959eda14cbcSMatt Macy if (sweep_state == SWEEP_ERROR) { 960eda14cbcSMatt Macy ERR("Sweep test failed! Failed option: \n"); 961eda14cbcSMatt Macy print_opts(&failed_opts, B_TRUE); 962eda14cbcSMatt Macy } else { 963eda14cbcSMatt Macy if (sweep_state == SWEEP_TIMEOUT) 964eda14cbcSMatt Macy LOG(D_ALL, "Test timeout (%lus). Stopping...\n", 965eda14cbcSMatt Macy (ulong_t)rto_opts.rto_sweep_timeout); 966eda14cbcSMatt Macy 967eda14cbcSMatt Macy LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n", 968eda14cbcSMatt Macy (ulong_t)tried_comb); 969eda14cbcSMatt Macy } 970eda14cbcSMatt Macy 971eda14cbcSMatt Macy mutex_destroy(&sem_mtx); 972eda14cbcSMatt Macy 973eda14cbcSMatt Macy return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0); 974eda14cbcSMatt Macy } 975eda14cbcSMatt Macy 9767877fdebSMatt Macy 977eda14cbcSMatt Macy int 978eda14cbcSMatt Macy main(int argc, char **argv) 979eda14cbcSMatt Macy { 980eda14cbcSMatt Macy size_t i; 981eda14cbcSMatt Macy struct sigaction action; 982eda14cbcSMatt Macy int err = 0; 983eda14cbcSMatt Macy 98416038816SMartin Matuska /* init gdb pid string early */ 98516038816SMartin Matuska (void) sprintf(pid_s, "%d", getpid()); 986eda14cbcSMatt Macy 987eda14cbcSMatt Macy action.sa_handler = sig_handler; 988eda14cbcSMatt Macy sigemptyset(&action.sa_mask); 989eda14cbcSMatt Macy action.sa_flags = 0; 990eda14cbcSMatt Macy 991eda14cbcSMatt Macy if (sigaction(SIGSEGV, &action, NULL) < 0) { 992eda14cbcSMatt Macy ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno)); 993eda14cbcSMatt Macy exit(EXIT_FAILURE); 994eda14cbcSMatt Macy } 995eda14cbcSMatt Macy 996eda14cbcSMatt Macy (void) setvbuf(stdout, NULL, _IOLBF, 0); 997eda14cbcSMatt Macy 998eda14cbcSMatt Macy dprintf_setup(&argc, argv); 999eda14cbcSMatt Macy 1000eda14cbcSMatt Macy process_options(argc, argv); 1001eda14cbcSMatt Macy 1002eda14cbcSMatt Macy kernel_init(SPA_MODE_READ); 1003eda14cbcSMatt Macy 1004eda14cbcSMatt Macy /* setup random data because rand() is not reentrant */ 1005eda14cbcSMatt Macy rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 1006eda14cbcSMatt Macy srand((unsigned)time(NULL) * getpid()); 1007eda14cbcSMatt Macy for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++) 1008eda14cbcSMatt Macy rand_data[i] = rand(); 1009eda14cbcSMatt Macy 1010eda14cbcSMatt Macy mprotect(rand_data, SPA_MAXBLOCKSIZE, PROT_READ); 1011eda14cbcSMatt Macy 1012eda14cbcSMatt Macy if (rto_opts.rto_benchmark) { 1013eda14cbcSMatt Macy run_raidz_benchmark(); 1014eda14cbcSMatt Macy } else if (rto_opts.rto_sweep) { 1015eda14cbcSMatt Macy err = run_sweep(); 1016eda14cbcSMatt Macy } else { 1017eda14cbcSMatt Macy err = run_test(NULL); 1018eda14cbcSMatt Macy } 1019eda14cbcSMatt Macy 1020eda14cbcSMatt Macy umem_free(rand_data, SPA_MAXBLOCKSIZE); 1021eda14cbcSMatt Macy kernel_fini(); 1022eda14cbcSMatt Macy 1023eda14cbcSMatt Macy return (err); 1024eda14cbcSMatt Macy } 1025