1eda14cbcSMatt Macy /* 2eda14cbcSMatt Macy * CDDL HEADER START 3eda14cbcSMatt Macy * 4eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7eda14cbcSMatt Macy * 8eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10eda14cbcSMatt Macy * See the License for the specific language governing permissions 11eda14cbcSMatt Macy * and limitations under the License. 12eda14cbcSMatt Macy * 13eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18eda14cbcSMatt Macy * 19eda14cbcSMatt Macy * CDDL HEADER END 20eda14cbcSMatt Macy */ 21eda14cbcSMatt Macy 22eda14cbcSMatt Macy /* 23eda14cbcSMatt Macy * Copyright (C) 2016 Gvozden Nešković. All rights reserved. 24eda14cbcSMatt Macy */ 25eda14cbcSMatt Macy 26eda14cbcSMatt Macy #include <sys/zfs_context.h> 27eda14cbcSMatt Macy #include <sys/time.h> 28eda14cbcSMatt Macy #include <sys/wait.h> 29eda14cbcSMatt Macy #include <sys/zio.h> 30eda14cbcSMatt Macy #include <umem.h> 31eda14cbcSMatt Macy #include <sys/vdev_raidz.h> 32eda14cbcSMatt Macy #include <sys/vdev_raidz_impl.h> 33eda14cbcSMatt Macy #include <assert.h> 34eda14cbcSMatt Macy #include <stdio.h> 35eda14cbcSMatt Macy #include "raidz_test.h" 36eda14cbcSMatt Macy 37eda14cbcSMatt Macy static int *rand_data; 38eda14cbcSMatt Macy raidz_test_opts_t rto_opts; 39eda14cbcSMatt Macy 40eda14cbcSMatt Macy static char gdb[256]; 41eda14cbcSMatt Macy static const char gdb_tmpl[] = "gdb -ex \"set pagination 0\" -p %d"; 42eda14cbcSMatt Macy 43eda14cbcSMatt Macy static void sig_handler(int signo) 44eda14cbcSMatt Macy { 45eda14cbcSMatt Macy struct sigaction action; 46eda14cbcSMatt Macy /* 47eda14cbcSMatt Macy * Restore default action and re-raise signal so SIGSEGV and 48eda14cbcSMatt Macy * SIGABRT can trigger a core dump. 49eda14cbcSMatt Macy */ 50eda14cbcSMatt Macy action.sa_handler = SIG_DFL; 51eda14cbcSMatt Macy sigemptyset(&action.sa_mask); 52eda14cbcSMatt Macy action.sa_flags = 0; 53eda14cbcSMatt Macy (void) sigaction(signo, &action, NULL); 54eda14cbcSMatt Macy 55eda14cbcSMatt Macy if (rto_opts.rto_gdb) 56eda14cbcSMatt Macy if (system(gdb)) { } 57eda14cbcSMatt Macy 58eda14cbcSMatt Macy raise(signo); 59eda14cbcSMatt Macy } 60eda14cbcSMatt Macy 61eda14cbcSMatt Macy static void print_opts(raidz_test_opts_t *opts, boolean_t force) 62eda14cbcSMatt Macy { 63eda14cbcSMatt Macy char *verbose; 64eda14cbcSMatt Macy switch (opts->rto_v) { 65eda14cbcSMatt Macy case 0: 66eda14cbcSMatt Macy verbose = "no"; 67eda14cbcSMatt Macy break; 68eda14cbcSMatt Macy case 1: 69eda14cbcSMatt Macy verbose = "info"; 70eda14cbcSMatt Macy break; 71eda14cbcSMatt Macy default: 72eda14cbcSMatt Macy verbose = "debug"; 73eda14cbcSMatt Macy break; 74eda14cbcSMatt Macy } 75eda14cbcSMatt Macy 76eda14cbcSMatt Macy if (force || opts->rto_v >= D_INFO) { 77eda14cbcSMatt Macy (void) fprintf(stdout, DBLSEP "Running with options:\n" 78eda14cbcSMatt Macy " (-a) zio ashift : %zu\n" 79eda14cbcSMatt Macy " (-o) zio offset : 1 << %zu\n" 807877fdebSMatt Macy " (-e) expanded map : %s\n" 817877fdebSMatt Macy " (-r) reflow offset : %llx\n" 82eda14cbcSMatt Macy " (-d) number of raidz data columns : %zu\n" 83eda14cbcSMatt Macy " (-s) size of DATA : 1 << %zu\n" 84eda14cbcSMatt Macy " (-S) sweep parameters : %s \n" 85eda14cbcSMatt Macy " (-v) verbose : %s \n\n", 86eda14cbcSMatt Macy opts->rto_ashift, /* -a */ 87eda14cbcSMatt Macy ilog2(opts->rto_offset), /* -o */ 887877fdebSMatt Macy opts->rto_expand ? "yes" : "no", /* -e */ 897877fdebSMatt Macy (u_longlong_t)opts->rto_expand_offset, /* -r */ 90eda14cbcSMatt Macy opts->rto_dcols, /* -d */ 91eda14cbcSMatt Macy ilog2(opts->rto_dsize), /* -s */ 92eda14cbcSMatt Macy opts->rto_sweep ? "yes" : "no", /* -S */ 93eda14cbcSMatt Macy verbose); /* -v */ 94eda14cbcSMatt Macy } 95eda14cbcSMatt Macy } 96eda14cbcSMatt Macy 97eda14cbcSMatt Macy static void usage(boolean_t requested) 98eda14cbcSMatt Macy { 99eda14cbcSMatt Macy const raidz_test_opts_t *o = &rto_opts_defaults; 100eda14cbcSMatt Macy 101eda14cbcSMatt Macy FILE *fp = requested ? stdout : stderr; 102eda14cbcSMatt Macy 103eda14cbcSMatt Macy (void) fprintf(fp, "Usage:\n" 104eda14cbcSMatt Macy "\t[-a zio ashift (default: %zu)]\n" 105eda14cbcSMatt Macy "\t[-o zio offset, exponent radix 2 (default: %zu)]\n" 106eda14cbcSMatt Macy "\t[-d number of raidz data columns (default: %zu)]\n" 107eda14cbcSMatt Macy "\t[-s zio size, exponent radix 2 (default: %zu)]\n" 108eda14cbcSMatt Macy "\t[-S parameter sweep (default: %s)]\n" 109eda14cbcSMatt Macy "\t[-t timeout for parameter sweep test]\n" 110eda14cbcSMatt Macy "\t[-B benchmark all raidz implementations]\n" 1117877fdebSMatt Macy "\t[-e use expanded raidz map (default: %s)]\n" 1127877fdebSMatt Macy "\t[-r expanded raidz map reflow offset (default: %llx)]\n" 113eda14cbcSMatt Macy "\t[-v increase verbosity (default: %zu)]\n" 114eda14cbcSMatt Macy "\t[-h (print help)]\n" 115eda14cbcSMatt Macy "\t[-T test the test, see if failure would be detected]\n" 116eda14cbcSMatt Macy "\t[-D debug (attach gdb on SIGSEGV)]\n" 117eda14cbcSMatt Macy "", 118eda14cbcSMatt Macy o->rto_ashift, /* -a */ 119eda14cbcSMatt Macy ilog2(o->rto_offset), /* -o */ 120eda14cbcSMatt Macy o->rto_dcols, /* -d */ 121eda14cbcSMatt Macy ilog2(o->rto_dsize), /* -s */ 122eda14cbcSMatt Macy rto_opts.rto_sweep ? "yes" : "no", /* -S */ 1237877fdebSMatt Macy rto_opts.rto_expand ? "yes" : "no", /* -e */ 1247877fdebSMatt Macy (u_longlong_t)o->rto_expand_offset, /* -r */ 125eda14cbcSMatt Macy o->rto_v); /* -d */ 126eda14cbcSMatt Macy 127eda14cbcSMatt Macy exit(requested ? 0 : 1); 128eda14cbcSMatt Macy } 129eda14cbcSMatt Macy 130eda14cbcSMatt Macy static void process_options(int argc, char **argv) 131eda14cbcSMatt Macy { 132eda14cbcSMatt Macy size_t value; 133eda14cbcSMatt Macy int opt; 134eda14cbcSMatt Macy 135eda14cbcSMatt Macy raidz_test_opts_t *o = &rto_opts; 136eda14cbcSMatt Macy 137eda14cbcSMatt Macy bcopy(&rto_opts_defaults, o, sizeof (*o)); 138eda14cbcSMatt Macy 1397877fdebSMatt Macy while ((opt = getopt(argc, argv, "TDBSvha:er:o:d:s:t:")) != -1) { 140eda14cbcSMatt Macy value = 0; 141eda14cbcSMatt Macy 142eda14cbcSMatt Macy switch (opt) { 143eda14cbcSMatt Macy case 'a': 144eda14cbcSMatt Macy value = strtoull(optarg, NULL, 0); 145eda14cbcSMatt Macy o->rto_ashift = MIN(13, MAX(9, value)); 146eda14cbcSMatt Macy break; 1477877fdebSMatt Macy case 'e': 1487877fdebSMatt Macy o->rto_expand = 1; 1497877fdebSMatt Macy break; 1507877fdebSMatt Macy case 'r': 1517877fdebSMatt Macy o->rto_expand_offset = strtoull(optarg, NULL, 0); 1527877fdebSMatt Macy break; 153eda14cbcSMatt Macy case 'o': 154eda14cbcSMatt Macy value = strtoull(optarg, NULL, 0); 155eda14cbcSMatt Macy o->rto_offset = ((1ULL << MIN(12, value)) >> 9) << 9; 156eda14cbcSMatt Macy break; 157eda14cbcSMatt Macy case 'd': 158eda14cbcSMatt Macy value = strtoull(optarg, NULL, 0); 159eda14cbcSMatt Macy o->rto_dcols = MIN(255, MAX(1, value)); 160eda14cbcSMatt Macy break; 161eda14cbcSMatt Macy case 's': 162eda14cbcSMatt Macy value = strtoull(optarg, NULL, 0); 163eda14cbcSMatt Macy o->rto_dsize = 1ULL << MIN(SPA_MAXBLOCKSHIFT, 164eda14cbcSMatt Macy MAX(SPA_MINBLOCKSHIFT, value)); 165eda14cbcSMatt Macy break; 166eda14cbcSMatt Macy case 't': 167eda14cbcSMatt Macy value = strtoull(optarg, NULL, 0); 168eda14cbcSMatt Macy o->rto_sweep_timeout = value; 169eda14cbcSMatt Macy break; 170eda14cbcSMatt Macy case 'v': 171eda14cbcSMatt Macy o->rto_v++; 172eda14cbcSMatt Macy break; 173eda14cbcSMatt Macy case 'S': 174eda14cbcSMatt Macy o->rto_sweep = 1; 175eda14cbcSMatt Macy break; 176eda14cbcSMatt Macy case 'B': 177eda14cbcSMatt Macy o->rto_benchmark = 1; 178eda14cbcSMatt Macy break; 179eda14cbcSMatt Macy case 'D': 180eda14cbcSMatt Macy o->rto_gdb = 1; 181eda14cbcSMatt Macy break; 182eda14cbcSMatt Macy case 'T': 183eda14cbcSMatt Macy o->rto_sanity = 1; 184eda14cbcSMatt Macy break; 185eda14cbcSMatt Macy case 'h': 186eda14cbcSMatt Macy usage(B_TRUE); 187eda14cbcSMatt Macy break; 188eda14cbcSMatt Macy case '?': 189eda14cbcSMatt Macy default: 190eda14cbcSMatt Macy usage(B_FALSE); 191eda14cbcSMatt Macy break; 192eda14cbcSMatt Macy } 193eda14cbcSMatt Macy } 194eda14cbcSMatt Macy } 195eda14cbcSMatt Macy 1967877fdebSMatt Macy #define DATA_COL(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_abd) 1977877fdebSMatt Macy #define DATA_COL_SIZE(rr, i) ((rr)->rr_col[rr->rr_firstdatacol + (i)].rc_size) 198eda14cbcSMatt Macy 1997877fdebSMatt Macy #define CODE_COL(rr, i) ((rr)->rr_col[(i)].rc_abd) 2007877fdebSMatt Macy #define CODE_COL_SIZE(rr, i) ((rr)->rr_col[(i)].rc_size) 201eda14cbcSMatt Macy 202eda14cbcSMatt Macy static int 203eda14cbcSMatt Macy cmp_code(raidz_test_opts_t *opts, const raidz_map_t *rm, const int parity) 204eda14cbcSMatt Macy { 2057877fdebSMatt Macy int r, i, ret = 0; 206eda14cbcSMatt Macy 207eda14cbcSMatt Macy VERIFY(parity >= 1 && parity <= 3); 208eda14cbcSMatt Macy 2097877fdebSMatt Macy for (r = 0; r < rm->rm_nrows; r++) { 2107877fdebSMatt Macy raidz_row_t * const rr = rm->rm_row[r]; 2117877fdebSMatt Macy raidz_row_t * const rrg = opts->rm_golden->rm_row[r]; 212eda14cbcSMatt Macy for (i = 0; i < parity; i++) { 2137877fdebSMatt Macy if (CODE_COL_SIZE(rrg, i) == 0) { 2147877fdebSMatt Macy VERIFY0(CODE_COL_SIZE(rr, i)); 2157877fdebSMatt Macy continue; 2167877fdebSMatt Macy } 2177877fdebSMatt Macy 2187877fdebSMatt Macy if (abd_cmp(CODE_COL(rr, i), 2197877fdebSMatt Macy CODE_COL(rrg, i)) != 0) { 220eda14cbcSMatt Macy ret++; 221eda14cbcSMatt Macy LOG_OPT(D_DEBUG, opts, 222eda14cbcSMatt Macy "\nParity block [%d] different!\n", i); 223eda14cbcSMatt Macy } 224eda14cbcSMatt Macy } 2257877fdebSMatt Macy } 226eda14cbcSMatt Macy return (ret); 227eda14cbcSMatt Macy } 228eda14cbcSMatt Macy 229eda14cbcSMatt Macy static int 230eda14cbcSMatt Macy cmp_data(raidz_test_opts_t *opts, raidz_map_t *rm) 231eda14cbcSMatt Macy { 2327877fdebSMatt Macy int r, i, dcols, ret = 0; 233eda14cbcSMatt Macy 2347877fdebSMatt Macy for (r = 0; r < rm->rm_nrows; r++) { 2357877fdebSMatt Macy raidz_row_t *rr = rm->rm_row[r]; 2367877fdebSMatt Macy raidz_row_t *rrg = opts->rm_golden->rm_row[r]; 2377877fdebSMatt Macy dcols = opts->rm_golden->rm_row[0]->rr_cols - 2387877fdebSMatt Macy raidz_parity(opts->rm_golden); 239eda14cbcSMatt Macy for (i = 0; i < dcols; i++) { 2407877fdebSMatt Macy if (DATA_COL_SIZE(rrg, i) == 0) { 2417877fdebSMatt Macy VERIFY0(DATA_COL_SIZE(rr, i)); 2427877fdebSMatt Macy continue; 2437877fdebSMatt Macy } 2447877fdebSMatt Macy 2457877fdebSMatt Macy if (abd_cmp(DATA_COL(rrg, i), 2467877fdebSMatt Macy DATA_COL(rr, i)) != 0) { 247eda14cbcSMatt Macy ret++; 248eda14cbcSMatt Macy 249eda14cbcSMatt Macy LOG_OPT(D_DEBUG, opts, 250eda14cbcSMatt Macy "\nData block [%d] different!\n", i); 251eda14cbcSMatt Macy } 252eda14cbcSMatt Macy } 2537877fdebSMatt Macy } 254eda14cbcSMatt Macy return (ret); 255eda14cbcSMatt Macy } 256eda14cbcSMatt Macy 257eda14cbcSMatt Macy static int 258eda14cbcSMatt Macy init_rand(void *data, size_t size, void *private) 259eda14cbcSMatt Macy { 260eda14cbcSMatt Macy int i; 261eda14cbcSMatt Macy int *dst = (int *)data; 262eda14cbcSMatt Macy 263eda14cbcSMatt Macy for (i = 0; i < size / sizeof (int); i++) 264eda14cbcSMatt Macy dst[i] = rand_data[i]; 265eda14cbcSMatt Macy 266eda14cbcSMatt Macy return (0); 267eda14cbcSMatt Macy } 268eda14cbcSMatt Macy 269eda14cbcSMatt Macy static void 270eda14cbcSMatt Macy corrupt_colums(raidz_map_t *rm, const int *tgts, const int cnt) 271eda14cbcSMatt Macy { 2727877fdebSMatt Macy for (int r = 0; r < rm->rm_nrows; r++) { 2737877fdebSMatt Macy raidz_row_t *rr = rm->rm_row[r]; 2747877fdebSMatt Macy for (int i = 0; i < cnt; i++) { 2757877fdebSMatt Macy raidz_col_t *col = &rr->rr_col[tgts[i]]; 2767877fdebSMatt Macy abd_iterate_func(col->rc_abd, 0, col->rc_size, 2777877fdebSMatt Macy init_rand, NULL); 2787877fdebSMatt Macy } 279eda14cbcSMatt Macy } 280eda14cbcSMatt Macy } 281eda14cbcSMatt Macy 282eda14cbcSMatt Macy void 283eda14cbcSMatt Macy init_zio_abd(zio_t *zio) 284eda14cbcSMatt Macy { 285eda14cbcSMatt Macy abd_iterate_func(zio->io_abd, 0, zio->io_size, init_rand, NULL); 286eda14cbcSMatt Macy } 287eda14cbcSMatt Macy 288eda14cbcSMatt Macy static void 289eda14cbcSMatt Macy fini_raidz_map(zio_t **zio, raidz_map_t **rm) 290eda14cbcSMatt Macy { 291eda14cbcSMatt Macy vdev_raidz_map_free(*rm); 292eda14cbcSMatt Macy raidz_free((*zio)->io_abd, (*zio)->io_size); 293eda14cbcSMatt Macy umem_free(*zio, sizeof (zio_t)); 294eda14cbcSMatt Macy 295eda14cbcSMatt Macy *zio = NULL; 296eda14cbcSMatt Macy *rm = NULL; 297eda14cbcSMatt Macy } 298eda14cbcSMatt Macy 299eda14cbcSMatt Macy static int 300eda14cbcSMatt Macy init_raidz_golden_map(raidz_test_opts_t *opts, const int parity) 301eda14cbcSMatt Macy { 302eda14cbcSMatt Macy int err = 0; 303eda14cbcSMatt Macy zio_t *zio_test; 304eda14cbcSMatt Macy raidz_map_t *rm_test; 305eda14cbcSMatt Macy const size_t total_ncols = opts->rto_dcols + parity; 306eda14cbcSMatt Macy 307eda14cbcSMatt Macy if (opts->rm_golden) { 308eda14cbcSMatt Macy fini_raidz_map(&opts->zio_golden, &opts->rm_golden); 309eda14cbcSMatt Macy } 310eda14cbcSMatt Macy 311eda14cbcSMatt Macy opts->zio_golden = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL); 312eda14cbcSMatt Macy zio_test = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL); 313eda14cbcSMatt Macy 314eda14cbcSMatt Macy opts->zio_golden->io_offset = zio_test->io_offset = opts->rto_offset; 315eda14cbcSMatt Macy opts->zio_golden->io_size = zio_test->io_size = opts->rto_dsize; 316eda14cbcSMatt Macy 317eda14cbcSMatt Macy opts->zio_golden->io_abd = raidz_alloc(opts->rto_dsize); 318eda14cbcSMatt Macy zio_test->io_abd = raidz_alloc(opts->rto_dsize); 319eda14cbcSMatt Macy 320eda14cbcSMatt Macy init_zio_abd(opts->zio_golden); 321eda14cbcSMatt Macy init_zio_abd(zio_test); 322eda14cbcSMatt Macy 323eda14cbcSMatt Macy VERIFY0(vdev_raidz_impl_set("original")); 324eda14cbcSMatt Macy 3257877fdebSMatt Macy if (opts->rto_expand) { 3267877fdebSMatt Macy opts->rm_golden = 3277877fdebSMatt Macy vdev_raidz_map_alloc_expanded(opts->zio_golden->io_abd, 3287877fdebSMatt Macy opts->zio_golden->io_size, opts->zio_golden->io_offset, 3297877fdebSMatt Macy opts->rto_ashift, total_ncols+1, total_ncols, 3307877fdebSMatt Macy parity, opts->rto_expand_offset); 3317877fdebSMatt Macy rm_test = vdev_raidz_map_alloc_expanded(zio_test->io_abd, 3327877fdebSMatt Macy zio_test->io_size, zio_test->io_offset, 3337877fdebSMatt Macy opts->rto_ashift, total_ncols+1, total_ncols, 3347877fdebSMatt Macy parity, opts->rto_expand_offset); 3357877fdebSMatt Macy } else { 336eda14cbcSMatt Macy opts->rm_golden = vdev_raidz_map_alloc(opts->zio_golden, 337eda14cbcSMatt Macy opts->rto_ashift, total_ncols, parity); 338eda14cbcSMatt Macy rm_test = vdev_raidz_map_alloc(zio_test, 339eda14cbcSMatt Macy opts->rto_ashift, total_ncols, parity); 3407877fdebSMatt Macy } 341eda14cbcSMatt Macy 342eda14cbcSMatt Macy VERIFY(opts->zio_golden); 343eda14cbcSMatt Macy VERIFY(opts->rm_golden); 344eda14cbcSMatt Macy 345eda14cbcSMatt Macy vdev_raidz_generate_parity(opts->rm_golden); 346eda14cbcSMatt Macy vdev_raidz_generate_parity(rm_test); 347eda14cbcSMatt Macy 348eda14cbcSMatt Macy /* sanity check */ 349eda14cbcSMatt Macy err |= cmp_data(opts, rm_test); 350eda14cbcSMatt Macy err |= cmp_code(opts, rm_test, parity); 351eda14cbcSMatt Macy 352eda14cbcSMatt Macy if (err) 353eda14cbcSMatt Macy ERR("initializing the golden copy ... [FAIL]!\n"); 354eda14cbcSMatt Macy 355eda14cbcSMatt Macy /* tear down raidz_map of test zio */ 356eda14cbcSMatt Macy fini_raidz_map(&zio_test, &rm_test); 357eda14cbcSMatt Macy 358eda14cbcSMatt Macy return (err); 359eda14cbcSMatt Macy } 360eda14cbcSMatt Macy 3617877fdebSMatt Macy /* 3627877fdebSMatt Macy * If reflow is not in progress, reflow_offset should be UINT64_MAX. 3637877fdebSMatt Macy * For each row, if the row is entirely before reflow_offset, it will 3647877fdebSMatt Macy * come from the new location. Otherwise this row will come from the 3657877fdebSMatt Macy * old location. Therefore, rows that straddle the reflow_offset will 3667877fdebSMatt Macy * come from the old location. 3677877fdebSMatt Macy * 3687877fdebSMatt Macy * NOTE: Until raidz expansion is implemented this function is only 3697877fdebSMatt Macy * needed by raidz_test.c to the multi-row raid_map_t functionality. 3707877fdebSMatt Macy */ 3717877fdebSMatt Macy raidz_map_t * 3727877fdebSMatt Macy vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset, 3737877fdebSMatt Macy uint64_t ashift, uint64_t physical_cols, uint64_t logical_cols, 3747877fdebSMatt Macy uint64_t nparity, uint64_t reflow_offset) 3757877fdebSMatt Macy { 3767877fdebSMatt Macy /* The zio's size in units of the vdev's minimum sector size. */ 3777877fdebSMatt Macy uint64_t s = size >> ashift; 3787877fdebSMatt Macy uint64_t q, r, bc, devidx, asize = 0, tot; 3797877fdebSMatt Macy 3807877fdebSMatt Macy /* 3817877fdebSMatt Macy * "Quotient": The number of data sectors for this stripe on all but 3827877fdebSMatt Macy * the "big column" child vdevs that also contain "remainder" data. 3837877fdebSMatt Macy * AKA "full rows" 3847877fdebSMatt Macy */ 3857877fdebSMatt Macy q = s / (logical_cols - nparity); 3867877fdebSMatt Macy 3877877fdebSMatt Macy /* 3887877fdebSMatt Macy * "Remainder": The number of partial stripe data sectors in this I/O. 3897877fdebSMatt Macy * This will add a sector to some, but not all, child vdevs. 3907877fdebSMatt Macy */ 3917877fdebSMatt Macy r = s - q * (logical_cols - nparity); 3927877fdebSMatt Macy 3937877fdebSMatt Macy /* The number of "big columns" - those which contain remainder data. */ 3947877fdebSMatt Macy bc = (r == 0 ? 0 : r + nparity); 3957877fdebSMatt Macy 3967877fdebSMatt Macy /* 3977877fdebSMatt Macy * The total number of data and parity sectors associated with 3987877fdebSMatt Macy * this I/O. 3997877fdebSMatt Macy */ 4007877fdebSMatt Macy tot = s + nparity * (q + (r == 0 ? 0 : 1)); 4017877fdebSMatt Macy 4027877fdebSMatt Macy /* How many rows contain data (not skip) */ 4037877fdebSMatt Macy uint64_t rows = howmany(tot, logical_cols); 4047877fdebSMatt Macy int cols = MIN(tot, logical_cols); 4057877fdebSMatt Macy 4067877fdebSMatt Macy raidz_map_t *rm = kmem_zalloc(offsetof(raidz_map_t, rm_row[rows]), 4077877fdebSMatt Macy KM_SLEEP); 4087877fdebSMatt Macy rm->rm_nrows = rows; 4097877fdebSMatt Macy 4107877fdebSMatt Macy for (uint64_t row = 0; row < rows; row++) { 4117877fdebSMatt Macy raidz_row_t *rr = kmem_alloc(offsetof(raidz_row_t, 4127877fdebSMatt Macy rr_col[cols]), KM_SLEEP); 4137877fdebSMatt Macy rm->rm_row[row] = rr; 4147877fdebSMatt Macy 4157877fdebSMatt Macy /* The starting RAIDZ (parent) vdev sector of the row. */ 4167877fdebSMatt Macy uint64_t b = (offset >> ashift) + row * logical_cols; 4177877fdebSMatt Macy 4187877fdebSMatt Macy /* 4197877fdebSMatt Macy * If we are in the middle of a reflow, and any part of this 4207877fdebSMatt Macy * row has not been copied, then use the old location of 4217877fdebSMatt Macy * this row. 4227877fdebSMatt Macy */ 4237877fdebSMatt Macy int row_phys_cols = physical_cols; 4247877fdebSMatt Macy if (b + (logical_cols - nparity) > reflow_offset >> ashift) 4257877fdebSMatt Macy row_phys_cols--; 4267877fdebSMatt Macy 4277877fdebSMatt Macy /* starting child of this row */ 4287877fdebSMatt Macy uint64_t child_id = b % row_phys_cols; 4297877fdebSMatt Macy /* The starting byte offset on each child vdev. */ 4307877fdebSMatt Macy uint64_t child_offset = (b / row_phys_cols) << ashift; 4317877fdebSMatt Macy 4327877fdebSMatt Macy /* 4337877fdebSMatt Macy * We set cols to the entire width of the block, even 4347877fdebSMatt Macy * if this row is shorter. This is needed because parity 4357877fdebSMatt Macy * generation (for Q and R) needs to know the entire width, 4367877fdebSMatt Macy * because it treats the short row as though it was 4377877fdebSMatt Macy * full-width (and the "phantom" sectors were zero-filled). 4387877fdebSMatt Macy * 4397877fdebSMatt Macy * Another approach to this would be to set cols shorter 4407877fdebSMatt Macy * (to just the number of columns that we might do i/o to) 4417877fdebSMatt Macy * and have another mechanism to tell the parity generation 4427877fdebSMatt Macy * about the "entire width". Reconstruction (at least 4437877fdebSMatt Macy * vdev_raidz_reconstruct_general()) would also need to 4447877fdebSMatt Macy * know about the "entire width". 4457877fdebSMatt Macy */ 4467877fdebSMatt Macy rr->rr_cols = cols; 4477877fdebSMatt Macy rr->rr_bigcols = bc; 4487877fdebSMatt Macy rr->rr_missingdata = 0; 4497877fdebSMatt Macy rr->rr_missingparity = 0; 4507877fdebSMatt Macy rr->rr_firstdatacol = nparity; 4517877fdebSMatt Macy rr->rr_abd_copy = NULL; 4527877fdebSMatt Macy rr->rr_abd_empty = NULL; 4537877fdebSMatt Macy rr->rr_nempty = 0; 4547877fdebSMatt Macy 4557877fdebSMatt Macy for (int c = 0; c < rr->rr_cols; c++, child_id++) { 4567877fdebSMatt Macy if (child_id >= row_phys_cols) { 4577877fdebSMatt Macy child_id -= row_phys_cols; 4587877fdebSMatt Macy child_offset += 1ULL << ashift; 4597877fdebSMatt Macy } 4607877fdebSMatt Macy rr->rr_col[c].rc_devidx = child_id; 4617877fdebSMatt Macy rr->rr_col[c].rc_offset = child_offset; 4627877fdebSMatt Macy rr->rr_col[c].rc_gdata = NULL; 4637877fdebSMatt Macy rr->rr_col[c].rc_orig_data = NULL; 4647877fdebSMatt Macy rr->rr_col[c].rc_error = 0; 4657877fdebSMatt Macy rr->rr_col[c].rc_tried = 0; 4667877fdebSMatt Macy rr->rr_col[c].rc_skipped = 0; 4677877fdebSMatt Macy rr->rr_col[c].rc_need_orig_restore = B_FALSE; 4687877fdebSMatt Macy 4697877fdebSMatt Macy uint64_t dc = c - rr->rr_firstdatacol; 4707877fdebSMatt Macy if (c < rr->rr_firstdatacol) { 4717877fdebSMatt Macy rr->rr_col[c].rc_size = 1ULL << ashift; 4727877fdebSMatt Macy rr->rr_col[c].rc_abd = 4737877fdebSMatt Macy abd_alloc_linear(rr->rr_col[c].rc_size, 4747877fdebSMatt Macy B_TRUE); 4757877fdebSMatt Macy } else if (row == rows - 1 && bc != 0 && c >= bc) { 4767877fdebSMatt Macy /* 4777877fdebSMatt Macy * Past the end, this for parity generation. 4787877fdebSMatt Macy */ 4797877fdebSMatt Macy rr->rr_col[c].rc_size = 0; 4807877fdebSMatt Macy rr->rr_col[c].rc_abd = NULL; 4817877fdebSMatt Macy } else { 4827877fdebSMatt Macy /* 4837877fdebSMatt Macy * "data column" (col excluding parity) 4847877fdebSMatt Macy * Add an ASCII art diagram here 4857877fdebSMatt Macy */ 4867877fdebSMatt Macy uint64_t off; 4877877fdebSMatt Macy 4887877fdebSMatt Macy if (c < bc || r == 0) { 4897877fdebSMatt Macy off = dc * rows + row; 4907877fdebSMatt Macy } else { 4917877fdebSMatt Macy off = r * rows + 4927877fdebSMatt Macy (dc - r) * (rows - 1) + row; 4937877fdebSMatt Macy } 4947877fdebSMatt Macy rr->rr_col[c].rc_size = 1ULL << ashift; 495*184c1b94SMartin Matuska rr->rr_col[c].rc_abd = abd_get_offset_struct( 496*184c1b94SMartin Matuska &rr->rr_col[c].rc_abdstruct, 497*184c1b94SMartin Matuska abd, off << ashift, 1 << ashift); 4987877fdebSMatt Macy } 4997877fdebSMatt Macy 5007877fdebSMatt Macy asize += rr->rr_col[c].rc_size; 5017877fdebSMatt Macy } 5027877fdebSMatt Macy /* 5037877fdebSMatt Macy * If all data stored spans all columns, there's a danger that 5047877fdebSMatt Macy * parity will always be on the same device and, since parity 5057877fdebSMatt Macy * isn't read during normal operation, that that device's I/O 5067877fdebSMatt Macy * bandwidth won't be used effectively. We therefore switch 5077877fdebSMatt Macy * the parity every 1MB. 5087877fdebSMatt Macy * 5097877fdebSMatt Macy * ...at least that was, ostensibly, the theory. As a practical 5107877fdebSMatt Macy * matter unless we juggle the parity between all devices 5117877fdebSMatt Macy * evenly, we won't see any benefit. Further, occasional writes 5127877fdebSMatt Macy * that aren't a multiple of the LCM of the number of children 5137877fdebSMatt Macy * and the minimum stripe width are sufficient to avoid pessimal 5147877fdebSMatt Macy * behavior. Unfortunately, this decision created an implicit 5157877fdebSMatt Macy * on-disk format requirement that we need to support for all 5167877fdebSMatt Macy * eternity, but only for single-parity RAID-Z. 5177877fdebSMatt Macy * 5187877fdebSMatt Macy * If we intend to skip a sector in the zeroth column for 5197877fdebSMatt Macy * padding we must make sure to note this swap. We will never 5207877fdebSMatt Macy * intend to skip the first column since at least one data and 5217877fdebSMatt Macy * one parity column must appear in each row. 5227877fdebSMatt Macy */ 5237877fdebSMatt Macy if (rr->rr_firstdatacol == 1 && rr->rr_cols > 1 && 5247877fdebSMatt Macy (offset & (1ULL << 20))) { 5257877fdebSMatt Macy ASSERT(rr->rr_cols >= 2); 5267877fdebSMatt Macy ASSERT(rr->rr_col[0].rc_size == rr->rr_col[1].rc_size); 5277877fdebSMatt Macy devidx = rr->rr_col[0].rc_devidx; 5287877fdebSMatt Macy uint64_t o = rr->rr_col[0].rc_offset; 5297877fdebSMatt Macy rr->rr_col[0].rc_devidx = rr->rr_col[1].rc_devidx; 5307877fdebSMatt Macy rr->rr_col[0].rc_offset = rr->rr_col[1].rc_offset; 5317877fdebSMatt Macy rr->rr_col[1].rc_devidx = devidx; 5327877fdebSMatt Macy rr->rr_col[1].rc_offset = o; 5337877fdebSMatt Macy } 5347877fdebSMatt Macy 5357877fdebSMatt Macy } 5367877fdebSMatt Macy ASSERT3U(asize, ==, tot << ashift); 5377877fdebSMatt Macy 5387877fdebSMatt Macy /* init RAIDZ parity ops */ 5397877fdebSMatt Macy rm->rm_ops = vdev_raidz_math_get_ops(); 5407877fdebSMatt Macy 5417877fdebSMatt Macy return (rm); 5427877fdebSMatt Macy } 5437877fdebSMatt Macy 544eda14cbcSMatt Macy static raidz_map_t * 545eda14cbcSMatt Macy init_raidz_map(raidz_test_opts_t *opts, zio_t **zio, const int parity) 546eda14cbcSMatt Macy { 547eda14cbcSMatt Macy raidz_map_t *rm = NULL; 548eda14cbcSMatt Macy const size_t alloc_dsize = opts->rto_dsize; 549eda14cbcSMatt Macy const size_t total_ncols = opts->rto_dcols + parity; 550eda14cbcSMatt Macy const int ccols[] = { 0, 1, 2 }; 551eda14cbcSMatt Macy 552eda14cbcSMatt Macy VERIFY(zio); 553eda14cbcSMatt Macy VERIFY(parity <= 3 && parity >= 1); 554eda14cbcSMatt Macy 555eda14cbcSMatt Macy *zio = umem_zalloc(sizeof (zio_t), UMEM_NOFAIL); 556eda14cbcSMatt Macy 557eda14cbcSMatt Macy (*zio)->io_offset = 0; 558eda14cbcSMatt Macy (*zio)->io_size = alloc_dsize; 559eda14cbcSMatt Macy (*zio)->io_abd = raidz_alloc(alloc_dsize); 560eda14cbcSMatt Macy init_zio_abd(*zio); 561eda14cbcSMatt Macy 5627877fdebSMatt Macy if (opts->rto_expand) { 5637877fdebSMatt Macy rm = vdev_raidz_map_alloc_expanded((*zio)->io_abd, 5647877fdebSMatt Macy (*zio)->io_size, (*zio)->io_offset, 5657877fdebSMatt Macy opts->rto_ashift, total_ncols+1, total_ncols, 5667877fdebSMatt Macy parity, opts->rto_expand_offset); 5677877fdebSMatt Macy } else { 568eda14cbcSMatt Macy rm = vdev_raidz_map_alloc(*zio, opts->rto_ashift, 569eda14cbcSMatt Macy total_ncols, parity); 5707877fdebSMatt Macy } 571eda14cbcSMatt Macy VERIFY(rm); 572eda14cbcSMatt Macy 573eda14cbcSMatt Macy /* Make sure code columns are destroyed */ 574eda14cbcSMatt Macy corrupt_colums(rm, ccols, parity); 575eda14cbcSMatt Macy 576eda14cbcSMatt Macy return (rm); 577eda14cbcSMatt Macy } 578eda14cbcSMatt Macy 579eda14cbcSMatt Macy static int 580eda14cbcSMatt Macy run_gen_check(raidz_test_opts_t *opts) 581eda14cbcSMatt Macy { 582eda14cbcSMatt Macy char **impl_name; 583eda14cbcSMatt Macy int fn, err = 0; 584eda14cbcSMatt Macy zio_t *zio_test; 585eda14cbcSMatt Macy raidz_map_t *rm_test; 586eda14cbcSMatt Macy 587eda14cbcSMatt Macy err = init_raidz_golden_map(opts, PARITY_PQR); 588eda14cbcSMatt Macy if (0 != err) 589eda14cbcSMatt Macy return (err); 590eda14cbcSMatt Macy 591eda14cbcSMatt Macy LOG(D_INFO, DBLSEP); 592eda14cbcSMatt Macy LOG(D_INFO, "Testing parity generation...\n"); 593eda14cbcSMatt Macy 594eda14cbcSMatt Macy for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL; 595eda14cbcSMatt Macy impl_name++) { 596eda14cbcSMatt Macy 597eda14cbcSMatt Macy LOG(D_INFO, SEP); 598eda14cbcSMatt Macy LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name); 599eda14cbcSMatt Macy 600eda14cbcSMatt Macy if (0 != vdev_raidz_impl_set(*impl_name)) { 601eda14cbcSMatt Macy LOG(D_INFO, "[SKIP]\n"); 602eda14cbcSMatt Macy continue; 603eda14cbcSMatt Macy } else { 604eda14cbcSMatt Macy LOG(D_INFO, "[SUPPORTED]\n"); 605eda14cbcSMatt Macy } 606eda14cbcSMatt Macy 607eda14cbcSMatt Macy for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) { 608eda14cbcSMatt Macy 609eda14cbcSMatt Macy /* Check if should stop */ 610eda14cbcSMatt Macy if (rto_opts.rto_should_stop) 611eda14cbcSMatt Macy return (err); 612eda14cbcSMatt Macy 613eda14cbcSMatt Macy /* create suitable raidz_map */ 614eda14cbcSMatt Macy rm_test = init_raidz_map(opts, &zio_test, fn+1); 615eda14cbcSMatt Macy VERIFY(rm_test); 616eda14cbcSMatt Macy 617eda14cbcSMatt Macy LOG(D_INFO, "\t\tTesting method [%s] ...", 618eda14cbcSMatt Macy raidz_gen_name[fn]); 619eda14cbcSMatt Macy 620eda14cbcSMatt Macy if (!opts->rto_sanity) 621eda14cbcSMatt Macy vdev_raidz_generate_parity(rm_test); 622eda14cbcSMatt Macy 623eda14cbcSMatt Macy if (cmp_code(opts, rm_test, fn+1) != 0) { 624eda14cbcSMatt Macy LOG(D_INFO, "[FAIL]\n"); 625eda14cbcSMatt Macy err++; 626eda14cbcSMatt Macy } else 627eda14cbcSMatt Macy LOG(D_INFO, "[PASS]\n"); 628eda14cbcSMatt Macy 629eda14cbcSMatt Macy fini_raidz_map(&zio_test, &rm_test); 630eda14cbcSMatt Macy } 631eda14cbcSMatt Macy } 632eda14cbcSMatt Macy 633eda14cbcSMatt Macy fini_raidz_map(&opts->zio_golden, &opts->rm_golden); 634eda14cbcSMatt Macy 635eda14cbcSMatt Macy return (err); 636eda14cbcSMatt Macy } 637eda14cbcSMatt Macy 638eda14cbcSMatt Macy static int 639eda14cbcSMatt Macy run_rec_check_impl(raidz_test_opts_t *opts, raidz_map_t *rm, const int fn) 640eda14cbcSMatt Macy { 641eda14cbcSMatt Macy int x0, x1, x2; 642eda14cbcSMatt Macy int tgtidx[3]; 643eda14cbcSMatt Macy int err = 0; 644eda14cbcSMatt Macy static const int rec_tgts[7][3] = { 645eda14cbcSMatt Macy {1, 2, 3}, /* rec_p: bad QR & D[0] */ 646eda14cbcSMatt Macy {0, 2, 3}, /* rec_q: bad PR & D[0] */ 647eda14cbcSMatt Macy {0, 1, 3}, /* rec_r: bad PQ & D[0] */ 648eda14cbcSMatt Macy {2, 3, 4}, /* rec_pq: bad R & D[0][1] */ 649eda14cbcSMatt Macy {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */ 650eda14cbcSMatt Macy {0, 3, 4}, /* rec_qr: bad P & D[0][1] */ 651eda14cbcSMatt Macy {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */ 652eda14cbcSMatt Macy }; 653eda14cbcSMatt Macy 654eda14cbcSMatt Macy memcpy(tgtidx, rec_tgts[fn], sizeof (tgtidx)); 655eda14cbcSMatt Macy 656eda14cbcSMatt Macy if (fn < RAIDZ_REC_PQ) { 657eda14cbcSMatt Macy /* can reconstruct 1 failed data disk */ 658eda14cbcSMatt Macy for (x0 = 0; x0 < opts->rto_dcols; x0++) { 6597877fdebSMatt Macy if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) 660eda14cbcSMatt Macy continue; 661eda14cbcSMatt Macy 662eda14cbcSMatt Macy /* Check if should stop */ 663eda14cbcSMatt Macy if (rto_opts.rto_should_stop) 664eda14cbcSMatt Macy return (err); 665eda14cbcSMatt Macy 666eda14cbcSMatt Macy LOG(D_DEBUG, "[%d] ", x0); 667eda14cbcSMatt Macy 668eda14cbcSMatt Macy tgtidx[2] = x0 + raidz_parity(rm); 669eda14cbcSMatt Macy 670eda14cbcSMatt Macy corrupt_colums(rm, tgtidx+2, 1); 671eda14cbcSMatt Macy 672eda14cbcSMatt Macy if (!opts->rto_sanity) 673eda14cbcSMatt Macy vdev_raidz_reconstruct(rm, tgtidx, 3); 674eda14cbcSMatt Macy 675eda14cbcSMatt Macy if (cmp_data(opts, rm) != 0) { 676eda14cbcSMatt Macy err++; 677eda14cbcSMatt Macy LOG(D_DEBUG, "\nREC D[%d]... [FAIL]\n", x0); 678eda14cbcSMatt Macy } 679eda14cbcSMatt Macy } 680eda14cbcSMatt Macy 681eda14cbcSMatt Macy } else if (fn < RAIDZ_REC_PQR) { 682eda14cbcSMatt Macy /* can reconstruct 2 failed data disk */ 683eda14cbcSMatt Macy for (x0 = 0; x0 < opts->rto_dcols; x0++) { 6847877fdebSMatt Macy if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) 685eda14cbcSMatt Macy continue; 686eda14cbcSMatt Macy for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) { 6877877fdebSMatt Macy if (x1 >= rm->rm_row[0]->rr_cols - 6887877fdebSMatt Macy raidz_parity(rm)) 689eda14cbcSMatt Macy continue; 690eda14cbcSMatt Macy 691eda14cbcSMatt Macy /* Check if should stop */ 692eda14cbcSMatt Macy if (rto_opts.rto_should_stop) 693eda14cbcSMatt Macy return (err); 694eda14cbcSMatt Macy 695eda14cbcSMatt Macy LOG(D_DEBUG, "[%d %d] ", x0, x1); 696eda14cbcSMatt Macy 697eda14cbcSMatt Macy tgtidx[1] = x0 + raidz_parity(rm); 698eda14cbcSMatt Macy tgtidx[2] = x1 + raidz_parity(rm); 699eda14cbcSMatt Macy 700eda14cbcSMatt Macy corrupt_colums(rm, tgtidx+1, 2); 701eda14cbcSMatt Macy 702eda14cbcSMatt Macy if (!opts->rto_sanity) 703eda14cbcSMatt Macy vdev_raidz_reconstruct(rm, tgtidx, 3); 704eda14cbcSMatt Macy 705eda14cbcSMatt Macy if (cmp_data(opts, rm) != 0) { 706eda14cbcSMatt Macy err++; 707eda14cbcSMatt Macy LOG(D_DEBUG, "\nREC D[%d %d]... " 708eda14cbcSMatt Macy "[FAIL]\n", x0, x1); 709eda14cbcSMatt Macy } 710eda14cbcSMatt Macy } 711eda14cbcSMatt Macy } 712eda14cbcSMatt Macy } else { 713eda14cbcSMatt Macy /* can reconstruct 3 failed data disk */ 714eda14cbcSMatt Macy for (x0 = 0; x0 < opts->rto_dcols; x0++) { 7157877fdebSMatt Macy if (x0 >= rm->rm_row[0]->rr_cols - raidz_parity(rm)) 716eda14cbcSMatt Macy continue; 717eda14cbcSMatt Macy for (x1 = x0 + 1; x1 < opts->rto_dcols; x1++) { 7187877fdebSMatt Macy if (x1 >= rm->rm_row[0]->rr_cols - 7197877fdebSMatt Macy raidz_parity(rm)) 720eda14cbcSMatt Macy continue; 721eda14cbcSMatt Macy for (x2 = x1 + 1; x2 < opts->rto_dcols; x2++) { 7227877fdebSMatt Macy if (x2 >= rm->rm_row[0]->rr_cols - 7237877fdebSMatt Macy raidz_parity(rm)) 724eda14cbcSMatt Macy continue; 725eda14cbcSMatt Macy 726eda14cbcSMatt Macy /* Check if should stop */ 727eda14cbcSMatt Macy if (rto_opts.rto_should_stop) 728eda14cbcSMatt Macy return (err); 729eda14cbcSMatt Macy 730eda14cbcSMatt Macy LOG(D_DEBUG, "[%d %d %d]", x0, x1, x2); 731eda14cbcSMatt Macy 732eda14cbcSMatt Macy tgtidx[0] = x0 + raidz_parity(rm); 733eda14cbcSMatt Macy tgtidx[1] = x1 + raidz_parity(rm); 734eda14cbcSMatt Macy tgtidx[2] = x2 + raidz_parity(rm); 735eda14cbcSMatt Macy 736eda14cbcSMatt Macy corrupt_colums(rm, tgtidx, 3); 737eda14cbcSMatt Macy 738eda14cbcSMatt Macy if (!opts->rto_sanity) 739eda14cbcSMatt Macy vdev_raidz_reconstruct(rm, 740eda14cbcSMatt Macy tgtidx, 3); 741eda14cbcSMatt Macy 742eda14cbcSMatt Macy if (cmp_data(opts, rm) != 0) { 743eda14cbcSMatt Macy err++; 744eda14cbcSMatt Macy LOG(D_DEBUG, 745eda14cbcSMatt Macy "\nREC D[%d %d %d]... " 746eda14cbcSMatt Macy "[FAIL]\n", x0, x1, x2); 747eda14cbcSMatt Macy } 748eda14cbcSMatt Macy } 749eda14cbcSMatt Macy } 750eda14cbcSMatt Macy } 751eda14cbcSMatt Macy } 752eda14cbcSMatt Macy return (err); 753eda14cbcSMatt Macy } 754eda14cbcSMatt Macy 755eda14cbcSMatt Macy static int 756eda14cbcSMatt Macy run_rec_check(raidz_test_opts_t *opts) 757eda14cbcSMatt Macy { 758eda14cbcSMatt Macy char **impl_name; 759eda14cbcSMatt Macy unsigned fn, err = 0; 760eda14cbcSMatt Macy zio_t *zio_test; 761eda14cbcSMatt Macy raidz_map_t *rm_test; 762eda14cbcSMatt Macy 763eda14cbcSMatt Macy err = init_raidz_golden_map(opts, PARITY_PQR); 764eda14cbcSMatt Macy if (0 != err) 765eda14cbcSMatt Macy return (err); 766eda14cbcSMatt Macy 767eda14cbcSMatt Macy LOG(D_INFO, DBLSEP); 768eda14cbcSMatt Macy LOG(D_INFO, "Testing data reconstruction...\n"); 769eda14cbcSMatt Macy 770eda14cbcSMatt Macy for (impl_name = (char **)raidz_impl_names+1; *impl_name != NULL; 771eda14cbcSMatt Macy impl_name++) { 772eda14cbcSMatt Macy 773eda14cbcSMatt Macy LOG(D_INFO, SEP); 774eda14cbcSMatt Macy LOG(D_INFO, "\tTesting [%s] implementation...", *impl_name); 775eda14cbcSMatt Macy 776eda14cbcSMatt Macy if (vdev_raidz_impl_set(*impl_name) != 0) { 777eda14cbcSMatt Macy LOG(D_INFO, "[SKIP]\n"); 778eda14cbcSMatt Macy continue; 779eda14cbcSMatt Macy } else 780eda14cbcSMatt Macy LOG(D_INFO, "[SUPPORTED]\n"); 781eda14cbcSMatt Macy 782eda14cbcSMatt Macy 783eda14cbcSMatt Macy /* create suitable raidz_map */ 784eda14cbcSMatt Macy rm_test = init_raidz_map(opts, &zio_test, PARITY_PQR); 785eda14cbcSMatt Macy /* generate parity */ 786eda14cbcSMatt Macy vdev_raidz_generate_parity(rm_test); 787eda14cbcSMatt Macy 788eda14cbcSMatt Macy for (fn = 0; fn < RAIDZ_REC_NUM; fn++) { 789eda14cbcSMatt Macy 790eda14cbcSMatt Macy LOG(D_INFO, "\t\tTesting method [%s] ...", 791eda14cbcSMatt Macy raidz_rec_name[fn]); 792eda14cbcSMatt Macy 793eda14cbcSMatt Macy if (run_rec_check_impl(opts, rm_test, fn) != 0) { 794eda14cbcSMatt Macy LOG(D_INFO, "[FAIL]\n"); 795eda14cbcSMatt Macy err++; 796eda14cbcSMatt Macy 797eda14cbcSMatt Macy } else 798eda14cbcSMatt Macy LOG(D_INFO, "[PASS]\n"); 799eda14cbcSMatt Macy 800eda14cbcSMatt Macy } 801eda14cbcSMatt Macy /* tear down test raidz_map */ 802eda14cbcSMatt Macy fini_raidz_map(&zio_test, &rm_test); 803eda14cbcSMatt Macy } 804eda14cbcSMatt Macy 805eda14cbcSMatt Macy fini_raidz_map(&opts->zio_golden, &opts->rm_golden); 806eda14cbcSMatt Macy 807eda14cbcSMatt Macy return (err); 808eda14cbcSMatt Macy } 809eda14cbcSMatt Macy 810eda14cbcSMatt Macy static int 811eda14cbcSMatt Macy run_test(raidz_test_opts_t *opts) 812eda14cbcSMatt Macy { 813eda14cbcSMatt Macy int err = 0; 814eda14cbcSMatt Macy 815eda14cbcSMatt Macy if (opts == NULL) 816eda14cbcSMatt Macy opts = &rto_opts; 817eda14cbcSMatt Macy 818eda14cbcSMatt Macy print_opts(opts, B_FALSE); 819eda14cbcSMatt Macy 820eda14cbcSMatt Macy err |= run_gen_check(opts); 821eda14cbcSMatt Macy err |= run_rec_check(opts); 822eda14cbcSMatt Macy 823eda14cbcSMatt Macy return (err); 824eda14cbcSMatt Macy } 825eda14cbcSMatt Macy 826eda14cbcSMatt Macy #define SWEEP_RUNNING 0 827eda14cbcSMatt Macy #define SWEEP_FINISHED 1 828eda14cbcSMatt Macy #define SWEEP_ERROR 2 829eda14cbcSMatt Macy #define SWEEP_TIMEOUT 3 830eda14cbcSMatt Macy 831eda14cbcSMatt Macy static int sweep_state = 0; 832eda14cbcSMatt Macy static raidz_test_opts_t failed_opts; 833eda14cbcSMatt Macy 834eda14cbcSMatt Macy static kmutex_t sem_mtx; 835eda14cbcSMatt Macy static kcondvar_t sem_cv; 836eda14cbcSMatt Macy static int max_free_slots; 837eda14cbcSMatt Macy static int free_slots; 838eda14cbcSMatt Macy 839eda14cbcSMatt Macy static void 840eda14cbcSMatt Macy sweep_thread(void *arg) 841eda14cbcSMatt Macy { 842eda14cbcSMatt Macy int err = 0; 843eda14cbcSMatt Macy raidz_test_opts_t *opts = (raidz_test_opts_t *)arg; 844eda14cbcSMatt Macy VERIFY(opts != NULL); 845eda14cbcSMatt Macy 846eda14cbcSMatt Macy err = run_test(opts); 847eda14cbcSMatt Macy 848eda14cbcSMatt Macy if (rto_opts.rto_sanity) { 849eda14cbcSMatt Macy /* 25% chance that a sweep test fails */ 850eda14cbcSMatt Macy if (rand() < (RAND_MAX/4)) 851eda14cbcSMatt Macy err = 1; 852eda14cbcSMatt Macy } 853eda14cbcSMatt Macy 854eda14cbcSMatt Macy if (0 != err) { 855eda14cbcSMatt Macy mutex_enter(&sem_mtx); 856eda14cbcSMatt Macy memcpy(&failed_opts, opts, sizeof (raidz_test_opts_t)); 857eda14cbcSMatt Macy sweep_state = SWEEP_ERROR; 858eda14cbcSMatt Macy mutex_exit(&sem_mtx); 859eda14cbcSMatt Macy } 860eda14cbcSMatt Macy 861eda14cbcSMatt Macy umem_free(opts, sizeof (raidz_test_opts_t)); 862eda14cbcSMatt Macy 863eda14cbcSMatt Macy /* signal the next thread */ 864eda14cbcSMatt Macy mutex_enter(&sem_mtx); 865eda14cbcSMatt Macy free_slots++; 866eda14cbcSMatt Macy cv_signal(&sem_cv); 867eda14cbcSMatt Macy mutex_exit(&sem_mtx); 868eda14cbcSMatt Macy 869eda14cbcSMatt Macy thread_exit(); 870eda14cbcSMatt Macy } 871eda14cbcSMatt Macy 872eda14cbcSMatt Macy static int 873eda14cbcSMatt Macy run_sweep(void) 874eda14cbcSMatt Macy { 875eda14cbcSMatt Macy static const size_t dcols_v[] = { 1, 2, 3, 4, 5, 6, 7, 8, 12, 15, 16 }; 876eda14cbcSMatt Macy static const size_t ashift_v[] = { 9, 12, 14 }; 877eda14cbcSMatt Macy static const size_t size_v[] = { 1 << 9, 21 * (1 << 9), 13 * (1 << 12), 878eda14cbcSMatt Macy 1 << 17, (1 << 20) - (1 << 12), SPA_MAXBLOCKSIZE }; 879eda14cbcSMatt Macy 880eda14cbcSMatt Macy (void) setvbuf(stdout, NULL, _IONBF, 0); 881eda14cbcSMatt Macy 882eda14cbcSMatt Macy ulong_t total_comb = ARRAY_SIZE(size_v) * ARRAY_SIZE(ashift_v) * 883eda14cbcSMatt Macy ARRAY_SIZE(dcols_v); 884eda14cbcSMatt Macy ulong_t tried_comb = 0; 885eda14cbcSMatt Macy hrtime_t time_diff, start_time = gethrtime(); 886eda14cbcSMatt Macy raidz_test_opts_t *opts; 887eda14cbcSMatt Macy int a, d, s; 888eda14cbcSMatt Macy 889eda14cbcSMatt Macy max_free_slots = free_slots = MAX(2, boot_ncpus); 890eda14cbcSMatt Macy 891eda14cbcSMatt Macy mutex_init(&sem_mtx, NULL, MUTEX_DEFAULT, NULL); 892eda14cbcSMatt Macy cv_init(&sem_cv, NULL, CV_DEFAULT, NULL); 893eda14cbcSMatt Macy 894eda14cbcSMatt Macy for (s = 0; s < ARRAY_SIZE(size_v); s++) 895eda14cbcSMatt Macy for (a = 0; a < ARRAY_SIZE(ashift_v); a++) 896eda14cbcSMatt Macy for (d = 0; d < ARRAY_SIZE(dcols_v); d++) { 897eda14cbcSMatt Macy 898eda14cbcSMatt Macy if (size_v[s] < (1 << ashift_v[a])) { 899eda14cbcSMatt Macy total_comb--; 900eda14cbcSMatt Macy continue; 901eda14cbcSMatt Macy } 902eda14cbcSMatt Macy 903eda14cbcSMatt Macy if (++tried_comb % 20 == 0) 904eda14cbcSMatt Macy LOG(D_ALL, "%lu/%lu... ", tried_comb, total_comb); 905eda14cbcSMatt Macy 906eda14cbcSMatt Macy /* wait for signal to start new thread */ 907eda14cbcSMatt Macy mutex_enter(&sem_mtx); 908eda14cbcSMatt Macy while (cv_timedwait_sig(&sem_cv, &sem_mtx, 909eda14cbcSMatt Macy ddi_get_lbolt() + hz)) { 910eda14cbcSMatt Macy 911eda14cbcSMatt Macy /* check if should stop the test (timeout) */ 912eda14cbcSMatt Macy time_diff = (gethrtime() - start_time) / NANOSEC; 913eda14cbcSMatt Macy if (rto_opts.rto_sweep_timeout > 0 && 914eda14cbcSMatt Macy time_diff >= rto_opts.rto_sweep_timeout) { 915eda14cbcSMatt Macy sweep_state = SWEEP_TIMEOUT; 916eda14cbcSMatt Macy rto_opts.rto_should_stop = B_TRUE; 917eda14cbcSMatt Macy mutex_exit(&sem_mtx); 918eda14cbcSMatt Macy goto exit; 919eda14cbcSMatt Macy } 920eda14cbcSMatt Macy 921eda14cbcSMatt Macy /* check if should stop the test (error) */ 922eda14cbcSMatt Macy if (sweep_state != SWEEP_RUNNING) { 923eda14cbcSMatt Macy mutex_exit(&sem_mtx); 924eda14cbcSMatt Macy goto exit; 925eda14cbcSMatt Macy } 926eda14cbcSMatt Macy 927eda14cbcSMatt Macy /* exit loop if a slot is available */ 928eda14cbcSMatt Macy if (free_slots > 0) { 929eda14cbcSMatt Macy break; 930eda14cbcSMatt Macy } 931eda14cbcSMatt Macy } 932eda14cbcSMatt Macy 933eda14cbcSMatt Macy free_slots--; 934eda14cbcSMatt Macy mutex_exit(&sem_mtx); 935eda14cbcSMatt Macy 936eda14cbcSMatt Macy opts = umem_zalloc(sizeof (raidz_test_opts_t), UMEM_NOFAIL); 937eda14cbcSMatt Macy opts->rto_ashift = ashift_v[a]; 938eda14cbcSMatt Macy opts->rto_dcols = dcols_v[d]; 939eda14cbcSMatt Macy opts->rto_offset = (1 << ashift_v[a]) * rand(); 940eda14cbcSMatt Macy opts->rto_dsize = size_v[s]; 9417877fdebSMatt Macy opts->rto_expand = rto_opts.rto_expand; 9427877fdebSMatt Macy opts->rto_expand_offset = rto_opts.rto_expand_offset; 943eda14cbcSMatt Macy opts->rto_v = 0; /* be quiet */ 944eda14cbcSMatt Macy 945eda14cbcSMatt Macy VERIFY3P(thread_create(NULL, 0, sweep_thread, (void *) opts, 946eda14cbcSMatt Macy 0, NULL, TS_RUN, defclsyspri), !=, NULL); 947eda14cbcSMatt Macy } 948eda14cbcSMatt Macy 949eda14cbcSMatt Macy exit: 950eda14cbcSMatt Macy LOG(D_ALL, "\nWaiting for test threads to finish...\n"); 951eda14cbcSMatt Macy mutex_enter(&sem_mtx); 952eda14cbcSMatt Macy VERIFY(free_slots <= max_free_slots); 953eda14cbcSMatt Macy while (free_slots < max_free_slots) { 954eda14cbcSMatt Macy (void) cv_wait(&sem_cv, &sem_mtx); 955eda14cbcSMatt Macy } 956eda14cbcSMatt Macy mutex_exit(&sem_mtx); 957eda14cbcSMatt Macy 958eda14cbcSMatt Macy if (sweep_state == SWEEP_ERROR) { 959eda14cbcSMatt Macy ERR("Sweep test failed! Failed option: \n"); 960eda14cbcSMatt Macy print_opts(&failed_opts, B_TRUE); 961eda14cbcSMatt Macy } else { 962eda14cbcSMatt Macy if (sweep_state == SWEEP_TIMEOUT) 963eda14cbcSMatt Macy LOG(D_ALL, "Test timeout (%lus). Stopping...\n", 964eda14cbcSMatt Macy (ulong_t)rto_opts.rto_sweep_timeout); 965eda14cbcSMatt Macy 966eda14cbcSMatt Macy LOG(D_ALL, "Sweep test succeeded on %lu raidz maps!\n", 967eda14cbcSMatt Macy (ulong_t)tried_comb); 968eda14cbcSMatt Macy } 969eda14cbcSMatt Macy 970eda14cbcSMatt Macy mutex_destroy(&sem_mtx); 971eda14cbcSMatt Macy 972eda14cbcSMatt Macy return (sweep_state == SWEEP_ERROR ? SWEEP_ERROR : 0); 973eda14cbcSMatt Macy } 974eda14cbcSMatt Macy 9757877fdebSMatt Macy 976eda14cbcSMatt Macy int 977eda14cbcSMatt Macy main(int argc, char **argv) 978eda14cbcSMatt Macy { 979eda14cbcSMatt Macy size_t i; 980eda14cbcSMatt Macy struct sigaction action; 981eda14cbcSMatt Macy int err = 0; 982eda14cbcSMatt Macy 983eda14cbcSMatt Macy /* init gdb string early */ 984eda14cbcSMatt Macy (void) sprintf(gdb, gdb_tmpl, getpid()); 985eda14cbcSMatt Macy 986eda14cbcSMatt Macy action.sa_handler = sig_handler; 987eda14cbcSMatt Macy sigemptyset(&action.sa_mask); 988eda14cbcSMatt Macy action.sa_flags = 0; 989eda14cbcSMatt Macy 990eda14cbcSMatt Macy if (sigaction(SIGSEGV, &action, NULL) < 0) { 991eda14cbcSMatt Macy ERR("raidz_test: cannot catch SIGSEGV: %s.\n", strerror(errno)); 992eda14cbcSMatt Macy exit(EXIT_FAILURE); 993eda14cbcSMatt Macy } 994eda14cbcSMatt Macy 995eda14cbcSMatt Macy (void) setvbuf(stdout, NULL, _IOLBF, 0); 996eda14cbcSMatt Macy 997eda14cbcSMatt Macy dprintf_setup(&argc, argv); 998eda14cbcSMatt Macy 999eda14cbcSMatt Macy process_options(argc, argv); 1000eda14cbcSMatt Macy 1001eda14cbcSMatt Macy kernel_init(SPA_MODE_READ); 1002eda14cbcSMatt Macy 1003eda14cbcSMatt Macy /* setup random data because rand() is not reentrant */ 1004eda14cbcSMatt Macy rand_data = (int *)umem_alloc(SPA_MAXBLOCKSIZE, UMEM_NOFAIL); 1005eda14cbcSMatt Macy srand((unsigned)time(NULL) * getpid()); 1006eda14cbcSMatt Macy for (i = 0; i < SPA_MAXBLOCKSIZE / sizeof (int); i++) 1007eda14cbcSMatt Macy rand_data[i] = rand(); 1008eda14cbcSMatt Macy 1009eda14cbcSMatt Macy mprotect(rand_data, SPA_MAXBLOCKSIZE, PROT_READ); 1010eda14cbcSMatt Macy 1011eda14cbcSMatt Macy if (rto_opts.rto_benchmark) { 1012eda14cbcSMatt Macy run_raidz_benchmark(); 1013eda14cbcSMatt Macy } else if (rto_opts.rto_sweep) { 1014eda14cbcSMatt Macy err = run_sweep(); 1015eda14cbcSMatt Macy } else { 1016eda14cbcSMatt Macy err = run_test(NULL); 1017eda14cbcSMatt Macy } 1018eda14cbcSMatt Macy 1019eda14cbcSMatt Macy umem_free(rand_data, SPA_MAXBLOCKSIZE); 1020eda14cbcSMatt Macy kernel_fini(); 1021eda14cbcSMatt Macy 1022eda14cbcSMatt Macy return (err); 1023eda14cbcSMatt Macy } 1024