1*716fd348SMartin Matuska /* 2*716fd348SMartin Matuska * CDDL HEADER START 3*716fd348SMartin Matuska * 4*716fd348SMartin Matuska * The contents of this file are subject to the terms of the 5*716fd348SMartin Matuska * Common Development and Distribution License (the "License"). 6*716fd348SMartin Matuska * You may not use this file except in compliance with the License. 7*716fd348SMartin Matuska * 8*716fd348SMartin Matuska * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*716fd348SMartin Matuska * or http://www.opensolaris.org/os/licensing. 10*716fd348SMartin Matuska * See the License for the specific language governing permissions 11*716fd348SMartin Matuska * and limitations under the License. 12*716fd348SMartin Matuska * 13*716fd348SMartin Matuska * When distributing Covered Code, include this CDDL HEADER in each 14*716fd348SMartin Matuska * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*716fd348SMartin Matuska * If applicable, add the following below this CDDL HEADER, with the 16*716fd348SMartin Matuska * fields enclosed by brackets "[]" replaced with your own identifying 17*716fd348SMartin Matuska * information: Portions Copyright [yyyy] [name of copyright owner] 18*716fd348SMartin Matuska * 19*716fd348SMartin Matuska * CDDL HEADER END 20*716fd348SMartin Matuska */ 21*716fd348SMartin Matuska /* 22*716fd348SMartin Matuska * Copyright (c) 2018 Intel Corporation. 23*716fd348SMartin Matuska * Copyright (c) 2020 by Lawrence Livermore National Security, LLC. 24*716fd348SMartin Matuska */ 25*716fd348SMartin Matuska 26*716fd348SMartin Matuska #include <stdio.h> 27*716fd348SMartin Matuska #include <zlib.h> 28*716fd348SMartin Matuska #include <zfs_fletcher.h> 29*716fd348SMartin Matuska #include <sys/vdev_draid.h> 30*716fd348SMartin Matuska #include <sys/nvpair.h> 31*716fd348SMartin Matuska #include <sys/stat.h> 32*716fd348SMartin Matuska 33*716fd348SMartin Matuska /* 34*716fd348SMartin Matuska * The number of rows to generate for new permutation maps. 35*716fd348SMartin Matuska */ 36*716fd348SMartin Matuska #define MAP_ROWS_DEFAULT 256 37*716fd348SMartin Matuska 38*716fd348SMartin Matuska /* 39*716fd348SMartin Matuska * Key values for dRAID maps when stored as nvlists. 40*716fd348SMartin Matuska */ 41*716fd348SMartin Matuska #define MAP_SEED "seed" 42*716fd348SMartin Matuska #define MAP_CHECKSUM "checksum" 43*716fd348SMartin Matuska #define MAP_WORST_RATIO "worst_ratio" 44*716fd348SMartin Matuska #define MAP_AVG_RATIO "avg_ratio" 45*716fd348SMartin Matuska #define MAP_CHILDREN "children" 46*716fd348SMartin Matuska #define MAP_NPERMS "nperms" 47*716fd348SMartin Matuska #define MAP_PERMS "perms" 48*716fd348SMartin Matuska 49*716fd348SMartin Matuska static void 50*716fd348SMartin Matuska draid_usage(void) 51*716fd348SMartin Matuska { 52*716fd348SMartin Matuska (void) fprintf(stderr, 53*716fd348SMartin Matuska "usage: draid command args ...\n" 54*716fd348SMartin Matuska "Available commands are:\n" 55*716fd348SMartin Matuska "\n" 56*716fd348SMartin Matuska "\tdraid generate [-cv] [-m min] [-n max] [-p passes] FILE\n" 57*716fd348SMartin Matuska "\tdraid verify [-rv] FILE\n" 58*716fd348SMartin Matuska "\tdraid dump [-v] [-m min] [-n max] FILE\n" 59*716fd348SMartin Matuska "\tdraid table FILE\n" 60*716fd348SMartin Matuska "\tdraid merge FILE SRC SRC...\n"); 61*716fd348SMartin Matuska exit(1); 62*716fd348SMartin Matuska } 63*716fd348SMartin Matuska 64*716fd348SMartin Matuska static int 65*716fd348SMartin Matuska read_map(const char *filename, nvlist_t **allcfgs) 66*716fd348SMartin Matuska { 67*716fd348SMartin Matuska int block_size = 131072; 68*716fd348SMartin Matuska int buf_size = 131072; 69*716fd348SMartin Matuska int tmp_size, error; 70*716fd348SMartin Matuska char *tmp_buf; 71*716fd348SMartin Matuska 72*716fd348SMartin Matuska struct stat64 stat; 73*716fd348SMartin Matuska if (lstat64(filename, &stat) != 0) 74*716fd348SMartin Matuska return (errno); 75*716fd348SMartin Matuska 76*716fd348SMartin Matuska if (stat.st_size == 0 || 77*716fd348SMartin Matuska !(S_ISREG(stat.st_mode) || S_ISLNK(stat.st_mode))) { 78*716fd348SMartin Matuska return (EINVAL); 79*716fd348SMartin Matuska } 80*716fd348SMartin Matuska 81*716fd348SMartin Matuska gzFile fp = gzopen(filename, "rb"); 82*716fd348SMartin Matuska if (fp == Z_NULL) 83*716fd348SMartin Matuska return (errno); 84*716fd348SMartin Matuska 85*716fd348SMartin Matuska char *buf = malloc(buf_size); 86*716fd348SMartin Matuska if (buf == NULL) { 87*716fd348SMartin Matuska (void) gzclose(fp); 88*716fd348SMartin Matuska return (ENOMEM); 89*716fd348SMartin Matuska } 90*716fd348SMartin Matuska 91*716fd348SMartin Matuska ssize_t rc, bytes = 0; 92*716fd348SMartin Matuska while (!gzeof(fp)) { 93*716fd348SMartin Matuska rc = gzread(fp, buf + bytes, block_size); 94*716fd348SMartin Matuska if ((rc < 0) || (rc == 0 && !gzeof(fp))) { 95*716fd348SMartin Matuska free(buf); 96*716fd348SMartin Matuska (void) gzclose(fp); 97*716fd348SMartin Matuska (void) gzerror(fp, &error); 98*716fd348SMartin Matuska return (error); 99*716fd348SMartin Matuska } else { 100*716fd348SMartin Matuska bytes += rc; 101*716fd348SMartin Matuska 102*716fd348SMartin Matuska if (bytes + block_size >= buf_size) { 103*716fd348SMartin Matuska tmp_size = 2 * buf_size; 104*716fd348SMartin Matuska tmp_buf = malloc(tmp_size); 105*716fd348SMartin Matuska if (tmp_buf == NULL) { 106*716fd348SMartin Matuska free(buf); 107*716fd348SMartin Matuska (void) gzclose(fp); 108*716fd348SMartin Matuska return (ENOMEM); 109*716fd348SMartin Matuska } 110*716fd348SMartin Matuska 111*716fd348SMartin Matuska memcpy(tmp_buf, buf, bytes); 112*716fd348SMartin Matuska free(buf); 113*716fd348SMartin Matuska buf = tmp_buf; 114*716fd348SMartin Matuska buf_size = tmp_size; 115*716fd348SMartin Matuska } 116*716fd348SMartin Matuska } 117*716fd348SMartin Matuska } 118*716fd348SMartin Matuska 119*716fd348SMartin Matuska (void) gzclose(fp); 120*716fd348SMartin Matuska 121*716fd348SMartin Matuska error = nvlist_unpack(buf, bytes, allcfgs, 0); 122*716fd348SMartin Matuska free(buf); 123*716fd348SMartin Matuska 124*716fd348SMartin Matuska return (error); 125*716fd348SMartin Matuska } 126*716fd348SMartin Matuska 127*716fd348SMartin Matuska /* 128*716fd348SMartin Matuska * Read a map from the specified filename. A file contains multiple maps 129*716fd348SMartin Matuska * which are indexed by the number of children. The caller is responsible 130*716fd348SMartin Matuska * for freeing the configuration returned. 131*716fd348SMartin Matuska */ 132*716fd348SMartin Matuska static int 133*716fd348SMartin Matuska read_map_key(const char *filename, char *key, nvlist_t **cfg) 134*716fd348SMartin Matuska { 135*716fd348SMartin Matuska nvlist_t *allcfgs, *foundcfg = NULL; 136*716fd348SMartin Matuska int error; 137*716fd348SMartin Matuska 138*716fd348SMartin Matuska error = read_map(filename, &allcfgs); 139*716fd348SMartin Matuska if (error != 0) 140*716fd348SMartin Matuska return (error); 141*716fd348SMartin Matuska 142*716fd348SMartin Matuska nvlist_lookup_nvlist(allcfgs, key, &foundcfg); 143*716fd348SMartin Matuska if (foundcfg != NULL) { 144*716fd348SMartin Matuska nvlist_dup(foundcfg, cfg, KM_SLEEP); 145*716fd348SMartin Matuska error = 0; 146*716fd348SMartin Matuska } else { 147*716fd348SMartin Matuska error = ENOENT; 148*716fd348SMartin Matuska } 149*716fd348SMartin Matuska 150*716fd348SMartin Matuska nvlist_free(allcfgs); 151*716fd348SMartin Matuska 152*716fd348SMartin Matuska return (error); 153*716fd348SMartin Matuska } 154*716fd348SMartin Matuska 155*716fd348SMartin Matuska /* 156*716fd348SMartin Matuska * Write all mappings to the map file. 157*716fd348SMartin Matuska */ 158*716fd348SMartin Matuska static int 159*716fd348SMartin Matuska write_map(const char *filename, nvlist_t *allcfgs) 160*716fd348SMartin Matuska { 161*716fd348SMartin Matuska size_t buflen = 0; 162*716fd348SMartin Matuska int error; 163*716fd348SMartin Matuska 164*716fd348SMartin Matuska error = nvlist_size(allcfgs, &buflen, NV_ENCODE_XDR); 165*716fd348SMartin Matuska if (error) 166*716fd348SMartin Matuska return (error); 167*716fd348SMartin Matuska 168*716fd348SMartin Matuska char *buf = malloc(buflen); 169*716fd348SMartin Matuska if (buf == NULL) 170*716fd348SMartin Matuska return (ENOMEM); 171*716fd348SMartin Matuska 172*716fd348SMartin Matuska error = nvlist_pack(allcfgs, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP); 173*716fd348SMartin Matuska if (error) { 174*716fd348SMartin Matuska free(buf); 175*716fd348SMartin Matuska return (error); 176*716fd348SMartin Matuska } 177*716fd348SMartin Matuska 178*716fd348SMartin Matuska /* 179*716fd348SMartin Matuska * Atomically update the file using a temporary file and the 180*716fd348SMartin Matuska * traditional unlink then rename steps. This code provides 181*716fd348SMartin Matuska * no locking, it only guarantees the packed nvlist on disk 182*716fd348SMartin Matuska * is updated atomically and is internally consistent. 183*716fd348SMartin Matuska */ 184*716fd348SMartin Matuska char *tmpname = calloc(1, MAXPATHLEN); 185*716fd348SMartin Matuska if (tmpname == NULL) { 186*716fd348SMartin Matuska free(buf); 187*716fd348SMartin Matuska return (ENOMEM); 188*716fd348SMartin Matuska } 189*716fd348SMartin Matuska 190*716fd348SMartin Matuska snprintf(tmpname, MAXPATHLEN - 1, "%s.XXXXXX", filename); 191*716fd348SMartin Matuska 192*716fd348SMartin Matuska int fd = mkstemp(tmpname); 193*716fd348SMartin Matuska if (fd < 0) { 194*716fd348SMartin Matuska error = errno; 195*716fd348SMartin Matuska free(buf); 196*716fd348SMartin Matuska free(tmpname); 197*716fd348SMartin Matuska return (error); 198*716fd348SMartin Matuska } 199*716fd348SMartin Matuska (void) close(fd); 200*716fd348SMartin Matuska 201*716fd348SMartin Matuska gzFile fp = gzopen(tmpname, "w9b"); 202*716fd348SMartin Matuska if (fp == Z_NULL) { 203*716fd348SMartin Matuska error = errno; 204*716fd348SMartin Matuska free(buf); 205*716fd348SMartin Matuska free(tmpname); 206*716fd348SMartin Matuska return (errno); 207*716fd348SMartin Matuska } 208*716fd348SMartin Matuska 209*716fd348SMartin Matuska ssize_t rc, bytes = 0; 210*716fd348SMartin Matuska while (bytes < buflen) { 211*716fd348SMartin Matuska size_t size = MIN(buflen - bytes, 131072); 212*716fd348SMartin Matuska rc = gzwrite(fp, buf + bytes, size); 213*716fd348SMartin Matuska if (rc < 0) { 214*716fd348SMartin Matuska free(buf); 215*716fd348SMartin Matuska (void) gzerror(fp, &error); 216*716fd348SMartin Matuska (void) gzclose(fp); 217*716fd348SMartin Matuska (void) unlink(tmpname); 218*716fd348SMartin Matuska free(tmpname); 219*716fd348SMartin Matuska return (error); 220*716fd348SMartin Matuska } else if (rc == 0) { 221*716fd348SMartin Matuska break; 222*716fd348SMartin Matuska } else { 223*716fd348SMartin Matuska bytes += rc; 224*716fd348SMartin Matuska } 225*716fd348SMartin Matuska } 226*716fd348SMartin Matuska 227*716fd348SMartin Matuska free(buf); 228*716fd348SMartin Matuska (void) gzclose(fp); 229*716fd348SMartin Matuska 230*716fd348SMartin Matuska if (bytes != buflen) { 231*716fd348SMartin Matuska (void) unlink(tmpname); 232*716fd348SMartin Matuska free(tmpname); 233*716fd348SMartin Matuska return (EIO); 234*716fd348SMartin Matuska } 235*716fd348SMartin Matuska 236*716fd348SMartin Matuska /* 237*716fd348SMartin Matuska * Unlink the previous config file and replace it with the updated 238*716fd348SMartin Matuska * version. If we're able to unlink the file then directory is 239*716fd348SMartin Matuska * writable by us and the subsequent rename should never fail. 240*716fd348SMartin Matuska */ 241*716fd348SMartin Matuska error = unlink(filename); 242*716fd348SMartin Matuska if (error != 0 && errno != ENOENT) { 243*716fd348SMartin Matuska error = errno; 244*716fd348SMartin Matuska (void) unlink(tmpname); 245*716fd348SMartin Matuska free(tmpname); 246*716fd348SMartin Matuska return (error); 247*716fd348SMartin Matuska } 248*716fd348SMartin Matuska 249*716fd348SMartin Matuska error = rename(tmpname, filename); 250*716fd348SMartin Matuska if (error != 0) { 251*716fd348SMartin Matuska error = errno; 252*716fd348SMartin Matuska (void) unlink(tmpname); 253*716fd348SMartin Matuska free(tmpname); 254*716fd348SMartin Matuska return (error); 255*716fd348SMartin Matuska } 256*716fd348SMartin Matuska 257*716fd348SMartin Matuska free(tmpname); 258*716fd348SMartin Matuska 259*716fd348SMartin Matuska return (0); 260*716fd348SMartin Matuska } 261*716fd348SMartin Matuska 262*716fd348SMartin Matuska /* 263*716fd348SMartin Matuska * Add the dRAID map to the file and write it out. 264*716fd348SMartin Matuska */ 265*716fd348SMartin Matuska static int 266*716fd348SMartin Matuska write_map_key(const char *filename, char *key, draid_map_t *map, 267*716fd348SMartin Matuska double worst_ratio, double avg_ratio) 268*716fd348SMartin Matuska { 269*716fd348SMartin Matuska nvlist_t *nv_cfg, *allcfgs; 270*716fd348SMartin Matuska int error; 271*716fd348SMartin Matuska 272*716fd348SMartin Matuska /* 273*716fd348SMartin Matuska * Add the configuration to an existing or new file. The new 274*716fd348SMartin Matuska * configuration will replace an existing configuration with the 275*716fd348SMartin Matuska * same key if it has a lower ratio and is therefore better. 276*716fd348SMartin Matuska */ 277*716fd348SMartin Matuska error = read_map(filename, &allcfgs); 278*716fd348SMartin Matuska if (error == ENOENT) { 279*716fd348SMartin Matuska allcfgs = fnvlist_alloc(); 280*716fd348SMartin Matuska } else if (error != 0) { 281*716fd348SMartin Matuska return (error); 282*716fd348SMartin Matuska } 283*716fd348SMartin Matuska 284*716fd348SMartin Matuska error = nvlist_lookup_nvlist(allcfgs, key, &nv_cfg); 285*716fd348SMartin Matuska if (error == 0) { 286*716fd348SMartin Matuska uint64_t nv_cfg_worst_ratio = fnvlist_lookup_uint64(nv_cfg, 287*716fd348SMartin Matuska MAP_WORST_RATIO); 288*716fd348SMartin Matuska double nv_worst_ratio = (double)nv_cfg_worst_ratio / 1000.0; 289*716fd348SMartin Matuska 290*716fd348SMartin Matuska if (worst_ratio < nv_worst_ratio) { 291*716fd348SMartin Matuska /* Replace old map with the more balanced new map. */ 292*716fd348SMartin Matuska fnvlist_remove(allcfgs, key); 293*716fd348SMartin Matuska } else { 294*716fd348SMartin Matuska /* The old map is preferable, keep it. */ 295*716fd348SMartin Matuska nvlist_free(allcfgs); 296*716fd348SMartin Matuska return (EEXIST); 297*716fd348SMartin Matuska } 298*716fd348SMartin Matuska } 299*716fd348SMartin Matuska 300*716fd348SMartin Matuska nvlist_t *cfg = fnvlist_alloc(); 301*716fd348SMartin Matuska fnvlist_add_uint64(cfg, MAP_SEED, map->dm_seed); 302*716fd348SMartin Matuska fnvlist_add_uint64(cfg, MAP_CHECKSUM, map->dm_checksum); 303*716fd348SMartin Matuska fnvlist_add_uint64(cfg, MAP_CHILDREN, map->dm_children); 304*716fd348SMartin Matuska fnvlist_add_uint64(cfg, MAP_NPERMS, map->dm_nperms); 305*716fd348SMartin Matuska fnvlist_add_uint8_array(cfg, MAP_PERMS, map->dm_perms, 306*716fd348SMartin Matuska map->dm_children * map->dm_nperms * sizeof (uint8_t)); 307*716fd348SMartin Matuska 308*716fd348SMartin Matuska fnvlist_add_uint64(cfg, MAP_WORST_RATIO, 309*716fd348SMartin Matuska (uint64_t)(worst_ratio * 1000.0)); 310*716fd348SMartin Matuska fnvlist_add_uint64(cfg, MAP_AVG_RATIO, 311*716fd348SMartin Matuska (uint64_t)(avg_ratio * 1000.0)); 312*716fd348SMartin Matuska 313*716fd348SMartin Matuska error = nvlist_add_nvlist(allcfgs, key, cfg); 314*716fd348SMartin Matuska if (error == 0) 315*716fd348SMartin Matuska error = write_map(filename, allcfgs); 316*716fd348SMartin Matuska 317*716fd348SMartin Matuska nvlist_free(cfg); 318*716fd348SMartin Matuska nvlist_free(allcfgs); 319*716fd348SMartin Matuska return (error); 320*716fd348SMartin Matuska } 321*716fd348SMartin Matuska 322*716fd348SMartin Matuska static void 323*716fd348SMartin Matuska dump_map(draid_map_t *map, char *key, double worst_ratio, double avg_ratio, 324*716fd348SMartin Matuska int verbose) 325*716fd348SMartin Matuska { 326*716fd348SMartin Matuska if (verbose == 0) { 327*716fd348SMartin Matuska return; 328*716fd348SMartin Matuska } else if (verbose == 1) { 329*716fd348SMartin Matuska printf(" \"%s\": seed: 0x%016llx worst_ratio: %2.03f " 330*716fd348SMartin Matuska "avg_ratio: %2.03f\n", key, (u_longlong_t)map->dm_seed, 331*716fd348SMartin Matuska worst_ratio, avg_ratio); 332*716fd348SMartin Matuska return; 333*716fd348SMartin Matuska } else { 334*716fd348SMartin Matuska printf(" \"%s\":\n" 335*716fd348SMartin Matuska " seed: 0x%016llx\n" 336*716fd348SMartin Matuska " checksum: 0x%016llx\n" 337*716fd348SMartin Matuska " worst_ratio: %2.03f\n" 338*716fd348SMartin Matuska " avg_ratio: %2.03f\n" 339*716fd348SMartin Matuska " children: %llu\n" 340*716fd348SMartin Matuska " nperms: %llu\n", 341*716fd348SMartin Matuska key, (u_longlong_t)map->dm_seed, 342*716fd348SMartin Matuska (u_longlong_t)map->dm_checksum, worst_ratio, avg_ratio, 343*716fd348SMartin Matuska (u_longlong_t)map->dm_children, 344*716fd348SMartin Matuska (u_longlong_t)map->dm_nperms); 345*716fd348SMartin Matuska 346*716fd348SMartin Matuska if (verbose > 2) { 347*716fd348SMartin Matuska printf(" perms = {\n"); 348*716fd348SMartin Matuska for (int i = 0; i < map->dm_nperms; i++) { 349*716fd348SMartin Matuska printf(" { "); 350*716fd348SMartin Matuska for (int j = 0; j < map->dm_children; j++) { 351*716fd348SMartin Matuska printf("%3d%s ", map->dm_perms[ 352*716fd348SMartin Matuska i * map->dm_children + j], 353*716fd348SMartin Matuska j < map->dm_children - 1 ? 354*716fd348SMartin Matuska "," : ""); 355*716fd348SMartin Matuska } 356*716fd348SMartin Matuska printf(" },\n"); 357*716fd348SMartin Matuska } 358*716fd348SMartin Matuska printf(" }\n"); 359*716fd348SMartin Matuska } else if (verbose == 2) { 360*716fd348SMartin Matuska printf(" draid_perms = <omitted>\n"); 361*716fd348SMartin Matuska } 362*716fd348SMartin Matuska } 363*716fd348SMartin Matuska } 364*716fd348SMartin Matuska 365*716fd348SMartin Matuska static void 366*716fd348SMartin Matuska dump_map_nv(char *key, nvlist_t *cfg, int verbose) 367*716fd348SMartin Matuska { 368*716fd348SMartin Matuska draid_map_t map; 369*716fd348SMartin Matuska uint_t c; 370*716fd348SMartin Matuska 371*716fd348SMartin Matuska uint64_t worst_ratio = fnvlist_lookup_uint64(cfg, MAP_WORST_RATIO); 372*716fd348SMartin Matuska uint64_t avg_ratio = fnvlist_lookup_uint64(cfg, MAP_AVG_RATIO); 373*716fd348SMartin Matuska 374*716fd348SMartin Matuska map.dm_seed = fnvlist_lookup_uint64(cfg, MAP_SEED); 375*716fd348SMartin Matuska map.dm_checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM); 376*716fd348SMartin Matuska map.dm_children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN); 377*716fd348SMartin Matuska map.dm_nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS); 378*716fd348SMartin Matuska nvlist_lookup_uint8_array(cfg, MAP_PERMS, &map.dm_perms, &c); 379*716fd348SMartin Matuska 380*716fd348SMartin Matuska dump_map(&map, key, (double)worst_ratio / 1000.0, 381*716fd348SMartin Matuska avg_ratio / 1000.0, verbose); 382*716fd348SMartin Matuska } 383*716fd348SMartin Matuska 384*716fd348SMartin Matuska /* 385*716fd348SMartin Matuska * Print a summary of the mapping. 386*716fd348SMartin Matuska */ 387*716fd348SMartin Matuska static int 388*716fd348SMartin Matuska dump_map_key(const char *filename, char *key, int verbose) 389*716fd348SMartin Matuska { 390*716fd348SMartin Matuska nvlist_t *cfg; 391*716fd348SMartin Matuska int error; 392*716fd348SMartin Matuska 393*716fd348SMartin Matuska error = read_map_key(filename, key, &cfg); 394*716fd348SMartin Matuska if (error != 0) 395*716fd348SMartin Matuska return (error); 396*716fd348SMartin Matuska 397*716fd348SMartin Matuska dump_map_nv(key, cfg, verbose); 398*716fd348SMartin Matuska 399*716fd348SMartin Matuska return (0); 400*716fd348SMartin Matuska } 401*716fd348SMartin Matuska 402*716fd348SMartin Matuska /* 403*716fd348SMartin Matuska * Allocate a new permutation map for evaluation. 404*716fd348SMartin Matuska */ 405*716fd348SMartin Matuska static int 406*716fd348SMartin Matuska alloc_new_map(uint64_t children, uint64_t nperms, uint64_t seed, 407*716fd348SMartin Matuska draid_map_t **mapp) 408*716fd348SMartin Matuska { 409*716fd348SMartin Matuska draid_map_t *map; 410*716fd348SMartin Matuska int error; 411*716fd348SMartin Matuska 412*716fd348SMartin Matuska map = malloc(sizeof (draid_map_t)); 413*716fd348SMartin Matuska if (map == NULL) 414*716fd348SMartin Matuska return (ENOMEM); 415*716fd348SMartin Matuska 416*716fd348SMartin Matuska map->dm_children = children; 417*716fd348SMartin Matuska map->dm_nperms = nperms; 418*716fd348SMartin Matuska map->dm_seed = seed; 419*716fd348SMartin Matuska map->dm_checksum = 0; 420*716fd348SMartin Matuska 421*716fd348SMartin Matuska error = vdev_draid_generate_perms(map, &map->dm_perms); 422*716fd348SMartin Matuska if (error) { 423*716fd348SMartin Matuska free(map); 424*716fd348SMartin Matuska return (error); 425*716fd348SMartin Matuska } 426*716fd348SMartin Matuska 427*716fd348SMartin Matuska *mapp = map; 428*716fd348SMartin Matuska 429*716fd348SMartin Matuska return (0); 430*716fd348SMartin Matuska } 431*716fd348SMartin Matuska 432*716fd348SMartin Matuska /* 433*716fd348SMartin Matuska * Allocate the fixed permutation map for N children. 434*716fd348SMartin Matuska */ 435*716fd348SMartin Matuska static int 436*716fd348SMartin Matuska alloc_fixed_map(uint64_t children, draid_map_t **mapp) 437*716fd348SMartin Matuska { 438*716fd348SMartin Matuska const draid_map_t *fixed_map; 439*716fd348SMartin Matuska draid_map_t *map; 440*716fd348SMartin Matuska int error; 441*716fd348SMartin Matuska 442*716fd348SMartin Matuska error = vdev_draid_lookup_map(children, &fixed_map); 443*716fd348SMartin Matuska if (error) 444*716fd348SMartin Matuska return (error); 445*716fd348SMartin Matuska 446*716fd348SMartin Matuska map = malloc(sizeof (draid_map_t)); 447*716fd348SMartin Matuska if (map == NULL) 448*716fd348SMartin Matuska return (ENOMEM); 449*716fd348SMartin Matuska 450*716fd348SMartin Matuska memcpy(map, fixed_map, sizeof (draid_map_t)); 451*716fd348SMartin Matuska VERIFY3U(map->dm_checksum, !=, 0); 452*716fd348SMartin Matuska 453*716fd348SMartin Matuska error = vdev_draid_generate_perms(map, &map->dm_perms); 454*716fd348SMartin Matuska if (error) { 455*716fd348SMartin Matuska free(map); 456*716fd348SMartin Matuska return (error); 457*716fd348SMartin Matuska } 458*716fd348SMartin Matuska 459*716fd348SMartin Matuska *mapp = map; 460*716fd348SMartin Matuska 461*716fd348SMartin Matuska return (0); 462*716fd348SMartin Matuska } 463*716fd348SMartin Matuska 464*716fd348SMartin Matuska /* 465*716fd348SMartin Matuska * Free a permutation map. 466*716fd348SMartin Matuska */ 467*716fd348SMartin Matuska static void 468*716fd348SMartin Matuska free_map(draid_map_t *map) 469*716fd348SMartin Matuska { 470*716fd348SMartin Matuska free(map->dm_perms); 471*716fd348SMartin Matuska free(map); 472*716fd348SMartin Matuska } 473*716fd348SMartin Matuska 474*716fd348SMartin Matuska /* 475*716fd348SMartin Matuska * Check if dev is in the provided list of faulted devices. 476*716fd348SMartin Matuska */ 477*716fd348SMartin Matuska static inline boolean_t 478*716fd348SMartin Matuska is_faulted(int *faulted_devs, int nfaulted, int dev) 479*716fd348SMartin Matuska { 480*716fd348SMartin Matuska for (int i = 0; i < nfaulted; i++) 481*716fd348SMartin Matuska if (faulted_devs[i] == dev) 482*716fd348SMartin Matuska return (B_TRUE); 483*716fd348SMartin Matuska 484*716fd348SMartin Matuska return (B_FALSE); 485*716fd348SMartin Matuska } 486*716fd348SMartin Matuska 487*716fd348SMartin Matuska /* 488*716fd348SMartin Matuska * Evaluate how resilvering I/O will be distributed given a list of faulted 489*716fd348SMartin Matuska * vdevs. As a simplification we assume one IO is sufficient to repair each 490*716fd348SMartin Matuska * damaged device in a group. 491*716fd348SMartin Matuska */ 492*716fd348SMartin Matuska static double 493*716fd348SMartin Matuska eval_resilver(draid_map_t *map, uint64_t groupwidth, uint64_t nspares, 494*716fd348SMartin Matuska int *faulted_devs, int nfaulted, int *min_child_ios, int *max_child_ios) 495*716fd348SMartin Matuska { 496*716fd348SMartin Matuska uint64_t children = map->dm_children; 497*716fd348SMartin Matuska uint64_t ngroups = 1; 498*716fd348SMartin Matuska uint64_t ndisks = children - nspares; 499*716fd348SMartin Matuska 500*716fd348SMartin Matuska /* 501*716fd348SMartin Matuska * Calculate the minimum number of groups required to fill a slice. 502*716fd348SMartin Matuska */ 503*716fd348SMartin Matuska while (ngroups * (groupwidth) % (children - nspares) != 0) 504*716fd348SMartin Matuska ngroups++; 505*716fd348SMartin Matuska 506*716fd348SMartin Matuska int *ios = calloc(map->dm_children, sizeof (uint64_t)); 507*716fd348SMartin Matuska 508*716fd348SMartin Matuska /* Resilver all rows */ 509*716fd348SMartin Matuska for (int i = 0; i < map->dm_nperms; i++) { 510*716fd348SMartin Matuska uint8_t *row = &map->dm_perms[i * map->dm_children]; 511*716fd348SMartin Matuska 512*716fd348SMartin Matuska /* Resilver all groups with faulted drives */ 513*716fd348SMartin Matuska for (int j = 0; j < ngroups; j++) { 514*716fd348SMartin Matuska uint64_t spareidx = map->dm_children - nspares; 515*716fd348SMartin Matuska boolean_t repair_needed = B_FALSE; 516*716fd348SMartin Matuska 517*716fd348SMartin Matuska /* See if any devices in this group are faulted */ 518*716fd348SMartin Matuska uint64_t groupstart = (j * groupwidth) % ndisks; 519*716fd348SMartin Matuska 520*716fd348SMartin Matuska for (int k = 0; k < groupwidth; k++) { 521*716fd348SMartin Matuska uint64_t groupidx = (groupstart + k) % ndisks; 522*716fd348SMartin Matuska 523*716fd348SMartin Matuska repair_needed = is_faulted(faulted_devs, 524*716fd348SMartin Matuska nfaulted, row[groupidx]); 525*716fd348SMartin Matuska if (repair_needed) 526*716fd348SMartin Matuska break; 527*716fd348SMartin Matuska } 528*716fd348SMartin Matuska 529*716fd348SMartin Matuska if (repair_needed == B_FALSE) 530*716fd348SMartin Matuska continue; 531*716fd348SMartin Matuska 532*716fd348SMartin Matuska /* 533*716fd348SMartin Matuska * This group is degraded. Calculate the number of 534*716fd348SMartin Matuska * reads the non-faulted drives require and the number 535*716fd348SMartin Matuska * of writes to the distributed hot spare for this row. 536*716fd348SMartin Matuska */ 537*716fd348SMartin Matuska for (int k = 0; k < groupwidth; k++) { 538*716fd348SMartin Matuska uint64_t groupidx = (groupstart + k) % ndisks; 539*716fd348SMartin Matuska 540*716fd348SMartin Matuska if (!is_faulted(faulted_devs, nfaulted, 541*716fd348SMartin Matuska row[groupidx])) { 542*716fd348SMartin Matuska ios[row[groupidx]]++; 543*716fd348SMartin Matuska } else if (nspares > 0) { 544*716fd348SMartin Matuska while (is_faulted(faulted_devs, 545*716fd348SMartin Matuska nfaulted, row[spareidx])) { 546*716fd348SMartin Matuska spareidx++; 547*716fd348SMartin Matuska } 548*716fd348SMartin Matuska 549*716fd348SMartin Matuska ASSERT3U(spareidx, <, map->dm_children); 550*716fd348SMartin Matuska ios[row[spareidx]]++; 551*716fd348SMartin Matuska spareidx++; 552*716fd348SMartin Matuska } 553*716fd348SMartin Matuska } 554*716fd348SMartin Matuska } 555*716fd348SMartin Matuska } 556*716fd348SMartin Matuska 557*716fd348SMartin Matuska *min_child_ios = INT_MAX; 558*716fd348SMartin Matuska *max_child_ios = 0; 559*716fd348SMartin Matuska 560*716fd348SMartin Matuska /* 561*716fd348SMartin Matuska * Find the drives with fewest and most required I/O. These values 562*716fd348SMartin Matuska * are used to calculate the imbalance ratio. To avoid returning an 563*716fd348SMartin Matuska * infinite value for permutations which have children that perform 564*716fd348SMartin Matuska * no IO a floor of 1 IO per child is set. This ensures a meaningful 565*716fd348SMartin Matuska * ratio is returned for comparison and it is not an uncommon when 566*716fd348SMartin Matuska * there are a large number of children. 567*716fd348SMartin Matuska */ 568*716fd348SMartin Matuska for (int i = 0; i < map->dm_children; i++) { 569*716fd348SMartin Matuska 570*716fd348SMartin Matuska if (is_faulted(faulted_devs, nfaulted, i)) { 571*716fd348SMartin Matuska ASSERT0(ios[i]); 572*716fd348SMartin Matuska continue; 573*716fd348SMartin Matuska } 574*716fd348SMartin Matuska 575*716fd348SMartin Matuska if (ios[i] == 0) 576*716fd348SMartin Matuska ios[i] = 1; 577*716fd348SMartin Matuska 578*716fd348SMartin Matuska if (ios[i] < *min_child_ios) 579*716fd348SMartin Matuska *min_child_ios = ios[i]; 580*716fd348SMartin Matuska 581*716fd348SMartin Matuska if (ios[i] > *max_child_ios) 582*716fd348SMartin Matuska *max_child_ios = ios[i]; 583*716fd348SMartin Matuska } 584*716fd348SMartin Matuska 585*716fd348SMartin Matuska ASSERT3S(*min_child_ios, !=, INT_MAX); 586*716fd348SMartin Matuska ASSERT3S(*max_child_ios, !=, 0); 587*716fd348SMartin Matuska 588*716fd348SMartin Matuska double ratio = (double)(*max_child_ios) / (double)(*min_child_ios); 589*716fd348SMartin Matuska 590*716fd348SMartin Matuska free(ios); 591*716fd348SMartin Matuska 592*716fd348SMartin Matuska return (ratio); 593*716fd348SMartin Matuska } 594*716fd348SMartin Matuska 595*716fd348SMartin Matuska /* 596*716fd348SMartin Matuska * Evaluate the quality of the permutation mapping by considering possible 597*716fd348SMartin Matuska * device failures. Returns the imbalance ratio for the worst mapping which 598*716fd348SMartin Matuska * is defined to be the largest number of child IOs over the fewest number 599*716fd348SMartin Matuska * child IOs. A value of 1.0 indicates the mapping is perfectly balance and 600*716fd348SMartin Matuska * all children perform an equal amount of work during reconstruction. 601*716fd348SMartin Matuska */ 602*716fd348SMartin Matuska static void 603*716fd348SMartin Matuska eval_decluster(draid_map_t *map, double *worst_ratiop, double *avg_ratiop) 604*716fd348SMartin Matuska { 605*716fd348SMartin Matuska uint64_t children = map->dm_children; 606*716fd348SMartin Matuska double worst_ratio = 1.0; 607*716fd348SMartin Matuska double sum = 0; 608*716fd348SMartin Matuska int worst_min_ios = 0, worst_max_ios = 0; 609*716fd348SMartin Matuska int n = 0; 610*716fd348SMartin Matuska 611*716fd348SMartin Matuska /* 612*716fd348SMartin Matuska * When there are only 2 children there can be no distributed 613*716fd348SMartin Matuska * spare and no resilver to evaluate. Default to a ratio of 1.0 614*716fd348SMartin Matuska * for this degenerate case. 615*716fd348SMartin Matuska */ 616*716fd348SMartin Matuska if (children == VDEV_DRAID_MIN_CHILDREN) { 617*716fd348SMartin Matuska *worst_ratiop = 1.0; 618*716fd348SMartin Matuska *avg_ratiop = 1.0; 619*716fd348SMartin Matuska return; 620*716fd348SMartin Matuska } 621*716fd348SMartin Matuska 622*716fd348SMartin Matuska /* 623*716fd348SMartin Matuska * Score the mapping as if it had either 1 or 2 distributed spares. 624*716fd348SMartin Matuska */ 625*716fd348SMartin Matuska for (int nspares = 1; nspares <= 2; nspares++) { 626*716fd348SMartin Matuska uint64_t faults = nspares; 627*716fd348SMartin Matuska 628*716fd348SMartin Matuska /* 629*716fd348SMartin Matuska * Score groupwidths up to 19. This value was chosen as the 630*716fd348SMartin Matuska * largest reasonable width (16d+3p). dRAID pools may be still 631*716fd348SMartin Matuska * be created with wider stripes but they are not considered in 632*716fd348SMartin Matuska * this analysis in order to optimize for the most common cases. 633*716fd348SMartin Matuska */ 634*716fd348SMartin Matuska for (uint64_t groupwidth = 2; 635*716fd348SMartin Matuska groupwidth <= MIN(children - nspares, 19); 636*716fd348SMartin Matuska groupwidth++) { 637*716fd348SMartin Matuska int faulted_devs[2]; 638*716fd348SMartin Matuska int min_ios, max_ios; 639*716fd348SMartin Matuska 640*716fd348SMartin Matuska /* 641*716fd348SMartin Matuska * Score possible devices faults. This is limited 642*716fd348SMartin Matuska * to exactly one fault per distributed spare for 643*716fd348SMartin Matuska * the purposes of this similation. 644*716fd348SMartin Matuska */ 645*716fd348SMartin Matuska for (int f1 = 0; f1 < children; f1++) { 646*716fd348SMartin Matuska faulted_devs[0] = f1; 647*716fd348SMartin Matuska double ratio; 648*716fd348SMartin Matuska 649*716fd348SMartin Matuska if (faults == 1) { 650*716fd348SMartin Matuska ratio = eval_resilver(map, groupwidth, 651*716fd348SMartin Matuska nspares, faulted_devs, faults, 652*716fd348SMartin Matuska &min_ios, &max_ios); 653*716fd348SMartin Matuska 654*716fd348SMartin Matuska if (ratio > worst_ratio) { 655*716fd348SMartin Matuska worst_ratio = ratio; 656*716fd348SMartin Matuska worst_min_ios = min_ios; 657*716fd348SMartin Matuska worst_max_ios = max_ios; 658*716fd348SMartin Matuska } 659*716fd348SMartin Matuska 660*716fd348SMartin Matuska sum += ratio; 661*716fd348SMartin Matuska n++; 662*716fd348SMartin Matuska } else if (faults == 2) { 663*716fd348SMartin Matuska for (int f2 = f1 + 1; f2 < children; 664*716fd348SMartin Matuska f2++) { 665*716fd348SMartin Matuska faulted_devs[1] = f2; 666*716fd348SMartin Matuska 667*716fd348SMartin Matuska ratio = eval_resilver(map, 668*716fd348SMartin Matuska groupwidth, nspares, 669*716fd348SMartin Matuska faulted_devs, faults, 670*716fd348SMartin Matuska &min_ios, &max_ios); 671*716fd348SMartin Matuska 672*716fd348SMartin Matuska if (ratio > worst_ratio) { 673*716fd348SMartin Matuska worst_ratio = ratio; 674*716fd348SMartin Matuska worst_min_ios = min_ios; 675*716fd348SMartin Matuska worst_max_ios = max_ios; 676*716fd348SMartin Matuska } 677*716fd348SMartin Matuska 678*716fd348SMartin Matuska sum += ratio; 679*716fd348SMartin Matuska n++; 680*716fd348SMartin Matuska } 681*716fd348SMartin Matuska } 682*716fd348SMartin Matuska } 683*716fd348SMartin Matuska } 684*716fd348SMartin Matuska } 685*716fd348SMartin Matuska 686*716fd348SMartin Matuska *worst_ratiop = worst_ratio; 687*716fd348SMartin Matuska *avg_ratiop = sum / n; 688*716fd348SMartin Matuska 689*716fd348SMartin Matuska /* 690*716fd348SMartin Matuska * Log the min/max io values for particularly unbalanced maps. 691*716fd348SMartin Matuska * Since the maps are generated entirely randomly these are possible 692*716fd348SMartin Matuska * be exceedingly unlikely. We log it for possible investigation. 693*716fd348SMartin Matuska */ 694*716fd348SMartin Matuska if (worst_ratio > 100.0) { 695*716fd348SMartin Matuska dump_map(map, "DEBUG", worst_ratio, *avg_ratiop, 2); 696*716fd348SMartin Matuska printf("worst_min_ios=%d worst_max_ios=%d\n", 697*716fd348SMartin Matuska worst_min_ios, worst_max_ios); 698*716fd348SMartin Matuska } 699*716fd348SMartin Matuska } 700*716fd348SMartin Matuska 701*716fd348SMartin Matuska static int 702*716fd348SMartin Matuska eval_maps(uint64_t children, int passes, uint64_t *map_seed, 703*716fd348SMartin Matuska draid_map_t **best_mapp, double *best_ratiop, double *avg_ratiop) 704*716fd348SMartin Matuska { 705*716fd348SMartin Matuska draid_map_t *best_map = NULL; 706*716fd348SMartin Matuska double best_worst_ratio = 1000.0; 707*716fd348SMartin Matuska double best_avg_ratio = 1000.0; 708*716fd348SMartin Matuska 709*716fd348SMartin Matuska /* 710*716fd348SMartin Matuska * Perform the requested number of passes evaluating randomly 711*716fd348SMartin Matuska * generated permutation maps. Only the best version is kept. 712*716fd348SMartin Matuska */ 713*716fd348SMartin Matuska for (int i = 0; i < passes; i++) { 714*716fd348SMartin Matuska double worst_ratio, avg_ratio; 715*716fd348SMartin Matuska draid_map_t *map; 716*716fd348SMartin Matuska int error; 717*716fd348SMartin Matuska 718*716fd348SMartin Matuska /* 719*716fd348SMartin Matuska * Calculate the next seed and generate a new candidate map. 720*716fd348SMartin Matuska */ 721*716fd348SMartin Matuska error = alloc_new_map(children, MAP_ROWS_DEFAULT, 722*716fd348SMartin Matuska vdev_draid_rand(map_seed), &map); 723*716fd348SMartin Matuska if (error) 724*716fd348SMartin Matuska return (error); 725*716fd348SMartin Matuska 726*716fd348SMartin Matuska /* 727*716fd348SMartin Matuska * Consider maps with a lower worst_ratio to be of higher 728*716fd348SMartin Matuska * quality. Some maps may have a lower avg_ratio but they 729*716fd348SMartin Matuska * are discarded since they might include some particularly 730*716fd348SMartin Matuska * imbalanced permutations. The average is tracked to in 731*716fd348SMartin Matuska * order to get a sense of the average permutation quality. 732*716fd348SMartin Matuska */ 733*716fd348SMartin Matuska eval_decluster(map, &worst_ratio, &avg_ratio); 734*716fd348SMartin Matuska 735*716fd348SMartin Matuska if (best_map == NULL || worst_ratio < best_worst_ratio) { 736*716fd348SMartin Matuska 737*716fd348SMartin Matuska if (best_map != NULL) 738*716fd348SMartin Matuska free_map(best_map); 739*716fd348SMartin Matuska 740*716fd348SMartin Matuska best_map = map; 741*716fd348SMartin Matuska best_worst_ratio = worst_ratio; 742*716fd348SMartin Matuska best_avg_ratio = avg_ratio; 743*716fd348SMartin Matuska } else { 744*716fd348SMartin Matuska free_map(map); 745*716fd348SMartin Matuska } 746*716fd348SMartin Matuska } 747*716fd348SMartin Matuska 748*716fd348SMartin Matuska /* 749*716fd348SMartin Matuska * After determining the best map generate a checksum over the full 750*716fd348SMartin Matuska * permutation array. This checksum is verified when opening a dRAID 751*716fd348SMartin Matuska * pool to ensure the generated in memory permutations are correct. 752*716fd348SMartin Matuska */ 753*716fd348SMartin Matuska zio_cksum_t cksum; 754*716fd348SMartin Matuska fletcher_4_native_varsize(best_map->dm_perms, 755*716fd348SMartin Matuska sizeof (uint8_t) * best_map->dm_children * best_map->dm_nperms, 756*716fd348SMartin Matuska &cksum); 757*716fd348SMartin Matuska best_map->dm_checksum = cksum.zc_word[0]; 758*716fd348SMartin Matuska 759*716fd348SMartin Matuska *best_mapp = best_map; 760*716fd348SMartin Matuska *best_ratiop = best_worst_ratio; 761*716fd348SMartin Matuska *avg_ratiop = best_avg_ratio; 762*716fd348SMartin Matuska 763*716fd348SMartin Matuska return (0); 764*716fd348SMartin Matuska } 765*716fd348SMartin Matuska 766*716fd348SMartin Matuska static int 767*716fd348SMartin Matuska draid_generate(int argc, char *argv[]) 768*716fd348SMartin Matuska { 769*716fd348SMartin Matuska char filename[MAXPATHLEN] = {0}; 770*716fd348SMartin Matuska uint64_t map_seed; 771*716fd348SMartin Matuska int c, fd, error, verbose = 0, passes = 1, continuous = 0; 772*716fd348SMartin Matuska int min_children = VDEV_DRAID_MIN_CHILDREN; 773*716fd348SMartin Matuska int max_children = VDEV_DRAID_MAX_CHILDREN; 774*716fd348SMartin Matuska int restarts = 0; 775*716fd348SMartin Matuska 776*716fd348SMartin Matuska while ((c = getopt(argc, argv, ":cm:n:p:v")) != -1) { 777*716fd348SMartin Matuska switch (c) { 778*716fd348SMartin Matuska case 'c': 779*716fd348SMartin Matuska continuous++; 780*716fd348SMartin Matuska break; 781*716fd348SMartin Matuska case 'm': 782*716fd348SMartin Matuska min_children = (int)strtol(optarg, NULL, 0); 783*716fd348SMartin Matuska if (min_children < VDEV_DRAID_MIN_CHILDREN) { 784*716fd348SMartin Matuska (void) fprintf(stderr, "A minimum of 2 " 785*716fd348SMartin Matuska "children are required.\n"); 786*716fd348SMartin Matuska return (1); 787*716fd348SMartin Matuska } 788*716fd348SMartin Matuska 789*716fd348SMartin Matuska break; 790*716fd348SMartin Matuska case 'n': 791*716fd348SMartin Matuska max_children = (int)strtol(optarg, NULL, 0); 792*716fd348SMartin Matuska if (max_children > VDEV_DRAID_MAX_CHILDREN) { 793*716fd348SMartin Matuska (void) fprintf(stderr, "A maximum of %d " 794*716fd348SMartin Matuska "children are allowed.\n", 795*716fd348SMartin Matuska VDEV_DRAID_MAX_CHILDREN); 796*716fd348SMartin Matuska return (1); 797*716fd348SMartin Matuska } 798*716fd348SMartin Matuska break; 799*716fd348SMartin Matuska case 'p': 800*716fd348SMartin Matuska passes = (int)strtol(optarg, NULL, 0); 801*716fd348SMartin Matuska break; 802*716fd348SMartin Matuska case 'v': 803*716fd348SMartin Matuska /* 804*716fd348SMartin Matuska * 0 - Only log when a better map is added to the file. 805*716fd348SMartin Matuska * 1 - Log the current best map for each child count. 806*716fd348SMartin Matuska * Minimal output on a single summary line. 807*716fd348SMartin Matuska * 2 - Log the current best map for each child count. 808*716fd348SMartin Matuska * More verbose includes most map fields. 809*716fd348SMartin Matuska * 3 - Log the current best map for each child count. 810*716fd348SMartin Matuska * Very verbose all fields including the full map. 811*716fd348SMartin Matuska */ 812*716fd348SMartin Matuska verbose++; 813*716fd348SMartin Matuska break; 814*716fd348SMartin Matuska case ':': 815*716fd348SMartin Matuska (void) fprintf(stderr, 816*716fd348SMartin Matuska "missing argument for '%c' option\n", optopt); 817*716fd348SMartin Matuska draid_usage(); 818*716fd348SMartin Matuska break; 819*716fd348SMartin Matuska case '?': 820*716fd348SMartin Matuska (void) fprintf(stderr, "invalid option '%c'\n", 821*716fd348SMartin Matuska optopt); 822*716fd348SMartin Matuska draid_usage(); 823*716fd348SMartin Matuska break; 824*716fd348SMartin Matuska } 825*716fd348SMartin Matuska } 826*716fd348SMartin Matuska 827*716fd348SMartin Matuska if (argc > optind) 828*716fd348SMartin Matuska strncpy(filename, argv[optind], MAXPATHLEN - 1); 829*716fd348SMartin Matuska else { 830*716fd348SMartin Matuska (void) fprintf(stderr, "A FILE must be specified.\n"); 831*716fd348SMartin Matuska return (1); 832*716fd348SMartin Matuska } 833*716fd348SMartin Matuska 834*716fd348SMartin Matuska restart: 835*716fd348SMartin Matuska /* 836*716fd348SMartin Matuska * Start with a fresh seed from /dev/urandom. 837*716fd348SMartin Matuska */ 838*716fd348SMartin Matuska fd = open("/dev/urandom", O_RDONLY); 839*716fd348SMartin Matuska if (fd < 0) { 840*716fd348SMartin Matuska printf("Unable to open /dev/urandom: %s\n:", strerror(errno)); 841*716fd348SMartin Matuska return (1); 842*716fd348SMartin Matuska } else { 843*716fd348SMartin Matuska ssize_t bytes = sizeof (map_seed); 844*716fd348SMartin Matuska ssize_t bytes_read = 0; 845*716fd348SMartin Matuska 846*716fd348SMartin Matuska while (bytes_read < bytes) { 847*716fd348SMartin Matuska ssize_t rc = read(fd, ((char *)&map_seed) + bytes_read, 848*716fd348SMartin Matuska bytes - bytes_read); 849*716fd348SMartin Matuska if (rc < 0) { 850*716fd348SMartin Matuska printf("Unable to read /dev/urandom: %s\n:", 851*716fd348SMartin Matuska strerror(errno)); 852*716fd348SMartin Matuska return (1); 853*716fd348SMartin Matuska } 854*716fd348SMartin Matuska bytes_read += rc; 855*716fd348SMartin Matuska } 856*716fd348SMartin Matuska 857*716fd348SMartin Matuska (void) close(fd); 858*716fd348SMartin Matuska } 859*716fd348SMartin Matuska 860*716fd348SMartin Matuska if (restarts == 0) 861*716fd348SMartin Matuska printf("Writing generated mappings to '%s':\n", filename); 862*716fd348SMartin Matuska 863*716fd348SMartin Matuska /* 864*716fd348SMartin Matuska * Generate maps for all requested child counts. The best map for 865*716fd348SMartin Matuska * each child count is written out to the specified file. If the file 866*716fd348SMartin Matuska * already contains a better mapping this map will not be added. 867*716fd348SMartin Matuska */ 868*716fd348SMartin Matuska for (uint64_t children = min_children; 869*716fd348SMartin Matuska children <= max_children; children++) { 870*716fd348SMartin Matuska char key[8] = { 0 }; 871*716fd348SMartin Matuska draid_map_t *map; 872*716fd348SMartin Matuska double worst_ratio = 1000.0; 873*716fd348SMartin Matuska double avg_ratio = 1000.0; 874*716fd348SMartin Matuska 875*716fd348SMartin Matuska error = eval_maps(children, passes, &map_seed, &map, 876*716fd348SMartin Matuska &worst_ratio, &avg_ratio); 877*716fd348SMartin Matuska if (error) { 878*716fd348SMartin Matuska printf("Error eval_maps(): %s\n", strerror(error)); 879*716fd348SMartin Matuska return (1); 880*716fd348SMartin Matuska } 881*716fd348SMartin Matuska 882*716fd348SMartin Matuska if (worst_ratio < 1.0 || avg_ratio < 1.0) { 883*716fd348SMartin Matuska printf("Error ratio < 1.0: worst_ratio = %2.03f " 884*716fd348SMartin Matuska "avg_ratio = %2.03f\n", worst_ratio, avg_ratio); 885*716fd348SMartin Matuska return (1); 886*716fd348SMartin Matuska } 887*716fd348SMartin Matuska 888*716fd348SMartin Matuska snprintf(key, 7, "%llu", (u_longlong_t)children); 889*716fd348SMartin Matuska error = write_map_key(filename, key, map, worst_ratio, 890*716fd348SMartin Matuska avg_ratio); 891*716fd348SMartin Matuska if (error == 0) { 892*716fd348SMartin Matuska /* The new map was added to the file. */ 893*716fd348SMartin Matuska dump_map(map, key, worst_ratio, avg_ratio, 894*716fd348SMartin Matuska MAX(verbose, 1)); 895*716fd348SMartin Matuska } else if (error == EEXIST) { 896*716fd348SMartin Matuska /* The existing map was preferable and kept. */ 897*716fd348SMartin Matuska if (verbose > 0) 898*716fd348SMartin Matuska dump_map_key(filename, key, verbose); 899*716fd348SMartin Matuska } else { 900*716fd348SMartin Matuska printf("Error write_map_key(): %s\n", strerror(error)); 901*716fd348SMartin Matuska return (1); 902*716fd348SMartin Matuska } 903*716fd348SMartin Matuska 904*716fd348SMartin Matuska free_map(map); 905*716fd348SMartin Matuska } 906*716fd348SMartin Matuska 907*716fd348SMartin Matuska /* 908*716fd348SMartin Matuska * When the continuous option is set restart at the minimum number of 909*716fd348SMartin Matuska * children instead of exiting. This option is useful as a mechanism 910*716fd348SMartin Matuska * to continuous try and refine the discovered permutations. 911*716fd348SMartin Matuska */ 912*716fd348SMartin Matuska if (continuous) { 913*716fd348SMartin Matuska restarts++; 914*716fd348SMartin Matuska printf("Restarting by request (-c): %d\n", restarts); 915*716fd348SMartin Matuska goto restart; 916*716fd348SMartin Matuska } 917*716fd348SMartin Matuska 918*716fd348SMartin Matuska return (0); 919*716fd348SMartin Matuska } 920*716fd348SMartin Matuska 921*716fd348SMartin Matuska /* 922*716fd348SMartin Matuska * Verify each map in the file by generating its in-memory permutation array 923*716fd348SMartin Matuska * and comfirming its checksum is correct. 924*716fd348SMartin Matuska */ 925*716fd348SMartin Matuska static int 926*716fd348SMartin Matuska draid_verify(int argc, char *argv[]) 927*716fd348SMartin Matuska { 928*716fd348SMartin Matuska char filename[MAXPATHLEN] = {0}; 929*716fd348SMartin Matuska int n = 0, c, error, verbose = 1; 930*716fd348SMartin Matuska int check_ratios = 0; 931*716fd348SMartin Matuska 932*716fd348SMartin Matuska while ((c = getopt(argc, argv, ":rv")) != -1) { 933*716fd348SMartin Matuska switch (c) { 934*716fd348SMartin Matuska case 'r': 935*716fd348SMartin Matuska check_ratios++; 936*716fd348SMartin Matuska break; 937*716fd348SMartin Matuska case 'v': 938*716fd348SMartin Matuska verbose++; 939*716fd348SMartin Matuska break; 940*716fd348SMartin Matuska case ':': 941*716fd348SMartin Matuska (void) fprintf(stderr, 942*716fd348SMartin Matuska "missing argument for '%c' option\n", optopt); 943*716fd348SMartin Matuska draid_usage(); 944*716fd348SMartin Matuska break; 945*716fd348SMartin Matuska case '?': 946*716fd348SMartin Matuska (void) fprintf(stderr, "invalid option '%c'\n", 947*716fd348SMartin Matuska optopt); 948*716fd348SMartin Matuska draid_usage(); 949*716fd348SMartin Matuska break; 950*716fd348SMartin Matuska } 951*716fd348SMartin Matuska } 952*716fd348SMartin Matuska 953*716fd348SMartin Matuska if (argc > optind) { 954*716fd348SMartin Matuska char *abspath = malloc(MAXPATHLEN); 955*716fd348SMartin Matuska if (abspath == NULL) 956*716fd348SMartin Matuska return (ENOMEM); 957*716fd348SMartin Matuska 958*716fd348SMartin Matuska if (realpath(argv[optind], abspath) != NULL) 959*716fd348SMartin Matuska strncpy(filename, abspath, MAXPATHLEN - 1); 960*716fd348SMartin Matuska else 961*716fd348SMartin Matuska strncpy(filename, argv[optind], MAXPATHLEN - 1); 962*716fd348SMartin Matuska 963*716fd348SMartin Matuska free(abspath); 964*716fd348SMartin Matuska } else { 965*716fd348SMartin Matuska (void) fprintf(stderr, "A FILE must be specified.\n"); 966*716fd348SMartin Matuska return (1); 967*716fd348SMartin Matuska } 968*716fd348SMartin Matuska 969*716fd348SMartin Matuska printf("Verifying permutation maps: '%s'\n", filename); 970*716fd348SMartin Matuska 971*716fd348SMartin Matuska /* 972*716fd348SMartin Matuska * Lookup hardcoded permutation map for each valid number of children 973*716fd348SMartin Matuska * and verify a generated map has the correct checksum. Then compare 974*716fd348SMartin Matuska * the generated map values with the nvlist map values read from the 975*716fd348SMartin Matuska * reference file to cross-check the permutation. 976*716fd348SMartin Matuska */ 977*716fd348SMartin Matuska for (uint64_t children = VDEV_DRAID_MIN_CHILDREN; 978*716fd348SMartin Matuska children <= VDEV_DRAID_MAX_CHILDREN; 979*716fd348SMartin Matuska children++) { 980*716fd348SMartin Matuska draid_map_t *map; 981*716fd348SMartin Matuska char key[8] = {0}; 982*716fd348SMartin Matuska 983*716fd348SMartin Matuska snprintf(key, 8, "%llu", (u_longlong_t)children); 984*716fd348SMartin Matuska 985*716fd348SMartin Matuska error = alloc_fixed_map(children, &map); 986*716fd348SMartin Matuska if (error) { 987*716fd348SMartin Matuska printf("Error alloc_fixed_map() failed: %s\n", 988*716fd348SMartin Matuska error == ECKSUM ? "Invalid checksum" : 989*716fd348SMartin Matuska strerror(error)); 990*716fd348SMartin Matuska return (1); 991*716fd348SMartin Matuska } 992*716fd348SMartin Matuska 993*716fd348SMartin Matuska uint64_t nv_seed, nv_checksum, nv_children, nv_nperms; 994*716fd348SMartin Matuska uint8_t *nv_perms; 995*716fd348SMartin Matuska nvlist_t *cfg; 996*716fd348SMartin Matuska uint_t c; 997*716fd348SMartin Matuska 998*716fd348SMartin Matuska error = read_map_key(filename, key, &cfg); 999*716fd348SMartin Matuska if (error != 0) { 1000*716fd348SMartin Matuska printf("Error read_map_key() failed: %s\n", 1001*716fd348SMartin Matuska strerror(error)); 1002*716fd348SMartin Matuska free_map(map); 1003*716fd348SMartin Matuska return (1); 1004*716fd348SMartin Matuska } 1005*716fd348SMartin Matuska 1006*716fd348SMartin Matuska nv_seed = fnvlist_lookup_uint64(cfg, MAP_SEED); 1007*716fd348SMartin Matuska nv_checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM); 1008*716fd348SMartin Matuska nv_children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN); 1009*716fd348SMartin Matuska nv_nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS); 1010*716fd348SMartin Matuska nvlist_lookup_uint8_array(cfg, MAP_PERMS, &nv_perms, &c); 1011*716fd348SMartin Matuska 1012*716fd348SMartin Matuska /* 1013*716fd348SMartin Matuska * Compare draid_map_t and nvlist reference values. 1014*716fd348SMartin Matuska */ 1015*716fd348SMartin Matuska if (map->dm_seed != nv_seed) { 1016*716fd348SMartin Matuska printf("Error different seeds: 0x%016llx != " 1017*716fd348SMartin Matuska "0x%016llx\n", (u_longlong_t)map->dm_seed, 1018*716fd348SMartin Matuska (u_longlong_t)nv_seed); 1019*716fd348SMartin Matuska error = EINVAL; 1020*716fd348SMartin Matuska } 1021*716fd348SMartin Matuska 1022*716fd348SMartin Matuska if (map->dm_checksum != nv_checksum) { 1023*716fd348SMartin Matuska printf("Error different checksums: 0x%016llx " 1024*716fd348SMartin Matuska "!= 0x%016llx\n", 1025*716fd348SMartin Matuska (u_longlong_t)map->dm_checksum, 1026*716fd348SMartin Matuska (u_longlong_t)nv_checksum); 1027*716fd348SMartin Matuska error = EINVAL; 1028*716fd348SMartin Matuska } 1029*716fd348SMartin Matuska 1030*716fd348SMartin Matuska if (map->dm_children != nv_children) { 1031*716fd348SMartin Matuska printf("Error different children: %llu " 1032*716fd348SMartin Matuska "!= %llu\n", (u_longlong_t)map->dm_children, 1033*716fd348SMartin Matuska (u_longlong_t)nv_children); 1034*716fd348SMartin Matuska error = EINVAL; 1035*716fd348SMartin Matuska } 1036*716fd348SMartin Matuska 1037*716fd348SMartin Matuska if (map->dm_nperms != nv_nperms) { 1038*716fd348SMartin Matuska printf("Error different nperms: %llu " 1039*716fd348SMartin Matuska "!= %llu\n", (u_longlong_t)map->dm_nperms, 1040*716fd348SMartin Matuska (u_longlong_t)nv_nperms); 1041*716fd348SMartin Matuska error = EINVAL; 1042*716fd348SMartin Matuska } 1043*716fd348SMartin Matuska 1044*716fd348SMartin Matuska for (uint64_t i = 0; i < nv_children * nv_nperms; i++) { 1045*716fd348SMartin Matuska if (map->dm_perms[i] != nv_perms[i]) { 1046*716fd348SMartin Matuska printf("Error different perms[%llu]: " 1047*716fd348SMartin Matuska "%d != %d\n", (u_longlong_t)i, 1048*716fd348SMartin Matuska (int)map->dm_perms[i], 1049*716fd348SMartin Matuska (int)nv_perms[i]); 1050*716fd348SMartin Matuska error = EINVAL; 1051*716fd348SMartin Matuska break; 1052*716fd348SMartin Matuska } 1053*716fd348SMartin Matuska } 1054*716fd348SMartin Matuska 1055*716fd348SMartin Matuska /* 1056*716fd348SMartin Matuska * For good measure recalculate the worst and average 1057*716fd348SMartin Matuska * ratios and confirm they match the nvlist values. 1058*716fd348SMartin Matuska */ 1059*716fd348SMartin Matuska if (check_ratios) { 1060*716fd348SMartin Matuska uint64_t nv_worst_ratio, nv_avg_ratio; 1061*716fd348SMartin Matuska double worst_ratio, avg_ratio; 1062*716fd348SMartin Matuska 1063*716fd348SMartin Matuska eval_decluster(map, &worst_ratio, &avg_ratio); 1064*716fd348SMartin Matuska 1065*716fd348SMartin Matuska nv_worst_ratio = fnvlist_lookup_uint64(cfg, 1066*716fd348SMartin Matuska MAP_WORST_RATIO); 1067*716fd348SMartin Matuska nv_avg_ratio = fnvlist_lookup_uint64(cfg, 1068*716fd348SMartin Matuska MAP_AVG_RATIO); 1069*716fd348SMartin Matuska 1070*716fd348SMartin Matuska if (worst_ratio < 1.0 || avg_ratio < 1.0) { 1071*716fd348SMartin Matuska printf("Error ratio out of range %2.03f, " 1072*716fd348SMartin Matuska "%2.03f\n", worst_ratio, avg_ratio); 1073*716fd348SMartin Matuska error = EINVAL; 1074*716fd348SMartin Matuska } 1075*716fd348SMartin Matuska 1076*716fd348SMartin Matuska if ((uint64_t)(worst_ratio * 1000.0) != 1077*716fd348SMartin Matuska nv_worst_ratio) { 1078*716fd348SMartin Matuska printf("Error different worst_ratio %2.03f " 1079*716fd348SMartin Matuska "!= %2.03f\n", (double)nv_worst_ratio / 1080*716fd348SMartin Matuska 1000.0, worst_ratio); 1081*716fd348SMartin Matuska error = EINVAL; 1082*716fd348SMartin Matuska } 1083*716fd348SMartin Matuska 1084*716fd348SMartin Matuska if ((uint64_t)(avg_ratio * 1000.0) != nv_avg_ratio) { 1085*716fd348SMartin Matuska printf("Error different average_ratio %2.03f " 1086*716fd348SMartin Matuska "!= %2.03f\n", (double)nv_avg_ratio / 1087*716fd348SMartin Matuska 1000.0, avg_ratio); 1088*716fd348SMartin Matuska error = EINVAL; 1089*716fd348SMartin Matuska } 1090*716fd348SMartin Matuska } 1091*716fd348SMartin Matuska 1092*716fd348SMartin Matuska if (error) { 1093*716fd348SMartin Matuska free_map(map); 1094*716fd348SMartin Matuska nvlist_free(cfg); 1095*716fd348SMartin Matuska return (1); 1096*716fd348SMartin Matuska } 1097*716fd348SMartin Matuska 1098*716fd348SMartin Matuska if (verbose > 0) { 1099*716fd348SMartin Matuska printf("- %llu children: good\n", 1100*716fd348SMartin Matuska (u_longlong_t)children); 1101*716fd348SMartin Matuska } 1102*716fd348SMartin Matuska n++; 1103*716fd348SMartin Matuska 1104*716fd348SMartin Matuska free_map(map); 1105*716fd348SMartin Matuska nvlist_free(cfg); 1106*716fd348SMartin Matuska } 1107*716fd348SMartin Matuska 1108*716fd348SMartin Matuska if (n != (VDEV_DRAID_MAX_CHILDREN - 1)) { 1109*716fd348SMartin Matuska printf("Error permutation maps missing: %d / %d checked\n", 1110*716fd348SMartin Matuska n, VDEV_DRAID_MAX_CHILDREN - 1); 1111*716fd348SMartin Matuska return (1); 1112*716fd348SMartin Matuska } 1113*716fd348SMartin Matuska 1114*716fd348SMartin Matuska printf("Successfully verified %d / %d permutation maps\n", 1115*716fd348SMartin Matuska n, VDEV_DRAID_MAX_CHILDREN - 1); 1116*716fd348SMartin Matuska 1117*716fd348SMartin Matuska return (0); 1118*716fd348SMartin Matuska } 1119*716fd348SMartin Matuska 1120*716fd348SMartin Matuska /* 1121*716fd348SMartin Matuska * Dump the contents of the specified mapping(s) for inspection. 1122*716fd348SMartin Matuska */ 1123*716fd348SMartin Matuska static int 1124*716fd348SMartin Matuska draid_dump(int argc, char *argv[]) 1125*716fd348SMartin Matuska { 1126*716fd348SMartin Matuska char filename[MAXPATHLEN] = {0}; 1127*716fd348SMartin Matuska int c, error, verbose = 1; 1128*716fd348SMartin Matuska int min_children = VDEV_DRAID_MIN_CHILDREN; 1129*716fd348SMartin Matuska int max_children = VDEV_DRAID_MAX_CHILDREN; 1130*716fd348SMartin Matuska 1131*716fd348SMartin Matuska while ((c = getopt(argc, argv, ":vm:n:")) != -1) { 1132*716fd348SMartin Matuska switch (c) { 1133*716fd348SMartin Matuska case 'm': 1134*716fd348SMartin Matuska min_children = (int)strtol(optarg, NULL, 0); 1135*716fd348SMartin Matuska if (min_children < 2) { 1136*716fd348SMartin Matuska (void) fprintf(stderr, "A minimum of 2 " 1137*716fd348SMartin Matuska "children are required.\n"); 1138*716fd348SMartin Matuska return (1); 1139*716fd348SMartin Matuska } 1140*716fd348SMartin Matuska 1141*716fd348SMartin Matuska break; 1142*716fd348SMartin Matuska case 'n': 1143*716fd348SMartin Matuska max_children = (int)strtol(optarg, NULL, 0); 1144*716fd348SMartin Matuska if (max_children > VDEV_DRAID_MAX_CHILDREN) { 1145*716fd348SMartin Matuska (void) fprintf(stderr, "A maximum of %d " 1146*716fd348SMartin Matuska "children are allowed.\n", 1147*716fd348SMartin Matuska VDEV_DRAID_MAX_CHILDREN); 1148*716fd348SMartin Matuska return (1); 1149*716fd348SMartin Matuska } 1150*716fd348SMartin Matuska break; 1151*716fd348SMartin Matuska case 'v': 1152*716fd348SMartin Matuska verbose++; 1153*716fd348SMartin Matuska break; 1154*716fd348SMartin Matuska case ':': 1155*716fd348SMartin Matuska (void) fprintf(stderr, 1156*716fd348SMartin Matuska "missing argument for '%c' option\n", optopt); 1157*716fd348SMartin Matuska draid_usage(); 1158*716fd348SMartin Matuska break; 1159*716fd348SMartin Matuska case '?': 1160*716fd348SMartin Matuska (void) fprintf(stderr, "invalid option '%c'\n", 1161*716fd348SMartin Matuska optopt); 1162*716fd348SMartin Matuska draid_usage(); 1163*716fd348SMartin Matuska break; 1164*716fd348SMartin Matuska } 1165*716fd348SMartin Matuska } 1166*716fd348SMartin Matuska 1167*716fd348SMartin Matuska if (argc > optind) 1168*716fd348SMartin Matuska strncpy(filename, argv[optind], MAXPATHLEN - 1); 1169*716fd348SMartin Matuska else { 1170*716fd348SMartin Matuska (void) fprintf(stderr, "A FILE must be specified.\n"); 1171*716fd348SMartin Matuska return (1); 1172*716fd348SMartin Matuska } 1173*716fd348SMartin Matuska 1174*716fd348SMartin Matuska /* 1175*716fd348SMartin Matuska * Dump maps for the requested child counts. 1176*716fd348SMartin Matuska */ 1177*716fd348SMartin Matuska for (uint64_t children = min_children; 1178*716fd348SMartin Matuska children <= max_children; children++) { 1179*716fd348SMartin Matuska char key[8] = { 0 }; 1180*716fd348SMartin Matuska 1181*716fd348SMartin Matuska snprintf(key, 7, "%llu", (u_longlong_t)children); 1182*716fd348SMartin Matuska error = dump_map_key(filename, key, verbose); 1183*716fd348SMartin Matuska if (error) { 1184*716fd348SMartin Matuska printf("Error dump_map_key(): %s\n", strerror(error)); 1185*716fd348SMartin Matuska return (1); 1186*716fd348SMartin Matuska } 1187*716fd348SMartin Matuska } 1188*716fd348SMartin Matuska 1189*716fd348SMartin Matuska return (0); 1190*716fd348SMartin Matuska } 1191*716fd348SMartin Matuska 1192*716fd348SMartin Matuska /* 1193*716fd348SMartin Matuska * Print all of the mappings as a C formatted draid_map_t array. This table 1194*716fd348SMartin Matuska * is found in the module/zcommon/zfs_draid.c file and is the definitive 1195*716fd348SMartin Matuska * source for all mapping used by dRAID. It cannot be updated without 1196*716fd348SMartin Matuska * changing the dRAID on disk format. 1197*716fd348SMartin Matuska */ 1198*716fd348SMartin Matuska static int 1199*716fd348SMartin Matuska draid_table(int argc, char *argv[]) 1200*716fd348SMartin Matuska { 1201*716fd348SMartin Matuska char filename[MAXPATHLEN] = {0}; 1202*716fd348SMartin Matuska int error; 1203*716fd348SMartin Matuska 1204*716fd348SMartin Matuska if (argc > optind) 1205*716fd348SMartin Matuska strncpy(filename, argv[optind], MAXPATHLEN - 1); 1206*716fd348SMartin Matuska else { 1207*716fd348SMartin Matuska (void) fprintf(stderr, "A FILE must be specified.\n"); 1208*716fd348SMartin Matuska return (1); 1209*716fd348SMartin Matuska } 1210*716fd348SMartin Matuska 1211*716fd348SMartin Matuska printf("static const draid_map_t " 1212*716fd348SMartin Matuska "draid_maps[VDEV_DRAID_MAX_MAPS] = {\n"); 1213*716fd348SMartin Matuska 1214*716fd348SMartin Matuska for (uint64_t children = VDEV_DRAID_MIN_CHILDREN; 1215*716fd348SMartin Matuska children <= VDEV_DRAID_MAX_CHILDREN; 1216*716fd348SMartin Matuska children++) { 1217*716fd348SMartin Matuska uint64_t seed, checksum, nperms, avg_ratio; 1218*716fd348SMartin Matuska nvlist_t *cfg; 1219*716fd348SMartin Matuska char key[8] = {0}; 1220*716fd348SMartin Matuska 1221*716fd348SMartin Matuska snprintf(key, 8, "%llu", (u_longlong_t)children); 1222*716fd348SMartin Matuska 1223*716fd348SMartin Matuska error = read_map_key(filename, key, &cfg); 1224*716fd348SMartin Matuska if (error != 0) { 1225*716fd348SMartin Matuska printf("Error read_map_key() failed: %s\n", 1226*716fd348SMartin Matuska strerror(error)); 1227*716fd348SMartin Matuska return (1); 1228*716fd348SMartin Matuska } 1229*716fd348SMartin Matuska 1230*716fd348SMartin Matuska seed = fnvlist_lookup_uint64(cfg, MAP_SEED); 1231*716fd348SMartin Matuska checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM); 1232*716fd348SMartin Matuska children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN); 1233*716fd348SMartin Matuska nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS); 1234*716fd348SMartin Matuska avg_ratio = fnvlist_lookup_uint64(cfg, MAP_AVG_RATIO); 1235*716fd348SMartin Matuska 1236*716fd348SMartin Matuska printf("\t{ %3llu, %3llu, 0x%016llx, 0x%016llx },\t" 1237*716fd348SMartin Matuska "/* %2.03f */\n", (u_longlong_t)children, 1238*716fd348SMartin Matuska (u_longlong_t)nperms, (u_longlong_t)seed, 1239*716fd348SMartin Matuska (u_longlong_t)checksum, (double)avg_ratio / 1000.0); 1240*716fd348SMartin Matuska 1241*716fd348SMartin Matuska nvlist_free(cfg); 1242*716fd348SMartin Matuska } 1243*716fd348SMartin Matuska 1244*716fd348SMartin Matuska printf("};\n"); 1245*716fd348SMartin Matuska 1246*716fd348SMartin Matuska return (0); 1247*716fd348SMartin Matuska } 1248*716fd348SMartin Matuska 1249*716fd348SMartin Matuska static int 1250*716fd348SMartin Matuska draid_merge_impl(nvlist_t *allcfgs, const char *srcfilename, int *mergedp) 1251*716fd348SMartin Matuska { 1252*716fd348SMartin Matuska nvlist_t *srccfgs; 1253*716fd348SMartin Matuska nvpair_t *elem = NULL; 1254*716fd348SMartin Matuska int error, merged = 0; 1255*716fd348SMartin Matuska 1256*716fd348SMartin Matuska error = read_map(srcfilename, &srccfgs); 1257*716fd348SMartin Matuska if (error != 0) 1258*716fd348SMartin Matuska return (error); 1259*716fd348SMartin Matuska 1260*716fd348SMartin Matuska while ((elem = nvlist_next_nvpair(srccfgs, elem)) != NULL) { 1261*716fd348SMartin Matuska uint64_t nv_worst_ratio; 1262*716fd348SMartin Matuska uint64_t allcfg_worst_ratio; 1263*716fd348SMartin Matuska nvlist_t *cfg, *allcfg; 1264*716fd348SMartin Matuska char *key; 1265*716fd348SMartin Matuska 1266*716fd348SMartin Matuska switch (nvpair_type(elem)) { 1267*716fd348SMartin Matuska case DATA_TYPE_NVLIST: 1268*716fd348SMartin Matuska 1269*716fd348SMartin Matuska (void) nvpair_value_nvlist(elem, &cfg); 1270*716fd348SMartin Matuska key = nvpair_name(elem); 1271*716fd348SMartin Matuska 1272*716fd348SMartin Matuska nv_worst_ratio = fnvlist_lookup_uint64(cfg, 1273*716fd348SMartin Matuska MAP_WORST_RATIO); 1274*716fd348SMartin Matuska 1275*716fd348SMartin Matuska error = nvlist_lookup_nvlist(allcfgs, key, &allcfg); 1276*716fd348SMartin Matuska if (error == 0) { 1277*716fd348SMartin Matuska allcfg_worst_ratio = fnvlist_lookup_uint64( 1278*716fd348SMartin Matuska allcfg, MAP_WORST_RATIO); 1279*716fd348SMartin Matuska 1280*716fd348SMartin Matuska if (nv_worst_ratio < allcfg_worst_ratio) { 1281*716fd348SMartin Matuska fnvlist_remove(allcfgs, key); 1282*716fd348SMartin Matuska error = nvlist_add_nvlist(allcfgs, 1283*716fd348SMartin Matuska key, cfg); 1284*716fd348SMartin Matuska merged++; 1285*716fd348SMartin Matuska } 1286*716fd348SMartin Matuska } else if (error == ENOENT) { 1287*716fd348SMartin Matuska error = nvlist_add_nvlist(allcfgs, key, cfg); 1288*716fd348SMartin Matuska merged++; 1289*716fd348SMartin Matuska } else { 1290*716fd348SMartin Matuska return (error); 1291*716fd348SMartin Matuska } 1292*716fd348SMartin Matuska 1293*716fd348SMartin Matuska break; 1294*716fd348SMartin Matuska default: 1295*716fd348SMartin Matuska continue; 1296*716fd348SMartin Matuska } 1297*716fd348SMartin Matuska } 1298*716fd348SMartin Matuska 1299*716fd348SMartin Matuska nvlist_free(srccfgs); 1300*716fd348SMartin Matuska 1301*716fd348SMartin Matuska *mergedp = merged; 1302*716fd348SMartin Matuska 1303*716fd348SMartin Matuska return (0); 1304*716fd348SMartin Matuska } 1305*716fd348SMartin Matuska 1306*716fd348SMartin Matuska /* 1307*716fd348SMartin Matuska * Merge the best map for each child count found in the listed files into 1308*716fd348SMartin Matuska * a new file. This allows 'draid generate' to be run in parallel and for 1309*716fd348SMartin Matuska * the results maps to be combined. 1310*716fd348SMartin Matuska */ 1311*716fd348SMartin Matuska static int 1312*716fd348SMartin Matuska draid_merge(int argc, char *argv[]) 1313*716fd348SMartin Matuska { 1314*716fd348SMartin Matuska char filename[MAXPATHLEN] = {0}; 1315*716fd348SMartin Matuska int c, error, total_merged = 0; 1316*716fd348SMartin Matuska nvlist_t *allcfgs; 1317*716fd348SMartin Matuska 1318*716fd348SMartin Matuska while ((c = getopt(argc, argv, ":")) != -1) { 1319*716fd348SMartin Matuska switch (c) { 1320*716fd348SMartin Matuska case ':': 1321*716fd348SMartin Matuska (void) fprintf(stderr, 1322*716fd348SMartin Matuska "missing argument for '%c' option\n", optopt); 1323*716fd348SMartin Matuska draid_usage(); 1324*716fd348SMartin Matuska break; 1325*716fd348SMartin Matuska case '?': 1326*716fd348SMartin Matuska (void) fprintf(stderr, "invalid option '%c'\n", 1327*716fd348SMartin Matuska optopt); 1328*716fd348SMartin Matuska draid_usage(); 1329*716fd348SMartin Matuska break; 1330*716fd348SMartin Matuska } 1331*716fd348SMartin Matuska } 1332*716fd348SMartin Matuska 1333*716fd348SMartin Matuska if (argc < 4) { 1334*716fd348SMartin Matuska (void) fprintf(stderr, 1335*716fd348SMartin Matuska "A FILE and multiple SRCs must be specified.\n"); 1336*716fd348SMartin Matuska return (1); 1337*716fd348SMartin Matuska } 1338*716fd348SMartin Matuska 1339*716fd348SMartin Matuska strncpy(filename, argv[optind], MAXPATHLEN - 1); 1340*716fd348SMartin Matuska optind++; 1341*716fd348SMartin Matuska 1342*716fd348SMartin Matuska error = read_map(filename, &allcfgs); 1343*716fd348SMartin Matuska if (error == ENOENT) { 1344*716fd348SMartin Matuska allcfgs = fnvlist_alloc(); 1345*716fd348SMartin Matuska } else if (error != 0) { 1346*716fd348SMartin Matuska printf("Error read_map(): %s\n", strerror(error)); 1347*716fd348SMartin Matuska return (error); 1348*716fd348SMartin Matuska } 1349*716fd348SMartin Matuska 1350*716fd348SMartin Matuska while (optind < argc) { 1351*716fd348SMartin Matuska char srcfilename[MAXPATHLEN] = {0}; 1352*716fd348SMartin Matuska int merged = 0; 1353*716fd348SMartin Matuska 1354*716fd348SMartin Matuska strncpy(srcfilename, argv[optind], MAXPATHLEN - 1); 1355*716fd348SMartin Matuska 1356*716fd348SMartin Matuska error = draid_merge_impl(allcfgs, srcfilename, &merged); 1357*716fd348SMartin Matuska if (error) { 1358*716fd348SMartin Matuska printf("Error draid_merge_impl(): %s\n", 1359*716fd348SMartin Matuska strerror(error)); 1360*716fd348SMartin Matuska nvlist_free(allcfgs); 1361*716fd348SMartin Matuska return (1); 1362*716fd348SMartin Matuska } 1363*716fd348SMartin Matuska 1364*716fd348SMartin Matuska total_merged += merged; 1365*716fd348SMartin Matuska printf("Merged %d key(s) from '%s' into '%s'\n", merged, 1366*716fd348SMartin Matuska srcfilename, filename); 1367*716fd348SMartin Matuska 1368*716fd348SMartin Matuska optind++; 1369*716fd348SMartin Matuska } 1370*716fd348SMartin Matuska 1371*716fd348SMartin Matuska if (total_merged > 0) 1372*716fd348SMartin Matuska write_map(filename, allcfgs); 1373*716fd348SMartin Matuska 1374*716fd348SMartin Matuska printf("Merged a total of %d key(s) into '%s'\n", total_merged, 1375*716fd348SMartin Matuska filename); 1376*716fd348SMartin Matuska 1377*716fd348SMartin Matuska nvlist_free(allcfgs); 1378*716fd348SMartin Matuska 1379*716fd348SMartin Matuska return (0); 1380*716fd348SMartin Matuska } 1381*716fd348SMartin Matuska 1382*716fd348SMartin Matuska int 1383*716fd348SMartin Matuska main(int argc, char *argv[]) 1384*716fd348SMartin Matuska { 1385*716fd348SMartin Matuska if (argc < 2) 1386*716fd348SMartin Matuska draid_usage(); 1387*716fd348SMartin Matuska 1388*716fd348SMartin Matuska char *subcommand = argv[1]; 1389*716fd348SMartin Matuska 1390*716fd348SMartin Matuska if (strcmp(subcommand, "generate") == 0) { 1391*716fd348SMartin Matuska return (draid_generate(argc - 1, argv + 1)); 1392*716fd348SMartin Matuska } else if (strcmp(subcommand, "verify") == 0) { 1393*716fd348SMartin Matuska return (draid_verify(argc - 1, argv + 1)); 1394*716fd348SMartin Matuska } else if (strcmp(subcommand, "dump") == 0) { 1395*716fd348SMartin Matuska return (draid_dump(argc - 1, argv + 1)); 1396*716fd348SMartin Matuska } else if (strcmp(subcommand, "table") == 0) { 1397*716fd348SMartin Matuska return (draid_table(argc - 1, argv + 1)); 1398*716fd348SMartin Matuska } else if (strcmp(subcommand, "merge") == 0) { 1399*716fd348SMartin Matuska return (draid_merge(argc - 1, argv + 1)); 1400*716fd348SMartin Matuska } else { 1401*716fd348SMartin Matuska draid_usage(); 1402*716fd348SMartin Matuska } 1403*716fd348SMartin Matuska } 1404