xref: /freebsd-src/sys/contrib/openzfs/tests/zfs-tests/cmd/draid.c (revision 716fd348e01c5f2ba125f878a634a753436c2994)
1*716fd348SMartin Matuska /*
2*716fd348SMartin Matuska  * CDDL HEADER START
3*716fd348SMartin Matuska  *
4*716fd348SMartin Matuska  * The contents of this file are subject to the terms of the
5*716fd348SMartin Matuska  * Common Development and Distribution License (the "License").
6*716fd348SMartin Matuska  * You may not use this file except in compliance with the License.
7*716fd348SMartin Matuska  *
8*716fd348SMartin Matuska  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*716fd348SMartin Matuska  * or http://www.opensolaris.org/os/licensing.
10*716fd348SMartin Matuska  * See the License for the specific language governing permissions
11*716fd348SMartin Matuska  * and limitations under the License.
12*716fd348SMartin Matuska  *
13*716fd348SMartin Matuska  * When distributing Covered Code, include this CDDL HEADER in each
14*716fd348SMartin Matuska  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*716fd348SMartin Matuska  * If applicable, add the following below this CDDL HEADER, with the
16*716fd348SMartin Matuska  * fields enclosed by brackets "[]" replaced with your own identifying
17*716fd348SMartin Matuska  * information: Portions Copyright [yyyy] [name of copyright owner]
18*716fd348SMartin Matuska  *
19*716fd348SMartin Matuska  * CDDL HEADER END
20*716fd348SMartin Matuska  */
21*716fd348SMartin Matuska /*
22*716fd348SMartin Matuska  * Copyright (c) 2018 Intel Corporation.
23*716fd348SMartin Matuska  * Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
24*716fd348SMartin Matuska  */
25*716fd348SMartin Matuska 
26*716fd348SMartin Matuska #include <stdio.h>
27*716fd348SMartin Matuska #include <zlib.h>
28*716fd348SMartin Matuska #include <zfs_fletcher.h>
29*716fd348SMartin Matuska #include <sys/vdev_draid.h>
30*716fd348SMartin Matuska #include <sys/nvpair.h>
31*716fd348SMartin Matuska #include <sys/stat.h>
32*716fd348SMartin Matuska 
33*716fd348SMartin Matuska /*
34*716fd348SMartin Matuska  * The number of rows to generate for new permutation maps.
35*716fd348SMartin Matuska  */
36*716fd348SMartin Matuska #define	MAP_ROWS_DEFAULT	256
37*716fd348SMartin Matuska 
38*716fd348SMartin Matuska /*
39*716fd348SMartin Matuska  * Key values for dRAID maps when stored as nvlists.
40*716fd348SMartin Matuska  */
41*716fd348SMartin Matuska #define	MAP_SEED		"seed"
42*716fd348SMartin Matuska #define	MAP_CHECKSUM		"checksum"
43*716fd348SMartin Matuska #define	MAP_WORST_RATIO		"worst_ratio"
44*716fd348SMartin Matuska #define	MAP_AVG_RATIO		"avg_ratio"
45*716fd348SMartin Matuska #define	MAP_CHILDREN		"children"
46*716fd348SMartin Matuska #define	MAP_NPERMS		"nperms"
47*716fd348SMartin Matuska #define	MAP_PERMS		"perms"
48*716fd348SMartin Matuska 
49*716fd348SMartin Matuska static void
50*716fd348SMartin Matuska draid_usage(void)
51*716fd348SMartin Matuska {
52*716fd348SMartin Matuska 	(void) fprintf(stderr,
53*716fd348SMartin Matuska 	    "usage: draid command args ...\n"
54*716fd348SMartin Matuska 	    "Available commands are:\n"
55*716fd348SMartin Matuska 	    "\n"
56*716fd348SMartin Matuska 	    "\tdraid generate [-cv] [-m min] [-n max] [-p passes] FILE\n"
57*716fd348SMartin Matuska 	    "\tdraid verify [-rv] FILE\n"
58*716fd348SMartin Matuska 	    "\tdraid dump [-v] [-m min] [-n max] FILE\n"
59*716fd348SMartin Matuska 	    "\tdraid table FILE\n"
60*716fd348SMartin Matuska 	    "\tdraid merge FILE SRC SRC...\n");
61*716fd348SMartin Matuska 	exit(1);
62*716fd348SMartin Matuska }
63*716fd348SMartin Matuska 
64*716fd348SMartin Matuska static int
65*716fd348SMartin Matuska read_map(const char *filename, nvlist_t **allcfgs)
66*716fd348SMartin Matuska {
67*716fd348SMartin Matuska 	int block_size = 131072;
68*716fd348SMartin Matuska 	int buf_size = 131072;
69*716fd348SMartin Matuska 	int tmp_size, error;
70*716fd348SMartin Matuska 	char *tmp_buf;
71*716fd348SMartin Matuska 
72*716fd348SMartin Matuska 	struct stat64 stat;
73*716fd348SMartin Matuska 	if (lstat64(filename, &stat) != 0)
74*716fd348SMartin Matuska 		return (errno);
75*716fd348SMartin Matuska 
76*716fd348SMartin Matuska 	if (stat.st_size == 0 ||
77*716fd348SMartin Matuska 	    !(S_ISREG(stat.st_mode) || S_ISLNK(stat.st_mode))) {
78*716fd348SMartin Matuska 		return (EINVAL);
79*716fd348SMartin Matuska 	}
80*716fd348SMartin Matuska 
81*716fd348SMartin Matuska 	gzFile fp = gzopen(filename, "rb");
82*716fd348SMartin Matuska 	if (fp == Z_NULL)
83*716fd348SMartin Matuska 		return (errno);
84*716fd348SMartin Matuska 
85*716fd348SMartin Matuska 	char *buf = malloc(buf_size);
86*716fd348SMartin Matuska 	if (buf == NULL) {
87*716fd348SMartin Matuska 		(void) gzclose(fp);
88*716fd348SMartin Matuska 		return (ENOMEM);
89*716fd348SMartin Matuska 	}
90*716fd348SMartin Matuska 
91*716fd348SMartin Matuska 	ssize_t rc, bytes = 0;
92*716fd348SMartin Matuska 	while (!gzeof(fp)) {
93*716fd348SMartin Matuska 		rc = gzread(fp, buf + bytes, block_size);
94*716fd348SMartin Matuska 		if ((rc < 0) || (rc == 0 && !gzeof(fp))) {
95*716fd348SMartin Matuska 			free(buf);
96*716fd348SMartin Matuska 			(void) gzclose(fp);
97*716fd348SMartin Matuska 			(void) gzerror(fp, &error);
98*716fd348SMartin Matuska 			return (error);
99*716fd348SMartin Matuska 		} else {
100*716fd348SMartin Matuska 			bytes += rc;
101*716fd348SMartin Matuska 
102*716fd348SMartin Matuska 			if (bytes + block_size >= buf_size) {
103*716fd348SMartin Matuska 				tmp_size = 2 * buf_size;
104*716fd348SMartin Matuska 				tmp_buf = malloc(tmp_size);
105*716fd348SMartin Matuska 				if (tmp_buf == NULL) {
106*716fd348SMartin Matuska 					free(buf);
107*716fd348SMartin Matuska 					(void) gzclose(fp);
108*716fd348SMartin Matuska 					return (ENOMEM);
109*716fd348SMartin Matuska 				}
110*716fd348SMartin Matuska 
111*716fd348SMartin Matuska 				memcpy(tmp_buf, buf, bytes);
112*716fd348SMartin Matuska 				free(buf);
113*716fd348SMartin Matuska 				buf = tmp_buf;
114*716fd348SMartin Matuska 				buf_size = tmp_size;
115*716fd348SMartin Matuska 			}
116*716fd348SMartin Matuska 		}
117*716fd348SMartin Matuska 	}
118*716fd348SMartin Matuska 
119*716fd348SMartin Matuska 	(void) gzclose(fp);
120*716fd348SMartin Matuska 
121*716fd348SMartin Matuska 	error = nvlist_unpack(buf, bytes, allcfgs, 0);
122*716fd348SMartin Matuska 	free(buf);
123*716fd348SMartin Matuska 
124*716fd348SMartin Matuska 	return (error);
125*716fd348SMartin Matuska }
126*716fd348SMartin Matuska 
127*716fd348SMartin Matuska /*
128*716fd348SMartin Matuska  * Read a map from the specified filename.  A file contains multiple maps
129*716fd348SMartin Matuska  * which are indexed by the number of children. The caller is responsible
130*716fd348SMartin Matuska  * for freeing the configuration returned.
131*716fd348SMartin Matuska  */
132*716fd348SMartin Matuska static int
133*716fd348SMartin Matuska read_map_key(const char *filename, char *key, nvlist_t **cfg)
134*716fd348SMartin Matuska {
135*716fd348SMartin Matuska 	nvlist_t *allcfgs, *foundcfg = NULL;
136*716fd348SMartin Matuska 	int error;
137*716fd348SMartin Matuska 
138*716fd348SMartin Matuska 	error = read_map(filename, &allcfgs);
139*716fd348SMartin Matuska 	if (error != 0)
140*716fd348SMartin Matuska 		return (error);
141*716fd348SMartin Matuska 
142*716fd348SMartin Matuska 	nvlist_lookup_nvlist(allcfgs, key, &foundcfg);
143*716fd348SMartin Matuska 	if (foundcfg != NULL) {
144*716fd348SMartin Matuska 		nvlist_dup(foundcfg, cfg, KM_SLEEP);
145*716fd348SMartin Matuska 		error = 0;
146*716fd348SMartin Matuska 	} else {
147*716fd348SMartin Matuska 		error = ENOENT;
148*716fd348SMartin Matuska 	}
149*716fd348SMartin Matuska 
150*716fd348SMartin Matuska 	nvlist_free(allcfgs);
151*716fd348SMartin Matuska 
152*716fd348SMartin Matuska 	return (error);
153*716fd348SMartin Matuska }
154*716fd348SMartin Matuska 
155*716fd348SMartin Matuska /*
156*716fd348SMartin Matuska  * Write all mappings to the map file.
157*716fd348SMartin Matuska  */
158*716fd348SMartin Matuska static int
159*716fd348SMartin Matuska write_map(const char *filename, nvlist_t *allcfgs)
160*716fd348SMartin Matuska {
161*716fd348SMartin Matuska 	size_t buflen = 0;
162*716fd348SMartin Matuska 	int error;
163*716fd348SMartin Matuska 
164*716fd348SMartin Matuska 	error = nvlist_size(allcfgs, &buflen, NV_ENCODE_XDR);
165*716fd348SMartin Matuska 	if (error)
166*716fd348SMartin Matuska 		return (error);
167*716fd348SMartin Matuska 
168*716fd348SMartin Matuska 	char *buf = malloc(buflen);
169*716fd348SMartin Matuska 	if (buf == NULL)
170*716fd348SMartin Matuska 		return (ENOMEM);
171*716fd348SMartin Matuska 
172*716fd348SMartin Matuska 	error = nvlist_pack(allcfgs, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP);
173*716fd348SMartin Matuska 	if (error) {
174*716fd348SMartin Matuska 		free(buf);
175*716fd348SMartin Matuska 		return (error);
176*716fd348SMartin Matuska 	}
177*716fd348SMartin Matuska 
178*716fd348SMartin Matuska 	/*
179*716fd348SMartin Matuska 	 * Atomically update the file using a temporary file and the
180*716fd348SMartin Matuska 	 * traditional unlink then rename steps.  This code provides
181*716fd348SMartin Matuska 	 * no locking, it only guarantees the packed nvlist on disk
182*716fd348SMartin Matuska 	 * is updated atomically and is internally consistent.
183*716fd348SMartin Matuska 	 */
184*716fd348SMartin Matuska 	char *tmpname = calloc(1, MAXPATHLEN);
185*716fd348SMartin Matuska 	if (tmpname == NULL) {
186*716fd348SMartin Matuska 		free(buf);
187*716fd348SMartin Matuska 		return (ENOMEM);
188*716fd348SMartin Matuska 	}
189*716fd348SMartin Matuska 
190*716fd348SMartin Matuska 	snprintf(tmpname, MAXPATHLEN - 1, "%s.XXXXXX", filename);
191*716fd348SMartin Matuska 
192*716fd348SMartin Matuska 	int fd = mkstemp(tmpname);
193*716fd348SMartin Matuska 	if (fd < 0) {
194*716fd348SMartin Matuska 		error = errno;
195*716fd348SMartin Matuska 		free(buf);
196*716fd348SMartin Matuska 		free(tmpname);
197*716fd348SMartin Matuska 		return (error);
198*716fd348SMartin Matuska 	}
199*716fd348SMartin Matuska 	(void) close(fd);
200*716fd348SMartin Matuska 
201*716fd348SMartin Matuska 	gzFile fp = gzopen(tmpname, "w9b");
202*716fd348SMartin Matuska 	if (fp == Z_NULL) {
203*716fd348SMartin Matuska 		error = errno;
204*716fd348SMartin Matuska 		free(buf);
205*716fd348SMartin Matuska 		free(tmpname);
206*716fd348SMartin Matuska 		return (errno);
207*716fd348SMartin Matuska 	}
208*716fd348SMartin Matuska 
209*716fd348SMartin Matuska 	ssize_t rc, bytes = 0;
210*716fd348SMartin Matuska 	while (bytes < buflen) {
211*716fd348SMartin Matuska 		size_t size = MIN(buflen - bytes, 131072);
212*716fd348SMartin Matuska 		rc = gzwrite(fp, buf + bytes, size);
213*716fd348SMartin Matuska 		if (rc < 0) {
214*716fd348SMartin Matuska 			free(buf);
215*716fd348SMartin Matuska 			(void) gzerror(fp, &error);
216*716fd348SMartin Matuska 			(void) gzclose(fp);
217*716fd348SMartin Matuska 			(void) unlink(tmpname);
218*716fd348SMartin Matuska 			free(tmpname);
219*716fd348SMartin Matuska 			return (error);
220*716fd348SMartin Matuska 		} else if (rc == 0) {
221*716fd348SMartin Matuska 			break;
222*716fd348SMartin Matuska 		} else {
223*716fd348SMartin Matuska 			bytes += rc;
224*716fd348SMartin Matuska 		}
225*716fd348SMartin Matuska 	}
226*716fd348SMartin Matuska 
227*716fd348SMartin Matuska 	free(buf);
228*716fd348SMartin Matuska 	(void) gzclose(fp);
229*716fd348SMartin Matuska 
230*716fd348SMartin Matuska 	if (bytes != buflen) {
231*716fd348SMartin Matuska 		(void) unlink(tmpname);
232*716fd348SMartin Matuska 		free(tmpname);
233*716fd348SMartin Matuska 		return (EIO);
234*716fd348SMartin Matuska 	}
235*716fd348SMartin Matuska 
236*716fd348SMartin Matuska 	/*
237*716fd348SMartin Matuska 	 * Unlink the previous config file and replace it with the updated
238*716fd348SMartin Matuska 	 * version.  If we're able to unlink the file then directory is
239*716fd348SMartin Matuska 	 * writable by us and the subsequent rename should never fail.
240*716fd348SMartin Matuska 	 */
241*716fd348SMartin Matuska 	error = unlink(filename);
242*716fd348SMartin Matuska 	if (error != 0 && errno != ENOENT) {
243*716fd348SMartin Matuska 		error = errno;
244*716fd348SMartin Matuska 		(void) unlink(tmpname);
245*716fd348SMartin Matuska 		free(tmpname);
246*716fd348SMartin Matuska 		return (error);
247*716fd348SMartin Matuska 	}
248*716fd348SMartin Matuska 
249*716fd348SMartin Matuska 	error = rename(tmpname, filename);
250*716fd348SMartin Matuska 	if (error != 0) {
251*716fd348SMartin Matuska 		error = errno;
252*716fd348SMartin Matuska 		(void) unlink(tmpname);
253*716fd348SMartin Matuska 		free(tmpname);
254*716fd348SMartin Matuska 		return (error);
255*716fd348SMartin Matuska 	}
256*716fd348SMartin Matuska 
257*716fd348SMartin Matuska 	free(tmpname);
258*716fd348SMartin Matuska 
259*716fd348SMartin Matuska 	return (0);
260*716fd348SMartin Matuska }
261*716fd348SMartin Matuska 
262*716fd348SMartin Matuska /*
263*716fd348SMartin Matuska  * Add the dRAID map to the file and write it out.
264*716fd348SMartin Matuska  */
265*716fd348SMartin Matuska static int
266*716fd348SMartin Matuska write_map_key(const char *filename, char *key, draid_map_t *map,
267*716fd348SMartin Matuska     double worst_ratio, double avg_ratio)
268*716fd348SMartin Matuska {
269*716fd348SMartin Matuska 	nvlist_t *nv_cfg, *allcfgs;
270*716fd348SMartin Matuska 	int error;
271*716fd348SMartin Matuska 
272*716fd348SMartin Matuska 	/*
273*716fd348SMartin Matuska 	 * Add the configuration to an existing or new file.  The new
274*716fd348SMartin Matuska 	 * configuration will replace an existing configuration with the
275*716fd348SMartin Matuska 	 * same key if it has a lower ratio and is therefore better.
276*716fd348SMartin Matuska 	 */
277*716fd348SMartin Matuska 	error = read_map(filename, &allcfgs);
278*716fd348SMartin Matuska 	if (error == ENOENT) {
279*716fd348SMartin Matuska 		allcfgs = fnvlist_alloc();
280*716fd348SMartin Matuska 	} else if (error != 0) {
281*716fd348SMartin Matuska 		return (error);
282*716fd348SMartin Matuska 	}
283*716fd348SMartin Matuska 
284*716fd348SMartin Matuska 	error = nvlist_lookup_nvlist(allcfgs, key, &nv_cfg);
285*716fd348SMartin Matuska 	if (error == 0) {
286*716fd348SMartin Matuska 		uint64_t nv_cfg_worst_ratio = fnvlist_lookup_uint64(nv_cfg,
287*716fd348SMartin Matuska 		    MAP_WORST_RATIO);
288*716fd348SMartin Matuska 		double nv_worst_ratio = (double)nv_cfg_worst_ratio / 1000.0;
289*716fd348SMartin Matuska 
290*716fd348SMartin Matuska 		if (worst_ratio < nv_worst_ratio) {
291*716fd348SMartin Matuska 			/* Replace old map with the more balanced new map. */
292*716fd348SMartin Matuska 			fnvlist_remove(allcfgs, key);
293*716fd348SMartin Matuska 		} else {
294*716fd348SMartin Matuska 			/* The old map is preferable, keep it. */
295*716fd348SMartin Matuska 			nvlist_free(allcfgs);
296*716fd348SMartin Matuska 			return (EEXIST);
297*716fd348SMartin Matuska 		}
298*716fd348SMartin Matuska 	}
299*716fd348SMartin Matuska 
300*716fd348SMartin Matuska 	nvlist_t *cfg = fnvlist_alloc();
301*716fd348SMartin Matuska 	fnvlist_add_uint64(cfg, MAP_SEED, map->dm_seed);
302*716fd348SMartin Matuska 	fnvlist_add_uint64(cfg, MAP_CHECKSUM, map->dm_checksum);
303*716fd348SMartin Matuska 	fnvlist_add_uint64(cfg, MAP_CHILDREN, map->dm_children);
304*716fd348SMartin Matuska 	fnvlist_add_uint64(cfg, MAP_NPERMS, map->dm_nperms);
305*716fd348SMartin Matuska 	fnvlist_add_uint8_array(cfg, MAP_PERMS,  map->dm_perms,
306*716fd348SMartin Matuska 	    map->dm_children * map->dm_nperms * sizeof (uint8_t));
307*716fd348SMartin Matuska 
308*716fd348SMartin Matuska 	fnvlist_add_uint64(cfg, MAP_WORST_RATIO,
309*716fd348SMartin Matuska 	    (uint64_t)(worst_ratio * 1000.0));
310*716fd348SMartin Matuska 	fnvlist_add_uint64(cfg, MAP_AVG_RATIO,
311*716fd348SMartin Matuska 	    (uint64_t)(avg_ratio * 1000.0));
312*716fd348SMartin Matuska 
313*716fd348SMartin Matuska 	error = nvlist_add_nvlist(allcfgs, key, cfg);
314*716fd348SMartin Matuska 	if (error == 0)
315*716fd348SMartin Matuska 		error = write_map(filename, allcfgs);
316*716fd348SMartin Matuska 
317*716fd348SMartin Matuska 	nvlist_free(cfg);
318*716fd348SMartin Matuska 	nvlist_free(allcfgs);
319*716fd348SMartin Matuska 	return (error);
320*716fd348SMartin Matuska }
321*716fd348SMartin Matuska 
322*716fd348SMartin Matuska static void
323*716fd348SMartin Matuska dump_map(draid_map_t *map, char *key, double worst_ratio, double avg_ratio,
324*716fd348SMartin Matuska     int verbose)
325*716fd348SMartin Matuska {
326*716fd348SMartin Matuska 	if (verbose == 0) {
327*716fd348SMartin Matuska 		return;
328*716fd348SMartin Matuska 	} else if (verbose == 1) {
329*716fd348SMartin Matuska 		printf("    \"%s\": seed: 0x%016llx worst_ratio: %2.03f "
330*716fd348SMartin Matuska 		    "avg_ratio: %2.03f\n", key, (u_longlong_t)map->dm_seed,
331*716fd348SMartin Matuska 		    worst_ratio, avg_ratio);
332*716fd348SMartin Matuska 		return;
333*716fd348SMartin Matuska 	} else {
334*716fd348SMartin Matuska 		printf("    \"%s\":\n"
335*716fd348SMartin Matuska 		    "        seed: 0x%016llx\n"
336*716fd348SMartin Matuska 		    "        checksum: 0x%016llx\n"
337*716fd348SMartin Matuska 		    "        worst_ratio: %2.03f\n"
338*716fd348SMartin Matuska 		    "        avg_ratio: %2.03f\n"
339*716fd348SMartin Matuska 		    "        children: %llu\n"
340*716fd348SMartin Matuska 		    "        nperms: %llu\n",
341*716fd348SMartin Matuska 		    key, (u_longlong_t)map->dm_seed,
342*716fd348SMartin Matuska 		    (u_longlong_t)map->dm_checksum, worst_ratio, avg_ratio,
343*716fd348SMartin Matuska 		    (u_longlong_t)map->dm_children,
344*716fd348SMartin Matuska 		    (u_longlong_t)map->dm_nperms);
345*716fd348SMartin Matuska 
346*716fd348SMartin Matuska 		if (verbose > 2) {
347*716fd348SMartin Matuska 			printf("        perms = {\n");
348*716fd348SMartin Matuska 			for (int i = 0; i < map->dm_nperms; i++) {
349*716fd348SMartin Matuska 				printf("            { ");
350*716fd348SMartin Matuska 				for (int j = 0; j < map->dm_children; j++) {
351*716fd348SMartin Matuska 					printf("%3d%s ", map->dm_perms[
352*716fd348SMartin Matuska 					    i * map->dm_children + j],
353*716fd348SMartin Matuska 					    j < map->dm_children - 1 ?
354*716fd348SMartin Matuska 					    "," : "");
355*716fd348SMartin Matuska 				}
356*716fd348SMartin Matuska 				printf(" },\n");
357*716fd348SMartin Matuska 			}
358*716fd348SMartin Matuska 			printf("        }\n");
359*716fd348SMartin Matuska 		} else if (verbose == 2) {
360*716fd348SMartin Matuska 			printf("        draid_perms = <omitted>\n");
361*716fd348SMartin Matuska 		}
362*716fd348SMartin Matuska 	}
363*716fd348SMartin Matuska }
364*716fd348SMartin Matuska 
365*716fd348SMartin Matuska static void
366*716fd348SMartin Matuska dump_map_nv(char *key, nvlist_t *cfg, int verbose)
367*716fd348SMartin Matuska {
368*716fd348SMartin Matuska 	draid_map_t map;
369*716fd348SMartin Matuska 	uint_t c;
370*716fd348SMartin Matuska 
371*716fd348SMartin Matuska 	uint64_t worst_ratio = fnvlist_lookup_uint64(cfg, MAP_WORST_RATIO);
372*716fd348SMartin Matuska 	uint64_t avg_ratio = fnvlist_lookup_uint64(cfg, MAP_AVG_RATIO);
373*716fd348SMartin Matuska 
374*716fd348SMartin Matuska 	map.dm_seed = fnvlist_lookup_uint64(cfg, MAP_SEED);
375*716fd348SMartin Matuska 	map.dm_checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM);
376*716fd348SMartin Matuska 	map.dm_children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN);
377*716fd348SMartin Matuska 	map.dm_nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS);
378*716fd348SMartin Matuska 	nvlist_lookup_uint8_array(cfg, MAP_PERMS, &map.dm_perms, &c);
379*716fd348SMartin Matuska 
380*716fd348SMartin Matuska 	dump_map(&map, key, (double)worst_ratio / 1000.0,
381*716fd348SMartin Matuska 	    avg_ratio / 1000.0, verbose);
382*716fd348SMartin Matuska }
383*716fd348SMartin Matuska 
384*716fd348SMartin Matuska /*
385*716fd348SMartin Matuska  * Print a summary of the mapping.
386*716fd348SMartin Matuska  */
387*716fd348SMartin Matuska static int
388*716fd348SMartin Matuska dump_map_key(const char *filename, char *key, int verbose)
389*716fd348SMartin Matuska {
390*716fd348SMartin Matuska 	nvlist_t *cfg;
391*716fd348SMartin Matuska 	int error;
392*716fd348SMartin Matuska 
393*716fd348SMartin Matuska 	error = read_map_key(filename, key, &cfg);
394*716fd348SMartin Matuska 	if (error != 0)
395*716fd348SMartin Matuska 		return (error);
396*716fd348SMartin Matuska 
397*716fd348SMartin Matuska 	dump_map_nv(key, cfg, verbose);
398*716fd348SMartin Matuska 
399*716fd348SMartin Matuska 	return (0);
400*716fd348SMartin Matuska }
401*716fd348SMartin Matuska 
402*716fd348SMartin Matuska /*
403*716fd348SMartin Matuska  * Allocate a new permutation map for evaluation.
404*716fd348SMartin Matuska  */
405*716fd348SMartin Matuska static int
406*716fd348SMartin Matuska alloc_new_map(uint64_t children, uint64_t nperms, uint64_t seed,
407*716fd348SMartin Matuska     draid_map_t **mapp)
408*716fd348SMartin Matuska {
409*716fd348SMartin Matuska 	draid_map_t *map;
410*716fd348SMartin Matuska 	int error;
411*716fd348SMartin Matuska 
412*716fd348SMartin Matuska 	map = malloc(sizeof (draid_map_t));
413*716fd348SMartin Matuska 	if (map == NULL)
414*716fd348SMartin Matuska 		return (ENOMEM);
415*716fd348SMartin Matuska 
416*716fd348SMartin Matuska 	map->dm_children = children;
417*716fd348SMartin Matuska 	map->dm_nperms = nperms;
418*716fd348SMartin Matuska 	map->dm_seed = seed;
419*716fd348SMartin Matuska 	map->dm_checksum = 0;
420*716fd348SMartin Matuska 
421*716fd348SMartin Matuska 	error = vdev_draid_generate_perms(map, &map->dm_perms);
422*716fd348SMartin Matuska 	if (error) {
423*716fd348SMartin Matuska 		free(map);
424*716fd348SMartin Matuska 		return (error);
425*716fd348SMartin Matuska 	}
426*716fd348SMartin Matuska 
427*716fd348SMartin Matuska 	*mapp = map;
428*716fd348SMartin Matuska 
429*716fd348SMartin Matuska 	return (0);
430*716fd348SMartin Matuska }
431*716fd348SMartin Matuska 
432*716fd348SMartin Matuska /*
433*716fd348SMartin Matuska  * Allocate the fixed permutation map for N children.
434*716fd348SMartin Matuska  */
435*716fd348SMartin Matuska static int
436*716fd348SMartin Matuska alloc_fixed_map(uint64_t children, draid_map_t **mapp)
437*716fd348SMartin Matuska {
438*716fd348SMartin Matuska 	const draid_map_t *fixed_map;
439*716fd348SMartin Matuska 	draid_map_t *map;
440*716fd348SMartin Matuska 	int error;
441*716fd348SMartin Matuska 
442*716fd348SMartin Matuska 	error = vdev_draid_lookup_map(children, &fixed_map);
443*716fd348SMartin Matuska 	if (error)
444*716fd348SMartin Matuska 		return (error);
445*716fd348SMartin Matuska 
446*716fd348SMartin Matuska 	map = malloc(sizeof (draid_map_t));
447*716fd348SMartin Matuska 	if (map == NULL)
448*716fd348SMartin Matuska 		return (ENOMEM);
449*716fd348SMartin Matuska 
450*716fd348SMartin Matuska 	memcpy(map, fixed_map, sizeof (draid_map_t));
451*716fd348SMartin Matuska 	VERIFY3U(map->dm_checksum, !=, 0);
452*716fd348SMartin Matuska 
453*716fd348SMartin Matuska 	error = vdev_draid_generate_perms(map, &map->dm_perms);
454*716fd348SMartin Matuska 	if (error) {
455*716fd348SMartin Matuska 		free(map);
456*716fd348SMartin Matuska 		return (error);
457*716fd348SMartin Matuska 	}
458*716fd348SMartin Matuska 
459*716fd348SMartin Matuska 	*mapp = map;
460*716fd348SMartin Matuska 
461*716fd348SMartin Matuska 	return (0);
462*716fd348SMartin Matuska }
463*716fd348SMartin Matuska 
464*716fd348SMartin Matuska /*
465*716fd348SMartin Matuska  * Free a permutation map.
466*716fd348SMartin Matuska  */
467*716fd348SMartin Matuska static void
468*716fd348SMartin Matuska free_map(draid_map_t *map)
469*716fd348SMartin Matuska {
470*716fd348SMartin Matuska 	free(map->dm_perms);
471*716fd348SMartin Matuska 	free(map);
472*716fd348SMartin Matuska }
473*716fd348SMartin Matuska 
474*716fd348SMartin Matuska /*
475*716fd348SMartin Matuska  * Check if dev is in the provided list of faulted devices.
476*716fd348SMartin Matuska  */
477*716fd348SMartin Matuska static inline boolean_t
478*716fd348SMartin Matuska is_faulted(int *faulted_devs, int nfaulted, int dev)
479*716fd348SMartin Matuska {
480*716fd348SMartin Matuska 	for (int i = 0; i < nfaulted; i++)
481*716fd348SMartin Matuska 		if (faulted_devs[i] == dev)
482*716fd348SMartin Matuska 			return (B_TRUE);
483*716fd348SMartin Matuska 
484*716fd348SMartin Matuska 	return (B_FALSE);
485*716fd348SMartin Matuska }
486*716fd348SMartin Matuska 
487*716fd348SMartin Matuska /*
488*716fd348SMartin Matuska  * Evaluate how resilvering I/O will be distributed given a list of faulted
489*716fd348SMartin Matuska  * vdevs.  As a simplification we assume one IO is sufficient to repair each
490*716fd348SMartin Matuska  * damaged device in a group.
491*716fd348SMartin Matuska  */
492*716fd348SMartin Matuska static double
493*716fd348SMartin Matuska eval_resilver(draid_map_t *map, uint64_t groupwidth, uint64_t nspares,
494*716fd348SMartin Matuska     int *faulted_devs, int nfaulted, int *min_child_ios, int *max_child_ios)
495*716fd348SMartin Matuska {
496*716fd348SMartin Matuska 	uint64_t children = map->dm_children;
497*716fd348SMartin Matuska 	uint64_t ngroups = 1;
498*716fd348SMartin Matuska 	uint64_t ndisks = children - nspares;
499*716fd348SMartin Matuska 
500*716fd348SMartin Matuska 	/*
501*716fd348SMartin Matuska 	 * Calculate the minimum number of groups required to fill a slice.
502*716fd348SMartin Matuska 	 */
503*716fd348SMartin Matuska 	while (ngroups * (groupwidth) % (children - nspares) != 0)
504*716fd348SMartin Matuska 		ngroups++;
505*716fd348SMartin Matuska 
506*716fd348SMartin Matuska 	int *ios = calloc(map->dm_children, sizeof (uint64_t));
507*716fd348SMartin Matuska 
508*716fd348SMartin Matuska 	/* Resilver all rows */
509*716fd348SMartin Matuska 	for (int i = 0; i < map->dm_nperms; i++) {
510*716fd348SMartin Matuska 		uint8_t *row = &map->dm_perms[i * map->dm_children];
511*716fd348SMartin Matuska 
512*716fd348SMartin Matuska 		/* Resilver all groups with faulted drives */
513*716fd348SMartin Matuska 		for (int j = 0; j < ngroups; j++) {
514*716fd348SMartin Matuska 			uint64_t spareidx = map->dm_children - nspares;
515*716fd348SMartin Matuska 			boolean_t repair_needed = B_FALSE;
516*716fd348SMartin Matuska 
517*716fd348SMartin Matuska 			/* See if any devices in this group are faulted */
518*716fd348SMartin Matuska 			uint64_t groupstart = (j * groupwidth) % ndisks;
519*716fd348SMartin Matuska 
520*716fd348SMartin Matuska 			for (int k = 0; k < groupwidth; k++) {
521*716fd348SMartin Matuska 				uint64_t groupidx = (groupstart + k) % ndisks;
522*716fd348SMartin Matuska 
523*716fd348SMartin Matuska 				repair_needed = is_faulted(faulted_devs,
524*716fd348SMartin Matuska 				    nfaulted, row[groupidx]);
525*716fd348SMartin Matuska 				if (repair_needed)
526*716fd348SMartin Matuska 					break;
527*716fd348SMartin Matuska 			}
528*716fd348SMartin Matuska 
529*716fd348SMartin Matuska 			if (repair_needed == B_FALSE)
530*716fd348SMartin Matuska 				continue;
531*716fd348SMartin Matuska 
532*716fd348SMartin Matuska 			/*
533*716fd348SMartin Matuska 			 * This group is degraded. Calculate the number of
534*716fd348SMartin Matuska 			 * reads the non-faulted drives require and the number
535*716fd348SMartin Matuska 			 * of writes to the distributed hot spare for this row.
536*716fd348SMartin Matuska 			 */
537*716fd348SMartin Matuska 			for (int k = 0; k < groupwidth; k++) {
538*716fd348SMartin Matuska 				uint64_t groupidx = (groupstart + k) % ndisks;
539*716fd348SMartin Matuska 
540*716fd348SMartin Matuska 				if (!is_faulted(faulted_devs, nfaulted,
541*716fd348SMartin Matuska 				    row[groupidx])) {
542*716fd348SMartin Matuska 					ios[row[groupidx]]++;
543*716fd348SMartin Matuska 				} else if (nspares > 0) {
544*716fd348SMartin Matuska 					while (is_faulted(faulted_devs,
545*716fd348SMartin Matuska 					    nfaulted, row[spareidx])) {
546*716fd348SMartin Matuska 						spareidx++;
547*716fd348SMartin Matuska 					}
548*716fd348SMartin Matuska 
549*716fd348SMartin Matuska 					ASSERT3U(spareidx, <, map->dm_children);
550*716fd348SMartin Matuska 					ios[row[spareidx]]++;
551*716fd348SMartin Matuska 					spareidx++;
552*716fd348SMartin Matuska 				}
553*716fd348SMartin Matuska 			}
554*716fd348SMartin Matuska 		}
555*716fd348SMartin Matuska 	}
556*716fd348SMartin Matuska 
557*716fd348SMartin Matuska 	*min_child_ios = INT_MAX;
558*716fd348SMartin Matuska 	*max_child_ios = 0;
559*716fd348SMartin Matuska 
560*716fd348SMartin Matuska 	/*
561*716fd348SMartin Matuska 	 * Find the drives with fewest and most required I/O.  These values
562*716fd348SMartin Matuska 	 * are used to calculate the imbalance ratio.  To avoid returning an
563*716fd348SMartin Matuska 	 * infinite value for permutations which have children that perform
564*716fd348SMartin Matuska 	 * no IO a floor of 1 IO per child is set.  This ensures a meaningful
565*716fd348SMartin Matuska 	 * ratio is returned for comparison and it is not an uncommon when
566*716fd348SMartin Matuska 	 * there are a large number of children.
567*716fd348SMartin Matuska 	 */
568*716fd348SMartin Matuska 	for (int i = 0; i < map->dm_children; i++) {
569*716fd348SMartin Matuska 
570*716fd348SMartin Matuska 		if (is_faulted(faulted_devs, nfaulted, i)) {
571*716fd348SMartin Matuska 			ASSERT0(ios[i]);
572*716fd348SMartin Matuska 			continue;
573*716fd348SMartin Matuska 		}
574*716fd348SMartin Matuska 
575*716fd348SMartin Matuska 		if (ios[i] == 0)
576*716fd348SMartin Matuska 			ios[i] = 1;
577*716fd348SMartin Matuska 
578*716fd348SMartin Matuska 		if (ios[i] < *min_child_ios)
579*716fd348SMartin Matuska 			*min_child_ios = ios[i];
580*716fd348SMartin Matuska 
581*716fd348SMartin Matuska 		if (ios[i] > *max_child_ios)
582*716fd348SMartin Matuska 			*max_child_ios = ios[i];
583*716fd348SMartin Matuska 	}
584*716fd348SMartin Matuska 
585*716fd348SMartin Matuska 	ASSERT3S(*min_child_ios, !=, INT_MAX);
586*716fd348SMartin Matuska 	ASSERT3S(*max_child_ios, !=, 0);
587*716fd348SMartin Matuska 
588*716fd348SMartin Matuska 	double ratio = (double)(*max_child_ios) / (double)(*min_child_ios);
589*716fd348SMartin Matuska 
590*716fd348SMartin Matuska 	free(ios);
591*716fd348SMartin Matuska 
592*716fd348SMartin Matuska 	return (ratio);
593*716fd348SMartin Matuska }
594*716fd348SMartin Matuska 
595*716fd348SMartin Matuska /*
596*716fd348SMartin Matuska  * Evaluate the quality of the permutation mapping by considering possible
597*716fd348SMartin Matuska  * device failures.  Returns the imbalance ratio for the worst mapping which
598*716fd348SMartin Matuska  * is defined to be the largest number of child IOs over the fewest number
599*716fd348SMartin Matuska  * child IOs. A value of 1.0 indicates the mapping is perfectly balance and
600*716fd348SMartin Matuska  * all children perform an equal amount of work during reconstruction.
601*716fd348SMartin Matuska  */
602*716fd348SMartin Matuska static void
603*716fd348SMartin Matuska eval_decluster(draid_map_t *map, double *worst_ratiop, double *avg_ratiop)
604*716fd348SMartin Matuska {
605*716fd348SMartin Matuska 	uint64_t children = map->dm_children;
606*716fd348SMartin Matuska 	double worst_ratio = 1.0;
607*716fd348SMartin Matuska 	double sum = 0;
608*716fd348SMartin Matuska 	int worst_min_ios = 0, worst_max_ios = 0;
609*716fd348SMartin Matuska 	int n = 0;
610*716fd348SMartin Matuska 
611*716fd348SMartin Matuska 	/*
612*716fd348SMartin Matuska 	 * When there are only 2 children there can be no distributed
613*716fd348SMartin Matuska 	 * spare and no resilver to evaluate.  Default to a ratio of 1.0
614*716fd348SMartin Matuska 	 * for this degenerate case.
615*716fd348SMartin Matuska 	 */
616*716fd348SMartin Matuska 	if (children == VDEV_DRAID_MIN_CHILDREN) {
617*716fd348SMartin Matuska 		*worst_ratiop = 1.0;
618*716fd348SMartin Matuska 		*avg_ratiop = 1.0;
619*716fd348SMartin Matuska 		return;
620*716fd348SMartin Matuska 	}
621*716fd348SMartin Matuska 
622*716fd348SMartin Matuska 	/*
623*716fd348SMartin Matuska 	 * Score the mapping as if it had either 1 or 2 distributed spares.
624*716fd348SMartin Matuska 	 */
625*716fd348SMartin Matuska 	for (int nspares = 1; nspares <= 2; nspares++) {
626*716fd348SMartin Matuska 		uint64_t faults = nspares;
627*716fd348SMartin Matuska 
628*716fd348SMartin Matuska 		/*
629*716fd348SMartin Matuska 		 * Score groupwidths up to 19.  This value was chosen as the
630*716fd348SMartin Matuska 		 * largest reasonable width (16d+3p).  dRAID pools may be still
631*716fd348SMartin Matuska 		 * be created with wider stripes but they are not considered in
632*716fd348SMartin Matuska 		 * this analysis in order to optimize for the most common cases.
633*716fd348SMartin Matuska 		 */
634*716fd348SMartin Matuska 		for (uint64_t groupwidth = 2;
635*716fd348SMartin Matuska 		    groupwidth <= MIN(children - nspares, 19);
636*716fd348SMartin Matuska 		    groupwidth++) {
637*716fd348SMartin Matuska 			int faulted_devs[2];
638*716fd348SMartin Matuska 			int min_ios, max_ios;
639*716fd348SMartin Matuska 
640*716fd348SMartin Matuska 			/*
641*716fd348SMartin Matuska 			 * Score possible devices faults.  This is limited
642*716fd348SMartin Matuska 			 * to exactly one fault per distributed spare for
643*716fd348SMartin Matuska 			 * the purposes of this similation.
644*716fd348SMartin Matuska 			 */
645*716fd348SMartin Matuska 			for (int f1 = 0; f1 < children; f1++) {
646*716fd348SMartin Matuska 				faulted_devs[0] = f1;
647*716fd348SMartin Matuska 				double ratio;
648*716fd348SMartin Matuska 
649*716fd348SMartin Matuska 				if (faults == 1) {
650*716fd348SMartin Matuska 					ratio = eval_resilver(map, groupwidth,
651*716fd348SMartin Matuska 					    nspares, faulted_devs, faults,
652*716fd348SMartin Matuska 					    &min_ios, &max_ios);
653*716fd348SMartin Matuska 
654*716fd348SMartin Matuska 					if (ratio > worst_ratio) {
655*716fd348SMartin Matuska 						worst_ratio = ratio;
656*716fd348SMartin Matuska 						worst_min_ios = min_ios;
657*716fd348SMartin Matuska 						worst_max_ios = max_ios;
658*716fd348SMartin Matuska 					}
659*716fd348SMartin Matuska 
660*716fd348SMartin Matuska 					sum += ratio;
661*716fd348SMartin Matuska 					n++;
662*716fd348SMartin Matuska 				} else if (faults == 2) {
663*716fd348SMartin Matuska 					for (int f2 = f1 + 1; f2 < children;
664*716fd348SMartin Matuska 					    f2++) {
665*716fd348SMartin Matuska 						faulted_devs[1] = f2;
666*716fd348SMartin Matuska 
667*716fd348SMartin Matuska 						ratio = eval_resilver(map,
668*716fd348SMartin Matuska 						    groupwidth, nspares,
669*716fd348SMartin Matuska 						    faulted_devs, faults,
670*716fd348SMartin Matuska 						    &min_ios, &max_ios);
671*716fd348SMartin Matuska 
672*716fd348SMartin Matuska 						if (ratio > worst_ratio) {
673*716fd348SMartin Matuska 							worst_ratio = ratio;
674*716fd348SMartin Matuska 							worst_min_ios = min_ios;
675*716fd348SMartin Matuska 							worst_max_ios = max_ios;
676*716fd348SMartin Matuska 						}
677*716fd348SMartin Matuska 
678*716fd348SMartin Matuska 						sum += ratio;
679*716fd348SMartin Matuska 						n++;
680*716fd348SMartin Matuska 					}
681*716fd348SMartin Matuska 				}
682*716fd348SMartin Matuska 			}
683*716fd348SMartin Matuska 		}
684*716fd348SMartin Matuska 	}
685*716fd348SMartin Matuska 
686*716fd348SMartin Matuska 	*worst_ratiop = worst_ratio;
687*716fd348SMartin Matuska 	*avg_ratiop = sum / n;
688*716fd348SMartin Matuska 
689*716fd348SMartin Matuska 	/*
690*716fd348SMartin Matuska 	 * Log the min/max io values for particularly unbalanced maps.
691*716fd348SMartin Matuska 	 * Since the maps are generated entirely randomly these are possible
692*716fd348SMartin Matuska 	 * be exceedingly unlikely.  We log it for possible investigation.
693*716fd348SMartin Matuska 	 */
694*716fd348SMartin Matuska 	if (worst_ratio > 100.0) {
695*716fd348SMartin Matuska 		dump_map(map, "DEBUG", worst_ratio, *avg_ratiop, 2);
696*716fd348SMartin Matuska 		printf("worst_min_ios=%d worst_max_ios=%d\n",
697*716fd348SMartin Matuska 		    worst_min_ios, worst_max_ios);
698*716fd348SMartin Matuska 	}
699*716fd348SMartin Matuska }
700*716fd348SMartin Matuska 
701*716fd348SMartin Matuska static int
702*716fd348SMartin Matuska eval_maps(uint64_t children, int passes, uint64_t *map_seed,
703*716fd348SMartin Matuska     draid_map_t **best_mapp, double *best_ratiop, double *avg_ratiop)
704*716fd348SMartin Matuska {
705*716fd348SMartin Matuska 	draid_map_t *best_map = NULL;
706*716fd348SMartin Matuska 	double best_worst_ratio = 1000.0;
707*716fd348SMartin Matuska 	double best_avg_ratio = 1000.0;
708*716fd348SMartin Matuska 
709*716fd348SMartin Matuska 	/*
710*716fd348SMartin Matuska 	 * Perform the requested number of passes evaluating randomly
711*716fd348SMartin Matuska 	 * generated permutation maps.  Only the best version is kept.
712*716fd348SMartin Matuska 	 */
713*716fd348SMartin Matuska 	for (int i = 0; i < passes; i++) {
714*716fd348SMartin Matuska 		double worst_ratio, avg_ratio;
715*716fd348SMartin Matuska 		draid_map_t *map;
716*716fd348SMartin Matuska 		int error;
717*716fd348SMartin Matuska 
718*716fd348SMartin Matuska 		/*
719*716fd348SMartin Matuska 		 * Calculate the next seed and generate a new candidate map.
720*716fd348SMartin Matuska 		 */
721*716fd348SMartin Matuska 		error = alloc_new_map(children, MAP_ROWS_DEFAULT,
722*716fd348SMartin Matuska 		    vdev_draid_rand(map_seed), &map);
723*716fd348SMartin Matuska 		if (error)
724*716fd348SMartin Matuska 			return (error);
725*716fd348SMartin Matuska 
726*716fd348SMartin Matuska 		/*
727*716fd348SMartin Matuska 		 * Consider maps with a lower worst_ratio to be of higher
728*716fd348SMartin Matuska 		 * quality.  Some maps may have a lower avg_ratio but they
729*716fd348SMartin Matuska 		 * are discarded since they might include some particularly
730*716fd348SMartin Matuska 		 * imbalanced permutations.  The average is tracked to in
731*716fd348SMartin Matuska 		 * order to get a sense of the average permutation quality.
732*716fd348SMartin Matuska 		 */
733*716fd348SMartin Matuska 		eval_decluster(map, &worst_ratio, &avg_ratio);
734*716fd348SMartin Matuska 
735*716fd348SMartin Matuska 		if (best_map == NULL || worst_ratio < best_worst_ratio) {
736*716fd348SMartin Matuska 
737*716fd348SMartin Matuska 			if (best_map != NULL)
738*716fd348SMartin Matuska 				free_map(best_map);
739*716fd348SMartin Matuska 
740*716fd348SMartin Matuska 			best_map = map;
741*716fd348SMartin Matuska 			best_worst_ratio = worst_ratio;
742*716fd348SMartin Matuska 			best_avg_ratio = avg_ratio;
743*716fd348SMartin Matuska 		} else {
744*716fd348SMartin Matuska 			free_map(map);
745*716fd348SMartin Matuska 		}
746*716fd348SMartin Matuska 	}
747*716fd348SMartin Matuska 
748*716fd348SMartin Matuska 	/*
749*716fd348SMartin Matuska 	 * After determining the best map generate a checksum over the full
750*716fd348SMartin Matuska 	 * permutation array.  This checksum is verified when opening a dRAID
751*716fd348SMartin Matuska 	 * pool to ensure the generated in memory permutations are correct.
752*716fd348SMartin Matuska 	 */
753*716fd348SMartin Matuska 	zio_cksum_t cksum;
754*716fd348SMartin Matuska 	fletcher_4_native_varsize(best_map->dm_perms,
755*716fd348SMartin Matuska 	    sizeof (uint8_t) * best_map->dm_children * best_map->dm_nperms,
756*716fd348SMartin Matuska 	    &cksum);
757*716fd348SMartin Matuska 	best_map->dm_checksum = cksum.zc_word[0];
758*716fd348SMartin Matuska 
759*716fd348SMartin Matuska 	*best_mapp = best_map;
760*716fd348SMartin Matuska 	*best_ratiop = best_worst_ratio;
761*716fd348SMartin Matuska 	*avg_ratiop = best_avg_ratio;
762*716fd348SMartin Matuska 
763*716fd348SMartin Matuska 	return (0);
764*716fd348SMartin Matuska }
765*716fd348SMartin Matuska 
766*716fd348SMartin Matuska static int
767*716fd348SMartin Matuska draid_generate(int argc, char *argv[])
768*716fd348SMartin Matuska {
769*716fd348SMartin Matuska 	char filename[MAXPATHLEN] = {0};
770*716fd348SMartin Matuska 	uint64_t map_seed;
771*716fd348SMartin Matuska 	int c, fd, error, verbose = 0, passes = 1, continuous = 0;
772*716fd348SMartin Matuska 	int min_children = VDEV_DRAID_MIN_CHILDREN;
773*716fd348SMartin Matuska 	int max_children = VDEV_DRAID_MAX_CHILDREN;
774*716fd348SMartin Matuska 	int restarts = 0;
775*716fd348SMartin Matuska 
776*716fd348SMartin Matuska 	while ((c = getopt(argc, argv, ":cm:n:p:v")) != -1) {
777*716fd348SMartin Matuska 		switch (c) {
778*716fd348SMartin Matuska 		case 'c':
779*716fd348SMartin Matuska 			continuous++;
780*716fd348SMartin Matuska 			break;
781*716fd348SMartin Matuska 		case 'm':
782*716fd348SMartin Matuska 			min_children = (int)strtol(optarg, NULL, 0);
783*716fd348SMartin Matuska 			if (min_children < VDEV_DRAID_MIN_CHILDREN) {
784*716fd348SMartin Matuska 				(void) fprintf(stderr, "A minimum of 2 "
785*716fd348SMartin Matuska 				    "children are required.\n");
786*716fd348SMartin Matuska 				return (1);
787*716fd348SMartin Matuska 			}
788*716fd348SMartin Matuska 
789*716fd348SMartin Matuska 			break;
790*716fd348SMartin Matuska 		case 'n':
791*716fd348SMartin Matuska 			max_children = (int)strtol(optarg, NULL, 0);
792*716fd348SMartin Matuska 			if (max_children > VDEV_DRAID_MAX_CHILDREN) {
793*716fd348SMartin Matuska 				(void) fprintf(stderr, "A maximum of %d "
794*716fd348SMartin Matuska 				    "children are allowed.\n",
795*716fd348SMartin Matuska 				    VDEV_DRAID_MAX_CHILDREN);
796*716fd348SMartin Matuska 				return (1);
797*716fd348SMartin Matuska 			}
798*716fd348SMartin Matuska 			break;
799*716fd348SMartin Matuska 		case 'p':
800*716fd348SMartin Matuska 			passes = (int)strtol(optarg, NULL, 0);
801*716fd348SMartin Matuska 			break;
802*716fd348SMartin Matuska 		case 'v':
803*716fd348SMartin Matuska 			/*
804*716fd348SMartin Matuska 			 * 0 - Only log when a better map is added to the file.
805*716fd348SMartin Matuska 			 * 1 - Log the current best map for each child count.
806*716fd348SMartin Matuska 			 *     Minimal output on a single summary line.
807*716fd348SMartin Matuska 			 * 2 - Log the current best map for each child count.
808*716fd348SMartin Matuska 			 *     More verbose includes most map fields.
809*716fd348SMartin Matuska 			 * 3 - Log the current best map for each child count.
810*716fd348SMartin Matuska 			 *     Very verbose all fields including the full map.
811*716fd348SMartin Matuska 			 */
812*716fd348SMartin Matuska 			verbose++;
813*716fd348SMartin Matuska 			break;
814*716fd348SMartin Matuska 		case ':':
815*716fd348SMartin Matuska 			(void) fprintf(stderr,
816*716fd348SMartin Matuska 			    "missing argument for '%c' option\n", optopt);
817*716fd348SMartin Matuska 			draid_usage();
818*716fd348SMartin Matuska 			break;
819*716fd348SMartin Matuska 		case '?':
820*716fd348SMartin Matuska 			(void) fprintf(stderr, "invalid option '%c'\n",
821*716fd348SMartin Matuska 			    optopt);
822*716fd348SMartin Matuska 			draid_usage();
823*716fd348SMartin Matuska 			break;
824*716fd348SMartin Matuska 		}
825*716fd348SMartin Matuska 	}
826*716fd348SMartin Matuska 
827*716fd348SMartin Matuska 	if (argc > optind)
828*716fd348SMartin Matuska 		strncpy(filename, argv[optind], MAXPATHLEN - 1);
829*716fd348SMartin Matuska 	else {
830*716fd348SMartin Matuska 		(void) fprintf(stderr, "A FILE must be specified.\n");
831*716fd348SMartin Matuska 		return (1);
832*716fd348SMartin Matuska 	}
833*716fd348SMartin Matuska 
834*716fd348SMartin Matuska restart:
835*716fd348SMartin Matuska 	/*
836*716fd348SMartin Matuska 	 * Start with a fresh seed from /dev/urandom.
837*716fd348SMartin Matuska 	 */
838*716fd348SMartin Matuska 	fd = open("/dev/urandom", O_RDONLY);
839*716fd348SMartin Matuska 	if (fd < 0) {
840*716fd348SMartin Matuska 		printf("Unable to open /dev/urandom: %s\n:", strerror(errno));
841*716fd348SMartin Matuska 		return (1);
842*716fd348SMartin Matuska 	} else {
843*716fd348SMartin Matuska 		ssize_t bytes = sizeof (map_seed);
844*716fd348SMartin Matuska 		ssize_t bytes_read = 0;
845*716fd348SMartin Matuska 
846*716fd348SMartin Matuska 		while (bytes_read < bytes) {
847*716fd348SMartin Matuska 			ssize_t rc = read(fd, ((char *)&map_seed) + bytes_read,
848*716fd348SMartin Matuska 			    bytes - bytes_read);
849*716fd348SMartin Matuska 			if (rc < 0) {
850*716fd348SMartin Matuska 				printf("Unable to read /dev/urandom: %s\n:",
851*716fd348SMartin Matuska 				    strerror(errno));
852*716fd348SMartin Matuska 				return (1);
853*716fd348SMartin Matuska 			}
854*716fd348SMartin Matuska 			bytes_read += rc;
855*716fd348SMartin Matuska 		}
856*716fd348SMartin Matuska 
857*716fd348SMartin Matuska 		(void) close(fd);
858*716fd348SMartin Matuska 	}
859*716fd348SMartin Matuska 
860*716fd348SMartin Matuska 	if (restarts == 0)
861*716fd348SMartin Matuska 		printf("Writing generated mappings to '%s':\n", filename);
862*716fd348SMartin Matuska 
863*716fd348SMartin Matuska 	/*
864*716fd348SMartin Matuska 	 * Generate maps for all requested child counts. The best map for
865*716fd348SMartin Matuska 	 * each child count is written out to the specified file.  If the file
866*716fd348SMartin Matuska 	 * already contains a better mapping this map will not be added.
867*716fd348SMartin Matuska 	 */
868*716fd348SMartin Matuska 	for (uint64_t children = min_children;
869*716fd348SMartin Matuska 	    children <= max_children; children++) {
870*716fd348SMartin Matuska 		char key[8] = { 0 };
871*716fd348SMartin Matuska 		draid_map_t *map;
872*716fd348SMartin Matuska 		double worst_ratio = 1000.0;
873*716fd348SMartin Matuska 		double avg_ratio = 1000.0;
874*716fd348SMartin Matuska 
875*716fd348SMartin Matuska 		error = eval_maps(children, passes, &map_seed, &map,
876*716fd348SMartin Matuska 		    &worst_ratio, &avg_ratio);
877*716fd348SMartin Matuska 		if (error) {
878*716fd348SMartin Matuska 			printf("Error eval_maps(): %s\n", strerror(error));
879*716fd348SMartin Matuska 			return (1);
880*716fd348SMartin Matuska 		}
881*716fd348SMartin Matuska 
882*716fd348SMartin Matuska 		if (worst_ratio < 1.0 || avg_ratio < 1.0) {
883*716fd348SMartin Matuska 			printf("Error ratio < 1.0: worst_ratio = %2.03f "
884*716fd348SMartin Matuska 			    "avg_ratio = %2.03f\n", worst_ratio, avg_ratio);
885*716fd348SMartin Matuska 			return (1);
886*716fd348SMartin Matuska 		}
887*716fd348SMartin Matuska 
888*716fd348SMartin Matuska 		snprintf(key, 7, "%llu", (u_longlong_t)children);
889*716fd348SMartin Matuska 		error = write_map_key(filename, key, map, worst_ratio,
890*716fd348SMartin Matuska 		    avg_ratio);
891*716fd348SMartin Matuska 		if (error == 0) {
892*716fd348SMartin Matuska 			/* The new map was added to the file. */
893*716fd348SMartin Matuska 			dump_map(map, key, worst_ratio, avg_ratio,
894*716fd348SMartin Matuska 			    MAX(verbose, 1));
895*716fd348SMartin Matuska 		} else if (error == EEXIST) {
896*716fd348SMartin Matuska 			/* The existing map was preferable and kept. */
897*716fd348SMartin Matuska 			if (verbose > 0)
898*716fd348SMartin Matuska 				dump_map_key(filename, key, verbose);
899*716fd348SMartin Matuska 		} else {
900*716fd348SMartin Matuska 			printf("Error write_map_key(): %s\n", strerror(error));
901*716fd348SMartin Matuska 			return (1);
902*716fd348SMartin Matuska 		}
903*716fd348SMartin Matuska 
904*716fd348SMartin Matuska 		free_map(map);
905*716fd348SMartin Matuska 	}
906*716fd348SMartin Matuska 
907*716fd348SMartin Matuska 	/*
908*716fd348SMartin Matuska 	 * When the continuous option is set restart at the minimum number of
909*716fd348SMartin Matuska 	 * children instead of exiting. This option is useful as a mechanism
910*716fd348SMartin Matuska 	 * to continuous try and refine the discovered permutations.
911*716fd348SMartin Matuska 	 */
912*716fd348SMartin Matuska 	if (continuous) {
913*716fd348SMartin Matuska 		restarts++;
914*716fd348SMartin Matuska 		printf("Restarting by request (-c): %d\n", restarts);
915*716fd348SMartin Matuska 		goto restart;
916*716fd348SMartin Matuska 	}
917*716fd348SMartin Matuska 
918*716fd348SMartin Matuska 	return (0);
919*716fd348SMartin Matuska }
920*716fd348SMartin Matuska 
921*716fd348SMartin Matuska /*
922*716fd348SMartin Matuska  * Verify each map in the file by generating its in-memory permutation array
923*716fd348SMartin Matuska  * and comfirming its checksum is correct.
924*716fd348SMartin Matuska  */
925*716fd348SMartin Matuska static int
926*716fd348SMartin Matuska draid_verify(int argc, char *argv[])
927*716fd348SMartin Matuska {
928*716fd348SMartin Matuska 	char filename[MAXPATHLEN] = {0};
929*716fd348SMartin Matuska 	int n = 0, c, error, verbose = 1;
930*716fd348SMartin Matuska 	int check_ratios = 0;
931*716fd348SMartin Matuska 
932*716fd348SMartin Matuska 	while ((c = getopt(argc, argv, ":rv")) != -1) {
933*716fd348SMartin Matuska 		switch (c) {
934*716fd348SMartin Matuska 		case 'r':
935*716fd348SMartin Matuska 			check_ratios++;
936*716fd348SMartin Matuska 			break;
937*716fd348SMartin Matuska 		case 'v':
938*716fd348SMartin Matuska 			verbose++;
939*716fd348SMartin Matuska 			break;
940*716fd348SMartin Matuska 		case ':':
941*716fd348SMartin Matuska 			(void) fprintf(stderr,
942*716fd348SMartin Matuska 			    "missing argument for '%c' option\n", optopt);
943*716fd348SMartin Matuska 			draid_usage();
944*716fd348SMartin Matuska 			break;
945*716fd348SMartin Matuska 		case '?':
946*716fd348SMartin Matuska 			(void) fprintf(stderr, "invalid option '%c'\n",
947*716fd348SMartin Matuska 			    optopt);
948*716fd348SMartin Matuska 			draid_usage();
949*716fd348SMartin Matuska 			break;
950*716fd348SMartin Matuska 		}
951*716fd348SMartin Matuska 	}
952*716fd348SMartin Matuska 
953*716fd348SMartin Matuska 	if (argc > optind) {
954*716fd348SMartin Matuska 		char *abspath = malloc(MAXPATHLEN);
955*716fd348SMartin Matuska 		if (abspath == NULL)
956*716fd348SMartin Matuska 			return (ENOMEM);
957*716fd348SMartin Matuska 
958*716fd348SMartin Matuska 		if (realpath(argv[optind], abspath) != NULL)
959*716fd348SMartin Matuska 			strncpy(filename, abspath, MAXPATHLEN - 1);
960*716fd348SMartin Matuska 		else
961*716fd348SMartin Matuska 			strncpy(filename, argv[optind], MAXPATHLEN - 1);
962*716fd348SMartin Matuska 
963*716fd348SMartin Matuska 		free(abspath);
964*716fd348SMartin Matuska 	} else {
965*716fd348SMartin Matuska 		(void) fprintf(stderr, "A FILE must be specified.\n");
966*716fd348SMartin Matuska 		return (1);
967*716fd348SMartin Matuska 	}
968*716fd348SMartin Matuska 
969*716fd348SMartin Matuska 	printf("Verifying permutation maps: '%s'\n", filename);
970*716fd348SMartin Matuska 
971*716fd348SMartin Matuska 	/*
972*716fd348SMartin Matuska 	 * Lookup hardcoded permutation map for each valid number of children
973*716fd348SMartin Matuska 	 * and verify a generated map has the correct checksum.  Then compare
974*716fd348SMartin Matuska 	 * the generated map values with the nvlist map values read from the
975*716fd348SMartin Matuska 	 * reference file to cross-check the permutation.
976*716fd348SMartin Matuska 	 */
977*716fd348SMartin Matuska 	for (uint64_t children = VDEV_DRAID_MIN_CHILDREN;
978*716fd348SMartin Matuska 	    children <= VDEV_DRAID_MAX_CHILDREN;
979*716fd348SMartin Matuska 	    children++) {
980*716fd348SMartin Matuska 		draid_map_t *map;
981*716fd348SMartin Matuska 		char key[8] = {0};
982*716fd348SMartin Matuska 
983*716fd348SMartin Matuska 		snprintf(key, 8, "%llu", (u_longlong_t)children);
984*716fd348SMartin Matuska 
985*716fd348SMartin Matuska 		error = alloc_fixed_map(children, &map);
986*716fd348SMartin Matuska 		if (error) {
987*716fd348SMartin Matuska 			printf("Error alloc_fixed_map() failed: %s\n",
988*716fd348SMartin Matuska 			    error == ECKSUM ? "Invalid checksum" :
989*716fd348SMartin Matuska 			    strerror(error));
990*716fd348SMartin Matuska 			return (1);
991*716fd348SMartin Matuska 		}
992*716fd348SMartin Matuska 
993*716fd348SMartin Matuska 		uint64_t nv_seed, nv_checksum, nv_children, nv_nperms;
994*716fd348SMartin Matuska 		uint8_t *nv_perms;
995*716fd348SMartin Matuska 		nvlist_t *cfg;
996*716fd348SMartin Matuska 		uint_t c;
997*716fd348SMartin Matuska 
998*716fd348SMartin Matuska 		error = read_map_key(filename, key, &cfg);
999*716fd348SMartin Matuska 		if (error != 0) {
1000*716fd348SMartin Matuska 			printf("Error read_map_key() failed: %s\n",
1001*716fd348SMartin Matuska 			    strerror(error));
1002*716fd348SMartin Matuska 			free_map(map);
1003*716fd348SMartin Matuska 			return (1);
1004*716fd348SMartin Matuska 		}
1005*716fd348SMartin Matuska 
1006*716fd348SMartin Matuska 		nv_seed = fnvlist_lookup_uint64(cfg, MAP_SEED);
1007*716fd348SMartin Matuska 		nv_checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM);
1008*716fd348SMartin Matuska 		nv_children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN);
1009*716fd348SMartin Matuska 		nv_nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS);
1010*716fd348SMartin Matuska 		nvlist_lookup_uint8_array(cfg, MAP_PERMS, &nv_perms, &c);
1011*716fd348SMartin Matuska 
1012*716fd348SMartin Matuska 		/*
1013*716fd348SMartin Matuska 		 * Compare draid_map_t and nvlist reference values.
1014*716fd348SMartin Matuska 		 */
1015*716fd348SMartin Matuska 		if (map->dm_seed != nv_seed) {
1016*716fd348SMartin Matuska 			printf("Error different seeds: 0x%016llx != "
1017*716fd348SMartin Matuska 			    "0x%016llx\n", (u_longlong_t)map->dm_seed,
1018*716fd348SMartin Matuska 			    (u_longlong_t)nv_seed);
1019*716fd348SMartin Matuska 			error = EINVAL;
1020*716fd348SMartin Matuska 		}
1021*716fd348SMartin Matuska 
1022*716fd348SMartin Matuska 		if (map->dm_checksum != nv_checksum) {
1023*716fd348SMartin Matuska 			printf("Error different checksums: 0x%016llx "
1024*716fd348SMartin Matuska 			    "!= 0x%016llx\n",
1025*716fd348SMartin Matuska 			    (u_longlong_t)map->dm_checksum,
1026*716fd348SMartin Matuska 			    (u_longlong_t)nv_checksum);
1027*716fd348SMartin Matuska 			error = EINVAL;
1028*716fd348SMartin Matuska 		}
1029*716fd348SMartin Matuska 
1030*716fd348SMartin Matuska 		if (map->dm_children != nv_children) {
1031*716fd348SMartin Matuska 			printf("Error different children: %llu "
1032*716fd348SMartin Matuska 			    "!= %llu\n", (u_longlong_t)map->dm_children,
1033*716fd348SMartin Matuska 			    (u_longlong_t)nv_children);
1034*716fd348SMartin Matuska 			error = EINVAL;
1035*716fd348SMartin Matuska 		}
1036*716fd348SMartin Matuska 
1037*716fd348SMartin Matuska 		if (map->dm_nperms != nv_nperms) {
1038*716fd348SMartin Matuska 			printf("Error different nperms: %llu "
1039*716fd348SMartin Matuska 			    "!= %llu\n", (u_longlong_t)map->dm_nperms,
1040*716fd348SMartin Matuska 			    (u_longlong_t)nv_nperms);
1041*716fd348SMartin Matuska 			error = EINVAL;
1042*716fd348SMartin Matuska 		}
1043*716fd348SMartin Matuska 
1044*716fd348SMartin Matuska 		for (uint64_t i = 0; i < nv_children * nv_nperms; i++) {
1045*716fd348SMartin Matuska 			if (map->dm_perms[i] != nv_perms[i]) {
1046*716fd348SMartin Matuska 				printf("Error different perms[%llu]: "
1047*716fd348SMartin Matuska 				    "%d != %d\n", (u_longlong_t)i,
1048*716fd348SMartin Matuska 				    (int)map->dm_perms[i],
1049*716fd348SMartin Matuska 				    (int)nv_perms[i]);
1050*716fd348SMartin Matuska 				error = EINVAL;
1051*716fd348SMartin Matuska 				break;
1052*716fd348SMartin Matuska 			}
1053*716fd348SMartin Matuska 		}
1054*716fd348SMartin Matuska 
1055*716fd348SMartin Matuska 		/*
1056*716fd348SMartin Matuska 		 * For good measure recalculate the worst and average
1057*716fd348SMartin Matuska 		 * ratios and confirm they match the nvlist values.
1058*716fd348SMartin Matuska 		 */
1059*716fd348SMartin Matuska 		if (check_ratios) {
1060*716fd348SMartin Matuska 			uint64_t nv_worst_ratio, nv_avg_ratio;
1061*716fd348SMartin Matuska 			double worst_ratio, avg_ratio;
1062*716fd348SMartin Matuska 
1063*716fd348SMartin Matuska 			eval_decluster(map, &worst_ratio, &avg_ratio);
1064*716fd348SMartin Matuska 
1065*716fd348SMartin Matuska 			nv_worst_ratio = fnvlist_lookup_uint64(cfg,
1066*716fd348SMartin Matuska 			    MAP_WORST_RATIO);
1067*716fd348SMartin Matuska 			nv_avg_ratio = fnvlist_lookup_uint64(cfg,
1068*716fd348SMartin Matuska 			    MAP_AVG_RATIO);
1069*716fd348SMartin Matuska 
1070*716fd348SMartin Matuska 			if (worst_ratio < 1.0 || avg_ratio < 1.0) {
1071*716fd348SMartin Matuska 				printf("Error ratio out of range %2.03f, "
1072*716fd348SMartin Matuska 				    "%2.03f\n", worst_ratio, avg_ratio);
1073*716fd348SMartin Matuska 				error = EINVAL;
1074*716fd348SMartin Matuska 			}
1075*716fd348SMartin Matuska 
1076*716fd348SMartin Matuska 			if ((uint64_t)(worst_ratio * 1000.0) !=
1077*716fd348SMartin Matuska 			    nv_worst_ratio) {
1078*716fd348SMartin Matuska 				printf("Error different worst_ratio %2.03f "
1079*716fd348SMartin Matuska 				    "!= %2.03f\n", (double)nv_worst_ratio /
1080*716fd348SMartin Matuska 				    1000.0, worst_ratio);
1081*716fd348SMartin Matuska 				error = EINVAL;
1082*716fd348SMartin Matuska 			}
1083*716fd348SMartin Matuska 
1084*716fd348SMartin Matuska 			if ((uint64_t)(avg_ratio * 1000.0) != nv_avg_ratio) {
1085*716fd348SMartin Matuska 				printf("Error different average_ratio %2.03f "
1086*716fd348SMartin Matuska 				    "!= %2.03f\n", (double)nv_avg_ratio /
1087*716fd348SMartin Matuska 				    1000.0, avg_ratio);
1088*716fd348SMartin Matuska 				error = EINVAL;
1089*716fd348SMartin Matuska 			}
1090*716fd348SMartin Matuska 		}
1091*716fd348SMartin Matuska 
1092*716fd348SMartin Matuska 		if (error) {
1093*716fd348SMartin Matuska 			free_map(map);
1094*716fd348SMartin Matuska 			nvlist_free(cfg);
1095*716fd348SMartin Matuska 			return (1);
1096*716fd348SMartin Matuska 		}
1097*716fd348SMartin Matuska 
1098*716fd348SMartin Matuska 		if (verbose > 0) {
1099*716fd348SMartin Matuska 			printf("- %llu children: good\n",
1100*716fd348SMartin Matuska 			    (u_longlong_t)children);
1101*716fd348SMartin Matuska 		}
1102*716fd348SMartin Matuska 		n++;
1103*716fd348SMartin Matuska 
1104*716fd348SMartin Matuska 		free_map(map);
1105*716fd348SMartin Matuska 		nvlist_free(cfg);
1106*716fd348SMartin Matuska 	}
1107*716fd348SMartin Matuska 
1108*716fd348SMartin Matuska 	if (n != (VDEV_DRAID_MAX_CHILDREN - 1)) {
1109*716fd348SMartin Matuska 		printf("Error permutation maps missing: %d / %d checked\n",
1110*716fd348SMartin Matuska 		    n, VDEV_DRAID_MAX_CHILDREN - 1);
1111*716fd348SMartin Matuska 		return (1);
1112*716fd348SMartin Matuska 	}
1113*716fd348SMartin Matuska 
1114*716fd348SMartin Matuska 	printf("Successfully verified %d / %d permutation maps\n",
1115*716fd348SMartin Matuska 	    n, VDEV_DRAID_MAX_CHILDREN - 1);
1116*716fd348SMartin Matuska 
1117*716fd348SMartin Matuska 	return (0);
1118*716fd348SMartin Matuska }
1119*716fd348SMartin Matuska 
1120*716fd348SMartin Matuska /*
1121*716fd348SMartin Matuska  * Dump the contents of the specified mapping(s) for inspection.
1122*716fd348SMartin Matuska  */
1123*716fd348SMartin Matuska static int
1124*716fd348SMartin Matuska draid_dump(int argc, char *argv[])
1125*716fd348SMartin Matuska {
1126*716fd348SMartin Matuska 	char filename[MAXPATHLEN] = {0};
1127*716fd348SMartin Matuska 	int c, error, verbose = 1;
1128*716fd348SMartin Matuska 	int min_children = VDEV_DRAID_MIN_CHILDREN;
1129*716fd348SMartin Matuska 	int max_children = VDEV_DRAID_MAX_CHILDREN;
1130*716fd348SMartin Matuska 
1131*716fd348SMartin Matuska 	while ((c = getopt(argc, argv, ":vm:n:")) != -1) {
1132*716fd348SMartin Matuska 		switch (c) {
1133*716fd348SMartin Matuska 		case 'm':
1134*716fd348SMartin Matuska 			min_children = (int)strtol(optarg, NULL, 0);
1135*716fd348SMartin Matuska 			if (min_children < 2) {
1136*716fd348SMartin Matuska 				(void) fprintf(stderr, "A minimum of 2 "
1137*716fd348SMartin Matuska 				    "children are required.\n");
1138*716fd348SMartin Matuska 				return (1);
1139*716fd348SMartin Matuska 			}
1140*716fd348SMartin Matuska 
1141*716fd348SMartin Matuska 			break;
1142*716fd348SMartin Matuska 		case 'n':
1143*716fd348SMartin Matuska 			max_children = (int)strtol(optarg, NULL, 0);
1144*716fd348SMartin Matuska 			if (max_children > VDEV_DRAID_MAX_CHILDREN) {
1145*716fd348SMartin Matuska 				(void) fprintf(stderr, "A maximum of %d "
1146*716fd348SMartin Matuska 				    "children are allowed.\n",
1147*716fd348SMartin Matuska 				    VDEV_DRAID_MAX_CHILDREN);
1148*716fd348SMartin Matuska 				return (1);
1149*716fd348SMartin Matuska 			}
1150*716fd348SMartin Matuska 			break;
1151*716fd348SMartin Matuska 		case 'v':
1152*716fd348SMartin Matuska 			verbose++;
1153*716fd348SMartin Matuska 			break;
1154*716fd348SMartin Matuska 		case ':':
1155*716fd348SMartin Matuska 			(void) fprintf(stderr,
1156*716fd348SMartin Matuska 			    "missing argument for '%c' option\n", optopt);
1157*716fd348SMartin Matuska 			draid_usage();
1158*716fd348SMartin Matuska 			break;
1159*716fd348SMartin Matuska 		case '?':
1160*716fd348SMartin Matuska 			(void) fprintf(stderr, "invalid option '%c'\n",
1161*716fd348SMartin Matuska 			    optopt);
1162*716fd348SMartin Matuska 			draid_usage();
1163*716fd348SMartin Matuska 			break;
1164*716fd348SMartin Matuska 		}
1165*716fd348SMartin Matuska 	}
1166*716fd348SMartin Matuska 
1167*716fd348SMartin Matuska 	if (argc > optind)
1168*716fd348SMartin Matuska 		strncpy(filename, argv[optind], MAXPATHLEN - 1);
1169*716fd348SMartin Matuska 	else {
1170*716fd348SMartin Matuska 		(void) fprintf(stderr, "A FILE must be specified.\n");
1171*716fd348SMartin Matuska 		return (1);
1172*716fd348SMartin Matuska 	}
1173*716fd348SMartin Matuska 
1174*716fd348SMartin Matuska 	/*
1175*716fd348SMartin Matuska 	 * Dump maps for the requested child counts.
1176*716fd348SMartin Matuska 	 */
1177*716fd348SMartin Matuska 	for (uint64_t children = min_children;
1178*716fd348SMartin Matuska 	    children <= max_children; children++) {
1179*716fd348SMartin Matuska 		char key[8] = { 0 };
1180*716fd348SMartin Matuska 
1181*716fd348SMartin Matuska 		snprintf(key, 7, "%llu", (u_longlong_t)children);
1182*716fd348SMartin Matuska 		error = dump_map_key(filename, key, verbose);
1183*716fd348SMartin Matuska 		if (error) {
1184*716fd348SMartin Matuska 			printf("Error dump_map_key(): %s\n", strerror(error));
1185*716fd348SMartin Matuska 			return (1);
1186*716fd348SMartin Matuska 		}
1187*716fd348SMartin Matuska 	}
1188*716fd348SMartin Matuska 
1189*716fd348SMartin Matuska 	return (0);
1190*716fd348SMartin Matuska }
1191*716fd348SMartin Matuska 
1192*716fd348SMartin Matuska /*
1193*716fd348SMartin Matuska  * Print all of the mappings as a C formatted draid_map_t array.  This table
1194*716fd348SMartin Matuska  * is found in the module/zcommon/zfs_draid.c file and is the definitive
1195*716fd348SMartin Matuska  * source for all mapping used by dRAID.  It cannot be updated without
1196*716fd348SMartin Matuska  * changing the dRAID on disk format.
1197*716fd348SMartin Matuska  */
1198*716fd348SMartin Matuska static int
1199*716fd348SMartin Matuska draid_table(int argc, char *argv[])
1200*716fd348SMartin Matuska {
1201*716fd348SMartin Matuska 	char filename[MAXPATHLEN] = {0};
1202*716fd348SMartin Matuska 	int error;
1203*716fd348SMartin Matuska 
1204*716fd348SMartin Matuska 	if (argc > optind)
1205*716fd348SMartin Matuska 		strncpy(filename, argv[optind], MAXPATHLEN - 1);
1206*716fd348SMartin Matuska 	else {
1207*716fd348SMartin Matuska 		(void) fprintf(stderr, "A FILE must be specified.\n");
1208*716fd348SMartin Matuska 		return (1);
1209*716fd348SMartin Matuska 	}
1210*716fd348SMartin Matuska 
1211*716fd348SMartin Matuska 	printf("static const draid_map_t "
1212*716fd348SMartin Matuska 	    "draid_maps[VDEV_DRAID_MAX_MAPS] = {\n");
1213*716fd348SMartin Matuska 
1214*716fd348SMartin Matuska 	for (uint64_t children = VDEV_DRAID_MIN_CHILDREN;
1215*716fd348SMartin Matuska 	    children <= VDEV_DRAID_MAX_CHILDREN;
1216*716fd348SMartin Matuska 	    children++) {
1217*716fd348SMartin Matuska 		uint64_t seed, checksum, nperms, avg_ratio;
1218*716fd348SMartin Matuska 		nvlist_t *cfg;
1219*716fd348SMartin Matuska 		char key[8] = {0};
1220*716fd348SMartin Matuska 
1221*716fd348SMartin Matuska 		snprintf(key, 8, "%llu", (u_longlong_t)children);
1222*716fd348SMartin Matuska 
1223*716fd348SMartin Matuska 		error = read_map_key(filename, key, &cfg);
1224*716fd348SMartin Matuska 		if (error != 0) {
1225*716fd348SMartin Matuska 			printf("Error read_map_key() failed: %s\n",
1226*716fd348SMartin Matuska 			    strerror(error));
1227*716fd348SMartin Matuska 			return (1);
1228*716fd348SMartin Matuska 		}
1229*716fd348SMartin Matuska 
1230*716fd348SMartin Matuska 		seed = fnvlist_lookup_uint64(cfg, MAP_SEED);
1231*716fd348SMartin Matuska 		checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM);
1232*716fd348SMartin Matuska 		children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN);
1233*716fd348SMartin Matuska 		nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS);
1234*716fd348SMartin Matuska 		avg_ratio = fnvlist_lookup_uint64(cfg, MAP_AVG_RATIO);
1235*716fd348SMartin Matuska 
1236*716fd348SMartin Matuska 		printf("\t{ %3llu, %3llu, 0x%016llx, 0x%016llx },\t"
1237*716fd348SMartin Matuska 		    "/* %2.03f */\n", (u_longlong_t)children,
1238*716fd348SMartin Matuska 		    (u_longlong_t)nperms, (u_longlong_t)seed,
1239*716fd348SMartin Matuska 		    (u_longlong_t)checksum, (double)avg_ratio / 1000.0);
1240*716fd348SMartin Matuska 
1241*716fd348SMartin Matuska 		nvlist_free(cfg);
1242*716fd348SMartin Matuska 	}
1243*716fd348SMartin Matuska 
1244*716fd348SMartin Matuska 	printf("};\n");
1245*716fd348SMartin Matuska 
1246*716fd348SMartin Matuska 	return (0);
1247*716fd348SMartin Matuska }
1248*716fd348SMartin Matuska 
1249*716fd348SMartin Matuska static int
1250*716fd348SMartin Matuska draid_merge_impl(nvlist_t *allcfgs, const char *srcfilename, int *mergedp)
1251*716fd348SMartin Matuska {
1252*716fd348SMartin Matuska 	nvlist_t *srccfgs;
1253*716fd348SMartin Matuska 	nvpair_t *elem = NULL;
1254*716fd348SMartin Matuska 	int error, merged = 0;
1255*716fd348SMartin Matuska 
1256*716fd348SMartin Matuska 	error = read_map(srcfilename, &srccfgs);
1257*716fd348SMartin Matuska 	if (error != 0)
1258*716fd348SMartin Matuska 		return (error);
1259*716fd348SMartin Matuska 
1260*716fd348SMartin Matuska 	while ((elem = nvlist_next_nvpair(srccfgs, elem)) != NULL) {
1261*716fd348SMartin Matuska 		uint64_t nv_worst_ratio;
1262*716fd348SMartin Matuska 		uint64_t allcfg_worst_ratio;
1263*716fd348SMartin Matuska 		nvlist_t *cfg, *allcfg;
1264*716fd348SMartin Matuska 		char *key;
1265*716fd348SMartin Matuska 
1266*716fd348SMartin Matuska 		switch (nvpair_type(elem)) {
1267*716fd348SMartin Matuska 		case DATA_TYPE_NVLIST:
1268*716fd348SMartin Matuska 
1269*716fd348SMartin Matuska 			(void) nvpair_value_nvlist(elem, &cfg);
1270*716fd348SMartin Matuska 			key = nvpair_name(elem);
1271*716fd348SMartin Matuska 
1272*716fd348SMartin Matuska 			nv_worst_ratio = fnvlist_lookup_uint64(cfg,
1273*716fd348SMartin Matuska 			    MAP_WORST_RATIO);
1274*716fd348SMartin Matuska 
1275*716fd348SMartin Matuska 			error = nvlist_lookup_nvlist(allcfgs, key, &allcfg);
1276*716fd348SMartin Matuska 			if (error == 0) {
1277*716fd348SMartin Matuska 				allcfg_worst_ratio = fnvlist_lookup_uint64(
1278*716fd348SMartin Matuska 				    allcfg, MAP_WORST_RATIO);
1279*716fd348SMartin Matuska 
1280*716fd348SMartin Matuska 				if (nv_worst_ratio < allcfg_worst_ratio) {
1281*716fd348SMartin Matuska 					fnvlist_remove(allcfgs, key);
1282*716fd348SMartin Matuska 					error = nvlist_add_nvlist(allcfgs,
1283*716fd348SMartin Matuska 					    key, cfg);
1284*716fd348SMartin Matuska 					merged++;
1285*716fd348SMartin Matuska 				}
1286*716fd348SMartin Matuska 			} else if (error == ENOENT) {
1287*716fd348SMartin Matuska 				error = nvlist_add_nvlist(allcfgs, key, cfg);
1288*716fd348SMartin Matuska 				merged++;
1289*716fd348SMartin Matuska 			} else {
1290*716fd348SMartin Matuska 				return (error);
1291*716fd348SMartin Matuska 			}
1292*716fd348SMartin Matuska 
1293*716fd348SMartin Matuska 			break;
1294*716fd348SMartin Matuska 		default:
1295*716fd348SMartin Matuska 			continue;
1296*716fd348SMartin Matuska 		}
1297*716fd348SMartin Matuska 	}
1298*716fd348SMartin Matuska 
1299*716fd348SMartin Matuska 	nvlist_free(srccfgs);
1300*716fd348SMartin Matuska 
1301*716fd348SMartin Matuska 	*mergedp = merged;
1302*716fd348SMartin Matuska 
1303*716fd348SMartin Matuska 	return (0);
1304*716fd348SMartin Matuska }
1305*716fd348SMartin Matuska 
1306*716fd348SMartin Matuska /*
1307*716fd348SMartin Matuska  * Merge the best map for each child count found in the listed files into
1308*716fd348SMartin Matuska  * a new file.  This allows 'draid generate' to be run in parallel and for
1309*716fd348SMartin Matuska  * the results maps to be combined.
1310*716fd348SMartin Matuska  */
1311*716fd348SMartin Matuska static int
1312*716fd348SMartin Matuska draid_merge(int argc, char *argv[])
1313*716fd348SMartin Matuska {
1314*716fd348SMartin Matuska 	char filename[MAXPATHLEN] = {0};
1315*716fd348SMartin Matuska 	int c, error, total_merged = 0;
1316*716fd348SMartin Matuska 	nvlist_t *allcfgs;
1317*716fd348SMartin Matuska 
1318*716fd348SMartin Matuska 	while ((c = getopt(argc, argv, ":")) != -1) {
1319*716fd348SMartin Matuska 		switch (c) {
1320*716fd348SMartin Matuska 		case ':':
1321*716fd348SMartin Matuska 			(void) fprintf(stderr,
1322*716fd348SMartin Matuska 			    "missing argument for '%c' option\n", optopt);
1323*716fd348SMartin Matuska 			draid_usage();
1324*716fd348SMartin Matuska 			break;
1325*716fd348SMartin Matuska 		case '?':
1326*716fd348SMartin Matuska 			(void) fprintf(stderr, "invalid option '%c'\n",
1327*716fd348SMartin Matuska 			    optopt);
1328*716fd348SMartin Matuska 			draid_usage();
1329*716fd348SMartin Matuska 			break;
1330*716fd348SMartin Matuska 		}
1331*716fd348SMartin Matuska 	}
1332*716fd348SMartin Matuska 
1333*716fd348SMartin Matuska 	if (argc < 4) {
1334*716fd348SMartin Matuska 		(void) fprintf(stderr,
1335*716fd348SMartin Matuska 		    "A FILE and multiple SRCs must be specified.\n");
1336*716fd348SMartin Matuska 		return (1);
1337*716fd348SMartin Matuska 	}
1338*716fd348SMartin Matuska 
1339*716fd348SMartin Matuska 	strncpy(filename, argv[optind], MAXPATHLEN - 1);
1340*716fd348SMartin Matuska 	optind++;
1341*716fd348SMartin Matuska 
1342*716fd348SMartin Matuska 	error = read_map(filename, &allcfgs);
1343*716fd348SMartin Matuska 	if (error == ENOENT) {
1344*716fd348SMartin Matuska 		allcfgs = fnvlist_alloc();
1345*716fd348SMartin Matuska 	} else if (error != 0) {
1346*716fd348SMartin Matuska 		printf("Error read_map(): %s\n", strerror(error));
1347*716fd348SMartin Matuska 		return (error);
1348*716fd348SMartin Matuska 	}
1349*716fd348SMartin Matuska 
1350*716fd348SMartin Matuska 	while (optind < argc) {
1351*716fd348SMartin Matuska 		char srcfilename[MAXPATHLEN] = {0};
1352*716fd348SMartin Matuska 		int merged = 0;
1353*716fd348SMartin Matuska 
1354*716fd348SMartin Matuska 		strncpy(srcfilename, argv[optind], MAXPATHLEN - 1);
1355*716fd348SMartin Matuska 
1356*716fd348SMartin Matuska 		error = draid_merge_impl(allcfgs, srcfilename, &merged);
1357*716fd348SMartin Matuska 		if (error) {
1358*716fd348SMartin Matuska 			printf("Error draid_merge_impl(): %s\n",
1359*716fd348SMartin Matuska 			    strerror(error));
1360*716fd348SMartin Matuska 			nvlist_free(allcfgs);
1361*716fd348SMartin Matuska 			return (1);
1362*716fd348SMartin Matuska 		}
1363*716fd348SMartin Matuska 
1364*716fd348SMartin Matuska 		total_merged += merged;
1365*716fd348SMartin Matuska 		printf("Merged %d key(s) from '%s' into '%s'\n", merged,
1366*716fd348SMartin Matuska 		    srcfilename, filename);
1367*716fd348SMartin Matuska 
1368*716fd348SMartin Matuska 		optind++;
1369*716fd348SMartin Matuska 	}
1370*716fd348SMartin Matuska 
1371*716fd348SMartin Matuska 	if (total_merged > 0)
1372*716fd348SMartin Matuska 		write_map(filename, allcfgs);
1373*716fd348SMartin Matuska 
1374*716fd348SMartin Matuska 	printf("Merged a total of %d key(s) into '%s'\n", total_merged,
1375*716fd348SMartin Matuska 	    filename);
1376*716fd348SMartin Matuska 
1377*716fd348SMartin Matuska 	nvlist_free(allcfgs);
1378*716fd348SMartin Matuska 
1379*716fd348SMartin Matuska 	return (0);
1380*716fd348SMartin Matuska }
1381*716fd348SMartin Matuska 
1382*716fd348SMartin Matuska int
1383*716fd348SMartin Matuska main(int argc, char *argv[])
1384*716fd348SMartin Matuska {
1385*716fd348SMartin Matuska 	if (argc < 2)
1386*716fd348SMartin Matuska 		draid_usage();
1387*716fd348SMartin Matuska 
1388*716fd348SMartin Matuska 	char *subcommand = argv[1];
1389*716fd348SMartin Matuska 
1390*716fd348SMartin Matuska 	if (strcmp(subcommand, "generate") == 0) {
1391*716fd348SMartin Matuska 		return (draid_generate(argc - 1, argv + 1));
1392*716fd348SMartin Matuska 	} else if (strcmp(subcommand, "verify") == 0) {
1393*716fd348SMartin Matuska 		return (draid_verify(argc - 1, argv + 1));
1394*716fd348SMartin Matuska 	} else if (strcmp(subcommand, "dump") == 0) {
1395*716fd348SMartin Matuska 		return (draid_dump(argc - 1, argv + 1));
1396*716fd348SMartin Matuska 	} else if (strcmp(subcommand, "table") == 0) {
1397*716fd348SMartin Matuska 		return (draid_table(argc - 1, argv + 1));
1398*716fd348SMartin Matuska 	} else if (strcmp(subcommand, "merge") == 0) {
1399*716fd348SMartin Matuska 		return (draid_merge(argc - 1, argv + 1));
1400*716fd348SMartin Matuska 	} else {
1401*716fd348SMartin Matuska 		draid_usage();
1402*716fd348SMartin Matuska 	}
1403*716fd348SMartin Matuska }
1404