xref: /freebsd-src/sys/contrib/openzfs/tests/zfs-tests/cmd/draid.c (revision 2a58b312b62f908ec92311d1bd8536dbaeb8e55b)
1716fd348SMartin Matuska /*
2716fd348SMartin Matuska  * CDDL HEADER START
3716fd348SMartin Matuska  *
4716fd348SMartin Matuska  * The contents of this file are subject to the terms of the
5716fd348SMartin Matuska  * Common Development and Distribution License (the "License").
6716fd348SMartin Matuska  * You may not use this file except in compliance with the License.
7716fd348SMartin Matuska  *
8716fd348SMartin Matuska  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
10716fd348SMartin Matuska  * See the License for the specific language governing permissions
11716fd348SMartin Matuska  * and limitations under the License.
12716fd348SMartin Matuska  *
13716fd348SMartin Matuska  * When distributing Covered Code, include this CDDL HEADER in each
14716fd348SMartin Matuska  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15716fd348SMartin Matuska  * If applicable, add the following below this CDDL HEADER, with the
16716fd348SMartin Matuska  * fields enclosed by brackets "[]" replaced with your own identifying
17716fd348SMartin Matuska  * information: Portions Copyright [yyyy] [name of copyright owner]
18716fd348SMartin Matuska  *
19716fd348SMartin Matuska  * CDDL HEADER END
20716fd348SMartin Matuska  */
21716fd348SMartin Matuska /*
22716fd348SMartin Matuska  * Copyright (c) 2018 Intel Corporation.
23716fd348SMartin Matuska  * Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
24716fd348SMartin Matuska  */
25716fd348SMartin Matuska 
26716fd348SMartin Matuska #include <stdio.h>
27716fd348SMartin Matuska #include <zlib.h>
28716fd348SMartin Matuska #include <zfs_fletcher.h>
29716fd348SMartin Matuska #include <sys/vdev_draid.h>
30716fd348SMartin Matuska #include <sys/nvpair.h>
31716fd348SMartin Matuska #include <sys/stat.h>
32716fd348SMartin Matuska 
33716fd348SMartin Matuska /*
34716fd348SMartin Matuska  * The number of rows to generate for new permutation maps.
35716fd348SMartin Matuska  */
36716fd348SMartin Matuska #define	MAP_ROWS_DEFAULT	256
37716fd348SMartin Matuska 
38716fd348SMartin Matuska /*
39716fd348SMartin Matuska  * Key values for dRAID maps when stored as nvlists.
40716fd348SMartin Matuska  */
41716fd348SMartin Matuska #define	MAP_SEED		"seed"
42716fd348SMartin Matuska #define	MAP_CHECKSUM		"checksum"
43716fd348SMartin Matuska #define	MAP_WORST_RATIO		"worst_ratio"
44716fd348SMartin Matuska #define	MAP_AVG_RATIO		"avg_ratio"
45716fd348SMartin Matuska #define	MAP_CHILDREN		"children"
46716fd348SMartin Matuska #define	MAP_NPERMS		"nperms"
47716fd348SMartin Matuska #define	MAP_PERMS		"perms"
48716fd348SMartin Matuska 
49716fd348SMartin Matuska static void
draid_usage(void)50716fd348SMartin Matuska draid_usage(void)
51716fd348SMartin Matuska {
52716fd348SMartin Matuska 	(void) fprintf(stderr,
53716fd348SMartin Matuska 	    "usage: draid command args ...\n"
54716fd348SMartin Matuska 	    "Available commands are:\n"
55716fd348SMartin Matuska 	    "\n"
56716fd348SMartin Matuska 	    "\tdraid generate [-cv] [-m min] [-n max] [-p passes] FILE\n"
57716fd348SMartin Matuska 	    "\tdraid verify [-rv] FILE\n"
58716fd348SMartin Matuska 	    "\tdraid dump [-v] [-m min] [-n max] FILE\n"
59716fd348SMartin Matuska 	    "\tdraid table FILE\n"
60716fd348SMartin Matuska 	    "\tdraid merge FILE SRC SRC...\n");
61716fd348SMartin Matuska 	exit(1);
62716fd348SMartin Matuska }
63716fd348SMartin Matuska 
64716fd348SMartin Matuska static int
read_map(const char * filename,nvlist_t ** allcfgs)65716fd348SMartin Matuska read_map(const char *filename, nvlist_t **allcfgs)
66716fd348SMartin Matuska {
67716fd348SMartin Matuska 	int block_size = 131072;
68716fd348SMartin Matuska 	int buf_size = 131072;
69716fd348SMartin Matuska 	int tmp_size, error;
70716fd348SMartin Matuska 	char *tmp_buf;
71716fd348SMartin Matuska 
72716fd348SMartin Matuska 	struct stat64 stat;
73716fd348SMartin Matuska 	if (lstat64(filename, &stat) != 0)
74716fd348SMartin Matuska 		return (errno);
75716fd348SMartin Matuska 
76716fd348SMartin Matuska 	if (stat.st_size == 0 ||
77716fd348SMartin Matuska 	    !(S_ISREG(stat.st_mode) || S_ISLNK(stat.st_mode))) {
78716fd348SMartin Matuska 		return (EINVAL);
79716fd348SMartin Matuska 	}
80716fd348SMartin Matuska 
81716fd348SMartin Matuska 	gzFile fp = gzopen(filename, "rb");
82716fd348SMartin Matuska 	if (fp == Z_NULL)
83716fd348SMartin Matuska 		return (errno);
84716fd348SMartin Matuska 
85716fd348SMartin Matuska 	char *buf = malloc(buf_size);
86716fd348SMartin Matuska 	if (buf == NULL) {
87716fd348SMartin Matuska 		(void) gzclose(fp);
88716fd348SMartin Matuska 		return (ENOMEM);
89716fd348SMartin Matuska 	}
90716fd348SMartin Matuska 
91716fd348SMartin Matuska 	ssize_t rc, bytes = 0;
92716fd348SMartin Matuska 	while (!gzeof(fp)) {
93716fd348SMartin Matuska 		rc = gzread(fp, buf + bytes, block_size);
94716fd348SMartin Matuska 		if ((rc < 0) || (rc == 0 && !gzeof(fp))) {
95716fd348SMartin Matuska 			free(buf);
96716fd348SMartin Matuska 			(void) gzerror(fp, &error);
97be181ee2SMartin Matuska 			(void) gzclose(fp);
98716fd348SMartin Matuska 			return (error);
99716fd348SMartin Matuska 		} else {
100716fd348SMartin Matuska 			bytes += rc;
101716fd348SMartin Matuska 
102716fd348SMartin Matuska 			if (bytes + block_size >= buf_size) {
103716fd348SMartin Matuska 				tmp_size = 2 * buf_size;
104716fd348SMartin Matuska 				tmp_buf = malloc(tmp_size);
105716fd348SMartin Matuska 				if (tmp_buf == NULL) {
106716fd348SMartin Matuska 					free(buf);
107716fd348SMartin Matuska 					(void) gzclose(fp);
108716fd348SMartin Matuska 					return (ENOMEM);
109716fd348SMartin Matuska 				}
110716fd348SMartin Matuska 
111716fd348SMartin Matuska 				memcpy(tmp_buf, buf, bytes);
112716fd348SMartin Matuska 				free(buf);
113716fd348SMartin Matuska 				buf = tmp_buf;
114716fd348SMartin Matuska 				buf_size = tmp_size;
115716fd348SMartin Matuska 			}
116716fd348SMartin Matuska 		}
117716fd348SMartin Matuska 	}
118716fd348SMartin Matuska 
119716fd348SMartin Matuska 	(void) gzclose(fp);
120716fd348SMartin Matuska 
121716fd348SMartin Matuska 	error = nvlist_unpack(buf, bytes, allcfgs, 0);
122716fd348SMartin Matuska 	free(buf);
123716fd348SMartin Matuska 
124716fd348SMartin Matuska 	return (error);
125716fd348SMartin Matuska }
126716fd348SMartin Matuska 
127716fd348SMartin Matuska /*
128716fd348SMartin Matuska  * Read a map from the specified filename.  A file contains multiple maps
129716fd348SMartin Matuska  * which are indexed by the number of children. The caller is responsible
130716fd348SMartin Matuska  * for freeing the configuration returned.
131716fd348SMartin Matuska  */
132716fd348SMartin Matuska static int
read_map_key(const char * filename,const char * key,nvlist_t ** cfg)133a0b956f5SMartin Matuska read_map_key(const char *filename, const char *key, nvlist_t **cfg)
134716fd348SMartin Matuska {
135716fd348SMartin Matuska 	nvlist_t *allcfgs, *foundcfg = NULL;
136716fd348SMartin Matuska 	int error;
137716fd348SMartin Matuska 
138716fd348SMartin Matuska 	error = read_map(filename, &allcfgs);
139716fd348SMartin Matuska 	if (error != 0)
140716fd348SMartin Matuska 		return (error);
141716fd348SMartin Matuska 
142be181ee2SMartin Matuska 	(void) nvlist_lookup_nvlist(allcfgs, key, &foundcfg);
143716fd348SMartin Matuska 	if (foundcfg != NULL) {
144716fd348SMartin Matuska 		nvlist_dup(foundcfg, cfg, KM_SLEEP);
145716fd348SMartin Matuska 		error = 0;
146716fd348SMartin Matuska 	} else {
147716fd348SMartin Matuska 		error = ENOENT;
148716fd348SMartin Matuska 	}
149716fd348SMartin Matuska 
150716fd348SMartin Matuska 	nvlist_free(allcfgs);
151716fd348SMartin Matuska 
152716fd348SMartin Matuska 	return (error);
153716fd348SMartin Matuska }
154716fd348SMartin Matuska 
155716fd348SMartin Matuska /*
156716fd348SMartin Matuska  * Write all mappings to the map file.
157716fd348SMartin Matuska  */
158716fd348SMartin Matuska static int
write_map(const char * filename,nvlist_t * allcfgs)159716fd348SMartin Matuska write_map(const char *filename, nvlist_t *allcfgs)
160716fd348SMartin Matuska {
161716fd348SMartin Matuska 	size_t buflen = 0;
162716fd348SMartin Matuska 	int error;
163716fd348SMartin Matuska 
164716fd348SMartin Matuska 	error = nvlist_size(allcfgs, &buflen, NV_ENCODE_XDR);
165716fd348SMartin Matuska 	if (error)
166716fd348SMartin Matuska 		return (error);
167716fd348SMartin Matuska 
168716fd348SMartin Matuska 	char *buf = malloc(buflen);
169716fd348SMartin Matuska 	if (buf == NULL)
170716fd348SMartin Matuska 		return (ENOMEM);
171716fd348SMartin Matuska 
172716fd348SMartin Matuska 	error = nvlist_pack(allcfgs, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP);
173716fd348SMartin Matuska 	if (error) {
174716fd348SMartin Matuska 		free(buf);
175716fd348SMartin Matuska 		return (error);
176716fd348SMartin Matuska 	}
177716fd348SMartin Matuska 
178716fd348SMartin Matuska 	/*
179716fd348SMartin Matuska 	 * Atomically update the file using a temporary file and the
180716fd348SMartin Matuska 	 * traditional unlink then rename steps.  This code provides
181716fd348SMartin Matuska 	 * no locking, it only guarantees the packed nvlist on disk
182716fd348SMartin Matuska 	 * is updated atomically and is internally consistent.
183716fd348SMartin Matuska 	 */
184716fd348SMartin Matuska 	char *tmpname = calloc(1, MAXPATHLEN);
185716fd348SMartin Matuska 	if (tmpname == NULL) {
186716fd348SMartin Matuska 		free(buf);
187716fd348SMartin Matuska 		return (ENOMEM);
188716fd348SMartin Matuska 	}
189716fd348SMartin Matuska 
190716fd348SMartin Matuska 	snprintf(tmpname, MAXPATHLEN - 1, "%s.XXXXXX", filename);
191716fd348SMartin Matuska 
192716fd348SMartin Matuska 	int fd = mkstemp(tmpname);
193716fd348SMartin Matuska 	if (fd < 0) {
194716fd348SMartin Matuska 		error = errno;
195716fd348SMartin Matuska 		free(buf);
196716fd348SMartin Matuska 		free(tmpname);
197716fd348SMartin Matuska 		return (error);
198716fd348SMartin Matuska 	}
199716fd348SMartin Matuska 	(void) close(fd);
200716fd348SMartin Matuska 
201716fd348SMartin Matuska 	gzFile fp = gzopen(tmpname, "w9b");
202716fd348SMartin Matuska 	if (fp == Z_NULL) {
203716fd348SMartin Matuska 		error = errno;
204716fd348SMartin Matuska 		free(buf);
205716fd348SMartin Matuska 		free(tmpname);
206716fd348SMartin Matuska 		return (errno);
207716fd348SMartin Matuska 	}
208716fd348SMartin Matuska 
209716fd348SMartin Matuska 	ssize_t rc, bytes = 0;
210716fd348SMartin Matuska 	while (bytes < buflen) {
211716fd348SMartin Matuska 		size_t size = MIN(buflen - bytes, 131072);
212716fd348SMartin Matuska 		rc = gzwrite(fp, buf + bytes, size);
213716fd348SMartin Matuska 		if (rc < 0) {
214716fd348SMartin Matuska 			free(buf);
215716fd348SMartin Matuska 			(void) gzerror(fp, &error);
216716fd348SMartin Matuska 			(void) gzclose(fp);
217716fd348SMartin Matuska 			(void) unlink(tmpname);
218716fd348SMartin Matuska 			free(tmpname);
219716fd348SMartin Matuska 			return (error);
220716fd348SMartin Matuska 		} else if (rc == 0) {
221716fd348SMartin Matuska 			break;
222716fd348SMartin Matuska 		} else {
223716fd348SMartin Matuska 			bytes += rc;
224716fd348SMartin Matuska 		}
225716fd348SMartin Matuska 	}
226716fd348SMartin Matuska 
227716fd348SMartin Matuska 	free(buf);
228716fd348SMartin Matuska 	(void) gzclose(fp);
229716fd348SMartin Matuska 
230716fd348SMartin Matuska 	if (bytes != buflen) {
231716fd348SMartin Matuska 		(void) unlink(tmpname);
232716fd348SMartin Matuska 		free(tmpname);
233716fd348SMartin Matuska 		return (EIO);
234716fd348SMartin Matuska 	}
235716fd348SMartin Matuska 
236716fd348SMartin Matuska 	/*
237716fd348SMartin Matuska 	 * Unlink the previous config file and replace it with the updated
238716fd348SMartin Matuska 	 * version.  If we're able to unlink the file then directory is
239716fd348SMartin Matuska 	 * writable by us and the subsequent rename should never fail.
240716fd348SMartin Matuska 	 */
241716fd348SMartin Matuska 	error = unlink(filename);
242716fd348SMartin Matuska 	if (error != 0 && errno != ENOENT) {
243716fd348SMartin Matuska 		error = errno;
244716fd348SMartin Matuska 		(void) unlink(tmpname);
245716fd348SMartin Matuska 		free(tmpname);
246716fd348SMartin Matuska 		return (error);
247716fd348SMartin Matuska 	}
248716fd348SMartin Matuska 
249716fd348SMartin Matuska 	error = rename(tmpname, filename);
250716fd348SMartin Matuska 	if (error != 0) {
251716fd348SMartin Matuska 		error = errno;
252716fd348SMartin Matuska 		(void) unlink(tmpname);
253716fd348SMartin Matuska 		free(tmpname);
254716fd348SMartin Matuska 		return (error);
255716fd348SMartin Matuska 	}
256716fd348SMartin Matuska 
257716fd348SMartin Matuska 	free(tmpname);
258716fd348SMartin Matuska 
259716fd348SMartin Matuska 	return (0);
260716fd348SMartin Matuska }
261716fd348SMartin Matuska 
262716fd348SMartin Matuska /*
263716fd348SMartin Matuska  * Add the dRAID map to the file and write it out.
264716fd348SMartin Matuska  */
265716fd348SMartin Matuska static int
write_map_key(const char * filename,char * key,draid_map_t * map,double worst_ratio,double avg_ratio)266716fd348SMartin Matuska write_map_key(const char *filename, char *key, draid_map_t *map,
267716fd348SMartin Matuska     double worst_ratio, double avg_ratio)
268716fd348SMartin Matuska {
269716fd348SMartin Matuska 	nvlist_t *nv_cfg, *allcfgs;
270716fd348SMartin Matuska 	int error;
271716fd348SMartin Matuska 
272716fd348SMartin Matuska 	/*
273716fd348SMartin Matuska 	 * Add the configuration to an existing or new file.  The new
274716fd348SMartin Matuska 	 * configuration will replace an existing configuration with the
275716fd348SMartin Matuska 	 * same key if it has a lower ratio and is therefore better.
276716fd348SMartin Matuska 	 */
277716fd348SMartin Matuska 	error = read_map(filename, &allcfgs);
278716fd348SMartin Matuska 	if (error == ENOENT) {
279716fd348SMartin Matuska 		allcfgs = fnvlist_alloc();
280716fd348SMartin Matuska 	} else if (error != 0) {
281716fd348SMartin Matuska 		return (error);
282716fd348SMartin Matuska 	}
283716fd348SMartin Matuska 
284716fd348SMartin Matuska 	error = nvlist_lookup_nvlist(allcfgs, key, &nv_cfg);
285716fd348SMartin Matuska 	if (error == 0) {
286716fd348SMartin Matuska 		uint64_t nv_cfg_worst_ratio = fnvlist_lookup_uint64(nv_cfg,
287716fd348SMartin Matuska 		    MAP_WORST_RATIO);
288716fd348SMartin Matuska 		double nv_worst_ratio = (double)nv_cfg_worst_ratio / 1000.0;
289716fd348SMartin Matuska 
290716fd348SMartin Matuska 		if (worst_ratio < nv_worst_ratio) {
291716fd348SMartin Matuska 			/* Replace old map with the more balanced new map. */
292716fd348SMartin Matuska 			fnvlist_remove(allcfgs, key);
293716fd348SMartin Matuska 		} else {
294716fd348SMartin Matuska 			/* The old map is preferable, keep it. */
295716fd348SMartin Matuska 			nvlist_free(allcfgs);
296716fd348SMartin Matuska 			return (EEXIST);
297716fd348SMartin Matuska 		}
298716fd348SMartin Matuska 	}
299716fd348SMartin Matuska 
300716fd348SMartin Matuska 	nvlist_t *cfg = fnvlist_alloc();
301716fd348SMartin Matuska 	fnvlist_add_uint64(cfg, MAP_SEED, map->dm_seed);
302716fd348SMartin Matuska 	fnvlist_add_uint64(cfg, MAP_CHECKSUM, map->dm_checksum);
303716fd348SMartin Matuska 	fnvlist_add_uint64(cfg, MAP_CHILDREN, map->dm_children);
304716fd348SMartin Matuska 	fnvlist_add_uint64(cfg, MAP_NPERMS, map->dm_nperms);
305716fd348SMartin Matuska 	fnvlist_add_uint8_array(cfg, MAP_PERMS,  map->dm_perms,
306716fd348SMartin Matuska 	    map->dm_children * map->dm_nperms * sizeof (uint8_t));
307716fd348SMartin Matuska 
308716fd348SMartin Matuska 	fnvlist_add_uint64(cfg, MAP_WORST_RATIO,
309716fd348SMartin Matuska 	    (uint64_t)(worst_ratio * 1000.0));
310716fd348SMartin Matuska 	fnvlist_add_uint64(cfg, MAP_AVG_RATIO,
311716fd348SMartin Matuska 	    (uint64_t)(avg_ratio * 1000.0));
312716fd348SMartin Matuska 
313716fd348SMartin Matuska 	error = nvlist_add_nvlist(allcfgs, key, cfg);
314716fd348SMartin Matuska 	if (error == 0)
315716fd348SMartin Matuska 		error = write_map(filename, allcfgs);
316716fd348SMartin Matuska 
317716fd348SMartin Matuska 	nvlist_free(cfg);
318716fd348SMartin Matuska 	nvlist_free(allcfgs);
319716fd348SMartin Matuska 	return (error);
320716fd348SMartin Matuska }
321716fd348SMartin Matuska 
322716fd348SMartin Matuska static void
dump_map(draid_map_t * map,const char * key,double worst_ratio,double avg_ratio,int verbose)323a0b956f5SMartin Matuska dump_map(draid_map_t *map, const char *key, double worst_ratio,
324a0b956f5SMartin Matuska     double avg_ratio, int verbose)
325716fd348SMartin Matuska {
326716fd348SMartin Matuska 	if (verbose == 0) {
327716fd348SMartin Matuska 		return;
328716fd348SMartin Matuska 	} else if (verbose == 1) {
329716fd348SMartin Matuska 		printf("    \"%s\": seed: 0x%016llx worst_ratio: %2.03f "
330716fd348SMartin Matuska 		    "avg_ratio: %2.03f\n", key, (u_longlong_t)map->dm_seed,
331716fd348SMartin Matuska 		    worst_ratio, avg_ratio);
332716fd348SMartin Matuska 		return;
333716fd348SMartin Matuska 	} else {
334716fd348SMartin Matuska 		printf("    \"%s\":\n"
335716fd348SMartin Matuska 		    "        seed: 0x%016llx\n"
336716fd348SMartin Matuska 		    "        checksum: 0x%016llx\n"
337716fd348SMartin Matuska 		    "        worst_ratio: %2.03f\n"
338716fd348SMartin Matuska 		    "        avg_ratio: %2.03f\n"
339716fd348SMartin Matuska 		    "        children: %llu\n"
340716fd348SMartin Matuska 		    "        nperms: %llu\n",
341716fd348SMartin Matuska 		    key, (u_longlong_t)map->dm_seed,
342716fd348SMartin Matuska 		    (u_longlong_t)map->dm_checksum, worst_ratio, avg_ratio,
343716fd348SMartin Matuska 		    (u_longlong_t)map->dm_children,
344716fd348SMartin Matuska 		    (u_longlong_t)map->dm_nperms);
345716fd348SMartin Matuska 
346716fd348SMartin Matuska 		if (verbose > 2) {
347716fd348SMartin Matuska 			printf("        perms = {\n");
348716fd348SMartin Matuska 			for (int i = 0; i < map->dm_nperms; i++) {
349716fd348SMartin Matuska 				printf("            { ");
350716fd348SMartin Matuska 				for (int j = 0; j < map->dm_children; j++) {
351716fd348SMartin Matuska 					printf("%3d%s ", map->dm_perms[
352716fd348SMartin Matuska 					    i * map->dm_children + j],
353716fd348SMartin Matuska 					    j < map->dm_children - 1 ?
354716fd348SMartin Matuska 					    "," : "");
355716fd348SMartin Matuska 				}
356716fd348SMartin Matuska 				printf(" },\n");
357716fd348SMartin Matuska 			}
358716fd348SMartin Matuska 			printf("        }\n");
359716fd348SMartin Matuska 		} else if (verbose == 2) {
360716fd348SMartin Matuska 			printf("        draid_perms = <omitted>\n");
361716fd348SMartin Matuska 		}
362716fd348SMartin Matuska 	}
363716fd348SMartin Matuska }
364716fd348SMartin Matuska 
365716fd348SMartin Matuska static void
dump_map_nv(const char * key,nvlist_t * cfg,int verbose)366a0b956f5SMartin Matuska dump_map_nv(const char *key, nvlist_t *cfg, int verbose)
367716fd348SMartin Matuska {
368716fd348SMartin Matuska 	draid_map_t map;
369716fd348SMartin Matuska 	uint_t c;
370716fd348SMartin Matuska 
371716fd348SMartin Matuska 	uint64_t worst_ratio = fnvlist_lookup_uint64(cfg, MAP_WORST_RATIO);
372716fd348SMartin Matuska 	uint64_t avg_ratio = fnvlist_lookup_uint64(cfg, MAP_AVG_RATIO);
373716fd348SMartin Matuska 
374716fd348SMartin Matuska 	map.dm_seed = fnvlist_lookup_uint64(cfg, MAP_SEED);
375716fd348SMartin Matuska 	map.dm_checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM);
376716fd348SMartin Matuska 	map.dm_children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN);
377716fd348SMartin Matuska 	map.dm_nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS);
378be181ee2SMartin Matuska 	map.dm_perms = fnvlist_lookup_uint8_array(cfg, MAP_PERMS, &c);
379716fd348SMartin Matuska 
380716fd348SMartin Matuska 	dump_map(&map, key, (double)worst_ratio / 1000.0,
381716fd348SMartin Matuska 	    avg_ratio / 1000.0, verbose);
382716fd348SMartin Matuska }
383716fd348SMartin Matuska 
384716fd348SMartin Matuska /*
385716fd348SMartin Matuska  * Print a summary of the mapping.
386716fd348SMartin Matuska  */
387716fd348SMartin Matuska static int
dump_map_key(const char * filename,const char * key,int verbose)388a0b956f5SMartin Matuska dump_map_key(const char *filename, const char *key, int verbose)
389716fd348SMartin Matuska {
390716fd348SMartin Matuska 	nvlist_t *cfg;
391716fd348SMartin Matuska 	int error;
392716fd348SMartin Matuska 
393716fd348SMartin Matuska 	error = read_map_key(filename, key, &cfg);
394716fd348SMartin Matuska 	if (error != 0)
395716fd348SMartin Matuska 		return (error);
396716fd348SMartin Matuska 
397716fd348SMartin Matuska 	dump_map_nv(key, cfg, verbose);
398716fd348SMartin Matuska 
399716fd348SMartin Matuska 	return (0);
400716fd348SMartin Matuska }
401716fd348SMartin Matuska 
402716fd348SMartin Matuska /*
403716fd348SMartin Matuska  * Allocate a new permutation map for evaluation.
404716fd348SMartin Matuska  */
405716fd348SMartin Matuska static int
alloc_new_map(uint64_t children,uint64_t nperms,uint64_t seed,draid_map_t ** mapp)406716fd348SMartin Matuska alloc_new_map(uint64_t children, uint64_t nperms, uint64_t seed,
407716fd348SMartin Matuska     draid_map_t **mapp)
408716fd348SMartin Matuska {
409716fd348SMartin Matuska 	draid_map_t *map;
410716fd348SMartin Matuska 	int error;
411716fd348SMartin Matuska 
412716fd348SMartin Matuska 	map = malloc(sizeof (draid_map_t));
413716fd348SMartin Matuska 	if (map == NULL)
414716fd348SMartin Matuska 		return (ENOMEM);
415716fd348SMartin Matuska 
416716fd348SMartin Matuska 	map->dm_children = children;
417716fd348SMartin Matuska 	map->dm_nperms = nperms;
418716fd348SMartin Matuska 	map->dm_seed = seed;
419716fd348SMartin Matuska 	map->dm_checksum = 0;
420716fd348SMartin Matuska 
421716fd348SMartin Matuska 	error = vdev_draid_generate_perms(map, &map->dm_perms);
422716fd348SMartin Matuska 	if (error) {
423716fd348SMartin Matuska 		free(map);
424716fd348SMartin Matuska 		return (error);
425716fd348SMartin Matuska 	}
426716fd348SMartin Matuska 
427716fd348SMartin Matuska 	*mapp = map;
428716fd348SMartin Matuska 
429716fd348SMartin Matuska 	return (0);
430716fd348SMartin Matuska }
431716fd348SMartin Matuska 
432716fd348SMartin Matuska /*
433716fd348SMartin Matuska  * Allocate the fixed permutation map for N children.
434716fd348SMartin Matuska  */
435716fd348SMartin Matuska static int
alloc_fixed_map(uint64_t children,draid_map_t ** mapp)436716fd348SMartin Matuska alloc_fixed_map(uint64_t children, draid_map_t **mapp)
437716fd348SMartin Matuska {
438716fd348SMartin Matuska 	const draid_map_t *fixed_map;
439716fd348SMartin Matuska 	draid_map_t *map;
440716fd348SMartin Matuska 	int error;
441716fd348SMartin Matuska 
442716fd348SMartin Matuska 	error = vdev_draid_lookup_map(children, &fixed_map);
443716fd348SMartin Matuska 	if (error)
444716fd348SMartin Matuska 		return (error);
445716fd348SMartin Matuska 
446716fd348SMartin Matuska 	map = malloc(sizeof (draid_map_t));
447716fd348SMartin Matuska 	if (map == NULL)
448716fd348SMartin Matuska 		return (ENOMEM);
449716fd348SMartin Matuska 
450716fd348SMartin Matuska 	memcpy(map, fixed_map, sizeof (draid_map_t));
451716fd348SMartin Matuska 	VERIFY3U(map->dm_checksum, !=, 0);
452716fd348SMartin Matuska 
453716fd348SMartin Matuska 	error = vdev_draid_generate_perms(map, &map->dm_perms);
454716fd348SMartin Matuska 	if (error) {
455716fd348SMartin Matuska 		free(map);
456716fd348SMartin Matuska 		return (error);
457716fd348SMartin Matuska 	}
458716fd348SMartin Matuska 
459716fd348SMartin Matuska 	*mapp = map;
460716fd348SMartin Matuska 
461716fd348SMartin Matuska 	return (0);
462716fd348SMartin Matuska }
463716fd348SMartin Matuska 
464716fd348SMartin Matuska /*
465716fd348SMartin Matuska  * Free a permutation map.
466716fd348SMartin Matuska  */
467716fd348SMartin Matuska static void
free_map(draid_map_t * map)468716fd348SMartin Matuska free_map(draid_map_t *map)
469716fd348SMartin Matuska {
470716fd348SMartin Matuska 	free(map->dm_perms);
471716fd348SMartin Matuska 	free(map);
472716fd348SMartin Matuska }
473716fd348SMartin Matuska 
474716fd348SMartin Matuska /*
475716fd348SMartin Matuska  * Check if dev is in the provided list of faulted devices.
476716fd348SMartin Matuska  */
477716fd348SMartin Matuska static inline boolean_t
is_faulted(int * faulted_devs,int nfaulted,int dev)478716fd348SMartin Matuska is_faulted(int *faulted_devs, int nfaulted, int dev)
479716fd348SMartin Matuska {
480716fd348SMartin Matuska 	for (int i = 0; i < nfaulted; i++)
481716fd348SMartin Matuska 		if (faulted_devs[i] == dev)
482716fd348SMartin Matuska 			return (B_TRUE);
483716fd348SMartin Matuska 
484716fd348SMartin Matuska 	return (B_FALSE);
485716fd348SMartin Matuska }
486716fd348SMartin Matuska 
487716fd348SMartin Matuska /*
488716fd348SMartin Matuska  * Evaluate how resilvering I/O will be distributed given a list of faulted
489716fd348SMartin Matuska  * vdevs.  As a simplification we assume one IO is sufficient to repair each
490716fd348SMartin Matuska  * damaged device in a group.
491716fd348SMartin Matuska  */
492716fd348SMartin Matuska static double
eval_resilver(draid_map_t * map,uint64_t groupwidth,uint64_t nspares,int * faulted_devs,int nfaulted,int * min_child_ios,int * max_child_ios)493716fd348SMartin Matuska eval_resilver(draid_map_t *map, uint64_t groupwidth, uint64_t nspares,
494716fd348SMartin Matuska     int *faulted_devs, int nfaulted, int *min_child_ios, int *max_child_ios)
495716fd348SMartin Matuska {
496716fd348SMartin Matuska 	uint64_t children = map->dm_children;
497716fd348SMartin Matuska 	uint64_t ngroups = 1;
498716fd348SMartin Matuska 	uint64_t ndisks = children - nspares;
499716fd348SMartin Matuska 
500716fd348SMartin Matuska 	/*
501716fd348SMartin Matuska 	 * Calculate the minimum number of groups required to fill a slice.
502716fd348SMartin Matuska 	 */
503716fd348SMartin Matuska 	while (ngroups * (groupwidth) % (children - nspares) != 0)
504716fd348SMartin Matuska 		ngroups++;
505716fd348SMartin Matuska 
506716fd348SMartin Matuska 	int *ios = calloc(map->dm_children, sizeof (uint64_t));
507716fd348SMartin Matuska 
508be181ee2SMartin Matuska 	ASSERT3P(ios, !=, NULL);
509be181ee2SMartin Matuska 
510716fd348SMartin Matuska 	/* Resilver all rows */
511716fd348SMartin Matuska 	for (int i = 0; i < map->dm_nperms; i++) {
512716fd348SMartin Matuska 		uint8_t *row = &map->dm_perms[i * map->dm_children];
513716fd348SMartin Matuska 
514716fd348SMartin Matuska 		/* Resilver all groups with faulted drives */
515716fd348SMartin Matuska 		for (int j = 0; j < ngroups; j++) {
516716fd348SMartin Matuska 			uint64_t spareidx = map->dm_children - nspares;
517716fd348SMartin Matuska 			boolean_t repair_needed = B_FALSE;
518716fd348SMartin Matuska 
519716fd348SMartin Matuska 			/* See if any devices in this group are faulted */
520716fd348SMartin Matuska 			uint64_t groupstart = (j * groupwidth) % ndisks;
521716fd348SMartin Matuska 
522716fd348SMartin Matuska 			for (int k = 0; k < groupwidth; k++) {
523716fd348SMartin Matuska 				uint64_t groupidx = (groupstart + k) % ndisks;
524716fd348SMartin Matuska 
525716fd348SMartin Matuska 				repair_needed = is_faulted(faulted_devs,
526716fd348SMartin Matuska 				    nfaulted, row[groupidx]);
527716fd348SMartin Matuska 				if (repair_needed)
528716fd348SMartin Matuska 					break;
529716fd348SMartin Matuska 			}
530716fd348SMartin Matuska 
531716fd348SMartin Matuska 			if (repair_needed == B_FALSE)
532716fd348SMartin Matuska 				continue;
533716fd348SMartin Matuska 
534716fd348SMartin Matuska 			/*
535716fd348SMartin Matuska 			 * This group is degraded. Calculate the number of
536716fd348SMartin Matuska 			 * reads the non-faulted drives require and the number
537716fd348SMartin Matuska 			 * of writes to the distributed hot spare for this row.
538716fd348SMartin Matuska 			 */
539716fd348SMartin Matuska 			for (int k = 0; k < groupwidth; k++) {
540716fd348SMartin Matuska 				uint64_t groupidx = (groupstart + k) % ndisks;
541716fd348SMartin Matuska 
542716fd348SMartin Matuska 				if (!is_faulted(faulted_devs, nfaulted,
543716fd348SMartin Matuska 				    row[groupidx])) {
544716fd348SMartin Matuska 					ios[row[groupidx]]++;
545716fd348SMartin Matuska 				} else if (nspares > 0) {
546716fd348SMartin Matuska 					while (is_faulted(faulted_devs,
547716fd348SMartin Matuska 					    nfaulted, row[spareidx])) {
548716fd348SMartin Matuska 						spareidx++;
549716fd348SMartin Matuska 					}
550716fd348SMartin Matuska 
551716fd348SMartin Matuska 					ASSERT3U(spareidx, <, map->dm_children);
552716fd348SMartin Matuska 					ios[row[spareidx]]++;
553716fd348SMartin Matuska 					spareidx++;
554716fd348SMartin Matuska 				}
555716fd348SMartin Matuska 			}
556716fd348SMartin Matuska 		}
557716fd348SMartin Matuska 	}
558716fd348SMartin Matuska 
559716fd348SMartin Matuska 	*min_child_ios = INT_MAX;
560716fd348SMartin Matuska 	*max_child_ios = 0;
561716fd348SMartin Matuska 
562716fd348SMartin Matuska 	/*
563716fd348SMartin Matuska 	 * Find the drives with fewest and most required I/O.  These values
564716fd348SMartin Matuska 	 * are used to calculate the imbalance ratio.  To avoid returning an
565716fd348SMartin Matuska 	 * infinite value for permutations which have children that perform
566716fd348SMartin Matuska 	 * no IO a floor of 1 IO per child is set.  This ensures a meaningful
567716fd348SMartin Matuska 	 * ratio is returned for comparison and it is not an uncommon when
568716fd348SMartin Matuska 	 * there are a large number of children.
569716fd348SMartin Matuska 	 */
570716fd348SMartin Matuska 	for (int i = 0; i < map->dm_children; i++) {
571716fd348SMartin Matuska 
572716fd348SMartin Matuska 		if (is_faulted(faulted_devs, nfaulted, i)) {
573716fd348SMartin Matuska 			ASSERT0(ios[i]);
574716fd348SMartin Matuska 			continue;
575716fd348SMartin Matuska 		}
576716fd348SMartin Matuska 
577716fd348SMartin Matuska 		if (ios[i] == 0)
578716fd348SMartin Matuska 			ios[i] = 1;
579716fd348SMartin Matuska 
580716fd348SMartin Matuska 		if (ios[i] < *min_child_ios)
581716fd348SMartin Matuska 			*min_child_ios = ios[i];
582716fd348SMartin Matuska 
583716fd348SMartin Matuska 		if (ios[i] > *max_child_ios)
584716fd348SMartin Matuska 			*max_child_ios = ios[i];
585716fd348SMartin Matuska 	}
586716fd348SMartin Matuska 
587716fd348SMartin Matuska 	ASSERT3S(*min_child_ios, !=, INT_MAX);
588716fd348SMartin Matuska 	ASSERT3S(*max_child_ios, !=, 0);
589716fd348SMartin Matuska 
590716fd348SMartin Matuska 	double ratio = (double)(*max_child_ios) / (double)(*min_child_ios);
591716fd348SMartin Matuska 
592716fd348SMartin Matuska 	free(ios);
593716fd348SMartin Matuska 
594716fd348SMartin Matuska 	return (ratio);
595716fd348SMartin Matuska }
596716fd348SMartin Matuska 
597716fd348SMartin Matuska /*
598716fd348SMartin Matuska  * Evaluate the quality of the permutation mapping by considering possible
599716fd348SMartin Matuska  * device failures.  Returns the imbalance ratio for the worst mapping which
600716fd348SMartin Matuska  * is defined to be the largest number of child IOs over the fewest number
601716fd348SMartin Matuska  * child IOs. A value of 1.0 indicates the mapping is perfectly balance and
602716fd348SMartin Matuska  * all children perform an equal amount of work during reconstruction.
603716fd348SMartin Matuska  */
604716fd348SMartin Matuska static void
eval_decluster(draid_map_t * map,double * worst_ratiop,double * avg_ratiop)605716fd348SMartin Matuska eval_decluster(draid_map_t *map, double *worst_ratiop, double *avg_ratiop)
606716fd348SMartin Matuska {
607716fd348SMartin Matuska 	uint64_t children = map->dm_children;
608716fd348SMartin Matuska 	double worst_ratio = 1.0;
609716fd348SMartin Matuska 	double sum = 0;
610716fd348SMartin Matuska 	int worst_min_ios = 0, worst_max_ios = 0;
611716fd348SMartin Matuska 	int n = 0;
612716fd348SMartin Matuska 
613716fd348SMartin Matuska 	/*
614716fd348SMartin Matuska 	 * When there are only 2 children there can be no distributed
615716fd348SMartin Matuska 	 * spare and no resilver to evaluate.  Default to a ratio of 1.0
616716fd348SMartin Matuska 	 * for this degenerate case.
617716fd348SMartin Matuska 	 */
618716fd348SMartin Matuska 	if (children == VDEV_DRAID_MIN_CHILDREN) {
619716fd348SMartin Matuska 		*worst_ratiop = 1.0;
620716fd348SMartin Matuska 		*avg_ratiop = 1.0;
621716fd348SMartin Matuska 		return;
622716fd348SMartin Matuska 	}
623716fd348SMartin Matuska 
624716fd348SMartin Matuska 	/*
625716fd348SMartin Matuska 	 * Score the mapping as if it had either 1 or 2 distributed spares.
626716fd348SMartin Matuska 	 */
627716fd348SMartin Matuska 	for (int nspares = 1; nspares <= 2; nspares++) {
628716fd348SMartin Matuska 		uint64_t faults = nspares;
629716fd348SMartin Matuska 
630716fd348SMartin Matuska 		/*
631716fd348SMartin Matuska 		 * Score groupwidths up to 19.  This value was chosen as the
632716fd348SMartin Matuska 		 * largest reasonable width (16d+3p).  dRAID pools may be still
633716fd348SMartin Matuska 		 * be created with wider stripes but they are not considered in
634716fd348SMartin Matuska 		 * this analysis in order to optimize for the most common cases.
635716fd348SMartin Matuska 		 */
636716fd348SMartin Matuska 		for (uint64_t groupwidth = 2;
637716fd348SMartin Matuska 		    groupwidth <= MIN(children - nspares, 19);
638716fd348SMartin Matuska 		    groupwidth++) {
639716fd348SMartin Matuska 			int faulted_devs[2];
640716fd348SMartin Matuska 			int min_ios, max_ios;
641716fd348SMartin Matuska 
642716fd348SMartin Matuska 			/*
643716fd348SMartin Matuska 			 * Score possible devices faults.  This is limited
644716fd348SMartin Matuska 			 * to exactly one fault per distributed spare for
645716fd348SMartin Matuska 			 * the purposes of this similation.
646716fd348SMartin Matuska 			 */
647716fd348SMartin Matuska 			for (int f1 = 0; f1 < children; f1++) {
648716fd348SMartin Matuska 				faulted_devs[0] = f1;
649716fd348SMartin Matuska 				double ratio;
650716fd348SMartin Matuska 
651716fd348SMartin Matuska 				if (faults == 1) {
652716fd348SMartin Matuska 					ratio = eval_resilver(map, groupwidth,
653716fd348SMartin Matuska 					    nspares, faulted_devs, faults,
654716fd348SMartin Matuska 					    &min_ios, &max_ios);
655716fd348SMartin Matuska 
656716fd348SMartin Matuska 					if (ratio > worst_ratio) {
657716fd348SMartin Matuska 						worst_ratio = ratio;
658716fd348SMartin Matuska 						worst_min_ios = min_ios;
659716fd348SMartin Matuska 						worst_max_ios = max_ios;
660716fd348SMartin Matuska 					}
661716fd348SMartin Matuska 
662716fd348SMartin Matuska 					sum += ratio;
663716fd348SMartin Matuska 					n++;
664716fd348SMartin Matuska 				} else if (faults == 2) {
665716fd348SMartin Matuska 					for (int f2 = f1 + 1; f2 < children;
666716fd348SMartin Matuska 					    f2++) {
667716fd348SMartin Matuska 						faulted_devs[1] = f2;
668716fd348SMartin Matuska 
669716fd348SMartin Matuska 						ratio = eval_resilver(map,
670716fd348SMartin Matuska 						    groupwidth, nspares,
671716fd348SMartin Matuska 						    faulted_devs, faults,
672716fd348SMartin Matuska 						    &min_ios, &max_ios);
673716fd348SMartin Matuska 
674716fd348SMartin Matuska 						if (ratio > worst_ratio) {
675716fd348SMartin Matuska 							worst_ratio = ratio;
676716fd348SMartin Matuska 							worst_min_ios = min_ios;
677716fd348SMartin Matuska 							worst_max_ios = max_ios;
678716fd348SMartin Matuska 						}
679716fd348SMartin Matuska 
680716fd348SMartin Matuska 						sum += ratio;
681716fd348SMartin Matuska 						n++;
682716fd348SMartin Matuska 					}
683716fd348SMartin Matuska 				}
684716fd348SMartin Matuska 			}
685716fd348SMartin Matuska 		}
686716fd348SMartin Matuska 	}
687716fd348SMartin Matuska 
688716fd348SMartin Matuska 	*worst_ratiop = worst_ratio;
689716fd348SMartin Matuska 	*avg_ratiop = sum / n;
690716fd348SMartin Matuska 
691716fd348SMartin Matuska 	/*
692716fd348SMartin Matuska 	 * Log the min/max io values for particularly unbalanced maps.
693716fd348SMartin Matuska 	 * Since the maps are generated entirely randomly these are possible
694716fd348SMartin Matuska 	 * be exceedingly unlikely.  We log it for possible investigation.
695716fd348SMartin Matuska 	 */
696716fd348SMartin Matuska 	if (worst_ratio > 100.0) {
697716fd348SMartin Matuska 		dump_map(map, "DEBUG", worst_ratio, *avg_ratiop, 2);
698716fd348SMartin Matuska 		printf("worst_min_ios=%d worst_max_ios=%d\n",
699716fd348SMartin Matuska 		    worst_min_ios, worst_max_ios);
700716fd348SMartin Matuska 	}
701716fd348SMartin Matuska }
702716fd348SMartin Matuska 
703716fd348SMartin Matuska static int
eval_maps(uint64_t children,int passes,uint64_t * map_seed,draid_map_t ** best_mapp,double * best_ratiop,double * avg_ratiop)704716fd348SMartin Matuska eval_maps(uint64_t children, int passes, uint64_t *map_seed,
705716fd348SMartin Matuska     draid_map_t **best_mapp, double *best_ratiop, double *avg_ratiop)
706716fd348SMartin Matuska {
707716fd348SMartin Matuska 	draid_map_t *best_map = NULL;
708716fd348SMartin Matuska 	double best_worst_ratio = 1000.0;
709716fd348SMartin Matuska 	double best_avg_ratio = 1000.0;
710716fd348SMartin Matuska 
711716fd348SMartin Matuska 	/*
712716fd348SMartin Matuska 	 * Perform the requested number of passes evaluating randomly
713716fd348SMartin Matuska 	 * generated permutation maps.  Only the best version is kept.
714716fd348SMartin Matuska 	 */
715716fd348SMartin Matuska 	for (int i = 0; i < passes; i++) {
716716fd348SMartin Matuska 		double worst_ratio, avg_ratio;
717716fd348SMartin Matuska 		draid_map_t *map;
718716fd348SMartin Matuska 		int error;
719716fd348SMartin Matuska 
720716fd348SMartin Matuska 		/*
721716fd348SMartin Matuska 		 * Calculate the next seed and generate a new candidate map.
722716fd348SMartin Matuska 		 */
723716fd348SMartin Matuska 		error = alloc_new_map(children, MAP_ROWS_DEFAULT,
724716fd348SMartin Matuska 		    vdev_draid_rand(map_seed), &map);
725be181ee2SMartin Matuska 		if (error) {
726be181ee2SMartin Matuska 			if (best_map != NULL)
727be181ee2SMartin Matuska 				free_map(best_map);
728716fd348SMartin Matuska 			return (error);
729be181ee2SMartin Matuska 		}
730716fd348SMartin Matuska 
731716fd348SMartin Matuska 		/*
732716fd348SMartin Matuska 		 * Consider maps with a lower worst_ratio to be of higher
733716fd348SMartin Matuska 		 * quality.  Some maps may have a lower avg_ratio but they
734716fd348SMartin Matuska 		 * are discarded since they might include some particularly
735716fd348SMartin Matuska 		 * imbalanced permutations.  The average is tracked to in
736716fd348SMartin Matuska 		 * order to get a sense of the average permutation quality.
737716fd348SMartin Matuska 		 */
738716fd348SMartin Matuska 		eval_decluster(map, &worst_ratio, &avg_ratio);
739716fd348SMartin Matuska 
740716fd348SMartin Matuska 		if (best_map == NULL || worst_ratio < best_worst_ratio) {
741716fd348SMartin Matuska 
742716fd348SMartin Matuska 			if (best_map != NULL)
743716fd348SMartin Matuska 				free_map(best_map);
744716fd348SMartin Matuska 
745716fd348SMartin Matuska 			best_map = map;
746716fd348SMartin Matuska 			best_worst_ratio = worst_ratio;
747716fd348SMartin Matuska 			best_avg_ratio = avg_ratio;
748716fd348SMartin Matuska 		} else {
749716fd348SMartin Matuska 			free_map(map);
750716fd348SMartin Matuska 		}
751716fd348SMartin Matuska 	}
752716fd348SMartin Matuska 
753716fd348SMartin Matuska 	/*
754716fd348SMartin Matuska 	 * After determining the best map generate a checksum over the full
755716fd348SMartin Matuska 	 * permutation array.  This checksum is verified when opening a dRAID
756716fd348SMartin Matuska 	 * pool to ensure the generated in memory permutations are correct.
757716fd348SMartin Matuska 	 */
758716fd348SMartin Matuska 	zio_cksum_t cksum;
759716fd348SMartin Matuska 	fletcher_4_native_varsize(best_map->dm_perms,
760716fd348SMartin Matuska 	    sizeof (uint8_t) * best_map->dm_children * best_map->dm_nperms,
761716fd348SMartin Matuska 	    &cksum);
762716fd348SMartin Matuska 	best_map->dm_checksum = cksum.zc_word[0];
763716fd348SMartin Matuska 
764716fd348SMartin Matuska 	*best_mapp = best_map;
765716fd348SMartin Matuska 	*best_ratiop = best_worst_ratio;
766716fd348SMartin Matuska 	*avg_ratiop = best_avg_ratio;
767716fd348SMartin Matuska 
768716fd348SMartin Matuska 	return (0);
769716fd348SMartin Matuska }
770716fd348SMartin Matuska 
771716fd348SMartin Matuska static int
draid_generate(int argc,char * argv[])772716fd348SMartin Matuska draid_generate(int argc, char *argv[])
773716fd348SMartin Matuska {
774716fd348SMartin Matuska 	char filename[MAXPATHLEN] = {0};
775be181ee2SMartin Matuska 	uint64_t map_seed[2];
776716fd348SMartin Matuska 	int c, fd, error, verbose = 0, passes = 1, continuous = 0;
777716fd348SMartin Matuska 	int min_children = VDEV_DRAID_MIN_CHILDREN;
778716fd348SMartin Matuska 	int max_children = VDEV_DRAID_MAX_CHILDREN;
779716fd348SMartin Matuska 	int restarts = 0;
780716fd348SMartin Matuska 
781716fd348SMartin Matuska 	while ((c = getopt(argc, argv, ":cm:n:p:v")) != -1) {
782716fd348SMartin Matuska 		switch (c) {
783716fd348SMartin Matuska 		case 'c':
784716fd348SMartin Matuska 			continuous++;
785716fd348SMartin Matuska 			break;
786716fd348SMartin Matuska 		case 'm':
787716fd348SMartin Matuska 			min_children = (int)strtol(optarg, NULL, 0);
788716fd348SMartin Matuska 			if (min_children < VDEV_DRAID_MIN_CHILDREN) {
789716fd348SMartin Matuska 				(void) fprintf(stderr, "A minimum of 2 "
790716fd348SMartin Matuska 				    "children are required.\n");
791716fd348SMartin Matuska 				return (1);
792716fd348SMartin Matuska 			}
793716fd348SMartin Matuska 
794716fd348SMartin Matuska 			break;
795716fd348SMartin Matuska 		case 'n':
796716fd348SMartin Matuska 			max_children = (int)strtol(optarg, NULL, 0);
797716fd348SMartin Matuska 			if (max_children > VDEV_DRAID_MAX_CHILDREN) {
798716fd348SMartin Matuska 				(void) fprintf(stderr, "A maximum of %d "
799716fd348SMartin Matuska 				    "children are allowed.\n",
800716fd348SMartin Matuska 				    VDEV_DRAID_MAX_CHILDREN);
801716fd348SMartin Matuska 				return (1);
802716fd348SMartin Matuska 			}
803716fd348SMartin Matuska 			break;
804716fd348SMartin Matuska 		case 'p':
805716fd348SMartin Matuska 			passes = (int)strtol(optarg, NULL, 0);
806716fd348SMartin Matuska 			break;
807716fd348SMartin Matuska 		case 'v':
808716fd348SMartin Matuska 			/*
809716fd348SMartin Matuska 			 * 0 - Only log when a better map is added to the file.
810716fd348SMartin Matuska 			 * 1 - Log the current best map for each child count.
811716fd348SMartin Matuska 			 *     Minimal output on a single summary line.
812716fd348SMartin Matuska 			 * 2 - Log the current best map for each child count.
813716fd348SMartin Matuska 			 *     More verbose includes most map fields.
814716fd348SMartin Matuska 			 * 3 - Log the current best map for each child count.
815716fd348SMartin Matuska 			 *     Very verbose all fields including the full map.
816716fd348SMartin Matuska 			 */
817716fd348SMartin Matuska 			verbose++;
818716fd348SMartin Matuska 			break;
819716fd348SMartin Matuska 		case ':':
820716fd348SMartin Matuska 			(void) fprintf(stderr,
821716fd348SMartin Matuska 			    "missing argument for '%c' option\n", optopt);
822716fd348SMartin Matuska 			draid_usage();
823716fd348SMartin Matuska 			break;
824716fd348SMartin Matuska 		case '?':
825716fd348SMartin Matuska 			(void) fprintf(stderr, "invalid option '%c'\n",
826716fd348SMartin Matuska 			    optopt);
827716fd348SMartin Matuska 			draid_usage();
828716fd348SMartin Matuska 			break;
829716fd348SMartin Matuska 		}
830716fd348SMartin Matuska 	}
831716fd348SMartin Matuska 
832716fd348SMartin Matuska 	if (argc > optind)
833be181ee2SMartin Matuska 		strlcpy(filename, argv[optind], sizeof (filename));
834716fd348SMartin Matuska 	else {
835716fd348SMartin Matuska 		(void) fprintf(stderr, "A FILE must be specified.\n");
836716fd348SMartin Matuska 		return (1);
837716fd348SMartin Matuska 	}
838716fd348SMartin Matuska 
839716fd348SMartin Matuska restart:
840716fd348SMartin Matuska 	/*
841716fd348SMartin Matuska 	 * Start with a fresh seed from /dev/urandom.
842716fd348SMartin Matuska 	 */
843716fd348SMartin Matuska 	fd = open("/dev/urandom", O_RDONLY);
844716fd348SMartin Matuska 	if (fd < 0) {
845716fd348SMartin Matuska 		printf("Unable to open /dev/urandom: %s\n:", strerror(errno));
846716fd348SMartin Matuska 		return (1);
847716fd348SMartin Matuska 	} else {
848716fd348SMartin Matuska 		ssize_t bytes = sizeof (map_seed);
849716fd348SMartin Matuska 		ssize_t bytes_read = 0;
850716fd348SMartin Matuska 
851716fd348SMartin Matuska 		while (bytes_read < bytes) {
852be181ee2SMartin Matuska 			ssize_t rc = read(fd, ((char *)map_seed) + bytes_read,
853716fd348SMartin Matuska 			    bytes - bytes_read);
854716fd348SMartin Matuska 			if (rc < 0) {
855716fd348SMartin Matuska 				printf("Unable to read /dev/urandom: %s\n:",
856716fd348SMartin Matuska 				    strerror(errno));
857be181ee2SMartin Matuska 				close(fd);
858716fd348SMartin Matuska 				return (1);
859716fd348SMartin Matuska 			}
860716fd348SMartin Matuska 			bytes_read += rc;
861716fd348SMartin Matuska 		}
862716fd348SMartin Matuska 
863716fd348SMartin Matuska 		(void) close(fd);
864716fd348SMartin Matuska 	}
865716fd348SMartin Matuska 
866716fd348SMartin Matuska 	if (restarts == 0)
867716fd348SMartin Matuska 		printf("Writing generated mappings to '%s':\n", filename);
868716fd348SMartin Matuska 
869716fd348SMartin Matuska 	/*
870716fd348SMartin Matuska 	 * Generate maps for all requested child counts. The best map for
871716fd348SMartin Matuska 	 * each child count is written out to the specified file.  If the file
872716fd348SMartin Matuska 	 * already contains a better mapping this map will not be added.
873716fd348SMartin Matuska 	 */
874716fd348SMartin Matuska 	for (uint64_t children = min_children;
875716fd348SMartin Matuska 	    children <= max_children; children++) {
876716fd348SMartin Matuska 		char key[8] = { 0 };
877716fd348SMartin Matuska 		draid_map_t *map;
878716fd348SMartin Matuska 		double worst_ratio = 1000.0;
879716fd348SMartin Matuska 		double avg_ratio = 1000.0;
880716fd348SMartin Matuska 
881be181ee2SMartin Matuska 		error = eval_maps(children, passes, map_seed, &map,
882716fd348SMartin Matuska 		    &worst_ratio, &avg_ratio);
883716fd348SMartin Matuska 		if (error) {
884716fd348SMartin Matuska 			printf("Error eval_maps(): %s\n", strerror(error));
885716fd348SMartin Matuska 			return (1);
886716fd348SMartin Matuska 		}
887716fd348SMartin Matuska 
888716fd348SMartin Matuska 		if (worst_ratio < 1.0 || avg_ratio < 1.0) {
889716fd348SMartin Matuska 			printf("Error ratio < 1.0: worst_ratio = %2.03f "
890716fd348SMartin Matuska 			    "avg_ratio = %2.03f\n", worst_ratio, avg_ratio);
891716fd348SMartin Matuska 			return (1);
892716fd348SMartin Matuska 		}
893716fd348SMartin Matuska 
894716fd348SMartin Matuska 		snprintf(key, 7, "%llu", (u_longlong_t)children);
895716fd348SMartin Matuska 		error = write_map_key(filename, key, map, worst_ratio,
896716fd348SMartin Matuska 		    avg_ratio);
897716fd348SMartin Matuska 		if (error == 0) {
898716fd348SMartin Matuska 			/* The new map was added to the file. */
899716fd348SMartin Matuska 			dump_map(map, key, worst_ratio, avg_ratio,
900716fd348SMartin Matuska 			    MAX(verbose, 1));
901716fd348SMartin Matuska 		} else if (error == EEXIST) {
902716fd348SMartin Matuska 			/* The existing map was preferable and kept. */
903716fd348SMartin Matuska 			if (verbose > 0)
904716fd348SMartin Matuska 				dump_map_key(filename, key, verbose);
905716fd348SMartin Matuska 		} else {
906716fd348SMartin Matuska 			printf("Error write_map_key(): %s\n", strerror(error));
907716fd348SMartin Matuska 			return (1);
908716fd348SMartin Matuska 		}
909716fd348SMartin Matuska 
910716fd348SMartin Matuska 		free_map(map);
911716fd348SMartin Matuska 	}
912716fd348SMartin Matuska 
913716fd348SMartin Matuska 	/*
914716fd348SMartin Matuska 	 * When the continuous option is set restart at the minimum number of
915716fd348SMartin Matuska 	 * children instead of exiting. This option is useful as a mechanism
916716fd348SMartin Matuska 	 * to continuous try and refine the discovered permutations.
917716fd348SMartin Matuska 	 */
918716fd348SMartin Matuska 	if (continuous) {
919716fd348SMartin Matuska 		restarts++;
920716fd348SMartin Matuska 		printf("Restarting by request (-c): %d\n", restarts);
921716fd348SMartin Matuska 		goto restart;
922716fd348SMartin Matuska 	}
923716fd348SMartin Matuska 
924716fd348SMartin Matuska 	return (0);
925716fd348SMartin Matuska }
926716fd348SMartin Matuska 
927716fd348SMartin Matuska /*
928716fd348SMartin Matuska  * Verify each map in the file by generating its in-memory permutation array
929716fd348SMartin Matuska  * and comfirming its checksum is correct.
930716fd348SMartin Matuska  */
931716fd348SMartin Matuska static int
draid_verify(int argc,char * argv[])932716fd348SMartin Matuska draid_verify(int argc, char *argv[])
933716fd348SMartin Matuska {
934716fd348SMartin Matuska 	char filename[MAXPATHLEN] = {0};
935716fd348SMartin Matuska 	int n = 0, c, error, verbose = 1;
936716fd348SMartin Matuska 	int check_ratios = 0;
937716fd348SMartin Matuska 
938716fd348SMartin Matuska 	while ((c = getopt(argc, argv, ":rv")) != -1) {
939716fd348SMartin Matuska 		switch (c) {
940716fd348SMartin Matuska 		case 'r':
941716fd348SMartin Matuska 			check_ratios++;
942716fd348SMartin Matuska 			break;
943716fd348SMartin Matuska 		case 'v':
944716fd348SMartin Matuska 			verbose++;
945716fd348SMartin Matuska 			break;
946716fd348SMartin Matuska 		case ':':
947716fd348SMartin Matuska 			(void) fprintf(stderr,
948716fd348SMartin Matuska 			    "missing argument for '%c' option\n", optopt);
949716fd348SMartin Matuska 			draid_usage();
950716fd348SMartin Matuska 			break;
951716fd348SMartin Matuska 		case '?':
952716fd348SMartin Matuska 			(void) fprintf(stderr, "invalid option '%c'\n",
953716fd348SMartin Matuska 			    optopt);
954716fd348SMartin Matuska 			draid_usage();
955716fd348SMartin Matuska 			break;
956716fd348SMartin Matuska 		}
957716fd348SMartin Matuska 	}
958716fd348SMartin Matuska 
959716fd348SMartin Matuska 	if (argc > optind) {
960716fd348SMartin Matuska 		char *abspath = malloc(MAXPATHLEN);
961716fd348SMartin Matuska 		if (abspath == NULL)
962716fd348SMartin Matuska 			return (ENOMEM);
963716fd348SMartin Matuska 
964716fd348SMartin Matuska 		if (realpath(argv[optind], abspath) != NULL)
965be181ee2SMartin Matuska 			strlcpy(filename, abspath, sizeof (filename));
966716fd348SMartin Matuska 		else
967be181ee2SMartin Matuska 			strlcpy(filename, argv[optind], sizeof (filename));
968716fd348SMartin Matuska 
969716fd348SMartin Matuska 		free(abspath);
970716fd348SMartin Matuska 	} else {
971716fd348SMartin Matuska 		(void) fprintf(stderr, "A FILE must be specified.\n");
972716fd348SMartin Matuska 		return (1);
973716fd348SMartin Matuska 	}
974716fd348SMartin Matuska 
975716fd348SMartin Matuska 	printf("Verifying permutation maps: '%s'\n", filename);
976716fd348SMartin Matuska 
977716fd348SMartin Matuska 	/*
978716fd348SMartin Matuska 	 * Lookup hardcoded permutation map for each valid number of children
979716fd348SMartin Matuska 	 * and verify a generated map has the correct checksum.  Then compare
980716fd348SMartin Matuska 	 * the generated map values with the nvlist map values read from the
981716fd348SMartin Matuska 	 * reference file to cross-check the permutation.
982716fd348SMartin Matuska 	 */
983716fd348SMartin Matuska 	for (uint64_t children = VDEV_DRAID_MIN_CHILDREN;
984716fd348SMartin Matuska 	    children <= VDEV_DRAID_MAX_CHILDREN;
985716fd348SMartin Matuska 	    children++) {
986716fd348SMartin Matuska 		draid_map_t *map;
987716fd348SMartin Matuska 		char key[8] = {0};
988716fd348SMartin Matuska 
989716fd348SMartin Matuska 		snprintf(key, 8, "%llu", (u_longlong_t)children);
990716fd348SMartin Matuska 
991716fd348SMartin Matuska 		error = alloc_fixed_map(children, &map);
992716fd348SMartin Matuska 		if (error) {
993716fd348SMartin Matuska 			printf("Error alloc_fixed_map() failed: %s\n",
994716fd348SMartin Matuska 			    error == ECKSUM ? "Invalid checksum" :
995716fd348SMartin Matuska 			    strerror(error));
996716fd348SMartin Matuska 			return (1);
997716fd348SMartin Matuska 		}
998716fd348SMartin Matuska 
999716fd348SMartin Matuska 		uint64_t nv_seed, nv_checksum, nv_children, nv_nperms;
1000716fd348SMartin Matuska 		uint8_t *nv_perms;
1001716fd348SMartin Matuska 		nvlist_t *cfg;
1002716fd348SMartin Matuska 		uint_t c;
1003716fd348SMartin Matuska 
1004716fd348SMartin Matuska 		error = read_map_key(filename, key, &cfg);
1005716fd348SMartin Matuska 		if (error != 0) {
1006716fd348SMartin Matuska 			printf("Error read_map_key() failed: %s\n",
1007716fd348SMartin Matuska 			    strerror(error));
1008716fd348SMartin Matuska 			free_map(map);
1009716fd348SMartin Matuska 			return (1);
1010716fd348SMartin Matuska 		}
1011716fd348SMartin Matuska 
1012716fd348SMartin Matuska 		nv_seed = fnvlist_lookup_uint64(cfg, MAP_SEED);
1013716fd348SMartin Matuska 		nv_checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM);
1014716fd348SMartin Matuska 		nv_children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN);
1015716fd348SMartin Matuska 		nv_nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS);
1016716fd348SMartin Matuska 		nvlist_lookup_uint8_array(cfg, MAP_PERMS, &nv_perms, &c);
1017716fd348SMartin Matuska 
1018716fd348SMartin Matuska 		/*
1019716fd348SMartin Matuska 		 * Compare draid_map_t and nvlist reference values.
1020716fd348SMartin Matuska 		 */
1021716fd348SMartin Matuska 		if (map->dm_seed != nv_seed) {
1022716fd348SMartin Matuska 			printf("Error different seeds: 0x%016llx != "
1023716fd348SMartin Matuska 			    "0x%016llx\n", (u_longlong_t)map->dm_seed,
1024716fd348SMartin Matuska 			    (u_longlong_t)nv_seed);
1025716fd348SMartin Matuska 			error = EINVAL;
1026716fd348SMartin Matuska 		}
1027716fd348SMartin Matuska 
1028716fd348SMartin Matuska 		if (map->dm_checksum != nv_checksum) {
1029716fd348SMartin Matuska 			printf("Error different checksums: 0x%016llx "
1030716fd348SMartin Matuska 			    "!= 0x%016llx\n",
1031716fd348SMartin Matuska 			    (u_longlong_t)map->dm_checksum,
1032716fd348SMartin Matuska 			    (u_longlong_t)nv_checksum);
1033716fd348SMartin Matuska 			error = EINVAL;
1034716fd348SMartin Matuska 		}
1035716fd348SMartin Matuska 
1036716fd348SMartin Matuska 		if (map->dm_children != nv_children) {
1037716fd348SMartin Matuska 			printf("Error different children: %llu "
1038716fd348SMartin Matuska 			    "!= %llu\n", (u_longlong_t)map->dm_children,
1039716fd348SMartin Matuska 			    (u_longlong_t)nv_children);
1040716fd348SMartin Matuska 			error = EINVAL;
1041716fd348SMartin Matuska 		}
1042716fd348SMartin Matuska 
1043716fd348SMartin Matuska 		if (map->dm_nperms != nv_nperms) {
1044716fd348SMartin Matuska 			printf("Error different nperms: %llu "
1045716fd348SMartin Matuska 			    "!= %llu\n", (u_longlong_t)map->dm_nperms,
1046716fd348SMartin Matuska 			    (u_longlong_t)nv_nperms);
1047716fd348SMartin Matuska 			error = EINVAL;
1048716fd348SMartin Matuska 		}
1049716fd348SMartin Matuska 
1050716fd348SMartin Matuska 		for (uint64_t i = 0; i < nv_children * nv_nperms; i++) {
1051716fd348SMartin Matuska 			if (map->dm_perms[i] != nv_perms[i]) {
1052716fd348SMartin Matuska 				printf("Error different perms[%llu]: "
1053716fd348SMartin Matuska 				    "%d != %d\n", (u_longlong_t)i,
1054716fd348SMartin Matuska 				    (int)map->dm_perms[i],
1055716fd348SMartin Matuska 				    (int)nv_perms[i]);
1056716fd348SMartin Matuska 				error = EINVAL;
1057716fd348SMartin Matuska 				break;
1058716fd348SMartin Matuska 			}
1059716fd348SMartin Matuska 		}
1060716fd348SMartin Matuska 
1061716fd348SMartin Matuska 		/*
1062716fd348SMartin Matuska 		 * For good measure recalculate the worst and average
1063716fd348SMartin Matuska 		 * ratios and confirm they match the nvlist values.
1064716fd348SMartin Matuska 		 */
1065716fd348SMartin Matuska 		if (check_ratios) {
1066716fd348SMartin Matuska 			uint64_t nv_worst_ratio, nv_avg_ratio;
1067716fd348SMartin Matuska 			double worst_ratio, avg_ratio;
1068716fd348SMartin Matuska 
1069716fd348SMartin Matuska 			eval_decluster(map, &worst_ratio, &avg_ratio);
1070716fd348SMartin Matuska 
1071716fd348SMartin Matuska 			nv_worst_ratio = fnvlist_lookup_uint64(cfg,
1072716fd348SMartin Matuska 			    MAP_WORST_RATIO);
1073716fd348SMartin Matuska 			nv_avg_ratio = fnvlist_lookup_uint64(cfg,
1074716fd348SMartin Matuska 			    MAP_AVG_RATIO);
1075716fd348SMartin Matuska 
1076716fd348SMartin Matuska 			if (worst_ratio < 1.0 || avg_ratio < 1.0) {
1077716fd348SMartin Matuska 				printf("Error ratio out of range %2.03f, "
1078716fd348SMartin Matuska 				    "%2.03f\n", worst_ratio, avg_ratio);
1079716fd348SMartin Matuska 				error = EINVAL;
1080716fd348SMartin Matuska 			}
1081716fd348SMartin Matuska 
1082716fd348SMartin Matuska 			if ((uint64_t)(worst_ratio * 1000.0) !=
1083716fd348SMartin Matuska 			    nv_worst_ratio) {
1084716fd348SMartin Matuska 				printf("Error different worst_ratio %2.03f "
1085716fd348SMartin Matuska 				    "!= %2.03f\n", (double)nv_worst_ratio /
1086716fd348SMartin Matuska 				    1000.0, worst_ratio);
1087716fd348SMartin Matuska 				error = EINVAL;
1088716fd348SMartin Matuska 			}
1089716fd348SMartin Matuska 
1090716fd348SMartin Matuska 			if ((uint64_t)(avg_ratio * 1000.0) != nv_avg_ratio) {
1091716fd348SMartin Matuska 				printf("Error different average_ratio %2.03f "
1092716fd348SMartin Matuska 				    "!= %2.03f\n", (double)nv_avg_ratio /
1093716fd348SMartin Matuska 				    1000.0, avg_ratio);
1094716fd348SMartin Matuska 				error = EINVAL;
1095716fd348SMartin Matuska 			}
1096716fd348SMartin Matuska 		}
1097716fd348SMartin Matuska 
1098716fd348SMartin Matuska 		if (error) {
1099716fd348SMartin Matuska 			free_map(map);
1100716fd348SMartin Matuska 			nvlist_free(cfg);
1101716fd348SMartin Matuska 			return (1);
1102716fd348SMartin Matuska 		}
1103716fd348SMartin Matuska 
1104716fd348SMartin Matuska 		if (verbose > 0) {
1105716fd348SMartin Matuska 			printf("- %llu children: good\n",
1106716fd348SMartin Matuska 			    (u_longlong_t)children);
1107716fd348SMartin Matuska 		}
1108716fd348SMartin Matuska 		n++;
1109716fd348SMartin Matuska 
1110716fd348SMartin Matuska 		free_map(map);
1111716fd348SMartin Matuska 		nvlist_free(cfg);
1112716fd348SMartin Matuska 	}
1113716fd348SMartin Matuska 
1114716fd348SMartin Matuska 	if (n != (VDEV_DRAID_MAX_CHILDREN - 1)) {
1115716fd348SMartin Matuska 		printf("Error permutation maps missing: %d / %d checked\n",
1116716fd348SMartin Matuska 		    n, VDEV_DRAID_MAX_CHILDREN - 1);
1117716fd348SMartin Matuska 		return (1);
1118716fd348SMartin Matuska 	}
1119716fd348SMartin Matuska 
1120716fd348SMartin Matuska 	printf("Successfully verified %d / %d permutation maps\n",
1121716fd348SMartin Matuska 	    n, VDEV_DRAID_MAX_CHILDREN - 1);
1122716fd348SMartin Matuska 
1123716fd348SMartin Matuska 	return (0);
1124716fd348SMartin Matuska }
1125716fd348SMartin Matuska 
1126716fd348SMartin Matuska /*
1127716fd348SMartin Matuska  * Dump the contents of the specified mapping(s) for inspection.
1128716fd348SMartin Matuska  */
1129716fd348SMartin Matuska static int
draid_dump(int argc,char * argv[])1130716fd348SMartin Matuska draid_dump(int argc, char *argv[])
1131716fd348SMartin Matuska {
1132716fd348SMartin Matuska 	char filename[MAXPATHLEN] = {0};
1133716fd348SMartin Matuska 	int c, error, verbose = 1;
1134716fd348SMartin Matuska 	int min_children = VDEV_DRAID_MIN_CHILDREN;
1135716fd348SMartin Matuska 	int max_children = VDEV_DRAID_MAX_CHILDREN;
1136716fd348SMartin Matuska 
1137716fd348SMartin Matuska 	while ((c = getopt(argc, argv, ":vm:n:")) != -1) {
1138716fd348SMartin Matuska 		switch (c) {
1139716fd348SMartin Matuska 		case 'm':
1140716fd348SMartin Matuska 			min_children = (int)strtol(optarg, NULL, 0);
1141716fd348SMartin Matuska 			if (min_children < 2) {
1142716fd348SMartin Matuska 				(void) fprintf(stderr, "A minimum of 2 "
1143716fd348SMartin Matuska 				    "children are required.\n");
1144716fd348SMartin Matuska 				return (1);
1145716fd348SMartin Matuska 			}
1146716fd348SMartin Matuska 
1147716fd348SMartin Matuska 			break;
1148716fd348SMartin Matuska 		case 'n':
1149716fd348SMartin Matuska 			max_children = (int)strtol(optarg, NULL, 0);
1150716fd348SMartin Matuska 			if (max_children > VDEV_DRAID_MAX_CHILDREN) {
1151716fd348SMartin Matuska 				(void) fprintf(stderr, "A maximum of %d "
1152716fd348SMartin Matuska 				    "children are allowed.\n",
1153716fd348SMartin Matuska 				    VDEV_DRAID_MAX_CHILDREN);
1154716fd348SMartin Matuska 				return (1);
1155716fd348SMartin Matuska 			}
1156716fd348SMartin Matuska 			break;
1157716fd348SMartin Matuska 		case 'v':
1158716fd348SMartin Matuska 			verbose++;
1159716fd348SMartin Matuska 			break;
1160716fd348SMartin Matuska 		case ':':
1161716fd348SMartin Matuska 			(void) fprintf(stderr,
1162716fd348SMartin Matuska 			    "missing argument for '%c' option\n", optopt);
1163716fd348SMartin Matuska 			draid_usage();
1164716fd348SMartin Matuska 			break;
1165716fd348SMartin Matuska 		case '?':
1166716fd348SMartin Matuska 			(void) fprintf(stderr, "invalid option '%c'\n",
1167716fd348SMartin Matuska 			    optopt);
1168716fd348SMartin Matuska 			draid_usage();
1169716fd348SMartin Matuska 			break;
1170716fd348SMartin Matuska 		}
1171716fd348SMartin Matuska 	}
1172716fd348SMartin Matuska 
1173716fd348SMartin Matuska 	if (argc > optind)
1174be181ee2SMartin Matuska 		strlcpy(filename, argv[optind], sizeof (filename));
1175716fd348SMartin Matuska 	else {
1176716fd348SMartin Matuska 		(void) fprintf(stderr, "A FILE must be specified.\n");
1177716fd348SMartin Matuska 		return (1);
1178716fd348SMartin Matuska 	}
1179716fd348SMartin Matuska 
1180716fd348SMartin Matuska 	/*
1181716fd348SMartin Matuska 	 * Dump maps for the requested child counts.
1182716fd348SMartin Matuska 	 */
1183716fd348SMartin Matuska 	for (uint64_t children = min_children;
1184716fd348SMartin Matuska 	    children <= max_children; children++) {
1185716fd348SMartin Matuska 		char key[8] = { 0 };
1186716fd348SMartin Matuska 
1187716fd348SMartin Matuska 		snprintf(key, 7, "%llu", (u_longlong_t)children);
1188716fd348SMartin Matuska 		error = dump_map_key(filename, key, verbose);
1189716fd348SMartin Matuska 		if (error) {
1190716fd348SMartin Matuska 			printf("Error dump_map_key(): %s\n", strerror(error));
1191716fd348SMartin Matuska 			return (1);
1192716fd348SMartin Matuska 		}
1193716fd348SMartin Matuska 	}
1194716fd348SMartin Matuska 
1195716fd348SMartin Matuska 	return (0);
1196716fd348SMartin Matuska }
1197716fd348SMartin Matuska 
1198716fd348SMartin Matuska /*
1199716fd348SMartin Matuska  * Print all of the mappings as a C formatted draid_map_t array.  This table
1200716fd348SMartin Matuska  * is found in the module/zcommon/zfs_draid.c file and is the definitive
1201716fd348SMartin Matuska  * source for all mapping used by dRAID.  It cannot be updated without
1202716fd348SMartin Matuska  * changing the dRAID on disk format.
1203716fd348SMartin Matuska  */
1204716fd348SMartin Matuska static int
draid_table(int argc,char * argv[])1205716fd348SMartin Matuska draid_table(int argc, char *argv[])
1206716fd348SMartin Matuska {
1207716fd348SMartin Matuska 	char filename[MAXPATHLEN] = {0};
1208716fd348SMartin Matuska 	int error;
1209716fd348SMartin Matuska 
1210716fd348SMartin Matuska 	if (argc > optind)
1211be181ee2SMartin Matuska 		strlcpy(filename, argv[optind], sizeof (filename));
1212716fd348SMartin Matuska 	else {
1213716fd348SMartin Matuska 		(void) fprintf(stderr, "A FILE must be specified.\n");
1214716fd348SMartin Matuska 		return (1);
1215716fd348SMartin Matuska 	}
1216716fd348SMartin Matuska 
1217716fd348SMartin Matuska 	printf("static const draid_map_t "
1218716fd348SMartin Matuska 	    "draid_maps[VDEV_DRAID_MAX_MAPS] = {\n");
1219716fd348SMartin Matuska 
1220716fd348SMartin Matuska 	for (uint64_t children = VDEV_DRAID_MIN_CHILDREN;
1221716fd348SMartin Matuska 	    children <= VDEV_DRAID_MAX_CHILDREN;
1222716fd348SMartin Matuska 	    children++) {
1223716fd348SMartin Matuska 		uint64_t seed, checksum, nperms, avg_ratio;
1224716fd348SMartin Matuska 		nvlist_t *cfg;
1225716fd348SMartin Matuska 		char key[8] = {0};
1226716fd348SMartin Matuska 
1227716fd348SMartin Matuska 		snprintf(key, 8, "%llu", (u_longlong_t)children);
1228716fd348SMartin Matuska 
1229716fd348SMartin Matuska 		error = read_map_key(filename, key, &cfg);
1230716fd348SMartin Matuska 		if (error != 0) {
1231716fd348SMartin Matuska 			printf("Error read_map_key() failed: %s\n",
1232716fd348SMartin Matuska 			    strerror(error));
1233716fd348SMartin Matuska 			return (1);
1234716fd348SMartin Matuska 		}
1235716fd348SMartin Matuska 
1236716fd348SMartin Matuska 		seed = fnvlist_lookup_uint64(cfg, MAP_SEED);
1237716fd348SMartin Matuska 		checksum = fnvlist_lookup_uint64(cfg, MAP_CHECKSUM);
1238716fd348SMartin Matuska 		children = fnvlist_lookup_uint64(cfg, MAP_CHILDREN);
1239716fd348SMartin Matuska 		nperms = fnvlist_lookup_uint64(cfg, MAP_NPERMS);
1240716fd348SMartin Matuska 		avg_ratio = fnvlist_lookup_uint64(cfg, MAP_AVG_RATIO);
1241716fd348SMartin Matuska 
1242716fd348SMartin Matuska 		printf("\t{ %3llu, %3llu, 0x%016llx, 0x%016llx },\t"
1243716fd348SMartin Matuska 		    "/* %2.03f */\n", (u_longlong_t)children,
1244716fd348SMartin Matuska 		    (u_longlong_t)nperms, (u_longlong_t)seed,
1245716fd348SMartin Matuska 		    (u_longlong_t)checksum, (double)avg_ratio / 1000.0);
1246716fd348SMartin Matuska 
1247716fd348SMartin Matuska 		nvlist_free(cfg);
1248716fd348SMartin Matuska 	}
1249716fd348SMartin Matuska 
1250716fd348SMartin Matuska 	printf("};\n");
1251716fd348SMartin Matuska 
1252716fd348SMartin Matuska 	return (0);
1253716fd348SMartin Matuska }
1254716fd348SMartin Matuska 
1255716fd348SMartin Matuska static int
draid_merge_impl(nvlist_t * allcfgs,const char * srcfilename,int * mergedp)1256716fd348SMartin Matuska draid_merge_impl(nvlist_t *allcfgs, const char *srcfilename, int *mergedp)
1257716fd348SMartin Matuska {
1258716fd348SMartin Matuska 	nvlist_t *srccfgs;
1259716fd348SMartin Matuska 	nvpair_t *elem = NULL;
1260716fd348SMartin Matuska 	int error, merged = 0;
1261716fd348SMartin Matuska 
1262716fd348SMartin Matuska 	error = read_map(srcfilename, &srccfgs);
1263716fd348SMartin Matuska 	if (error != 0)
1264716fd348SMartin Matuska 		return (error);
1265716fd348SMartin Matuska 
1266716fd348SMartin Matuska 	while ((elem = nvlist_next_nvpair(srccfgs, elem)) != NULL) {
1267716fd348SMartin Matuska 		uint64_t nv_worst_ratio;
1268716fd348SMartin Matuska 		uint64_t allcfg_worst_ratio;
1269716fd348SMartin Matuska 		nvlist_t *cfg, *allcfg;
1270*2a58b312SMartin Matuska 		const char *key;
1271716fd348SMartin Matuska 
1272716fd348SMartin Matuska 		switch (nvpair_type(elem)) {
1273716fd348SMartin Matuska 		case DATA_TYPE_NVLIST:
1274716fd348SMartin Matuska 
1275716fd348SMartin Matuska 			(void) nvpair_value_nvlist(elem, &cfg);
1276716fd348SMartin Matuska 			key = nvpair_name(elem);
1277716fd348SMartin Matuska 
1278716fd348SMartin Matuska 			nv_worst_ratio = fnvlist_lookup_uint64(cfg,
1279716fd348SMartin Matuska 			    MAP_WORST_RATIO);
1280716fd348SMartin Matuska 
1281716fd348SMartin Matuska 			error = nvlist_lookup_nvlist(allcfgs, key, &allcfg);
1282716fd348SMartin Matuska 			if (error == 0) {
1283716fd348SMartin Matuska 				allcfg_worst_ratio = fnvlist_lookup_uint64(
1284716fd348SMartin Matuska 				    allcfg, MAP_WORST_RATIO);
1285716fd348SMartin Matuska 
1286716fd348SMartin Matuska 				if (nv_worst_ratio < allcfg_worst_ratio) {
1287716fd348SMartin Matuska 					fnvlist_remove(allcfgs, key);
1288dbd5678dSMartin Matuska 					fnvlist_add_nvlist(allcfgs, key, cfg);
1289716fd348SMartin Matuska 					merged++;
1290716fd348SMartin Matuska 				}
1291716fd348SMartin Matuska 			} else if (error == ENOENT) {
1292dbd5678dSMartin Matuska 				fnvlist_add_nvlist(allcfgs, key, cfg);
1293716fd348SMartin Matuska 				merged++;
1294716fd348SMartin Matuska 			} else {
1295716fd348SMartin Matuska 				return (error);
1296716fd348SMartin Matuska 			}
1297716fd348SMartin Matuska 
1298716fd348SMartin Matuska 			break;
1299716fd348SMartin Matuska 		default:
1300716fd348SMartin Matuska 			continue;
1301716fd348SMartin Matuska 		}
1302716fd348SMartin Matuska 	}
1303716fd348SMartin Matuska 
1304716fd348SMartin Matuska 	nvlist_free(srccfgs);
1305716fd348SMartin Matuska 
1306716fd348SMartin Matuska 	*mergedp = merged;
1307716fd348SMartin Matuska 
1308716fd348SMartin Matuska 	return (0);
1309716fd348SMartin Matuska }
1310716fd348SMartin Matuska 
1311716fd348SMartin Matuska /*
1312716fd348SMartin Matuska  * Merge the best map for each child count found in the listed files into
1313716fd348SMartin Matuska  * a new file.  This allows 'draid generate' to be run in parallel and for
1314716fd348SMartin Matuska  * the results maps to be combined.
1315716fd348SMartin Matuska  */
1316716fd348SMartin Matuska static int
draid_merge(int argc,char * argv[])1317716fd348SMartin Matuska draid_merge(int argc, char *argv[])
1318716fd348SMartin Matuska {
1319716fd348SMartin Matuska 	char filename[MAXPATHLEN] = {0};
1320716fd348SMartin Matuska 	int c, error, total_merged = 0;
1321716fd348SMartin Matuska 	nvlist_t *allcfgs;
1322716fd348SMartin Matuska 
1323716fd348SMartin Matuska 	while ((c = getopt(argc, argv, ":")) != -1) {
1324716fd348SMartin Matuska 		switch (c) {
1325716fd348SMartin Matuska 		case ':':
1326716fd348SMartin Matuska 			(void) fprintf(stderr,
1327716fd348SMartin Matuska 			    "missing argument for '%c' option\n", optopt);
1328716fd348SMartin Matuska 			draid_usage();
1329716fd348SMartin Matuska 			break;
1330716fd348SMartin Matuska 		case '?':
1331716fd348SMartin Matuska 			(void) fprintf(stderr, "invalid option '%c'\n",
1332716fd348SMartin Matuska 			    optopt);
1333716fd348SMartin Matuska 			draid_usage();
1334716fd348SMartin Matuska 			break;
1335716fd348SMartin Matuska 		}
1336716fd348SMartin Matuska 	}
1337716fd348SMartin Matuska 
1338716fd348SMartin Matuska 	if (argc < 4) {
1339716fd348SMartin Matuska 		(void) fprintf(stderr,
1340716fd348SMartin Matuska 		    "A FILE and multiple SRCs must be specified.\n");
1341716fd348SMartin Matuska 		return (1);
1342716fd348SMartin Matuska 	}
1343716fd348SMartin Matuska 
1344be181ee2SMartin Matuska 	strlcpy(filename, argv[optind], sizeof (filename));
1345716fd348SMartin Matuska 	optind++;
1346716fd348SMartin Matuska 
1347716fd348SMartin Matuska 	error = read_map(filename, &allcfgs);
1348716fd348SMartin Matuska 	if (error == ENOENT) {
1349716fd348SMartin Matuska 		allcfgs = fnvlist_alloc();
1350716fd348SMartin Matuska 	} else if (error != 0) {
1351716fd348SMartin Matuska 		printf("Error read_map(): %s\n", strerror(error));
1352716fd348SMartin Matuska 		return (error);
1353716fd348SMartin Matuska 	}
1354716fd348SMartin Matuska 
1355716fd348SMartin Matuska 	while (optind < argc) {
1356716fd348SMartin Matuska 		char srcfilename[MAXPATHLEN] = {0};
1357716fd348SMartin Matuska 		int merged = 0;
1358716fd348SMartin Matuska 
1359be181ee2SMartin Matuska 		strlcpy(srcfilename, argv[optind], sizeof (srcfilename));
1360716fd348SMartin Matuska 
1361716fd348SMartin Matuska 		error = draid_merge_impl(allcfgs, srcfilename, &merged);
1362716fd348SMartin Matuska 		if (error) {
1363716fd348SMartin Matuska 			printf("Error draid_merge_impl(): %s\n",
1364716fd348SMartin Matuska 			    strerror(error));
1365716fd348SMartin Matuska 			nvlist_free(allcfgs);
1366716fd348SMartin Matuska 			return (1);
1367716fd348SMartin Matuska 		}
1368716fd348SMartin Matuska 
1369716fd348SMartin Matuska 		total_merged += merged;
1370716fd348SMartin Matuska 		printf("Merged %d key(s) from '%s' into '%s'\n", merged,
1371716fd348SMartin Matuska 		    srcfilename, filename);
1372716fd348SMartin Matuska 
1373716fd348SMartin Matuska 		optind++;
1374716fd348SMartin Matuska 	}
1375716fd348SMartin Matuska 
1376716fd348SMartin Matuska 	if (total_merged > 0)
1377716fd348SMartin Matuska 		write_map(filename, allcfgs);
1378716fd348SMartin Matuska 
1379716fd348SMartin Matuska 	printf("Merged a total of %d key(s) into '%s'\n", total_merged,
1380716fd348SMartin Matuska 	    filename);
1381716fd348SMartin Matuska 
1382716fd348SMartin Matuska 	nvlist_free(allcfgs);
1383716fd348SMartin Matuska 
1384716fd348SMartin Matuska 	return (0);
1385716fd348SMartin Matuska }
1386716fd348SMartin Matuska 
1387716fd348SMartin Matuska int
main(int argc,char * argv[])1388716fd348SMartin Matuska main(int argc, char *argv[])
1389716fd348SMartin Matuska {
1390716fd348SMartin Matuska 	if (argc < 2)
1391716fd348SMartin Matuska 		draid_usage();
1392716fd348SMartin Matuska 
1393716fd348SMartin Matuska 	char *subcommand = argv[1];
1394716fd348SMartin Matuska 
1395716fd348SMartin Matuska 	if (strcmp(subcommand, "generate") == 0) {
1396716fd348SMartin Matuska 		return (draid_generate(argc - 1, argv + 1));
1397716fd348SMartin Matuska 	} else if (strcmp(subcommand, "verify") == 0) {
1398716fd348SMartin Matuska 		return (draid_verify(argc - 1, argv + 1));
1399716fd348SMartin Matuska 	} else if (strcmp(subcommand, "dump") == 0) {
1400716fd348SMartin Matuska 		return (draid_dump(argc - 1, argv + 1));
1401716fd348SMartin Matuska 	} else if (strcmp(subcommand, "table") == 0) {
1402716fd348SMartin Matuska 		return (draid_table(argc - 1, argv + 1));
1403716fd348SMartin Matuska 	} else if (strcmp(subcommand, "merge") == 0) {
1404716fd348SMartin Matuska 		return (draid_merge(argc - 1, argv + 1));
1405716fd348SMartin Matuska 	} else {
1406716fd348SMartin Matuska 		draid_usage();
1407716fd348SMartin Matuska 	}
1408716fd348SMartin Matuska }
1409